diff --git a/.VERSION b/.VERSION new file mode 100644 index 00000000..1263e476 --- /dev/null +++ b/.VERSION @@ -0,0 +1,12 @@ +$Format:%d%n%n$ +# Fall back version, probably last release: +3.9.0 + +# PSBLAS version file. +# +# Release archive created from commit: +# $Format:%H %d$ +# $Format:Created on %ci by %cN, and$ +# $Format:signed by %GS using %GK.$ +# $Format:Signature status: %G?$ +$Format:%GG$ diff --git a/.gitignore b/.gitignore index 7227f784..a1719f69 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,13 @@ *.a *.o *.mod +*.smod *~ # header files generated /cbind/*.h /util/psb_metis_int.h +/base/modules/psb_config.h # Make.inc generated /Make.inc diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..9910c612 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,1267 @@ +# FDEFINES : -DHAVE_LAPACK -DHAVE_FINAL -DHAVE_ISO_FORTRAN_ENV -DHAVE_FLUSH_STMT -DHAVE_VOLATILE -DSERIAL_MPI -DMPI_MOD +# CDEFINES : -DLowerUnderscore -DPtr64Bits + +#----------------------------------- +# Set oldest allowable CMake version +#----------------------------------- +cmake_minimum_required(VERSION 3.11) +cmake_policy(VERSION 3.11.1...3.13.3) + +set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake") + +#---------------------------------------------- +# Define canonical CMake build types and extras +#---------------------------------------------- +set ( CMAKE_CONFIGURATION_TYPES "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "CodeCoverage" ) +set ( CMAKE_BUILD_TYPE "Release" + CACHE STRING "Select which configuration to build." ) +set_property ( CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS ${CMAKE_CONFIGURATION_TYPES} ) + +#----------------------------------------------------- +# Determine version from .VERSION file or git describe +#----------------------------------------------------- +include(setVersion) +set_version( + VERSION_VARIABLE PSBLAS_Version + GIT_DESCRIBE_VAR full_git_describe + CUSTOM_VERSION_FILE "${CMAKE_SOURCE_DIR}/.VERSION") +message( STATUS "Building PSBLAS1 version: ${full_git_describe}" ) +#------------------------------------------ +# Name project and specify source languages +#------------------------------------------ +project(psblas + VERSION "${PSBLAS_Version}" + LANGUAGES C Fortran) + +#-------------------------------------------------- +# Set option to allow building against OpenCoarrays +#-------------------------------------------------- +if(CMAKE_Fortran_COMPILER_ID MATCHES "GNU") + option(PSBLAS_USE_OpenCoarrays "Build enabling linkage to programs using OpenCoarrays" OFF) +endif() + +#----------------------------------------------------------------- +# Define a target to create a checksummed & signed release archive +#----------------------------------------------------------------- +set(${CMAKE_PROJECT_NAME}_dist_string "${CMAKE_PROJECT_NAME}-${full_git_describe}") +if(GIT_FOUND) + add_custom_target(dist # OUTPUT "${CMAKE_BINARY_DIR}/${_package_stem_name}.tar.gz" + COMMAND "${CMAKE_COMMAND}" -P "${CMAKE_SOURCE_DIR}/cmake/makeDist.cmake" "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" + COMMENT "Creating source release asset, ${_package_stem_name}.tar.gz, from ${_full_git_describe} using the `git archive` command." + VERBATIM) +endif() + +#-------------------------- +# Prohibit in-source builds +#-------------------------- +include(CheckOutOfSourceBuild) + +#---------------------------------------------------- +# Define coverage flags and report untested compilers +#---------------------------------------------------- +if ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "GNU" ) + set(gfortran_compiler true) + #TODO: check if it is needed an mpi compiler set(CMAKE_Fortran_COMPILER mpifort) + + set ( CMAKE_C_FLAGS_CODECOVERAGE "-fprofile-arcs -ftest-coverage -O0" + CACHE STRING "Code coverage C compiler flags") + set ( CMAKE_Fortran_FLAGS_CODECOVERAGE "-fprofile-arcs -ftest-coverage -O0" + CACHE STRING "Code coverage Fortran compiler flags") +else() + message(WARNING + "\n" + "Attempting untested CMake build with Fortran compiler: ${CMAKE_Fortran_COMPILER_ID}. " + "Please report any failures at https://github.com/sfilippone/psblas3\n\n" + ) +endif() + +#---------------------------------------------------- +# Define -frecursive for GNU Fortran Compiler +#---------------------------------------------------- +if ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "GNU" ) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -frecursive") + message(STATUS "GNU Fortran COMPILER ${CMAKE_Fortran_FLAGS};") +endif() + + + message(STATUS "cmake flags? ${CMAKE_Fortran_FLAGS};") + +#------------------------------------ +# Fortran name mangling introspection +#------------------------------------ +include("${CMAKE_CURRENT_LIST_DIR}/cmake/CapitalizeString.cmake") +#include(FortranCInterface) +#CapitalizeString(${FortranCInterface_GLOBAL__CASE} fc_case) +#message(STATUS "Name mangling capitalization: ${fc_case}") +#message(STATUS "Name mangling fortran global suffix underscore: ${FortranCInterface_GLOBAL__SUFFIX}") +#if(FortranCInterface_GLOBAL__SUFFIX STREQUAL "") +# add_compile_options("-D${fc_case}Case") +#elseif(FortranCInterface_GLOBAL__SUFFIX STREQUAL "_") +# add_compile_options("-D${fc_case}Underscore") +#elseif(FortranCInterface_GLOBAL__SUFFIX STREQUAL "__") +# add_compile_options("-D${fc_case}DoubleUnderscore") +#else() +# message( FATAL_ERROR "Fortran name mangling suffix, \'${FortranCInterface_GLOBAL__SUFFIX}\', unknown to PSBLAS") +#endif() + + +# message(STATUS "win? ${WIN32};") +#if(TRUE)#NOT ${WIN32}) +#previous check did not work if WIN32 is empty string + #---------------------------------------------- + # Determine system endian-ness and pointer size + #---------------------------------------------- +# include(TestBigEndian) +# TEST_BIG_ENDIAN(IS_BIG_ENDIAN) +# if(IS_BIG_ENDIAN) +# message( STATUS "System appears to be big endian.") +# else() +# message( STATUS "System appears to be little endian.") +# add_compile_options(-DLittleEndian) +# endif() +# include(CheckTypeSize) +# CHECK_TYPE_SIZE("void *" VOID_P_SIZE LANGUAGE C) +# if(${VOID_P_SIZE} EQUAL 8) +# add_compile_options(-DPtr64Bits) +# endif() +# message(STATUS "Have 64bit pointers") + + +#endif() + +message(STATUS "Using compiler ${CMAKE_C_COMPILER};") + + +# Set default values for IPK_SIZE and LPK_SIZE +set(DEFAULT_IPK_SIZE 4) +set(DEFAULT_LPK_SIZE 8) + +# Allow user to override with command line definitions +if(NOT DEFINED CMAKE_PSB_IPK) + set(CMAKE_PSB_IPK ${DEFAULT_IPK_SIZE} CACHE STRING "Size of IPK (default: 4)") +endif() + +if(NOT DEFINED CMAKE_PSB_LPK) + set(CMAKE_PSB_LPK ${DEFAULT_LPK_SIZE} CACHE STRING "Size of LPK (default: 8)") +endif() + +# Use the passed values +set(IPK_SIZE ${CMAKE_PSB_IPK}) +set(LPK_SIZE ${CMAKE_PSB_LPK}) +# Define IPKDEF and LPKDEF based on the sizes +set(PSB_IPKDEF "#define PSB_IPK${IPK_SIZE}") +set(PSB_LPKDEF "#define PSB_LPK${LPK_SIZE}") + +# Output the definitions for verification +message(STATUS "Using IPKDEF: ${PSB_IPKDEF}") +message(STATUS "Using LPKDEF: ${PSB_LPKDEF}") + +#add_compile_options(-DPSB_IPK${IPK_SIZE}) +#add_compile_options(-DPSB_LPK${LPK_SIZE}) +# Add PSB_IPK/LPK flag only for fortran files. +set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_IPK${IPK_SIZE}") +set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_LPK${LPK_SIZE}") + + + + +#----------------------------------------- +# Check for some Fortran compiler features +#----------------------------------------- +include(CheckFortranSourceCompiles) +CHECK_Fortran_SOURCE_COMPILES( + " +integer, allocatable :: a(:), b(:) +allocate(a(5)) +a = [1,2,3,4,5] +call move_alloc(from=a, to=b) +end +" + HAVE_MOVE_ALLOC + SRC_EXT f90 + ) +if(HAVE_MOVE_ALLOC) + #add_compile_options(-DHAVE_MOVE_ALLOC) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DHAVE_MOVE_ALLOC") + message(STATUS "-DHAVE_MOVE_ALLOC") +endif() +CHECK_Fortran_SOURCE_COMPILES( + "integer, volatile :: i ; end" + HAVE_VOLATILE + SRC_EXT f90 + ) +if(HAVE_VOLATILE) + #add_compile_options(-DPSB_HAVE_VOLATILE) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_HAVE_VOLATILE") + message(STATUS "-DPSB_HAVE_VOLATILE") +endif() +CHECK_Fortran_SOURCE_COMPILES( + "use ISO_FORTRAN_ENV ; end" + HAVE_ISO_FORTRAN_ENV + SRC_EXT f90 + ) +if(HAVE_ISO_FORTRAN_ENV) + #add_compile_options(-DPSB_HAVE_ISO_FORTRAN_ENV) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_HAVE_ISO_FORTRAN_ENV") + message(STATUS "-DPSB_HAVE_ISO_FORTRAN_ENV") +endif() +CHECK_Fortran_SOURCE_COMPILES( + "flush(5); end" + HAVE_FLUSH_STMT + SRC_EXT f90 + ) +if(HAVE_FLUSH_STMT) + #add_compile_options(-DPSB_HAVE_FLUSH_STMT) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_HAVE_FLUSH_STMT") + message(STATUS "-DPSB_HAVE_FLUSH_STMT") +endif() +CHECK_Fortran_SOURCE_COMPILES( + " +module conftest_mod + type foo + integer :: i + contains + final :: destroy_foo + end type foo + private destroy_foo +contains + subroutine destroy_foo(a) + type(foo) :: a + ! Just a test + end subroutine destroy_foo +end module conftest_mod +program conftest + use conftest_mod + type(foo) :: foovar +end program" + HAVE_FINAL + SRC_EXT f90 + ) +if(HAVE_FINAL) + # add_compile_options(-DPSB_HAVE_FINAL) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_HAVE_FINAL") + message(STATUS "-DPSB_HAVE_FINAL") +endif() +CHECK_Fortran_SOURCE_COMPILES( + " +program xtt + type foo + integer :: i + end type foo + type, extends(foo) :: new_foo + integer :: j + end type new_foo + class(foo), allocatable :: fooab + type(new_foo) :: nfv + integer :: info + allocate(fooab, mold=nfv, stat=info) +end program" + HAVE_MOLD + SRC_EXT f90) +if(HAVE_MOLD) + # add_compile_options(-DPSB_HAVE_MOLD) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_HAVE_MOLD") + message(STATUS "-DPSB_HAVE_MOLD") +endif() +CHECK_Fortran_SOURCE_COMPILES( + " +program conftest + type foo + integer :: i + end type foo + type, extends(foo) :: bar + integer j + end type bar + type(bar) :: barvar +end program " + HAVE_EXTENDS_TYPE_OF + SRC_EXT f90) +if(HAVE_EXTENDS_TYPE_OF) + # add_compile_options(-DPSB_HAVE_EXTENDS_TYPE_OF) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_HAVE_EXTENDS_TYPE_OF") + message(STATUS "-DPSB_HAVE_EXTENDS_TYPE_OF") +endif() +CHECK_Fortran_SOURCE_COMPILES( + " +program stt + type foo + integer :: i + end type foo + type, extends(foo) :: new_foo + integer :: j + end type new_foo + type(foo) :: foov + type(new_foo) :: nfv1, nfv2 + + + write(*,*) 'foov == nfv1? ', same_type_as(foov,nfv1) + write(*,*) 'nfv2 == nfv1? ', same_type_as(nfv2,nfv1) +end program" + HAVE_SAME_TYPE_AS + SRC_EXT f90) +if(HAVE_SAME_TYPE_AS) + # add_compile_options(-DPSB_HAVE_SAME_TYPE_AS) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_HAVE_SAME_TYPE_AS") + message(STATUS "-DPSB_HAVE_SAME_TYPE_AS") +endif() + +#---------------------------------------------------------------------------- +# Find MPI and set some flags so that FC and CC can point to gfortran and gcc +#---------------------------------------------------------------------------- +find_package( MPI REQUIRED Fortran ) + +if(MPI_FOUND) + #----------------------------------------------- + # Work around an issue present on fedora systems + #----------------------------------------------- + if( (MPI_C_LINK_FLAGS MATCHES "noexecstack") OR (MPI_Fortran_LINK_FLAGS MATCHES "noexecstack") ) + message ( WARNING + "The `noexecstack` linker flag was found in the MPI__LINK_FLAGS variable. This is +known to cause segmentation faults for some Fortran codes. See, e.g., +https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71729 or +https://github.com/sourceryinstitute/OpenCoarrays/issues/317. + +`noexecstack` is being replaced with `execstack`" + ) + string(REPLACE "noexecstack" + "execstack" MPI_C_LINK_FLAGS_FIXED ${MPI_C_LINK_FLAGS}) + string(REPLACE "noexecstack" + "execstack" MPI_Fortran_LINK_FLAGS_FIXED ${MPI_Fortran_LINK_FLAGS}) + set(MPI_C_LINK_FLAGS "${MPI_C_LINK_FLAGS_FIXED}" CACHE STRING + "MPI C linking flags" FORCE) + set(MPI_Fortran_LINK_FLAGS "${MPI_Fortran_LINK_FLAGS_FIXED}" CACHE STRING + "MPI Fortran linking flags" FORCE) + + endif() + + message(STATUS "Found MPI: ${MPI_C_LIBRARIES} ${MPI_Fortran_LIBRARIES}") + + #---------------- + # Setup MPI flags + #---------------- + list(REMOVE_DUPLICATES MPI_Fortran_INCLUDE_PATH) + set(CMAKE_C_COMPILE_FLAGS ${CMAKE_C_COMPILE_FLAGS} ${MPI_C_COMPILE_FLAGS}) + set(CMAKE_C_LINK_FLAGS ${CMAKE_C_LINK_FLAGS} ${MPI_C_LINK_FLAGS}) + set(CMAKE_Fortran_COMPILE_FLAGS ${CMAKE_Fortran_COMPILE_FLAGS} ${MPI_Fortran_COMPILE_FLAGS}) + set(CMAKE_Fortran_LINK_FLAGS ${CMAKE_Fortran_LINK_FLAGS} ${MPI_Fortran_LINK_FLAGS}) + include_directories(BEFORE ${MPI_C_INCLUDE_PATH} ${MPI_Fortran_INCLUDE_PATH}) +message(STATUS "${MPI_C_INCLUDE_PATH}; ${MPI_Fortran_INCLUDE_PATH};; ${CMAKE_Fortran_LINK_FLAGS} ;") + if(MPI_Fortran_HAVE_F90_MODULE OR MPI_Fortran_HAVE_F08_MODULE) + add_compile_options(-DPSB_MPI_MOD) + message(STATUS "-DPSB_MPI_MOD") + #add_compile_options(-DSERIAL_MPI) # Is it right?? + #message(STATUS "-DSERIAL_MPI") + endif() + set(PSB_SERIAL_MPI OFF) + +else() + message(STATUS "MPI not found, serial ahead") + add_compile_options(-DPSB_SERIAL_MPI) + add_compile_options(-DPSB_MPI_MOD) + set(PSB_SERIAL_MPI ON) + set(CSERIALMPI "#define PSB_SERIAL_MPI") +endif() + +#------------------------------------------------------- +# Find and Use OpenCoarrays IFF gfortran AND options set +#------------------------------------------------------- + +if("${PSBLAS_USE_OpenCoarrays}" AND CMAKE_Fortran_COMPILER_ID MATCHES GNU) + message(STATUS "Set openCoarrays") + find_package(OpenCoarrays) +endif() + +#------------------------------ +# Find Linear Algebra Libraries +#------------------------------ +if(NOT APPLE) + set(BLA_STATIC ON) +endif() +find_package(BLAS REQUIRED) +find_package(LAPACK REQUIRED) +#add_compile_options(-DPSB_HAVE_LAPACK) +set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_HAVE_LAPACK") + + + +#-------------------------------- +# Find METIS partitioning library +#-------------------------------- +include(${CMAKE_CURRENT_LIST_DIR}/cmake/FindMETIS.cmake) +find_package(METIS) + + +if(METIS_FOUND) + message(STATUS "METIS PATH ${METIS_INCLUDES} and metis libraries ${METIS_LIBRARIES}") + # Make sure this path is correct +# set(METISINCFILE "metis.h") # Adjust this to your actual path + + # Specify the configuration file + # set(HEADER_TEMPLATE "${CMAKE_CURRENT_SOURCE_DIR}/util/psb_metis_int.h.in") + # set(HEADER_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/include/psb_metis_int.h") + + # Configure the header file + #configure_file(${HEADER_TEMPLATE} ${HEADER_OUTPUT} @ONLY) + + + + + # Check for real sizes using try_compile + include(CheckCSourceCompiles) + + # Function to check the size of a type + function(check_metis_real_type type_name) + set(source_code " + #include + #include + int main() { + printf(\"%zu\\n\", sizeof(${type_name})); + return 0; + }") + + # Create a temporary source file + file(WRITE "${CMAKE_BINARY_DIR}/CMakeFiles/CMakeTmp/test_size.c" "${source_code}") + + # Try to compile it + try_compile(COMPILER_RESULT "${CMAKE_BINARY_DIR}/CMakeFiles/CMakeTmp" + "${CMAKE_BINARY_DIR}/CMakeFiles/CMakeTmp/test_size.c") + + # Check the result and read the output + if (COMPILER_RESULT) + execute_process(COMMAND "${CMAKE_BINARY_DIR}/CMakeFiles/CMakeTmp/test_size" + OUTPUT_VARIABLE type_size) + string(STRIP "${type_size}" type_size) + if (type_name STREQUAL "float") + set(PSB_METIS_REAL_32 "${type_size}" PARENT_SCOPE) + # add_definitions(-DPSB_METIS_REAL_32) + set(CREALMETIS "#define PSB_METIS_REAL_32" PARENT_SCOPE) + elseif (type_name STREQUAL "double") + set(PSB_METIS_REAL_64 "${type_size}" PARENT_SCOPE) + #add_definitions(-DPSB_METIS_REAL_64) + set(CREALMETIS "#define PSB_METIS_REAL_64" PARENT_SCOPE) + endif() + else() + message(WARNING "Failed to compile test for type size: ${type_name}") + endif() + endfunction() + + # Check for both float and double + check_metis_real_type(float) + check_metis_real_type(double) + + # Set HAVE_METIS if METIS is found + #add_compile_options(-DPSB_HAVE_METIS) + # set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_HAVE_METIS") + + + + + + + # Determine METIS_INDEX based on real type sizes + if(DEFINED PSB_METIS_REAL_32) + set(METIS_INDEX 32) + elseif(DEFINED PSB_METIS_REAL_64) + set(METIS_INDEX 64) + else() + message(WARNING "Neither METIS_REAL_32 nor METIS_REAL_64 is defined.") + set(METIS_INDEX 64) # Default to 64 if not defined + endif() + + # Check conditions for LPK_SIZE and METIS_INDEX + if(LPK_SIZE STREQUAL "4") + if(METIS_INDEX STREQUAL "64") + # Mismatch between METIS size and PSBLAS LPK + message(FATAL " Mismatch between metis ${METIS_INDEX} size and psblas LPK size ${LPK_SIZE}") + set(METIS_FOUND FALSE) + endif() + endif() + + if(LPK_SIZE STREQUAL "8") + if(METIS_INDEX STREQUAL "32") + # Mismatch between METIS size and PSBLAS LPK + message(FATAL " Mismatch between metis ${METIS_INDEX} size and psblas LPK size ${LPK_SIZE}") + set(METIS_FOUND FALSE) + endif() + endif() + + if(METIS_FOUND) + + # Make sure this path is correct + set(METISINCFILE "metis.h") # Adjust this to your actual path + + # Specify the configuration file + set(HEADER_TEMPLATE "${CMAKE_CURRENT_SOURCE_DIR}/util/psb_metis_int.h.in") + set(HEADER_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/include/psb_metis_int.h") + + # Configure the header file + configure_file(${HEADER_TEMPLATE} ${HEADER_OUTPUT} @ONLY) + + # Set HAVE_METIS if METIS is found and coherent with the system settings + #add_compile_options(-DPSB_HAVE_METIS) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_HAVE_METIS") + + + + set(CHAVEMETIS "#define PSB_HAVE_METIS") + set(CINTMETIS "#define PSB_METIS_${METIS_INDEX}") +# set(CREALMETIS "#define PSB_METIS_REAL_${LPK_SIZE}") + # Configure the header file + configure_file(${HEADER_TEMPLATE} ${HEADER_OUTPUT} @ONLY) + # Set HAVE_METIS if METIS is found + #add_compile_options(-DPSB_HAVE_METIS) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_HAVE_METIS") + +endif() + +endif() + + + + +#--------------------------------------------------- +# Use standardized GNU install directory conventions +#--------------------------------------------------- +include(GNUInstallDirs) +#set(mod_dir_tail "${${CMAKE_PROJECT_NAME}_dist_string}_${CMAKE_Fortran_COMPILER_ID}-${CMAKE_Fortran_COMPILER_VERSION}") +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/${${CMAKE_PROJECT_NAME}_dist_string}-tests") +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") +#set(CMAKE_INSTALL_LIBDIR "lib" CACHE STRING "Library install directory") +#set(CMAKE_INSTALL_INCLUDEDIR "include" CACHE STRING "Include directory") +#set(CMAKE_INSTALL_MODULDIR "modules" CACHE STRING "Modules directory") + + + + +#Ser variables exportable for other projects + + + + +message(STATUS "Initial CMAKE_INSTALL_LIBDIR: ${CMAKE_INSTALL_LIBDIR}") +set(PSB_CMAKE_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}) + +if(NOT PSB_CMAKE_INSTALL_LIBDIR) + message(STATUS "CMAKE_INSTALL_LIBDIR is set to default value lib") + set(CMAKE_INSTALL_LIBDIR "lib" CACHE STRING "Library install directory" FORCE) + set(PSB_CMAKE_INSTALL_LIBDIR ${CMAKE_INSTALL_LIBDIR}) +else() + set(CMAKE_INSTALL_LIBDIR ${PSB_CMAKE_INSTALL_LIBDIR}) + message(STATUS "CMAKE_INSTALL_LIBDIR is set to: ${CMAKE_INSTALL_LIBDIR}") +endif() + +if(NOT PSB_CMAKE_INSTALL_INCLUDEDIR) + message(STATUS "CMAKE_INSTALL_INCLUDEDIR is set to default value lib") + set(CMAKE_INSTALL_INCLUDEDIR "include" CACHE STRING "Include directory" FORCE) + set(PSB_CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR}) +else() + set(CMAKE_INSTALL_INCLUDEDIR ${PSB_CMAKE_INSTALL_INCLUDEDIR}) + message(STATUS "CMAKE_INSTALL_INCLUDEDIR is set to: ${CMAKE_INSTALL_INCLUDEDIR}") +endif() + +if(NOT PSB_CMAKE_INSTALL_MODULDIR) + message(STATUS "CMAKE_INSTALL_MODULDIR is set to default value lib") + set(CMAKE_INSTALL_MODULDIR "modules" CACHE STRING "Modules directory" FORCE) + set(PSB_CMAKE_INSTALL_MODULDIR ${CMAKE_INSTALL_MODULDIR}) +else() + set(CMAKE_INSTALL_MODULDIR ${PSB_CMAKE_INSTALL_MODULDIR}) + message(STATUS "CMAKE_INSTALL_MODULDIR is set to: ${CMAKE_INSTALL_MODULDIR}") +endif() + + + + +#----------------------------------- +# Turn on testing/ctest capabilities +#----------------------------------- +enable_testing() + +#------------------------------------------------------------------------------ +# Add custom properties on targets for controling number of ranks during tests +#------------------------------------------------------------------------------ +define_property(TARGET + PROPERTY MIN_RANKS + BRIEF_DOCS "Minimum allowable ranks for the test " + FULL_DOCS "Property to mark executable targets run as tests that they require at least ranks to run" + ) + +define_property(TARGET + PROPERTY POWER_2_RANKS + BRIEF_DOCS "True if test must be run with a power of 2 ranks (T/F)" + FULL_DOCS "Property to mark executable targets run as tests that they require 2^n ranks." + ) + +#----------------------------------------------------- +# Publicize installed location to other CMake projects +#----------------------------------------------------- +#install(EXPORT ${CMAKE_PROJECT_NAME}-targets +# DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake" +#) + +install(EXPORT ${CMAKE_PROJECT_NAME}-targets + FILE ${CMAKE_PROJECT_NAME}Config.cmake + NAMESPACE ${CMAKE_PROJECT_NAME}:: + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/" +) + + +include(CMakePackageConfigHelpers) # standard CMake module +write_basic_package_version_file( + "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}ConfigVersion.cmake" + VERSION "${psblas_VERSION}" + COMPATIBILITY SameMajorVersion + ) + +configure_file("${CMAKE_SOURCE_DIR}/cmake/${CMAKE_PROJECT_NAME}Config.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${CMAKE_PROJECT_NAME}Config.cmake" @ONLY) + +install( + FILES + "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${CMAKE_PROJECT_NAME}Config.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}ConfigVersion.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}Targets.cmake" + DESTINATION + "${CMAKE_INSTALL_LIBDIR}/cmake/${CMAKE_PROJECT_NAME}" +) + +#------------------------------------------ +# Add portable unistall command to makefile +#------------------------------------------ +# Adapted from the CMake Wiki FAQ +configure_file ( "${CMAKE_SOURCE_DIR}/cmake/uninstall.cmake.in" "${CMAKE_BINARY_DIR}/uninstall.cmake" + @ONLY) + +add_custom_target ( uninstall + COMMAND ${CMAKE_COMMAND} -P "${CMAKE_BINARY_DIR}/uninstall.cmake" ) + +add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure) +# See JSON-Fortran's CMakeLists.txt file to find out how to get the check target to depend +# on the test executables + +#---------------------------------- +# Determine if we're using Open MPI +#--------------------------------- +if(MPI_FOUND) + execute_process(COMMAND ${MPIEXEC} --version + OUTPUT_VARIABLE mpi_version_out) + if (mpi_version_out MATCHES "[Oo]pen[ -][Mm][Pp][Ii]") + message( STATUS "OpenMPI detected") + set ( openmpi true ) + endif() +endif() + +# Optionally check for CUDA requirement +option(PSB_BUILD_CUDA "Build CUDA code" OFF) + + +if(IPK_SIZE EQUAL 8) + set(PSB_BUILD_CUDA OFF) + message(STATUS "IPK8 is not compatible with CUDA. Cuda is now OFF ${PSB_BUILD_CUDA}") +endif() + + +if(PSB_BUILD_CUDA) + + if(NOT DEFINED PSB_CUDA_PATH) + set(PSB_CUDA_PATH "/opt/cuda/12.8") + endif() + # Include the CMakeLists for the cbind library + include(${CMAKE_CURRENT_LIST_DIR}/cuda/CMakeLists.txt) + include_directories("${PSB_CUDA_PATH}/include") + message(STATUS "${PSB_CUDA_PATH}") + + + # find_package(CUDA REQUIRED) + + enable_language(CUDA) + message(STATUS "Enabled CUDA ${CMAKE_CUDA_COMPILER_VERSION} ${CMAKE_CUDA_ARCHITECTURES};; ${CMAKE_CUDA_HOST_COMPILER_VERSION};") + find_package(CUDAToolkit) + message(STATUS "Enabled CUDA throguh find ${CUDAToolkit_VERSION_MAJOR} ${CUDAToolkit_VERSION};; ${CUDAToolkit_VERSION_MINOR};") + #compute cuda versio for psblas + math(EXPR PSB_CUDA_VERSION "${CUDAToolkit_VERSION_MAJOR} * 1000 + ${CUDAToolkit_VERSION_MINOR} * 10") + + + + + + message(STATUS "cuda version called has given ${PSB_CUDA_VERSION}:") + + # Check for CUDA version + # set(PSB_CUDA_VERSION 12800) + if(PSB_CUDA_VERSION) + message(STATUS "CUDA version: ${PSB_CUDA_VERSION}") + + # Define macros for CUDA version + # add_definitions(-DPSB_HAVE_CUDA) + # add_definitions(-DPSB_CUDA_VERSION=${PSB_CUDA_VERSION}) + # math(EXPR PSB_CUDA_SHORT_VERSION "${PSB_CUDA_VERSION} / 1000") + # add_definitions(-DPSB_CUDA_SHORT_VERSION=${PSB_CUDA_SHORT_VERSION}) + set(PSB_CUDA_SHORT_VERSION ${CUDAToolkit_VERSION_MAJOR}) + message(STATUS "cuda version called has given ${PSB_CUDA_SHORT_VERSION}:") + + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_HAVE_CUDA") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_CUDA_VERSION=${PSB_CUDA_VERSION}") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_CUDA_SHORT_VERSION=${PSB_CUDA_SHORT_VERSION}") + + + set(CHAVECUDA "#define PSB_HAVE_CUDA") + set(CSHORTVCUDA "#define PSB_CUDA_SHORT_VERSION ${PSB_CUDA_SHORT_VERSION}") + set(CVERSIONCUDA "#define PSB_CUDA_VERSION ${PSB_CUDA_VERSION}") + + else() + message(FATAL_ERROR "CUDA version not found!") + endif() +endif() + +#------------------------------------------ +# Configure the psb_config.h file +#------------------------------------------ + +message(STATUS "bin dir ${CMAKE_CURRENT_BINARY_DIR}; source dir ${CMAKE_CURRENT_SOURCE_DIR};;") +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/base/modules/psb_config.h.in + ${CMAKE_CURRENT_BINARY_DIR}/include/psb_config.h + @ONLY # Replace variables only +) + + + +#--------------------------------------- +# Add the PSBLAS libraries and utilities +#--------------------------------------- + +# Link order, left to right: +# cbind.a, util.a linsolve.a prec.a base.a + + + + + +include(${CMAKE_CURRENT_LIST_DIR}/base/CMakeLists.txt) + +include_directories("${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_INCLUDEDIR}") + + + foreach(path IN LISTS base_header_C_files) + + # Copy the header file to the include directory + file(COPY "${path}" DESTINATION "${CMAKE_BINARY_DIR}/include") + + + endforeach() + +if(WIN32) + add_library(psb_base_C STATIC ${base_source_C_files}) + target_compile_definitions(psb_base_C + PRIVATE -DWIN32 -D_LIB -DWIN64) + set_target_properties(psb_base_C + PROPERTIES + LINKER_LANGUAGE C + POSITION_INDEPENDENT_CODE TRUE) + target_link_libraries(psb_base_C + PUBLIC kernel32 user32 shell32) + add_library(base ${base_source_files}) + target_link_libraries(base + PUBLIC psb_base_C) +else() + add_library(base_C OBJECT ${base_source_C_files}) + add_library(base ${base_source_files} $) +endif() + + +# Set the Fortran module output directory for all targets +set(CMAKE_Fortran_MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/modules) +#set(CMAKE_Fortran_MODULE_DIRECTORY "${CMAKE_BINARY_DIR}/include") + + + +message(STATUS "fortran module direcotry ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_INCLUDEDIR}") + + +include_directories(${MPI_Fortran_INCLUDE_PATH}) + + +message(STATUS "Using MPI include at: ${MPI_Fortran_INCLUDE_PATH}") + + + +set_target_properties(base + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_BINARY_DIR}/modules" + POSITION_INDEPENDENT_CODE TRUE + OUTPUT_NAME psb_base + LINKER_LANGUAGE Fortran + ) + +target_include_directories(base PUBLIC + $ + $) + +message(STATUS "include dir := ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_INCLUDEDIR}") + +#target_include_directories(base PUBLIC ${CMAKE_Fortran_MODULE_DIRECTORY}) + + +target_link_libraries(base + PUBLIC ${LAPACK_LINKER_FLAGS} ${LAPACK_LIBRARIES} ${LAPACK95_LIBRARIES} + PUBLIC ${BLAS_LINKER_FLAGS} ${BLAS_LIBRARIES} ${BLAS95_LIBRARIES}) + +#add_custom_command( +# TARGET base POST_BUILD +# COMMAND ${CMAKE_COMMAND} -E cmake_copy_f90_mod +# ${MPI_Fortran_INCLUDE_PATH}mpi.mod ${CMAKE_BINARY_DIR}/include/mpi.mod +#) + + + + +include(${CMAKE_CURRENT_LIST_DIR}/prec/CMakeLists.txt) +add_library(prec ${prec_source_files}) +set_target_properties(prec + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_BINARY_DIR}/modules" + POSITION_INDEPENDENT_CODE TRUE + OUTPUT_NAME psb_prec + LINKER_LANGUAGE Fortran + ) +target_include_directories(prec PUBLIC + $ + $) +target_link_libraries(prec PUBLIC base) + + +include(${CMAKE_CURRENT_LIST_DIR}/linsolve/CMakeLists.txt) +add_library(linsolve ${linsolve_source_files}) +set_target_properties(linsolve + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_BINARY_DIR}/modules" + POSITION_INDEPENDENT_CODE TRUE + OUTPUT_NAME psb_linsolve + LINKER_LANGUAGE Fortran + ) +target_include_directories(linsolve PUBLIC + $ + $) +target_link_libraries(linsolve PUBLIC base prec) + + + +include(${CMAKE_CURRENT_LIST_DIR}/ext/CMakeLists.txt) +add_library(ext ${ext_source_files}) +set_target_properties(ext + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_BINARY_DIR}/modules" + POSITION_INDEPENDENT_CODE TRUE + OUTPUT_NAME psb_ext + LINKER_LANGUAGE Fortran + ) +target_include_directories(ext PUBLIC + $ + $) +target_link_libraries(ext PUBLIC base prec) #TODO: check actual dependencies + + + + +include(${CMAKE_CURRENT_LIST_DIR}/util/CMakeLists.txt) + +if(WIN32) + if(METIS_FOUND) + add_library(psb_util_C STATIC ${util_source_C_files}) + target_compile_definitions(psb_util_C + PRIVATE -DWIN32 -D_LIB -DWIN64) + set_target_properties(psb_util_C + PROPERTIES + LINKER_LANGUAGE C + POSITION_INDEPENDENT_CODE TRUE) + target_link_libraries(psb_util_C + PUBLIC kernel32 user32 shell32) + endif() + add_library(util ${util_source_files}) + if(METIS_FOUND) + target_link_libraries(util + PUBLIC psb_util_C) + endif() +else() + + if(METIS_FOUND) + foreach(file IN LISTS util_source_C_metis_files) + list(APPEND util_source_C_files file) + endforeach() + endif() + add_library(psb_util_C OBJECT ${util_source_C_files}) + + add_library(util ${util_source_files} $) +endif() +set_target_properties(util + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_BINARY_DIR}/modules" + POSITION_INDEPENDENT_CODE TRUE + OUTPUT_NAME psb_util + LINKER_LANGUAGE Fortran + ) +target_include_directories(util PUBLIC + $ + $) +target_link_libraries(util PUBLIC base prec) + +if(METIS_FOUND) + + target_include_directories(util + PUBLIC ${METIS_INCLUDES}) + target_include_directories(psb_util_C + PUBLIC ${METIS_INCLUDES}) + target_link_libraries(util + PUBLIC ${METIS_LIBRARIES}) +# target_compile_definitions(psb_util_C +# PUBLIC PSB_HAVE_METIS) #TDDO: CHECK IF THAT _ IS CORRECT +# target_compile_definitions(util +# PUBLIC PSB_HAVE_METIS) +endif() + + + + + + + + + +# Include headers from the 'include' directory in the current directory +include_directories(${CMAKE_BINARY_DIR}/include) + + +include(${CMAKE_CURRENT_LIST_DIR}/cbind/CMakeLists.txt) +if(WIN32) + add_library(psb_cbind_C STATIC ${cbind_source_C_files}) + target_compile_definitions(psb_cbind_C + PRIVATE -DWIN32 -D_LIB -DWIN64) + set_target_properties(psb_cbind_C + PROPERTIES + LINKER_LANGUAGE C + POSITION_INDEPENDENT_CODE TRUE) + target_link_libraries(psb_cbind_C + PUBLIC kernel32 user32 shell32) + add_library(cbind ${cbind_source_files}) + target_link_libraries(cbind + PUBLIC psb_cbind_C) +else() + add_library(cbind_C OBJECT ${cbind_source_C_files}) + add_library(cbind ${cbind_source_files}) +endif() + + +#add_library(cbind ${cbind_source_files}) +set_target_properties(cbind + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_BINARY_DIR}/modules" + POSITION_INDEPENDENT_CODE TRUE + OUTPUT_NAME psb_cbind + LINKER_LANGUAGE Fortran + ) +#target_include_directories(cbind PUBLIC +# $ +# $) +# Include directories for the cbind library +target_include_directories(cbind PUBLIC + $ # Path for building + $ # Path for installation +) + +target_link_libraries(cbind PUBLIC base prec linsolve ext util) + + + +# Custom command to copy all header files +#add_custom_command( +# OUTPUT ${CMAKE_BINARY_DIR}/include/ # Dummy output to represent the target directory +# COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/include # Create the include directory if it doesn't exist +# COMMAND ${CMAKE_COMMAND} -E copy_if_different ${cbind_header_C_files} ${CMAKE_BINARY_DIR}/include/ # Copy all headers +# DEPENDS ${cbind_header_C_files} # Make the copy depend on the header files +# COMMENT "Copying header files to include directory" +#) + +# Create a custom target to copy headers +#add_custom_target(copy_headers ALL DEPENDS ${CMAKE_BINARY_DIR}/include/) + + foreach(path IN LISTS cbind_header_C_files) + + # Copy the header file to the include directory + file(COPY "${path}" DESTINATION "${CMAKE_BINARY_DIR}/include") + + + endforeach() +message(STATUS "Copied .h files to ${CMAKE_BINARY_DIR}/include") + + +#target_include_directories(cbind PUBLIC +# $ +# $) +# Include directories for the cbind library +#target_include_directories(cbind_C PUBLICF +# $ # Path for building +# $ # Path for installation +# ${CMAKE_BINARY_DIR}/include # Include the copied headers +#) + + + + + + +######################################### +####### BUILD CUDA LIBRARY ############## +######################################### + + +# Optionally check for CUDA requirement +#option(PSB_BUILD_CUDA "Build CUDA code" OFF) + +if(PSB_BUILD_CUDA) + + # if(NOT DEFINED PSB_CUDA_PATH) + # set(PSB_CUDA_PATH "/opt/cuda/12.8") + #endif() + # Include the CMakeLists for the cbind library + #include(${CMAKE_CURRENT_LIST_DIR}/cuda/CMakeLists.txt) + #include_directories("${PSB_CUDA_PATH}/include") + #message(STATUS "${PSB_CUDA_PATH}") + + + # find_package(CUDA REQUIRED) + + #enable_language(CUDA) + + # Check for CUDA version + #set(PSB_CUDA_VERSION 12800) + #if(PSB_CUDA_VERSION) + # message(STATUS "CUDA version: ${PSB_CUDA_VERSION}") + + # Define macros for CUDA version + # add_definitions(-DPSB_HAVE_CUDA) + # add_definitions(-DPSB_CUDA_VERSION=${PSB_CUDA_VERSION}) + #math(EXPR PSB_CUDA_SHORT_VERSION "${PSB_CUDA_VERSION} / 1000") + # add_definitions(-DPSB_CUDA_SHORT_VERSION=${PSB_CUDA_SHORT_VERSION}) + + #set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_HAVE_CUDA") + #set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_CUDA_VERSION=${PSB_CUDA_VERSION}") + #set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DPSB_CUDA_SHORT_VERSION=${PSB_CUDA_SHORT_VERSION}") + + + #set(CHAVECUDA "#define PSB_HAVE_CUDA") + #set(CSHORTVCUDA "#define PSB_CUDA_SHORT_VERSION ${PSB_CUDA_SHORT_VERSION}") + #set(CVERSIONCUDA "#define PSB_CUDA_VERSION ${PSB_CUDA_VERSION}") + + #else() + #message(FATAL_ERROR "CUDA version not found!") + #endif() + + + + # Define the CUDA library + #if(WIN32) + #add_library(psb_cuda_C STATIC ${cuda_source_files}) + #target_compile_definitions(psb_cuda_C + # PRIVATE -DWIN32 -D_LIB -DWIN64) + #set_target_properties(psb_cuda_C + # PROPERTIES + # LINKER_LANGUAGE C + # POSITION_INDEPENDENT_CODE TRUE) + #target_link_libraries(psb_cuda_C + # PUBLIC kernel32 user32 shell32) + #else() + #add_library(psb_cuda_C OBJECT ${cuda_source_files}) + #endif() + + foreach(path IN LISTS cuda_header_C_files) + + # Copy the header file to the include directory + file(COPY "${path}" DESTINATION "${CMAKE_BINARY_DIR}/include") + + + endforeach() + message(STATUS "Copied .h files to ${CMAKE_BINARY_DIR}/include") + foreach(path IN LISTS cuda_header_cu_files) + + # Copy the header file to the include directory + file(COPY "${path}" DESTINATION "${CMAKE_BINARY_DIR}/include") + + + endforeach() + message(STATUS "Copied .cuh files to ${CMAKE_BINARY_DIR}/include") + + + + + add_library(psb_cuda_C OBJECT ${cuda_source_C_files} ${cuda_source_cu_files}) + + + # Create the main CUDA library + add_library(cuda ${cuda_source_files}) + + # Set properties for the CUDA library + set_target_properties(cuda + PROPERTIES + POSITION_INDEPENDENT_CODE TRUE + OUTPUT_NAME psb_cuda + LINKER_LANGUAGE C) + + # Include directories for the CUDA library + target_include_directories(cuda PUBLIC + $ # Path for building + $ # Path for installation + #/opt/cuda/12.8/include + ) + + # Link with other necessary libraries + target_link_libraries(cuda PUBLIC base prec linsolve ext util) +endif() + + + + +if(MPI_FOUND) +# Copy mpi.mod from the first available path in MPI_Fortran_INCLUDE_PATH + set(MPI_MOD_COPIED FALSE) + + foreach(path IN LISTS MPI_Fortran_INCLUDE_PATH) + # Construct the full path to the mpi.mod file + set(mpi_mod_path "${path}/mpi.mod") + + # Check if the mpi.mod file exists + if(EXISTS "${mpi_mod_path}") + # Copy the mpi.mod file to the module directory + file(COPY "${mpi_mod_path}" DESTINATION "${CMAKE_Fortran_MODULE_DIRECTORY}") + message(STATUS "Copied mpi.mod from ${mpi_mod_path} to ${CMAKE_Fortran_MODULE_DIRECTORY}") + set(MPI_MOD_COPIED TRUE) + break() # Exit the loop once we've copied the file + endif() + endforeach() + + if(NOT MPI_MOD_COPIED) + message(WARNING "mpi.mod not found in any of the specified paths: ${MPI_Fortran_INCLUDE_PATH}") + endif() + + + + foreach(lib base prec linsolve ext util cbind) + + target_link_libraries(${lib} PUBLIC ${MPI_C_LIBRARIES} ${MPI_Fortran_LIBRARIES}) + endforeach() +endif() + +if(OpenCoarrays_FOUND) + foreach(lib base prec linsolve ext util cbind) #TODO: check if cbind goes here! + target_link_libraries(${lib} PUBLIC OpenCoarrays::caf_mpi_static) + endforeach() +endif() + + + + + +message(STATUS "\t\t ${CMAKE_INSTALL_LIBDIR}") +# Install the header files to the include directory +#install(FILES ${cbind_header_C_files} +# DESTINATION include +#) + +#install(DIRECTORY "${CMAKE_BINARY_DIR}/include" DESTINATION "include" +# FILES_MATCHING PATTERN "*.h") + +#install(DIRECTORY "${CMAKE_BINARY_DIR}/modules" DESTINATION "modules" +# FILES_MATCHING PATTERN "*.mod") + + +#install(DIRECTORY "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}" DESTINATION "include" +# FILES_MATCHING PATTERN "*.h") + +#install(DIRECTORY "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_MODULDIR}" DESTINATION "modules" +# FILES_MATCHING PATTERN "*.mod") + +# Install header files +install(DIRECTORY ${CMAKE_BINARY_DIR}/include/ + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" # This will place headers in /include + FILES_MATCHING PATTERN "*.h" +) + +# Install module files +install(DIRECTORY ${CMAKE_BINARY_DIR}/modules/ + DESTINATION "${CMAKE_INSTALL_MODULDIR}" # This will place .mod files in /modules + FILES_MATCHING PATTERN "*.mod" +) + + + +install(TARGETS base prec linsolve ext util cbind + EXPORT ${CMAKE_PROJECT_NAME}-targets + DESTINATION "${CMAKE_INSTALL_LIBDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ) + +if(PSB_BUILD_CUDA) + install(TARGETS cuda + EXPORT ${CMAKE_PROJECT_NAME}-targets + DESTINATION "${CMAKE_INSTALL_LIBDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ) + +endif() + +if(WIN32) + install(TARGETS psb_base_C + EXPORT ${CMAKE_PROJECT_NAME}-targets + DESTINATION "${CMAKE_INSTALL_LIBDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ) + if(METIS_FOUND) + install(TARGETS psb_util_C + EXPORT ${CMAKE_PROJECT_NAME}-targets + DESTINATION "${CMAKE_INSTALL_LIBDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ) + endif() +endif() + + +#configure_package_config_file( +# INSTALL_DESTINATION "${CMAKE_INSTALL_PREFIX}/cmake/psblas" +#) + +#install(FILES +# "${CMAKE_CURRENT_BINARY_DIR}/psblasConfig.cmake" +# "${CMAKE_CURRENT_BINARY_DIR}/psblasConfigVersion.cmake" +# DESTINATION "${CMAKE_INSTALL_PREFIX}/cmake/psblas" +#) + + +export( + EXPORT ${CMAKE_PROJECT_NAME}-targets + FILE "${CMAKE_CURRENT_BINARY_DIR}/psblasTargets.cmake" + NAMESPACE ${CMAKE_PROJECT_NAME}:: +) + + + + + + + + + +message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX} - ${PSB_CMAKE_INSTALL_PREFIX};") +message(STATUS "CMAKE_INSTALL_LIBDIR: ${CMAKE_INSTALL_LIBDIR} - ${PSB_CMAKE_INSTALL_LIBDIR};") +message(STATUS "CMAKE_INSTALL_INCLUDEDIR: ${CMAKE_INSTALL_INCLUDEDIR} - ${PSB_CMAKE_INSTALL_INCLUDEDIR};") +message(STATUS "CMAKE_INSTALL_MODULDIR: ${CMAKE_INSTALL_MODULDIR} - ${PSB_CMAKE_INSTALL_MODULDIR};") + + + + +#----------------- +# Add PSBLAS tests +#----------------- + +# Unit tests targeting each function, argument, and branch of code +# add_mpi_test(initialize_mpi 2 initialize_mpi) + diff --git a/Changelog b/Changelog deleted file mode 100644 index 88684099..00000000 --- a/Changelog +++ /dev/null @@ -1,625 +0,0 @@ -Changelog. A lot less detailed than usual, at least for past - history. -2022/05/20: Merge changes for REMOTE build. Bump v 3.8 -2022/03/28: Introduce new non-blocking collectives. -2021/06/01: New CTXT object -2021/04/20: OpenMP integration -2021/04/10: Recognize MPICXX in configure -2021/02/10: Take out precset interface, only prec%set now. -2020/09/20: New getelem function to extract vector entries -2020/07/21: Fix configure for METIS sizes -2020/06/01: reworked bild internals for descriptors -2019/12/18: New internals and algorithms for FND_OWNER, faster and less - memory hungry. -2019/07/20: New SCAN collective; improve handling of SYMmetric -2019/07/08: Fix memory usage in conversions to/from coo -2019/07/01: Timer facility -2019/06/25: Fix memory footprint in spspmm and other borderline cases -2019/06/17: Make ADD default assembly action. -2019/05/30: new insertion routines in CSR mode -2019/05/27: Fix matrix generation. -2019/05/26: New timer facility (undocumented). -2019/05/20: Change checks on vectors -2019/05/05: Reworked linmap internals -2019/04/15: Improved sphalo and test programs printout -2019/04/09: bump version to 3.6.1 -2019/04/09: new fnd_owner_halo method -2019/03/31: New RENUM_BLK descriptor method -2019/01/16: In bldext, implement ALL_TO_ALLV by hand for certain - compilers. -2018/10/10: New ICTXT argument in prec%init -2018/07/30: Improved implementations for TRIL/TRIU -2018/04/15: Fix internals to have tmp_ovrlap in local indexing. Change - default in CDALL with VL to no global checks. -2018/03/22: Fix defaults for matrix/vector internals -2018/02/03: Accept 'GMRES' as synonim with 'RGMRES'. -2018/11/23: Reimplement hash function. -2018/10/10: new ICTXT argument to prec%init method. -2018/09/04: Modified vector class get_vect method -2018/08/10: Optional arguments in GETROW method. -2018/07/30: Improved TRIL/TRIU implementations. -2018/06/14: New FCG code. -2018/04/24: Merged changes to error handling internals. -2018/04/23: Change default for CDALL with VL. New GLOBAL argument for - reductions. -2018/04/15: Fixed pargen benchmark programs. Made MOLD mandatory. -2018/01/10: Updated docs. -2017/12/15: Fixed preconditioner build. -2017/10/31: Updated target install directories. -2017/10/15: Fold genpde into examples for readability. -2017/10/02: Merged CBinding. -2017/09/30: Fixes for README, contributors, bug reporting address. -2017/08/09: New optional args to TRIL and TRIU to produce two output - matrices at once. -2017/07/20: Fixes to genpde_impl. Fixed bug in matdist with BLOCK. -2017/04/28: Further development of CBIND branch. -2017/04/25: Fix configure for MKL. -2017/04/10: Makefile fixes. -2017/04/09: Remove all remaining .F files, and remove configure checks for - F77. Define version 3.5.0. Fixed INTENT in preconditioners. -2017/04/08: All PREC methods now invoked through preconditioner object. -2017/04/05: make ISO_C_BINDING and MOVE_ALLOC mandatory prereq. -2017/03/20: Changes for GPU codes: added buffers to MAPs, reduce number of - malloc/free, added new maybe_free_buffers method on vector. -2017/02/12: New stopping criterion, simplified mat_dist -2017/02/06: Fixes for --enable-long-integers and for RPM build. -2016/07/14: Fix matrix print with renumbering. -2016/06/18: New thresholds for quicksort -2016/06/03: New source dir organization -2016/05/05: Fix fakempi -2016/02/24: Fix init of hash inside cdbldext. -2016/01/06: Default COND to 0. -2015/12/17: Added clean_zeros method to spmat. -2015/12/14: Fixed mat_dist -2015/12/08: Make LOCX allocatable -2015/12/02: Reworked scatter interfaces. -2015/10/17: Fix configure for ATLAS packaging. -2015/07/17: Fix cdbldext. -2015/07/10: Begin integration of multivectors. -2015/04/25: New optional args in V%set. -2015/04/14: New absval method for vectors. - -2015/03/20: Optimizations of TO/FROM routines, use of is_by_rows in COO. - -2015/01/05: Fix silly bug in format conversion csr_from_coo. - -2014/12/21: Change error handling routines to make them more flexible for - C binding. More compact prologues/epilogues. - -2014/11/12: Fix silly bug in MMIO: cycling through rank-2 dense read/write was - transposing! -2014/10/22: Implement norm-1 and norm-infinity at base_sparse_mat relying - on srwsum/aclsum. - -2014/10/15: Merged changes into trunk. Declare version 3.3 - -2014/10/10: Fix recutions when root /= -1 - -2014/09/10: Use int32/64 kinds & friends from ISO_FORTRAN_ENV - -2014/08/30: New multivector types. Vector insertion now takes other - vectors as well as arrays. - -2014/07/10: CSPUT has now two specifics, with arrays or vector types. - - -2014/07/02: Matrices have host/device status. - -2014/06/11: Check for bug on multiple generic names. - -2014/05/05: Make sure CSPUT ignores (at most a warning) indices not - belonging. - -2014/04/14: Sort status in COO. Changes in error handling for CSPUT. - -2014/03/31: get_diag, rowsum & friends changed into functions. - -2014/03/26: TRIL/TRIU interfaces. Fixed mm_array_read/write. - Multiple generic names for norms and io. - -2014/03/18: New defaults for MOLD & friends. - -2014/01/27: Fixed error conditions on mmio. - -2013/12/12: New sparse-sparse matrix product, with CSR-CSR, CSC-CSC new - versions, while keeping the old version at hand. - -2013/12/04: Fix computation of output space for getrow & friends. - -2013/12/02: Reimplement coo%fix method: use more memory if available, but - faster. Fix small interfacing problems with getrow/getblk/getptn. - -2013/11/25: Limit usage of coo%fix method. - -2013/11/07: Merged integer vector changes. - -2013/10/01: New desc_type methods calling into indxmap methods. - -2013/09/30: Restructure index conversion methods, simplify. - -2013/09/26: Simplify checks in matrix-vector products - -2013/08/01: Defined new get_fmt method for vectors - -2013/06/19: Fixed type match bug in daxpby/saxpby/zaxpby. - -2013/06/05: Fix INTENT in APPEND methods. -2013/05/10: test/serial now contains an example of how to define a new - format. -2013/04/20: Fix scaling and norm methods for matrices with unit diagonal. - -2013/04/20: Fix add-by-one for unit triangular matrices. - -2013/03/31: Implement CLONE method for vectors, maps and preconditioners. - Make base_prec abstract. - -2013/03/13: doxygen docs for base_vect. Fix afmt to be len=* in genpde. - -2013/03/01: Changes in method naming scheme for matrices. - Change name of descriptor module. - -2012/12/11: Various fixes for 8-bytes integers. - -2012/12/04: Rename specifics for GLOB_TO_LOC/LOC_TO_GLOB. Parametrize type - identifiers for MPI calls. - -2012/11/26: Infinite loop bug in sparse AXPBY. - -2012/10/24: Split serial_mod to improve build time. - -2012/07/12: Cleanup Krylov routines. - -2012/06/26: Fixed bug in prec%apply with 'C', and usage of rotations in - GMRES. -2012/06/08: Fix silly bug in GPS renumbering. - -2012/05/25: Fixed docs for release 3.0 - -2012/05/21: Fix configure script to work around configure failure on Cray. - -2012/04/30: Change descriptor's move_alloc and free to work on - uninitialized input. - -2012/04/15: New LOCAL argument to geins/spins. New LIDX argument to CDALL - with VL to allow for user-specified local numbering. - -2012/04/05: Default implementation of MV_TO_COO and MV_FROM_COO based on - CP. - -2012/03/01: Make ISO_C_BINDING a prerequisite. - -2012/02/21: Added experimental support for 8-bytes integers. - Refactored the problem generation methods and the pargen - sample programs. - -2012/02/15: Fixed major perf problem with genblock. - -2012/01/30: Reworked norms 1 and infty, added sparse mat norm1. - -2012/01/10: Bunch of fixes and configury improvements from Cray FTN - -2012/01/03: Split preconditioners into interface/implementation. - -2011/11/27: Merged may routines from preprocessing project psblas-testpre. - -2011/11/21: Added test for ISO_C_BINDING and AMD renumbering. - -2011/11/19: Added the scratch option to the vect ASB routine. - -2011/11/11: Makefile fixes allowing for parallel make. - -2011/10/25: Major upgrade defining the encapsulated vector types, - providing further support for GPU. - -2011/10/05: Split preconditioner modules to alleviate memory pressure - on the compiler, esp. XLF. - Fixed bug in glist map. - -2011/08/01: MOLD methods and various fixes for NAG configry. - -2011/07/25: Bunch of fixes for problems uncovered by Cray FTN. - -2011/06/15: Changed get_local_rows and friends into methods. - - -2011/03/25: Added version identification constants. - -2011/03/10: Added support for sparse dot products. Changed intent of X in - preconditioner apply to allow for GPU extensions. - -2011/02/27: Reworked PRINT methods, for vectors as well. - -2011/02/11: Changes to accommodate Cray compiler. - -2011/01/07: Silly bugs in spgather MPI data types and in z_nubmm calling - cnumbmm. Also, don't use allocate on assignment with GNU. - -2011/01/02: Finish descriptor reform by eliminating matrix_data. Switch - all f03 extensions into f90. - -2010/12/16: Merge new descriptors with CLASS(PSB_INDX_MAP) internal maps. - Updated various descriptor-related routines. - -2010/11/29: opt subdir containing ELL and RSB. test/newfmt. - -2010/11/22: PRECBLD now takes a MOLD argument for its matrices. - -2010/10/26: Fixed configure to use MPI wrappers. Fixed example in CXX: the - MOLD actual argument should be polymorphic. - -2010/09/02: Fixed inheritance hierarchy of MOLD method. Merged into trunk. - -2010/09/01: Changed Makefiles to allow for multiple submakes. - -2010/08/31: Defined the MOLD method. Put under IFDEF in opposition to - MOLD= in allocation of CLASS variables. - -2010/07/29: Make the aux component of base_mat a static array; works - around a problem with gfortran. The library does not fully - work yet under gfortran. -2010/05/10: Fixed fakempi. Now works under XLF 13.1 - -2010/04/29: Restructure KRYLOV modules. - -2010/04/29: Take out BLACS. - -2010/04/27: Reworked inheritance chain for PREC. - -2010/04/27: Fixed private attribute. Various fixes for compilation - with gfortran. - -2010/04/21: Added shortcut for CSR in ILU_FACT - -2010/03/23: Restructured the module structure. - - -2009/12/15: Tons of bug fixes, also from testing on IBM XLF. - -2009/09/15: First working OO implementation for serial routines on sparse - matrix data structures. Only D for the time being. - -2009/08/25: New configure flag - --enable-serial - for serial-only compilation. -2009/06/24: Changed order of arguments in sp_scal to make it uniform with - rest of library. -2009/05/15: Changed interface to matdist. -2009/05/12: Added support for NAG Fortran compiler -2009/03/16: Release 2.3.1 - -2009/01/27: Renamed psb_transfer into psb_move_alloc. -2009/01/08: Require GNU Fortran 4.3 or later. -2008/11/04: Repackaged and streamlined linear maps. - -2008/10/16: Fixed internal structure of psb_inter_desc. - -2008/09/23: Fix borderline cases where one process does not own any - indices from the global space. - -2008/09/18: Defined psb_sizeof to be integer(8). Added support - into psb_sum, psb_amx and other reductions for long int - scalars. - -2008/09/16: Implemented new scheme for index conversion. - Changed cdall with an option to suppress global checks. - -2008/09/02: Improved psi_fnd_owner performace. - -2008/09/01: Better timings in the pargen test cases. - -2008/08/28: Changed CDALL in case of VL to handle overlapped indices. - -2008/07/28: New sorting/reordering modules. - -2008/07/24: Addded HTML version of user's guide. - -2008/07/22: Fixed I/O for Harwell-Boeing and Matrix Market examples - -2008/05/27: Merged single precision branch. - -2008/04/28: Fixed trimming space in sparse matrix conversion. - Fixed performance issue in cdins. - -2008/03/25: Fix performance bug in psi_idx_ins_cnv. Changed names of - some internal components of preconditioner data structure. - -2008/03/27: Merged the experimental branch for implementing the AVL tree - data structure in Fortran instead of relying on C and passing - functions around to perform comparisons. There seems to be - some performance advantage, although not very large. - -2008/03/25: Merged in changes from the 2.2-maint branch re: error - messages, performance bug in psi_idx_ins_cnv. - -2008/02/26: New psb_linmap_init, psb_linmap_ins, psb_linmap_asb for a - general linear operator mapping among index spaces. - -2008/02/18: Branched off for Version 2.2 - -2008/02/08: Merged changes from intermesh branch: we now have an - inter_desc_type object. Currently we only implement the - version needed for aggregation algorithms in the algebraic - multigrid preconditioners, but we'll define more general - (linear) maps soon enough. - -2008/01/25: Various changes to variables controlling conditional - compilation on the Fortran side: removed NETLIB_BLACS, now - HAVE_METIS HAVE_ESSL_BLACS HAVE_KSENDID. - Files impacted: Make.inc.XXX, base/modules/psb_penv_mod, - util/psb_metispart_mod - - -2008/01/18: Centralized convergence checks. Still partial for RGMRES. - -2008/01/14: Merged changes for handling of transpose vs. overlap. - -2008/01/10: Changed name of GMRESR into RGMRES for consistency. - -2007/12/21: Merged in debug infrastructure, internal and html docs. - -2007/11/14: Fix INTENT(IN) on X vector in preconditioner routines. - -2007/10/15: Repackaged the sorting routines in a submodule of their - own, adding some heap management and heapsort utilities for the - benefit of the multilevel preconditioners. - -2007/09/28: Moved gelp and csrp to serial. Changed interface to - sphalo: the new one makes more sense. - Updated documentation. - -2007/09/14: Second round of serial changes: merged into trunk, fixed - JAD regeneration and srch_upd now works. - -2007/09/10: First round of serial changes: implemented serial - psb_spcnv unifying multiple functionalities. - -2007/09/04: Implemented RGMRES for complex data. - -2007/06/04: Fixed implementation of fctint and coins: assume size - arrays caused troubles on some compilers. Documentation of - set_large_threshold. - -2007/05/22: Defined psb_precinit. - -2007/05/15: Defined psb_sizeof. - -2007/05/15: Merged in various fixes coming from tests on SP5 and - HP-Itanium. - -2007/04/08: Changed the implementation of psb_sp_getrow & friends. - -2007/03/27: Merged in changes for enabling compilation on SUN. - -2007/02/22: Fixed various misalignments between real and complex. - Defined new psb_sp_clip routines. - Fixed psb_rwextd. - Changed the USE statements, minimizing size of modules and - maximizing consistency checks. - -2007/02/01: Merged serial version: we provide a minimal fake mpi to - allow compiling and running without mpi and blacs. Only - tested with gnu42 so far. - -2007/01/23: Defined new field ext_index in desc_type, and - fixed long standing inconsistency in usage of overlap for - AS preconditioners. Modified halo to accept selector for - halo_index vs. ext_index. - -2007/01/11: Migrated repository to SVN. - -2007/01/11: MLD2P4 has been moved to the new org. Now tackling the - test dirs. - -2007/01/09: First try at reorganizing directories. Subdir MLD2P4 still - to be fixed. Documentation still to be updated. - -2006/12/11: Documented options in glob_to_loc. - -2006/12/06: Fixed raw aggregation. - -2006/12/05: Taken out extra interfaces; inserted use modules with ONLY - clauses where appropriate. - -2006/11/30: Fixed a bug in raw aggregation. Note: raw aggregation - gives different results from smoothed with omega=0.0, - because in the latter we have explicitly stored zero - coefficients that would be absent in the first, thus - generating different ILU factorizations. - -2006/11/28: Merged the mods for descriptors of large index spaces to - avoid having the GLOB_TO_LOC array. Took the chance to - reorganize the descriptor build routines and define some - access functions for descriptor features and entries, so - as not to use the descriptor components directly. Tested - with AS, 2- and 3- level Post smoothers. - -2006/11/09: The allocatable version works, but under gcc42 there is a - compiler bug when using -fbounds-check. - -2006/11/08: Merged the allocatable version; hope everything works! - -2006/11/08: Branched version psblas2-2-0-maint, and defined tag - 2.0.2.6 - -2006/11/02: Done in the allocatable branch: repackaging of cdasb and - friends, taking out AVL trees where they were not - absolutely needed, and new dcsrmv routine. - -2006/11/01: Merged changes in the handling of data exchange. - -2006/10/03: Merged in the multilevel preconditioner stuff. This is - still experimental, especially the interfaces are not - stable yet. - -2006/10/03: Declared version 2.0.2.5 for reference purposes. - -2006/10/03: Fixed a bunch of minor bugs, incuding the sorting routines - imsr and imsrx. Added a default call to blacs_exit inside - psb_exit fixed a bad termination in test/pargen/ppde90.f90 - -2006/09/02: Declared version 2.0.2, after having fixed a lot of - details in the environment routines. - -2006/07/25: Defined a new psb_wtime function. Modified precset to - have a non-optional INFO dummy argument. - -2006/07/06: Fixed bug in swaptran. Added psb_krylov generic interface. - -2006/07/04: Ooops, the GetRow mod in SMMP is a performance hit. - Need to investigate further. - -2006/06/21: Bug fix in hb_read when dealing with symmetric matrices. - -2006/06/20: Rewritten symbmm and numbmm from SMMP to be intependent of - CSR storage by using GetRow. Still need to test for - performance. - -2006/06/16: Defined GetRow. This way we may close the mat objects. - Next we will rewrite SMMP to only make use of GetRow, - not to rely on CSR storage format. - -2006/05/29: Added BLACS-like routines for data communication, - broadcasts, reductions, send/receive. - -2006/05/25: Added environment management routines. - -2006/05/03: Bug fixes, plus some change in the internals for SPINS, - preparing hooks for insertion with local numbering. - -2006/04/24: Minor changes to the interface of dense tools routines, - trying to achieve a uniform look & feel. - Rewritten documentation; it is now reasonable, though not - perfect, except for the preconditioner routines. - We can now declare RC3. - -2006/04/21: A bunch of fixes related to various matrix initialization - problems that were revealed while testing on SP5. - -2006/04/18: Changed interface to spasb and csdp: better handling of - regeneration. To be tested further for sophisticated uses. - -2006/03/31: We declare RC2 now. Improved I/O routines in test/Fileread. - -2006/03/24: We have a complex version now, working (not necessarily bug free). - -2006/03/15: Started move to complex version. - -2006/03/01: Complete restructure of PREC section. - -2006/02/01: New naming scheme. - -2006/01/01: New multilevel preconditioning wih smoothed aggregation. - -2005/09 : Now enabled UMFPACK complete factorization as basis for AS. - -2005/05/04: Now enabled SuperLU complete factorization as basis for AS. - -2005/04/29: First version with decoupled 2-level. - -2005/04/06: Started work on decoupling the preconditioner aggregation - for 2-level from the main factorization. - -2005/03/30: First version of new DSC/SP allocate/insert/assembly - routines. -2005/03/17: First version of RGMRES. To be refined. - -2005/03/08: dSwapTran aligned with dSwapData. Taken out SwapOverlap. - also moved onto iSwapX. - -2005/03/07: dSwapData rewritten to achieve: 1. better performance; - 2. more flexible functionality. It is now possible to - avoid SwapOvrlap entirely, relying on just SwapData. - SwapTran is still alive, since it reads the descriptors in - "transpose" mode. Also, added work areas to preconditioner - routine, to avoid excessive allocation in the halo/overlap - exchange. - -2005/03/04: Had to put in a workaround for a gfortran bug: - tolower/toupper cannot be functions. - -2005/02/09: Explicit storage choice for the smoother. This seems - to be changing a little bit the actual preconditioner. - To be evaluated further. - -2005/02/08: Renamed F90_PSPREC to PSB_PRCAPLY and Preconditioner to - PSB_PRCBLD. Changed the way PRCAPLY decides what to do. - Still needs a PSB_PRCSET to be called before PRCBLD. - -2005/01/28: Started moving functionalities to a SERIAL F90 layer. Also - defined a new COMM layer, to enable implementing SPMM - directly in F90. - -2005/01/20: Finally taken out a direct call to the F77 DCSDP from - SPASB. - -2005/01/18: After much work, we now have 2-level Additive Schwarz - prototype implemented and working. We now start a major - code cleanup that will take some time. Mainly we want to - move a lot of the serial F77 functionality into a new F95 - serial layer, to simplify the parallel F95 code. - -2004/11/25: Following the introduction of Additive Shwarz and - variants, we have now renamed DECOMP_ and friends as - DESC_; this makes things more readable. Sooner or later - we're going to merge this into mainline, but this version - is still very much in a state of flux. - -2004/07/18: For use with gfortran we need to declare the pointer - components with NULL() initialization. This rules out - VAST and PGI. - -2004/07/15: First development version with gfortran from the current - snapshot of gcc 3.5.0. - It is now possible in PSI_dSwapData to opt for - SEND|RECEIVE|SYNC data exchange; plan is to extend to all - data exchange functions, plus making it available as an - option from the F90 level. - -2004/07/06: Merged in a lot of stuff coming mainly from the ASM - development; full merge will have to wait a little more. - Among other things: - use of psimod - new choice parms for overlap - new data exchange for swapdata, to be extended. - multicolumn CSMM. - use psrealloc - new format for marking a matrix as suitable for update. - - -2003/12/09: Changed DSALLOC and DSASB to make sure whenever a dense - matrix is allocated it is also zeroed out. - -2003/10/13: Added call to BLACS_SET in the solvers to ensure global - heterogeneous coherence in the combine operations. - -2003/09/30: Added LOC_TO_GLOB and GLOB_TO_LOC support routines. - -2003/09/30: Changed interface for smart update capabilities: choose - with optional parameters in ASB routines. - -2003/09/16: IFC 7.0 had a strange behaviour in the test programs: - sometimes the declaration of PARTS dummy argument with an - INTERFACE would not work, requiring an EXTERNAL - declaration. The proper INTERFACE works now with 7.1. - -2003/03/10: Halo data exchange in F90_PSHALO can now be applied to - integer data; create appropriate support routines. - -2002/12/05: Initial version of Fileread sample programs. - -2002/11/19: Fixes for JAD preconditioner. - -2002/11/19: Methods for patterns: create a descriptor without a - matrix. - -2001/11/16: Reviewed the interfaces: in the tools section we really - need the POINTER attribute for dense vectors, but not in - the computational routines; taking it out allows more - flexibility. - -2001/09/16: Smart update capabilities. - -2001/03/16: Renumbering routines. - -2001/01/14: Added extensions to compute multiple DOTs and AMAXs at once; - diff --git a/LICENSE b/LICENSE index 5ff86d89..a05485c3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,5 @@ - Parallel Sparse BLAS version 3.8 - (C) Copyright 2006-2022 + Parallel Sparse BLAS version 3.9 + (C) Copyright 2006-2025 Salvatore Filippone Alfredo Buttari diff --git a/Make.inc.in b/Make.inc.in index 1fa3179c..ceba77b8 100755 --- a/Make.inc.in +++ b/Make.inc.in @@ -14,20 +14,22 @@ FC=@FC@ CC=@CC@ CXX=@CXX@ -FCOPT=@FCOPT@ -CCOPT=@CCOPT@ -CXXOPT=@CXXOPT@ FMFLAG=@FMFLAG@ FIFLAG=@FIFLAG@ EXTRA_OPT=@EXTRA_OPT@ +FCOPT=@FCOPT@ $(EXTRA_OPT) +CCOPT=@CCOPT@ $(EXTRA_OPT) +CXXOPT=@CXXOPT@ $(EXTRA_OPT) # These three should be always set! MPFC=@MPIFC@ MPCC=@MPICC@ FLINK=@FLINK@ +CLINK=@CLINK@ LIBS=@LIBS@ +FLIBS=@FLIBS@ # BLAS, BLACS and METIS libraries. BLAS=@BLAS_LIBS@ @@ -67,6 +69,37 @@ UTILMODNAME=@UTILMODNAME@ CBINDLIBNAME=libpsb_cbind.a +OACCD=@OACCD@ +OACCLD=@OACCLD@ +FCOPENACC=@FCOPENACC@ +CCOPENACC=@CCOPENACC@ +CXXOPENACC=@CXXOPENACC@ + +CUDAD=@CUDAD@ +CUDALD=@CUDALD@ +LCUDA=@LCUDA@ + +SPGPU_LIBS=@SPGPU_LIBS@ + +CUDA_DIR=@CUDA_DIR@ +CUDA_INCLUDES=@CUDA_INCLUDES@ +CUDA_LIBS=@CUDA_LIBS@ +PSB_CUDA_VERSION=@PSB_CUDA_VERSION@ +PSB_CUDA_SHORT_VERSION=@PSB_CUDA_SHORT_VERSION@ +CUDA_DEFINES=@CUDA_DEFINES@ +FCUDEFINES=@FCUDEFINES@ +CCUDEFINES=@CCUDEFINES@ +CXXCUDEFINES=@CXXCUDEFINES@ +EXTRA_NVCC=@EXTRA_NVCC@ +NVCC=@CUDA_NVCC@ $(EXTRA_NVCC) +CUDEFINES=@CUDEFINES@ + + +.SUFFIXES: .cu +.cu.o: + $(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $< + @PSBLASRULES@ +PSBGPULDLIBS=$(LCUDA) $(SPGPU_LIBS) $(CUDA_LIBS) $(PSBLDLIBS) $(LIBS) \ No newline at end of file diff --git a/Makefile b/Makefile index a0f5ec3e..fd18a38c 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ include Make.inc -all: dirs based precd kryld utild cbindd libd +all: dirs based precd linslvd utild cbindd extd $(CUDAD) $(OACCD) libd @echo "=====================================" @echo "PSBLAS libraries Compilation Successful." @@ -11,27 +11,41 @@ dirs: precd: based utild: based -kryld: precd +linslvd: precd +extd: based +cudad: extd +oaccd: extd +cbindd: based precd linslvd utild -cbindd: based precd kryld utild - -libd: based precd kryld utild cbindd +libd: based precd linslvd utild cbindd extd $(CUDALD) $(OACCLD) $(MAKE) -C base lib $(MAKE) -C prec lib - $(MAKE) -C krylov lib + $(MAKE) -C linsolve lib $(MAKE) -C util lib $(MAKE) -C cbind lib + $(MAKE) -C ext lib +cudald: cudad + $(MAKE) -C cuda lib +oaccld: oaccd + $(MAKE) -C openacc lib + based: $(MAKE) -C base objs precd: $(MAKE) -C prec objs -kryld: - $(MAKE) -C krylov objs +linslvd: + $(MAKE) -C linsolve objs utild: $(MAKE) -C util objs cbindd: $(MAKE) -C cbind objs +extd: + $(MAKE) -C ext objs +cudad: + $(MAKE) -C cuda objs +oaccd: + $(MAKE) -C openacc objs install: all @@ -48,30 +62,31 @@ install: all mkdir -p $(INSTALL_DOCSDIR) && \ $(INSTALL_DATA) README.md LICENSE $(INSTALL_DOCSDIR) mkdir -p $(INSTALL_SAMPLESDIR) && \ - /bin/cp -fr test/pargen test/fileread $(INSTALL_SAMPLESDIR) && \ - mkdir -p $(INSTALL_SAMPLESDIR)/cbind && /bin/cp -fr cbind/test/pargen/* $(INSTALL_SAMPLESDIR)/cbind -clean: - $(MAKE) -C base clean - $(MAKE) -C prec clean - $(MAKE) -C krylov clean - $(MAKE) -C util clean - $(MAKE) -C cbind clean - -check: all - make check -C test/serial + /bin/cp -fr test/pdegen test/fileread $(INSTALL_SAMPLESDIR) && \ + mkdir -p $(INSTALL_SAMPLESDIR)/cbind && /bin/cp -fr cbind/test/pdegen/* $(INSTALL_SAMPLESDIR)/cbind +clean: cleanlib + $(MAKE) -C base veryclean + $(MAKE) -C prec veryclean + $(MAKE) -C linsolve veryclean + $(MAKE) -C util veryclean + $(MAKE) -C cbind veryclean + $(MAKE) -C ext veryclean + $(MAKE) -C cuda veryclean + $(MAKE) -C openacc veryclean +cleantest: + cd test/fileread && $(MAKE) clean + cd test/pdegen && $(MAKE) clean + cd test/util && $(MAKE) clean cleanlib: (cd lib; /bin/rm -f *.a *$(.mod) *$(.fh) *.h) (cd include; /bin/rm -f *.a *$(.mod) *$(.fh) *.h) - (cd modules; /bin/rm -f *.a *$(.mod) *$(.fh) *.h) + (cd modules; /bin/rm -f *.a *$(.mod) *$(.fh) *.h) + +distclean: clean + /bin/rm -f Make.inc util/psb_metis_int.h base/modules/psb_config.h + +check: all + make check -C test/serial -veryclean: cleanlib - cd base && $(MAKE) veryclean - cd prec && $(MAKE) veryclean - cd krylov && $(MAKE) veryclean - cd util && $(MAKE) veryclean - cd cbind && $(MAKE) veryclean - cd test/fileread && $(MAKE) clean - cd test/pargen && $(MAKE) clean - cd test/util && $(MAKE) clean diff --git a/README.md b/README.md index a9813f5e..34b2aff8 100644 --- a/README.md +++ b/README.md @@ -1,48 +1,56 @@ -PSBLAS library, version 3.8 -=========================== +# PSBLAS library, version 3.9 -The architecture of the Fortran 2003 sparse BLAS is described in: ->S. Filippone, A. Buttari. Object-Oriented Techniques for Sparse Matrix ->Computations in Fortran 2003, ACM Trans. on Math. Software, vol. 38, No. -4, 2012. -The ideas are explored further with the paper: ->V. Cardellini, S. Filippone and D. Rouson. Design Patterns for ->sparse-matrix computations on hybrid CPU/GPU platforms, Scientific ->Programming, 22(2014), pp.1-19. +The PSBLAS library, developed with the aim to facilitate the parallelization of computationally intensive scientific applications, is designed to address parallel implementation of iterative solvers for sparse linear systems through the distributed memory paradigm. It includes routines for multiplying sparse matrices by dense matrices, solving block diagonal systems with triangular diagonal entries, preprocessing sparse matrices, and contains additional routines for dense matrix operations. The current implementation of PSBLAS addresses a distributed memory execution model operating with message passing. -Version 1.0 of the library is described in: ->S. Filippone, M. Colajanni. PSBLAS: A library for parallel linear ->algebra computation on sparse matrices, ACM Trans. on Math. Software, ->26(4), Dec. 2000, pp. 527-550. +The PSBLAS library version 3 is implemented in the Fortran 2008 programming language, with reuse and/or adaptation of existing Fortran 77 and Fortran 95 software, plus a handful of C routines. +## References -UTILITIES ---------- -The `test/util` directory contains some utilities to convert to/from -Harwell-Boeing and MatrixMarket file formats. +The architecture, philosophy and implementation details of the library are contained in the following papers: -DOCUMENTATION -------------- -See docs/psblas-3.8.pdf; an HTML version of the same document is -available in docs/html. Please consult the sample programs, especially -test/pargen/psb_[sd]_pde[23]d.f90 +- The architecture of the Fortran 2003 sparse BLAS is described in: + >S. Filippone, A. Buttari. Object-Oriented Techniques for Sparse Matrix + >Computations in Fortran 2003, ACM Trans. on Math. Software, vol. 38, No. + 4, 2012. + +- The software engineering ideas are further detailed in the paper: + >V. Cardellini, S. Filippone and D. Rouson. Design Patterns for + >sparse-matrix computations on hybrid CPU/GPU platforms, Scientific + >Programming, 22(2014), pp.1-19. + +- The GPU support is explored in + > S. Filippone, V. Cardellini, D. Barbieri and A. Fanfarillo: + > Sparse Matrix-Vector Multiplication on GPGPUs ACM Transactions on Mathematical Software (TOMS), Volume 43 Issue 4, December 2016. +- Version 1.0 of the library is described in: + >S. Filippone, M. Colajanni. PSBLAS: A library for parallel linear + >algebra computation on sparse matrices, ACM Trans. on Math. Software, + >26(4), Dec. 2000, pp. 527-550. +- The software infrastructure changes required to accommodate the implementation of the + Additive-Schwarz preconditioners available in [AMG4PSBLAS](https://github.com/sfilippone/amg4psblas/) are detailed in: + > A. Buttari, P. D'Ambra, D. di Serafino, S. Filippone, Extending PSBLAS to build parallel Schwarz preconditioners, Applied Parallel Computing. State of the Art in Scientific Computing: 7th International Workshop, PARA 2004, LNCS 3732, 2006, pp. 593-602. + + > A. Buttari, P. D'Ambra, D. Di Serafino, S. Filippone, 2LEV-D2P4: A package of high-performance preconditioners for scientific and engineering applications, Applicable Algebra in Engineering, Communications and Computing, 2007, 18(3), pp. 223-239. + + > P. D'Ambra, D. Di Serafino, S. Filippone, MLD2P4: A package of parallel algebraic multilevel domain decomposition preconditioners in Fortran 95 ACM Transactions on Mathematical Software, 2010, 37(3), 30 + +PSBLAS is the backbone of the Parallel Sparse Computation Toolkit ([PSCToolkit](https://psctoolkit.github.io/)) suite of libraries. See the paper: + > D’Ambra, P., Durastante, F., & Filippone, S. (2023). Parallel Sparse Computation Toolkit. Software Impacts, 15, 100463. + +### Other Software credits -OTHER SOFTWARE CREDITS ----------------------- We originally included a modified implementation of some of the Sparker (serial sparse BLAS) material; this has been completely rewritten, way beyond the intention(s) and responsibilities of the original developers. The main reference for the serial sparse BLAS is: ->Duff, I., Marrone, M., Radicati, G., and Vittoli, C. Level 3 basic ->linear algebra subprograms for sparse matrices: a user level interface, ->ACM Trans. Math. Softw., 23(3), 379-401, 1997. + >Duff, I., Marrone, M., Radicati, G., and Vittoli, C. Level 3 basic + >linear algebra subprograms for sparse matrices: a user level interface, + >ACM Trans. Math. Softw., 23(3), 379-401, 1997. +## Installing -INSTALLING ----------- To compile and run our software you will need the following prerequisites (see also SERIAL below): @@ -53,33 +61,45 @@ prerequisites (see also SERIAL below): http://math-atlas.sourceforge.net/ 3. We have had good results with the METIS library, from - http://www-users.cs.umn.edu/~karypis/metis/metis/main.html. + https://github.com/KarypisLab/METIS. This is optional; it is used in the util and test/fileread directories but only if you specify `--with-metis`. -4. If you have the AMD package of Davis, Duff and Amestoy, you can +5. If you have the AMD package of Davis, Duff and Amestoy, you can specify `--with-amd` (see `./configure --help` for more details). We use the C interface to AMD. +6. If you have CUDA available, use + --enable-cuda to compile CUDA-enabled methods + --with-cudadir= to specify the CUDA toolkit location + --with-cudacc=XX,YY,ZZ to specify a list of target CCs (compute + capabilities) to compile the CUDA code for. + The configure script will generate a Make.inc file suitable for building the library. The script is capable of recognizing the needed libraries with their default names; if they are in unusual places consider adding the paths with `--with-libs`, or explicitly specifying the names in -`--with-blas`, etc. Please note that a common way for the configure script -to fail is to specify inconsistent MPI vs. plain compilers, either -directly or indirectly via environment variables; e.g. specifying the -Intel compiler with `FC=ifort` while at the same time having an -`MPIFC=mpif90` which points to GNU Fortran. The best way to avoid this -situation is (in our opinion) to use the environment modules package -(see http://modules.sourceforge.net/), and load the relevant -variables with (e.g.) -``` -module load gnu46 openmpi -``` -This will delegate to the modules setup to make sure that the version of -openmpi in use is the one compiled with the gnu46 compilers. After the -configure script has completed you can always tweak the Make.inc file -yourself. +`--with-blas`, etc. + +>[!CAUTION] +> Please note that a common way for the configure script +> to fail is to specify inconsistent MPI vs. plain compilers, either +> directly or indirectly via environment variables; e.g. specifying the +> Intel compiler with `FC=ifort` while at the same time having an +> `MPIFC=mpif90` which points to GNU Fortran. + +>[!TIP] +> The best way to avoid this +> situation is (in our opinion) to use the environment modules package +> (see [http://modules.sourceforge.net/](http://modules.sourceforge.net/)), and load the relevant +> variables with (e.g.) +> ``` +> module load gcc/13.2.0 openmpi/4.1.6 +> ``` +> This will delegate to the modules setup to make sure that the version of +> openmpi in use is the one compiled with the gnu46 compilers. After the +> configure script has completed you can always tweak the Make.inc file +> yourself. After you have Make.inc fixed, run ``` @@ -91,58 +111,126 @@ install and the libraries will be installed under `/path/lib`, while the module files will be installed under `/path/modules`. The regular and experimental C interface header files are under `/path/include`. -SERIAL ------- +### Packaging changes, CUDA and GPU support + +This version of PSBLAS incorporates into a single package three +entities that were previously separated: +| Library | | +|---------|--------------------| +| PSBLAS | the base library | +| PSBLAS-EXT | a library providing additional storage formats for matrices and vectors | +| SPGPU | a package of kernels for NVIDIA GPUs originally written by Davide Barbieri and Salvatore Filippone; see the license file [cuda/License-spgpu.md](cuda/License-spgpu.md) | + +Moreover, the module and library previously called psb_krylovv are now called +psb_linsolve, but their usage is otherwise unchanged. + +### OpenACC +There is a highly experimental version of an OpenACC interface, +you can access it by speficifying +```bash +--enable-openacc --with-extraopenacc="-foffload=nvptx-none=-march=sm_70" +``` +where the argument to the extraopenacc option depends on the compiler +you are using (the example shown here is relevant for the GNU +compiler). + +### Serial + Configuring with `--enable-serial` will provide a fake MPI stub library that enables running in pure serial mode; no MPI installation is needed in this case (but note that the fake MPI stubs are only guaranteed to cover what we use internally, it's not a complete replacement). -INTEGER SIZES -------------- +### Integers + We have two kind of integers: IPK for local indices, and LPK for global indices. They can be specified independently at configure time, e.g. +```bash --with-ipk=4 --with-lpk=8 +``` which is asking for 4-bytes local indices, and 8-bytes global indices (this is the default). - +## CMAKE +There is initial support for building with CMAKE. As of this time, it does not compile the CUDA part. -TODO ----- -Fix all reamining bugs. Bugs? We dont' have any ! ;-) +## LLVM +The library has been successfully compiled and tested with LLVM version 20.1.0-rc2. + +## Documentation +Further information on installation and configuration can be found in the documentation. +See [docs/psblas-3.9.pdf](docs/psblas-3.9.pdf); an HTML version of the same document is +available in docs/html. Please consult the sample programs, especially +- [test/pargen/psb_s_pde2d.F90](test/pargen/psb_s_pde2d.F90) [test/pargen/psb_d_pde2d.F90](test/pargen/psb_d_pde2d.F90) +- [test/pargen/psb_s_pde2d.F90](test/pargen/psb_s_pde3d.F90) [test/pargen/psb_d_pde2d.F90](test/pargen/psb_d_pde3d.F90) + +which contain examples for the solution of linear systems obtained by the discretization of a generic second-order differential equation in two: +```math +- a_1 \frac{\partial^2 u}{\partial x^2} +- a_2 \frac{\partial^2 u}{\partial y^2} ++ b_1 \frac{\partial u}{\partial x} ++ b_2 \frac{\partial u}{\partial y} ++ c u = f +``` +or three +```math +- a_1 \frac{\partial^2 u}{\partial x^2} +- a_2 \frac{\partial^2 u}{\partial y^2} +- a_3 \frac{\partial^2 u}{\partial z^2} ++ b_1 \frac{\partial u}{\partial x} ++ b_2 \frac{\partial u}{\partial y} ++ b_3 \frac{\partial u}{\partial z} ++ c u = f +``` +dimensions on the unit square/cube with Dirichlet boundary conditions. -The PSBLAS team. ---------------- -Project lead: -Salvatore Filippone +### Utilities + +The [test/util](test/util) directory contains some utilities to convert to/from +Harwell-Boeing and MatrixMarket file formats. -Contributors (roughly reverse cronological order): +## TODO and bugs -Dimitri Walther -Andea Di Iorio -Stefano Petrilli -Soren Rasmussen -Zaak Beekman -Ambra Abdullahi Hassan -Pasqua D'Ambra -Alfredo Buttari -Daniela di Serafino -Michele Martone -Michele Colajanni -Fabio Cerioni -Stefano Maiolatesi -Dario Pascucci +- [ ] Improving OpenACC support +- [ ] Improving OpenMP support +- [X] Fix all reamining bugs. Bugs? We dont' have any ! 🤓 +> [!NOTE] +> To report bugs 🛠or issues ⓠplease use the [GitHub issue system](https://github.com/sfilippone/psblas3/issues). -RELATED SOFTWARE ----------------- + +## The PSBLAS team. +**Project lead:** +Salvatore Filippone + +**Contributors** (_roughly reverse cronological order_): + +- Luca Pepè Sciarria +- Theophane Loloum +- Fabio Durastante +- Dimitri Walther +- Andea Di Iorio +- Stefano Petrilli +- Soren Rasmussen +- Zaak Beekman +- Ambra Abdullahi Hassan +- Pasqua D'Ambra +- Alfredo Buttari +- Daniela di Serafino +- Michele Martone +- Michele Colajanni +- Fabio Cerioni +- Stefano Maiolatesi +- Dario Pascucci + + +## RELATED SOFTWARE If you are looking for more sophisticated preconditioners, you may be interested in the package AMG4PSBLAS from - + and the whole [PSCTooolkit suite](https://psctoolkit.github.io/). Contact: diff --git a/ReleaseNews b/ReleaseNews index 5a6664c2..e1d7a2b8 100644 --- a/ReleaseNews +++ b/ReleaseNews @@ -1,5 +1,12 @@ WHAT'S NEW - +Version 3.9 + 1. PSBLAS3-EXT has been folded into the main library + 2. Renamed GPU into CUDA. + 3. Highly experimental OpenACC support. + 4. The iterative solvers are now defined in psb_linsolve_mod + and implemented in libpsb_linsolve.a; existing code using + Krylov methods will work with no changes. + Version 3.8.0-2 1. CTXT is now an opaque object. 2. OpenMP is now better integrated. diff --git a/base/CMakeLists.txt b/base/CMakeLists.txt new file mode 100644 index 00000000..c2b1fe8f --- /dev/null +++ b/base/CMakeLists.txt @@ -0,0 +1,643 @@ +set(PSB_base_source_files + comm/psb_dovrl_a.f90 + comm/psb_dovrl.f90 +# comm/psb_i2halo_a.f90 + comm/internals/psi_zswaptran.F90 +# comm/internals/psi_i2ovrl_upd_a.f90 + comm/internals/psi_lovrl_save.f90 + comm/internals/psi_movrl_save_a.f90 + comm/internals/psi_sovrl_restr_a.f90 + comm/internals/psi_sovrl_upd_a.f90 + comm/internals/psi_zswaptran_a.F90 + comm/internals/psi_lovrl_restr.f90 + comm/internals/psi_iswapdata.F90 + comm/internals/psi_covrl_upd_a.f90 + comm/internals/psi_dswaptran_a.F90 + comm/internals/psi_lovrl_upd.f90 + comm/internals/psi_dswapdata_a.F90 + comm/internals/psi_movrl_upd_a.f90 +# comm/internals/psi_i2swaptran_a.F90 + comm/internals/psi_dswaptran.F90 + comm/internals/psi_covrl_save_a.f90 + comm/internals/psi_eovrl_restr_a.f90 + comm/internals/psi_sswaptran_a.F90 + comm/internals/psi_dovrl_save_a.f90 + comm/internals/psi_lswapdata.F90 + comm/internals/psi_cswapdata.F90 + comm/internals/psi_dswapdata.F90 + comm/internals/psi_sovrl_save.f90 + comm/internals/psi_iswaptran.F90 + comm/internals/psi_sswapdata_a.F90 + comm/internals/psi_sswaptran.F90 + comm/internals/psi_lswaptran.F90 + comm/internals/psi_mswaptran_a.F90 +# comm/internals/psi_i2ovrl_restr_a.f90 + comm/internals/psi_covrl_restr.f90 + comm/internals/psi_mswapdata_a.F90 + comm/internals/psi_zovrl_restr_a.f90 + comm/internals/psi_dovrl_restr_a.f90 + comm/internals/psi_covrl_restr_a.f90 + comm/internals/psi_sswapdata.F90 + comm/internals/psi_sovrl_save_a.f90 + comm/internals/psi_iovrl_upd.f90 + comm/internals/psi_eswaptran_a.F90 + comm/internals/psi_iovrl_save.f90 + comm/internals/psi_zovrl_restr.f90 + comm/internals/psi_zovrl_upd.f90 + comm/internals/psi_dovrl_upd_a.f90 + comm/internals/psi_dovrl_restr.f90 + comm/internals/psi_zswapdata_a.F90 + comm/internals/psi_dovrl_save.f90 + comm/internals/psi_covrl_save.f90 +# comm/internals/psi_i2swapdata_a.F90 + comm/internals/psi_dovrl_upd.f90 + comm/internals/psi_eovrl_save_a.f90 + comm/internals/psi_zovrl_upd_a.f90 + comm/internals/psi_zswapdata.F90 + comm/internals/psi_covrl_upd.f90 + comm/internals/psi_cswaptran.F90 +# comm/internals/psi_i2ovrl_save_a.f90 + comm/internals/psi_sovrl_upd.f90 + comm/internals/psi_eswapdata_a.F90 + comm/internals/psi_movrl_restr_a.f90 + comm/internals/psi_iovrl_restr.f90 + comm/internals/psi_cswapdata_a.F90 + comm/internals/psi_zovrl_save.f90 + comm/internals/psi_eovrl_upd_a.f90 + comm/internals/psi_zovrl_save_a.f90 + comm/internals/psi_cswaptran_a.F90 + comm/internals/psi_sovrl_restr.f90 + comm/psb_dhalo.f90 + comm/psb_zgather_a.f90 + comm/psb_zovrl.f90 + comm/psb_mhalo_a.f90 + comm/psb_zscatter_a.F90 + comm/psb_chalo.f90 + comm/psb_zscatter.F90 + comm/psb_cscatter_a.F90 + comm/psb_cspgather.F90 + comm/psb_cscatter.F90 + comm/psb_shalo_a.f90 + comm/psb_cgather.f90 + comm/psb_zhalo.f90 + comm/psb_movrl_a.f90 + comm/psb_chalo_a.f90 +# comm/psb_i2scatter_a.F90 + comm/psb_sgather_a.f90 +# comm/psb_i2ovrl_a.f90 + comm/psb_zovrl_a.f90 + comm/psb_covrl.f90 + comm/psb_shalo.f90 + comm/psb_dscatter_a.F90 + comm/psb_lgather.f90 + comm/psb_iscatter.F90 + comm/psb_sovrl_a.f90 + comm/psb_dscatter.F90 + comm/psb_eovrl_a.f90 + comm/psb_lovrl.f90 +## comm/psb_lspgather.F90 +## comm/psb_ispgather.F90 + comm/psb_zhalo_a.f90 + comm/psb_sscatter_a.F90 + comm/psb_lscatter.F90 +# comm/psb_i2gather_a.f90 + comm/psb_ihalo.f90 + comm/psb_iovrl.f90 + comm/psb_zspgather.F90 + comm/psb_escatter_a.F90 + comm/psb_mscatter_a.F90 + comm/psb_egather_a.f90 + comm/psb_covrl_a.f90 + comm/psb_sgather.f90 + comm/psb_dhalo_a.f90 + comm/psb_zgather.f90 + comm/psb_igather.f90 + comm/psb_sovrl.f90 + comm/psb_sspgather.F90 + comm/psb_cgather_a.f90 + comm/psb_ehalo_a.f90 + comm/psb_dgather_a.f90 + comm/psb_dspgather.F90 + comm/psb_sscatter.F90 + comm/psb_mgather_a.f90 + comm/psb_dgather.f90 + comm/psb_lhalo.f90 + internals/psi_bld_glb_dep_list.F90 + internals/psi_graph_fnd_owner.F90 + internals/psi_sort_dl.f90 + internals/psi_indx_map_fnd_owner.F90 + internals/psi_fnd_owner.F90 + internals/psi_bld_tmpovrl.f90 + internals/psi_symm_dep_list.F90 + internals/psi_desc_impl.f90 +### internals/psi_compute_size.f90 + internals/psi_hash_impl.f90 + internals/psi_crea_ovr_elem.f90 + internals/psi_a2a_fnd_owner.F90 + internals/psi_bld_tmphalo.f90 + internals/psi_crea_bnd_elem.f90 + internals/psi_desc_index.F90 + internals/psi_xtr_loc_dl.F90 + internals/psi_crea_index.f90 + internals/psi_srtlist.f90 + internals/psi_adjcncy_fnd_owner.F90 + tools/psb_sins.f90 + tools/psb_zspasb.f90 + tools/psb_zspalloc.f90 +# tools/psb_i2_remote_vect.F90 + tools/psb_sfree_a.f90 + tools/psb_cdprt.f90 + tools/psb_c_glob_transpose.F90 + tools/psb_ssphalo.F90 + tools/psb_sallc.f90 + tools/psb_sspasb.f90 + tools/psb_zasb.f90 + tools/psb_z_par_csr_spspmm.f90 + tools/psb_iasb.f90 + tools/psb_cdalv.f90 + tools/psb_sspfree.f90 + tools/psb_icdasb.F90 + tools/psb_zallc_a.f90 + tools/psb_d_map.f90 + tools/psb_lfree.f90 +# tools/psb_i2ins_a.f90 + tools/psb_s_remap.F90 + tools/psb_cspalloc.f90 + tools/psb_glob_to_loc.f90 + tools/psb_cdrep.f90 + tools/psb_mins_a.f90 + tools/psb_dallc_a.f90 + tools/psb_d_remote_vect.F90 + tools/psb_cfree.f90 + tools/psb_scdbldext.F90 + tools/psb_cspins.F90 + tools/psb_z_remote_vect.F90 + tools/psb_ssprn.f90 + tools/psb_cdals.f90 + tools/psb_sgetelem.f90 + tools/psb_cspfree.f90 + tools/psb_cins.f90 +# tools/psb_i2free_a.f90 + tools/psb_dspins.F90 +# tools/psb_i2asb_a.f90 + tools/psb_dsphalo.F90 + tools/psb_d_glob_transpose.F90 + tools/psb_c_par_csr_spspmm.f90 + tools/psb_callc_a.f90 + tools/psb_masb_a.f90 + tools/psb_ccdbldext.F90 + tools/psb_dfree_a.f90 + tools/psb_dspasb.f90 + tools/psb_sasb_a.f90 + tools/psb_z_remote_mat.F90 + tools/psb_c_remote_vect.F90 + tools/psb_cd_switch_ovl_indxmap.f90 + tools/psb_dfree.f90 + tools/psb_dasb.f90 + tools/psb_cd_inloc.f90 + tools/psb_mfree_a.f90 + tools/psb_zspfree.f90 + tools/psb_s_glob_transpose.F90 + tools/psb_sfree.f90 + tools/psb_dcdbldext.F90 + tools/psb_eins_a.f90 + tools/psb_s_map.f90 + tools/psb_dsprn.f90 + tools/psb_d_remap.F90 + tools/psb_iins.f90 + tools/psb_sasb.f90 + tools/psb_zgetelem.f90 + tools/psb_z_map.f90 + tools/psb_dins_a.f90 + tools/psb_loc_to_glob.f90 + tools/psb_cgetelem.f90 + tools/psb_zcdbldext.F90 + tools/psb_d_remote_mat.F90 + tools/psb_cd_set_bld.f90 + tools/psb_zfree.f90 + tools/psb_zallc.f90 + tools/psb_lallc.f90 + tools/psb_cd_reinit.f90 + tools/psb_csphalo.F90 + tools/psb_cfree_a.f90 + tools/psb_cd_lstext.f90 + tools/psb_zfree_a.f90 + tools/psb_s_par_csr_spspmm.f90 + tools/psb_dgetelem.f90 + tools/psb_callc.f90 + tools/psb_d_par_csr_spspmm.f90 + tools/psb_sspins.F90 + tools/psb_sallc_a.f90 + tools/psb_c_remote_mat.F90 + tools/psb_zins.f90 + tools/psb_e_remote_vect.F90 + tools/psb_zsphalo.F90 + tools/psb_cdren.f90 + tools/psb_casb_a.f90 + tools/psb_dins.f90 + tools/psb_ifree.f90 + tools/psb_mallc_a.f90 + tools/psb_s_remote_vect.F90 + tools/psb_c_remap.F90 + tools/psb_efree_a.f90 + tools/psb_sins_a.f90 + tools/psb_cdins.F90 + tools/psb_cdall.f90 + tools/psb_lasb.f90 + tools/psb_csprn.f90 + tools/psb_casb.f90 + tools/psb_c_map.f90 + tools/psb_lins.f90 + tools/psb_cspasb.f90 + tools/psb_dspfree.f90 + tools/psb_sspalloc.f90 + tools/psb_z_remap.F90 + tools/psb_z_glob_transpose.F90 + tools/psb_easb_a.f90 + tools/psb_cins_a.f90 + tools/psb_iallc.f90 + tools/psb_m_remote_vect.F90 + tools/psb_eallc_a.f90 + tools/psb_dspalloc.f90 + tools/psb_zasb_a.f90 + tools/psb_s_remote_mat.F90 + tools/psb_cd_remap.F90 + tools/psb_zspins.F90 + tools/psb_zins_a.f90 + tools/psb_cdcpy.F90 +# tools/psb_i2allc_a.f90 + tools/psb_dallc.f90 + tools/psb_cd_renum_block.F90 + tools/psb_dasb_a.f90 + tools/psb_zsprn.f90 + tools/psb_get_overlap.f90 + serial/psb_crwextd.f90 + serial/psb_zspspmm.f90 + serial/psb_drwextd.f90 + serial/psb_dnumbmm.f90 + serial/psb_damax_s.f90 + serial/psb_zgeprt.f90 + serial/impl/psb_c_coo_impl.F90 + serial/impl/psb_d_coo_impl.F90 + serial/impl/psb_d_csc_impl.F90 + serial/impl/psb_s_coo_impl.F90 + serial/impl/psb_c_csc_impl.F90 + serial/impl/psb_c_rb_idx_tree_impl.F90 + serial/impl/psb_z_csc_impl.F90 + serial/impl/psb_d_mat_impl.F90 + serial/impl/psb_s_csr_impl.F90 + serial/impl/psb_c_mat_impl.F90 + serial/impl/psb_c_csr_impl.F90 + serial/impl/psb_z_mat_impl.F90 + serial/impl/psb_s_rb_idx_tree_impl.F90 + serial/impl/psb_d_csr_impl.F90 + serial/impl/psb_s_mat_impl.F90 + serial/impl/psb_s_base_mat_impl.F90 + serial/impl/psb_base_mat_impl.f90 + serial/impl/psb_d_rb_idx_tree_impl.F90 + serial/impl/psb_z_rb_idx_tree_impl.F90 + serial/impl/psb_z_csr_impl.F90 + serial/impl/psb_z_coo_impl.F90 + serial/impl/psb_c_base_mat_impl.F90 + serial/impl/psb_z_base_mat_impl.F90 + serial/impl/psb_d_base_mat_impl.F90 + serial/impl/psb_s_csc_impl.F90 + serial/smmp.f90 + serial/psi_m_serial_impl.F90 + serial/psb_spdot_srtd.f90 + serial/psb_sasum_s.f90 + serial/psb_snumbmm.f90 + serial/psb_camax_s.f90 + serial/lsmmp.f90 + serial/psb_csymbmm.f90 + serial/psb_dgeprt.f90 + serial/psb_zrwextd.f90 + serial/psb_srwextd.f90 + serial/psb_znumbmm.f90 + serial/sort/psb_c_msort_impl.f90 + serial/sort/psb_c_hsort_impl.f90 + serial/sort/psb_m_isort_impl.f90 + serial/sort/psb_m_msort_impl.f90 + serial/sort/psb_s_hsort_impl.f90 + serial/sort/psb_e_isort_impl.f90 + serial/sort/psb_m_qsort_impl.f90 + serial/sort/psb_z_hsort_impl.f90 + serial/sort/psb_s_qsort_impl.f90 + serial/sort/psb_z_qsort_impl.f90 + serial/sort/psb_c_isort_impl.f90 + serial/sort/psb_e_msort_impl.f90 + serial/sort/psb_d_msort_impl.f90 + serial/sort/psb_d_qsort_impl.f90 + serial/sort/psb_s_isort_impl.f90 + serial/sort/psb_z_isort_impl.f90 + serial/sort/psb_e_hsort_impl.f90 + serial/sort/psb_z_msort_impl.f90 + serial/sort/psb_s_msort_impl.f90 + serial/sort/psb_m_hsort_impl.f90 + serial/sort/psb_d_hsort_impl.f90 + serial/sort/psb_e_qsort_impl.f90 + serial/sort/psb_d_isort_impl.f90 + serial/sort/psb_c_qsort_impl.f90 + serial/psb_dasum_s.f90 + serial/psi_z_serial_impl.F90 + serial/psb_dsymbmm.f90 + serial/psb_samax_s.f90 + serial/psb_lsame.f90 + serial/psb_dspspmm.f90 + serial/psb_ssymbmm.f90 + serial/psb_cgeprt.f90 + serial/psb_sgeprt.f90 +# serial/psi_i2_serial_impl.F90 + serial/psi_e_serial_impl.F90 + serial/psb_zsymbmm.f90 + serial/psb_cspspmm.f90 + serial/psb_aspxpby.f90 + serial/psi_s_serial_impl.F90 + serial/psb_zamax_s.f90 + serial/psb_spge_dot.f90 + serial/psb_zasum_s.f90 + serial/psb_casum_s.f90 + serial/psi_d_serial_impl.F90 + serial/psi_c_serial_impl.F90 + serial/psb_sspspmm.f90 + serial/psb_cnumbmm.f90 + psblas/psb_damax.f90 + psblas/psb_dspmm.f90 + psblas/psb_dasum.f90 + psblas/psb_sgetmatinfo.F90 + psblas/psb_dspnrm1.f90 +### psblas/psb_zvmlt.f90 + psblas/psb_daxpby.f90 + psblas/psb_smlt_vect.f90 + psblas/psb_dspsm.f90 + psblas/psb_zabs_vect.f90 + psblas/psb_zspmm.f90 + psblas/psb_sinv_vect.f90 + psblas/psb_zinv_vect.f90 + psblas/psb_dmlt_vect.f90 + psblas/psb_sabs_vect.f90 + psblas/psb_ddot.f90 + psblas/psb_camax.f90 + psblas/psb_cdiv_vect.f90 + psblas/psb_ddiv_vect.f90 + psblas/psb_dabs_vect.f90 + psblas/psb_zmlt_vect.f90 + psblas/psb_caxpby.f90 + psblas/psb_zaxpby.f90 + psblas/psb_cspsm.f90 + psblas/psb_sspnrm1.f90 + psblas/psb_cabs_vect.f90 +### psblas/psb_dvmlt.f90 + psblas/psb_zdot.f90 + psblas/psb_zgetmatinfo.F90 + psblas/psb_znrm2.f90 + psblas/psb_sspmm.f90 + psblas/psb_cspmm.f90 + psblas/psb_cnrmi.f90 + psblas/psb_ccmp_vect.f90 + psblas/psb_casum.f90 + psblas/psb_scmp_vect.f90 +### psblas/psb_svmlt.f90 + psblas/psb_sdot.f90 + psblas/psb_cmlt_vect.f90 + psblas/psb_dnrmi.f90 + psblas/psb_dcmp_vect.f90 + psblas/psb_cnrm2.f90 + psblas/psb_cgetmatinfo.F90 +### psblas/psb_cvmlt.f90 + psblas/psb_zamax.f90 + psblas/psb_dinv_vect.f90 + psblas/psb_dnrm2.f90 + psblas/psb_zspsm.f90 + psblas/psb_snrm2.f90 + psblas/psb_sdiv_vect.f90 + psblas/psb_zdiv_vect.f90 + psblas/psb_znrmi.f90 + psblas/psb_saxpby.f90 + psblas/psb_zspnrm1.f90 + psblas/psb_dgetmatinfo.F90 + psblas/psb_sasum.f90 + psblas/psb_zcmp_vect.f90 + psblas/psb_samax.f90 + psblas/psb_snrmi.f90 + psblas/psb_cdot.f90 + psblas/psb_cspnrm1.f90 + psblas/psb_sspsm.f90 + psblas/psb_cinv_vect.f90 + psblas/psb_zasum.f90 + modules/comm/psi_z_comm_v_mod.f90 +# modules/comm/psb_i2_comm_a_mod.f90 + modules/comm/psb_m_comm_a_mod.f90 + modules/comm/psb_z_linmap_mod.f90 + modules/comm/psi_s_comm_a_mod.f90 +# modules/comm/psi_i2_comm_a_mod.f90 + modules/comm/psi_m_comm_a_mod.f90 + modules/comm/psi_l_comm_v_mod.f90 + modules/comm/psb_comm_mod.f90 + modules/comm/psb_l_comm_mod.f90 + modules/comm/psb_d_linmap_mod.f90 + modules/comm/psi_d_comm_v_mod.f90 + modules/comm/psb_c_linmap_mod.f90 + modules/comm/psb_s_comm_mod.f90 + modules/comm/psb_base_linmap_mod.f90 + modules/comm/psi_d_comm_a_mod.f90 + modules/comm/psb_s_linmap_mod.f90 + modules/comm/psi_s_comm_v_mod.f90 + modules/comm/psb_s_comm_a_mod.f90 + modules/comm/psb_c_comm_mod.f90 + modules/comm/psb_i_comm_mod.f90 + modules/comm/psi_c_comm_v_mod.f90 + modules/comm/psb_d_comm_a_mod.f90 + modules/comm/psi_z_comm_a_mod.f90 + modules/comm/psb_z_comm_mod.f90 + modules/comm/psi_i_comm_v_mod.f90 + modules/comm/psb_e_comm_a_mod.f90 + modules/comm/psb_d_comm_mod.f90 + modules/comm/psi_e_comm_a_mod.f90 + modules/comm/psb_c_comm_a_mod.f90 + modules/comm/psb_linmap_mod.f90 + modules/comm/psb_z_comm_a_mod.f90 + modules/comm/psi_c_comm_a_mod.f90 +# modules/auxil/psb_i2_isort_mod.f90 + modules/auxil/psb_z_ip_reord_mod.F90 + modules/auxil/psi_s_serial_mod.f90 + modules/auxil/psb_s_hsort_x_mod.f90 + modules/auxil/psb_s_qsort_mod.f90 + modules/auxil/psb_d_hsort_mod.f90 + modules/auxil/psi_alcx_mod.f90 + modules/auxil/psb_e_ip_reord_mod.F90 +# modules/auxil/psb_i2_msort_mod.f90 + modules/auxil/psb_rb_idx_tree_mod.f90 + modules/auxil/psb_m_isort_mod.f90 + modules/auxil/psb_e_msort_mod.f90 + modules/auxil/psb_c_msort_mod.f90 + modules/auxil/psb_e_isort_mod.f90 + modules/auxil/psb_c_rb_idx_tree_mod.f90 + modules/auxil/psb_c_realloc_mod.F90 + modules/auxil/psb_ip_reord_mod.F90 + modules/auxil/psb_e_qsort_mod.f90 + modules/auxil/psi_e_serial_mod.f90 + modules/auxil/psi_serial_mod.f90 + modules/auxil/psb_l_hsort_x_mod.f90 + modules/auxil/psi_lcx_mod.f90 + modules/auxil/psb_d_rb_idx_tree_mod.f90 + modules/auxil/psb_m_realloc_mod.F90 + modules/auxil/psb_z_isort_mod.f90 + modules/auxil/psb_e_hsort_mod.f90 + modules/auxil/psi_m_serial_mod.f90 +# modules/auxil/psi_i2_serial_mod.f90 + modules/auxil/psb_s_isort_mod.f90 + modules/auxil/psb_e_realloc_mod.F90 + modules/auxil/psb_c_hsort_mod.f90 + modules/auxil/psb_z_msort_mod.f90 + modules/auxil/psi_d_serial_mod.f90 + modules/auxil/psb_z_qsort_mod.f90 +# modules/auxil/psb_i2_hsort_mod.f90 + modules/auxil/psb_m_msort_mod.f90 + modules/auxil/psb_m_ip_reord_mod.F90 + modules/auxil/psb_string_mod.f90 + modules/auxil/psb_c_isort_mod.f90 + modules/auxil/psb_d_hsort_x_mod.f90 + modules/auxil/psb_s_hsort_mod.f90 + modules/auxil/psb_i_hsort_x_mod.f90 + modules/auxil/psb_d_qsort_mod.f90 + modules/auxil/psb_s_realloc_mod.F90 + modules/auxil/psb_m_hsort_mod.f90 + modules/auxil/psb_z_realloc_mod.F90 + modules/auxil/psb_z_rb_idx_tree_mod.f90 +# modules/auxil/psb_i2_ip_reord_mod.F90 +# modules/auxil/psb_i2_realloc_mod.F90 + modules/auxil/psb_s_rb_idx_tree_mod.f90 + modules/auxil/psb_c_hsort_x_mod.f90 + modules/auxil/psb_s_ip_reord_mod.F90 + modules/auxil/psb_d_isort_mod.f90 + modules/auxil/psi_z_serial_mod.f90 +# modules/auxil/psb_i2_qsort_mod.f90 + modules/auxil/psb_d_msort_mod.f90 + modules/auxil/psb_c_qsort_mod.f90 + modules/auxil/psb_z_hsort_x_mod.f90 + modules/auxil/psb_c_ip_reord_mod.F90 + modules/auxil/psb_sort_mod.f90 + modules/auxil/psi_acx_mod.f90 + modules/auxil/psb_d_realloc_mod.F90 + modules/auxil/psb_m_qsort_mod.f90 + modules/auxil/psb_s_msort_mod.f90 + modules/auxil/psi_c_serial_mod.f90 + modules/auxil/psb_d_ip_reord_mod.F90 + modules/auxil/psb_z_hsort_mod.f90 + modules/psi_d_mod.F90 + modules/psi_l_mod.F90 + modules/penv/psi_d_collective_mod.F90 + modules/penv/psi_m_p2p_mod.F90 +# modules/penv/psi_i2_collective_mod.F90 + modules/penv/psi_s_p2p_mod.F90 + modules/penv/psi_e_p2p_mod.F90 + modules/penv/psi_m_collective_mod.F90 + modules/penv/psi_d_p2p_mod.F90 + modules/penv/psi_p2p_mod.F90 + modules/penv/psi_penv_mod.F90 + modules/penv/psi_z_p2p_mod.F90 + modules/penv/psi_c_collective_mod.F90 + modules/penv/psi_collective_mod.F90 +# modules/penv/psi_i2_p2p_mod.F90 + modules/penv/psi_c_p2p_mod.F90 + modules/penv/psi_e_collective_mod.F90 + modules/penv/psi_z_collective_mod.F90 + modules/penv/psi_s_collective_mod.F90 + modules/psb_cbind_const_mod.F90 + modules/psi_s_mod.F90 + modules/psi_c_mod.F90 + modules/tools/psb_s_tools_a_mod.f90 + modules/tools/psb_d_tools_a_mod.f90 + modules/tools/psb_z_tools_a_mod.f90 + modules/tools/psb_i_tools_mod.F90 + modules/tools/psb_s_tools_mod.F90 + modules/tools/psb_tools_mod.f90 + modules/tools/psb_m_tools_a_mod.f90 + modules/tools/psb_cd_tools_mod.F90 + modules/tools/psb_d_tools_mod.F90 + modules/tools/psb_c_tools_mod.F90 + modules/tools/psb_e_tools_a_mod.f90 +# modules/tools/psb_i2_tools_a_mod.f90 + modules/tools/psb_c_tools_a_mod.f90 + modules/tools/psb_z_tools_mod.F90 + modules/tools/psb_l_tools_mod.F90 + modules/psb_realloc_mod.F90 + modules/psb_check_mod.f90 + modules/serial/psb_mat_mod.f90 + modules/serial/psb_s_csr_mat_mod.f90 + modules/serial/psb_z_mat_mod.F90 + modules/serial/psb_z_vect_mod.F90 + modules/serial/psb_l_base_vect_mod.F90 + modules/serial/psb_c_serial_mod.f90 + modules/serial/psb_z_csc_mat_mod.f90 + modules/serial/psb_d_csc_mat_mod.f90 + modules/serial/psb_z_serial_mod.f90 + modules/serial/psb_c_base_mat_mod.F90 + modules/serial/psb_z_base_mat_mod.F90 + modules/serial/psb_z_csr_mat_mod.f90 + modules/serial/psb_c_csc_mat_mod.f90 + modules/serial/psb_z_base_vect_mod.F90 + modules/serial/psb_l_vect_mod.F90 + modules/serial/psb_d_csr_mat_mod.f90 + modules/serial/psb_c_csr_mat_mod.f90 + modules/serial/psb_s_base_mat_mod.F90 + modules/serial/psb_base_mat_mod.F90 + modules/serial/psb_i_base_vect_mod.F90 + modules/serial/psb_s_vect_mod.F90 + modules/serial/psb_s_base_vect_mod.F90 + modules/serial/psb_d_base_vect_mod.F90 + modules/serial/psb_c_mat_mod.F90 + modules/serial/psb_d_base_mat_mod.F90 + modules/serial/psb_c_vect_mod.F90 + modules/serial/psb_d_mat_mod.F90 + modules/serial/psb_s_mat_mod.F90 + modules/serial/psb_i_vect_mod.F90 + modules/serial/psb_d_vect_mod.F90 + modules/serial/psb_c_base_vect_mod.F90 + modules/serial/psb_vect_mod.f90 + modules/serial/psb_d_serial_mod.f90 + modules/serial/psb_s_csc_mat_mod.f90 + modules/serial/psb_s_serial_mod.f90 + modules/serial/psb_serial_mod.f90 + modules/psi_mod.f90 + modules/error.f90 + modules/psb_const_mod.F90 + modules/psblas/psb_c_psblas_mod.F90 + modules/psblas/psb_s_psblas_mod.F90 + modules/psblas/psb_d_psblas_mod.F90 + modules/psblas/psb_z_psblas_mod.F90 + modules/psblas/psb_psblas_mod.f90 + modules/psb_error_impl.F90 + modules/psb_penv_mod.F90 + modules/psb_error_mod.F90 + modules/psb_timers_mod.f90 + modules/psi_i_mod.F90 + modules/psi_z_mod.F90 + modules/desc/psb_desc_const_mod.f90 + modules/desc/psb_indx_map_mod.F90 + modules/desc/psb_hash_mod.F90 + modules/desc/psb_desc_mod.F90 + modules/desc/psb_gen_block_map_mod.F90 + modules/desc/psb_list_map_mod.F90 + modules/desc/psb_repl_map_mod.F90 + modules/desc/psb_hash_map_mod.F90 + modules/desc/psb_glist_map_mod.F90 + modules/psb_base_mod.f90 +) +foreach(file IN LISTS PSB_base_source_files) + list(APPEND base_source_files ${CMAKE_CURRENT_LIST_DIR}/${file}) +endforeach() + +list(APPEND PSB_base_source_C_files modules/cutil.c) +list(APPEND PSB_base_source_C_files modules/desc/psb_hashval.c) +if (PSB_SERIAL_MPI) + list(APPEND PSB_base_source_C_files modules/psb_fakempi.c) + list(APPEND base_header_C_files ${CMAKE_CURRENT_LIST_DIR}/modules/psb_fakempi.h) +endif() +list(APPEND base_header_C_files ${CMAKE_CURRENT_LIST_DIR}/modules/psb_types.h) + +foreach(file IN LISTS PSB_base_source_C_files) + list(APPEND base_source_C_files ${CMAKE_CURRENT_LIST_DIR}/${file}) +endforeach() + diff --git a/base/comm/internals/psi_covrl_restr.f90 b/base/comm/internals/psi_covrl_restr.f90 index c0276bfd..9a0ecbed 100644 --- a/base/comm/internals/psi_covrl_restr.f90 +++ b/base/comm/internals/psi_covrl_restr.f90 @@ -48,7 +48,8 @@ subroutine psi_covrl_restr_vect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz + integer(psb_mpk_) :: np, me, isz + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_covrl_restr_vect' @@ -91,7 +92,8 @@ subroutine psi_covrl_restr_multivect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz, nc + integer(psb_mpk_) :: np, me, isz,nc + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_covrl_restr_mv' diff --git a/base/comm/internals/psi_covrl_save.f90 b/base/comm/internals/psi_covrl_save.f90 index 8ee6dc9c..42f2ae3a 100644 --- a/base/comm/internals/psi_covrl_save.f90 +++ b/base/comm/internals/psi_covrl_save.f90 @@ -48,7 +48,8 @@ subroutine psi_covrl_save_vect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz + integer(psb_mpk_) :: np, me, isz + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_dovrl_saver1' @@ -97,7 +98,8 @@ subroutine psi_covrl_save_multivect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz, nc + integer(psb_mpk_) :: np, me, isz, nc + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_dovrl_saver1' diff --git a/base/comm/internals/psi_covrl_upd.f90 b/base/comm/internals/psi_covrl_upd.f90 index c829e570..8212895c 100644 --- a/base/comm/internals/psi_covrl_upd.f90 +++ b/base/comm/internals/psi_covrl_upd.f90 @@ -51,7 +51,8 @@ subroutine psi_covrl_upd_vect(x,desc_a,update,info) ! locals complex(psb_spk_), allocatable :: xs(:) type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, ndm, nx + integer(psb_mpk_) :: np, me, isz, nx, ndm + integer(psb_ipk_) :: err_act, i, idx integer(psb_ipk_) :: ierr(5) character(len=20) :: name, ch_err @@ -133,7 +134,8 @@ subroutine psi_covrl_upd_multivect(x,desc_a,update,info) ! locals complex(psb_spk_), allocatable :: xs(:,:) type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, ndm, nx, nc + integer(psb_mpk_) :: np, me, isz, ndm, nx, nc + integer(psb_ipk_) :: err_act, i, idx integer(psb_ipk_) :: ierr(5) character(len=20) :: name, ch_err diff --git a/base/comm/internals/psi_cswapdata.F90 b/base/comm/internals/psi_cswapdata.F90 index 5f0b8bdc..db76d16e 100644 --- a/base/comm/internals/psi_cswapdata.F90 +++ b/base/comm/internals/psi_cswapdata.F90 @@ -96,11 +96,11 @@ subroutine psi_cswapdata_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -184,11 +184,11 @@ subroutine psi_cswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & use psb_desc_mod use psb_penv_mod use psb_c_base_vect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -203,11 +203,11 @@ subroutine psi_cswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me - integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_mpk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size),& + & iret, nesd, nerv integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti, n logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv @@ -434,11 +434,11 @@ subroutine psi_cswapdata_multivect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -522,11 +522,11 @@ subroutine psi_cswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & use psb_desc_mod use psb_penv_mod use psb_c_base_multivect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -541,12 +541,11 @@ subroutine psi_cswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. diff --git a/base/comm/internals/psi_cswapdata_a.F90 b/base/comm/internals/psi_cswapdata_a.F90 index 715b674e..8b137397 100644 --- a/base/comm/internals/psi_cswapdata_a.F90 +++ b/base/comm/internals/psi_cswapdata_a.F90 @@ -90,15 +90,16 @@ subroutine psi_cswapdatam(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_spk_) :: y(:,:), beta complex(psb_spk_), target :: work(:) @@ -108,7 +109,8 @@ subroutine psi_cswapdatam(flag,n,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -161,17 +163,18 @@ subroutine psi_cswapidxm(ctxt,icomm,flag,n,beta,y,idx, & use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag,n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_spk_) :: y(:,:), beta complex(psb_spk_), target :: work(:) @@ -179,19 +182,20 @@ subroutine psi_cswapidxm(ctxt,icomm,flag,n,beta,y,idx, & ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -565,11 +569,11 @@ subroutine psi_cswapdatav(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -583,7 +587,8 @@ subroutine psi_cswapdatav(flag,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -647,11 +652,11 @@ subroutine psi_cswapidxv(ctxt,icomm,flag,beta,y,idx, & use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -664,19 +669,20 @@ subroutine psi_cswapidxv(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_cswaptran.F90 b/base/comm/internals/psi_cswaptran.F90 index aefb6b01..28b356c8 100644 --- a/base/comm/internals/psi_cswaptran.F90 +++ b/base/comm/internals/psi_cswaptran.F90 @@ -98,11 +98,11 @@ subroutine psi_cswaptran_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -185,11 +185,11 @@ subroutine psi_ctran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& use psb_desc_mod use psb_penv_mod use psb_c_base_vect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -204,12 +204,11 @@ subroutine psi_ctran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. @@ -445,11 +444,11 @@ subroutine psi_cswaptran_multivect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -533,11 +532,11 @@ subroutine psi_ctran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& use psb_desc_mod use psb_penv_mod use psb_c_base_multivect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -552,12 +551,11 @@ subroutine psi_ctran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. diff --git a/base/comm/internals/psi_cswaptran_a.F90 b/base/comm/internals/psi_cswaptran_a.F90 index a7f2c687..3fa61d94 100644 --- a/base/comm/internals/psi_cswaptran_a.F90 +++ b/base/comm/internals/psi_cswaptran_a.F90 @@ -94,15 +94,16 @@ subroutine psi_cswaptranm(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_spk_) :: y(:,:), beta complex(psb_spk_), target :: work(:) @@ -112,7 +113,8 @@ subroutine psi_cswaptranm(flag,n,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, err_act, totxch, data_ + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, err_act, totxch, data_ integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -166,36 +168,38 @@ subroutine psi_ctranidxm(ctxt,icomm,flag,n,beta,y,idx,& use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag,n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_spk_) :: y(:,:), beta complex(psb_spk_), target :: work(:) integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -577,11 +581,11 @@ subroutine psi_cswaptranv(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -659,11 +663,11 @@ subroutine psi_ctranidxv(ctxt,icomm,flag,beta,y,idx,& use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -676,19 +680,20 @@ subroutine psi_ctranidxv(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_dovrl_restr.f90 b/base/comm/internals/psi_dovrl_restr.f90 index 22a77328..bbcab4f3 100644 --- a/base/comm/internals/psi_dovrl_restr.f90 +++ b/base/comm/internals/psi_dovrl_restr.f90 @@ -48,7 +48,8 @@ subroutine psi_dovrl_restr_vect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz + integer(psb_mpk_) :: np, me, isz + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_dovrl_restr_vect' @@ -91,7 +92,8 @@ subroutine psi_dovrl_restr_multivect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz, nc + integer(psb_mpk_) :: np, me, isz,nc + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_dovrl_restr_mv' diff --git a/base/comm/internals/psi_dovrl_save.f90 b/base/comm/internals/psi_dovrl_save.f90 index 38a83d2d..f7bc3dd1 100644 --- a/base/comm/internals/psi_dovrl_save.f90 +++ b/base/comm/internals/psi_dovrl_save.f90 @@ -48,7 +48,8 @@ subroutine psi_dovrl_save_vect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz + integer(psb_mpk_) :: np, me, isz + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_dovrl_saver1' @@ -97,7 +98,8 @@ subroutine psi_dovrl_save_multivect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz, nc + integer(psb_mpk_) :: np, me, isz, nc + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_dovrl_saver1' diff --git a/base/comm/internals/psi_dovrl_upd.f90 b/base/comm/internals/psi_dovrl_upd.f90 index 261971ba..4ca995d9 100644 --- a/base/comm/internals/psi_dovrl_upd.f90 +++ b/base/comm/internals/psi_dovrl_upd.f90 @@ -51,7 +51,8 @@ subroutine psi_dovrl_upd_vect(x,desc_a,update,info) ! locals real(psb_dpk_), allocatable :: xs(:) type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, ndm, nx + integer(psb_mpk_) :: np, me, isz, nx, ndm + integer(psb_ipk_) :: err_act, i, idx integer(psb_ipk_) :: ierr(5) character(len=20) :: name, ch_err @@ -133,7 +134,8 @@ subroutine psi_dovrl_upd_multivect(x,desc_a,update,info) ! locals real(psb_dpk_), allocatable :: xs(:,:) type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, ndm, nx, nc + integer(psb_mpk_) :: np, me, isz, ndm, nx, nc + integer(psb_ipk_) :: err_act, i, idx integer(psb_ipk_) :: ierr(5) character(len=20) :: name, ch_err diff --git a/base/comm/internals/psi_dswapdata.F90 b/base/comm/internals/psi_dswapdata.F90 index fe529706..fb1924be 100644 --- a/base/comm/internals/psi_dswapdata.F90 +++ b/base/comm/internals/psi_dswapdata.F90 @@ -96,11 +96,11 @@ subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -184,11 +184,11 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & use psb_desc_mod use psb_penv_mod use psb_d_base_vect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -203,11 +203,11 @@ subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me - integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_mpk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size),& + & iret, nesd, nerv integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti, n logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv @@ -434,11 +434,11 @@ subroutine psi_dswapdata_multivect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -522,11 +522,11 @@ subroutine psi_dswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & use psb_desc_mod use psb_penv_mod use psb_d_base_multivect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -541,12 +541,11 @@ subroutine psi_dswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. diff --git a/base/comm/internals/psi_dswapdata_a.F90 b/base/comm/internals/psi_dswapdata_a.F90 index aff32517..6f1d4a10 100644 --- a/base/comm/internals/psi_dswapdata_a.F90 +++ b/base/comm/internals/psi_dswapdata_a.F90 @@ -90,15 +90,16 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_dpk_) :: y(:,:), beta real(psb_dpk_), target :: work(:) @@ -108,7 +109,8 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -161,17 +163,18 @@ subroutine psi_dswapidxm(ctxt,icomm,flag,n,beta,y,idx, & use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag,n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_dpk_) :: y(:,:), beta real(psb_dpk_), target :: work(:) @@ -179,19 +182,20 @@ subroutine psi_dswapidxm(ctxt,icomm,flag,n,beta,y,idx, & ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -565,11 +569,11 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -583,7 +587,8 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -647,11 +652,11 @@ subroutine psi_dswapidxv(ctxt,icomm,flag,beta,y,idx, & use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -664,19 +669,20 @@ subroutine psi_dswapidxv(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_dswaptran.F90 b/base/comm/internals/psi_dswaptran.F90 index df98e1ae..25cd8276 100644 --- a/base/comm/internals/psi_dswaptran.F90 +++ b/base/comm/internals/psi_dswaptran.F90 @@ -98,11 +98,11 @@ subroutine psi_dswaptran_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -185,11 +185,11 @@ subroutine psi_dtran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& use psb_desc_mod use psb_penv_mod use psb_d_base_vect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -204,12 +204,11 @@ subroutine psi_dtran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. @@ -445,11 +444,11 @@ subroutine psi_dswaptran_multivect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -533,11 +532,11 @@ subroutine psi_dtran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& use psb_desc_mod use psb_penv_mod use psb_d_base_multivect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -552,12 +551,11 @@ subroutine psi_dtran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. diff --git a/base/comm/internals/psi_dswaptran_a.F90 b/base/comm/internals/psi_dswaptran_a.F90 index ed13df40..df04c391 100644 --- a/base/comm/internals/psi_dswaptran_a.F90 +++ b/base/comm/internals/psi_dswaptran_a.F90 @@ -94,15 +94,16 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_dpk_) :: y(:,:), beta real(psb_dpk_), target :: work(:) @@ -112,7 +113,8 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, err_act, totxch, data_ + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, err_act, totxch, data_ integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -166,36 +168,38 @@ subroutine psi_dtranidxm(ctxt,icomm,flag,n,beta,y,idx,& use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag,n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_dpk_) :: y(:,:), beta real(psb_dpk_), target :: work(:) integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -577,11 +581,11 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -659,11 +663,11 @@ subroutine psi_dtranidxv(ctxt,icomm,flag,beta,y,idx,& use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -676,19 +680,20 @@ subroutine psi_dtranidxv(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_eswapdata_a.F90 b/base/comm/internals/psi_eswapdata_a.F90 index 6a644563..6e2d9557 100644 --- a/base/comm/internals/psi_eswapdata_a.F90 +++ b/base/comm/internals/psi_eswapdata_a.F90 @@ -90,15 +90,16 @@ subroutine psi_eswapdatam(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_epk_) :: y(:,:), beta integer(psb_epk_), target :: work(:) @@ -108,7 +109,8 @@ subroutine psi_eswapdatam(flag,n,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -161,17 +163,18 @@ subroutine psi_eswapidxm(ctxt,icomm,flag,n,beta,y,idx, & use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag,n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_epk_) :: y(:,:), beta integer(psb_epk_), target :: work(:) @@ -179,19 +182,20 @@ subroutine psi_eswapidxm(ctxt,icomm,flag,n,beta,y,idx, & ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -565,11 +569,11 @@ subroutine psi_eswapdatav(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -583,7 +587,8 @@ subroutine psi_eswapdatav(flag,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -647,11 +652,11 @@ subroutine psi_eswapidxv(ctxt,icomm,flag,beta,y,idx, & use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -664,19 +669,20 @@ subroutine psi_eswapidxv(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_eswaptran_a.F90 b/base/comm/internals/psi_eswaptran_a.F90 index 78ed7d8b..e105c88b 100644 --- a/base/comm/internals/psi_eswaptran_a.F90 +++ b/base/comm/internals/psi_eswaptran_a.F90 @@ -94,15 +94,16 @@ subroutine psi_eswaptranm(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_epk_) :: y(:,:), beta integer(psb_epk_), target :: work(:) @@ -112,7 +113,8 @@ subroutine psi_eswaptranm(flag,n,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, err_act, totxch, data_ + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, err_act, totxch, data_ integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -166,36 +168,38 @@ subroutine psi_etranidxm(ctxt,icomm,flag,n,beta,y,idx,& use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag,n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_epk_) :: y(:,:), beta integer(psb_epk_), target :: work(:) integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -577,11 +581,11 @@ subroutine psi_eswaptranv(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -659,11 +663,11 @@ subroutine psi_etranidxv(ctxt,icomm,flag,beta,y,idx,& use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -676,19 +680,20 @@ subroutine psi_etranidxv(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_i2swapdata_a.F90 b/base/comm/internals/psi_i2swapdata_a.F90 index 42b4498e..4acdbc9e 100644 --- a/base/comm/internals/psi_i2swapdata_a.F90 +++ b/base/comm/internals/psi_i2swapdata_a.F90 @@ -90,15 +90,16 @@ subroutine psi_i2swapdatam(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_i2pk_) :: y(:,:), beta integer(psb_i2pk_), target :: work(:) @@ -108,7 +109,8 @@ subroutine psi_i2swapdatam(flag,n,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -161,17 +163,18 @@ subroutine psi_i2swapidxm(ctxt,icomm,flag,n,beta,y,idx, & use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag,n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_i2pk_) :: y(:,:), beta integer(psb_i2pk_), target :: work(:) @@ -179,19 +182,20 @@ subroutine psi_i2swapidxm(ctxt,icomm,flag,n,beta,y,idx, & ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -565,11 +569,11 @@ subroutine psi_i2swapdatav(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -583,7 +587,8 @@ subroutine psi_i2swapdatav(flag,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -647,11 +652,11 @@ subroutine psi_i2swapidxv(ctxt,icomm,flag,beta,y,idx, & use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -664,19 +669,20 @@ subroutine psi_i2swapidxv(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_i2swaptran_a.F90 b/base/comm/internals/psi_i2swaptran_a.F90 index f94bf29e..f879702c 100644 --- a/base/comm/internals/psi_i2swaptran_a.F90 +++ b/base/comm/internals/psi_i2swaptran_a.F90 @@ -94,15 +94,16 @@ subroutine psi_i2swaptranm(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_i2pk_) :: y(:,:), beta integer(psb_i2pk_), target :: work(:) @@ -112,7 +113,8 @@ subroutine psi_i2swaptranm(flag,n,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, err_act, totxch, data_ + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, err_act, totxch, data_ integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -166,36 +168,38 @@ subroutine psi_i2tranidxm(ctxt,icomm,flag,n,beta,y,idx,& use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag,n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_i2pk_) :: y(:,:), beta integer(psb_i2pk_), target :: work(:) integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -577,11 +581,11 @@ subroutine psi_i2swaptranv(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -659,11 +663,11 @@ subroutine psi_i2tranidxv(ctxt,icomm,flag,beta,y,idx,& use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -676,19 +680,20 @@ subroutine psi_i2tranidxv(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_iovrl_restr.f90 b/base/comm/internals/psi_iovrl_restr.f90 index 4059f508..599a986e 100644 --- a/base/comm/internals/psi_iovrl_restr.f90 +++ b/base/comm/internals/psi_iovrl_restr.f90 @@ -48,7 +48,8 @@ subroutine psi_iovrl_restr_vect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz + integer(psb_mpk_) :: np, me, isz + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_iovrl_restr_vect' @@ -91,7 +92,8 @@ subroutine psi_iovrl_restr_multivect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz, nc + integer(psb_mpk_) :: np, me, isz,nc + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_iovrl_restr_mv' diff --git a/base/comm/internals/psi_iovrl_save.f90 b/base/comm/internals/psi_iovrl_save.f90 index 0a9b13fd..eb7a7ffb 100644 --- a/base/comm/internals/psi_iovrl_save.f90 +++ b/base/comm/internals/psi_iovrl_save.f90 @@ -48,7 +48,8 @@ subroutine psi_iovrl_save_vect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz + integer(psb_mpk_) :: np, me, isz + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_dovrl_saver1' @@ -97,7 +98,8 @@ subroutine psi_iovrl_save_multivect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz, nc + integer(psb_mpk_) :: np, me, isz, nc + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_dovrl_saver1' diff --git a/base/comm/internals/psi_iovrl_upd.f90 b/base/comm/internals/psi_iovrl_upd.f90 index 4eefe131..cf3c201b 100644 --- a/base/comm/internals/psi_iovrl_upd.f90 +++ b/base/comm/internals/psi_iovrl_upd.f90 @@ -51,7 +51,8 @@ subroutine psi_iovrl_upd_vect(x,desc_a,update,info) ! locals integer(psb_ipk_), allocatable :: xs(:) type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, ndm, nx + integer(psb_mpk_) :: np, me, isz, nx, ndm + integer(psb_ipk_) :: err_act, i, idx integer(psb_ipk_) :: ierr(5) character(len=20) :: name, ch_err @@ -133,7 +134,8 @@ subroutine psi_iovrl_upd_multivect(x,desc_a,update,info) ! locals integer(psb_ipk_), allocatable :: xs(:,:) type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, ndm, nx, nc + integer(psb_mpk_) :: np, me, isz, ndm, nx, nc + integer(psb_ipk_) :: err_act, i, idx integer(psb_ipk_) :: ierr(5) character(len=20) :: name, ch_err diff --git a/base/comm/internals/psi_iswapdata.F90 b/base/comm/internals/psi_iswapdata.F90 index ff4bd074..d73277ef 100644 --- a/base/comm/internals/psi_iswapdata.F90 +++ b/base/comm/internals/psi_iswapdata.F90 @@ -96,11 +96,11 @@ subroutine psi_iswapdata_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -184,11 +184,11 @@ subroutine psi_iswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & use psb_desc_mod use psb_penv_mod use psb_i_base_vect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -203,11 +203,11 @@ subroutine psi_iswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me - integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_mpk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size),& + & iret, nesd, nerv integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti, n logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv @@ -434,11 +434,11 @@ subroutine psi_iswapdata_multivect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -522,11 +522,11 @@ subroutine psi_iswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & use psb_desc_mod use psb_penv_mod use psb_i_base_multivect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -541,12 +541,11 @@ subroutine psi_iswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. diff --git a/base/comm/internals/psi_iswaptran.F90 b/base/comm/internals/psi_iswaptran.F90 index 75a0a185..9f58455a 100644 --- a/base/comm/internals/psi_iswaptran.F90 +++ b/base/comm/internals/psi_iswaptran.F90 @@ -98,11 +98,11 @@ subroutine psi_iswaptran_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -185,11 +185,11 @@ subroutine psi_itran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& use psb_desc_mod use psb_penv_mod use psb_i_base_vect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -204,12 +204,11 @@ subroutine psi_itran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. @@ -445,11 +444,11 @@ subroutine psi_iswaptran_multivect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -533,11 +532,11 @@ subroutine psi_itran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& use psb_desc_mod use psb_penv_mod use psb_i_base_multivect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -552,12 +551,11 @@ subroutine psi_itran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. diff --git a/base/comm/internals/psi_lovrl_restr.f90 b/base/comm/internals/psi_lovrl_restr.f90 index 71871e70..d3f6c913 100644 --- a/base/comm/internals/psi_lovrl_restr.f90 +++ b/base/comm/internals/psi_lovrl_restr.f90 @@ -48,7 +48,8 @@ subroutine psi_lovrl_restr_vect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz + integer(psb_mpk_) :: np, me, isz + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_lovrl_restr_vect' @@ -91,7 +92,8 @@ subroutine psi_lovrl_restr_multivect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz, nc + integer(psb_mpk_) :: np, me, isz,nc + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_lovrl_restr_mv' diff --git a/base/comm/internals/psi_lovrl_save.f90 b/base/comm/internals/psi_lovrl_save.f90 index 29d3b0ad..0eb623da 100644 --- a/base/comm/internals/psi_lovrl_save.f90 +++ b/base/comm/internals/psi_lovrl_save.f90 @@ -48,7 +48,8 @@ subroutine psi_lovrl_save_vect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz + integer(psb_mpk_) :: np, me, isz + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_dovrl_saver1' @@ -97,7 +98,8 @@ subroutine psi_lovrl_save_multivect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz, nc + integer(psb_mpk_) :: np, me, isz, nc + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_dovrl_saver1' diff --git a/base/comm/internals/psi_lovrl_upd.f90 b/base/comm/internals/psi_lovrl_upd.f90 index d8b4bb5a..1371e02b 100644 --- a/base/comm/internals/psi_lovrl_upd.f90 +++ b/base/comm/internals/psi_lovrl_upd.f90 @@ -51,7 +51,8 @@ subroutine psi_lovrl_upd_vect(x,desc_a,update,info) ! locals integer(psb_lpk_), allocatable :: xs(:) type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, ndm, nx + integer(psb_mpk_) :: np, me, isz, nx, ndm + integer(psb_ipk_) :: err_act, i, idx integer(psb_ipk_) :: ierr(5) character(len=20) :: name, ch_err @@ -133,7 +134,8 @@ subroutine psi_lovrl_upd_multivect(x,desc_a,update,info) ! locals integer(psb_lpk_), allocatable :: xs(:,:) type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, ndm, nx, nc + integer(psb_mpk_) :: np, me, isz, ndm, nx, nc + integer(psb_ipk_) :: err_act, i, idx integer(psb_ipk_) :: ierr(5) character(len=20) :: name, ch_err diff --git a/base/comm/internals/psi_lswapdata.F90 b/base/comm/internals/psi_lswapdata.F90 index 9201ebfa..2d819ae9 100644 --- a/base/comm/internals/psi_lswapdata.F90 +++ b/base/comm/internals/psi_lswapdata.F90 @@ -96,11 +96,11 @@ subroutine psi_lswapdata_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -184,11 +184,11 @@ subroutine psi_lswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & use psb_desc_mod use psb_penv_mod use psb_l_base_vect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -203,11 +203,11 @@ subroutine psi_lswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me - integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_mpk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size),& + & iret, nesd, nerv integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti, n logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv @@ -434,11 +434,11 @@ subroutine psi_lswapdata_multivect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -522,11 +522,11 @@ subroutine psi_lswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & use psb_desc_mod use psb_penv_mod use psb_l_base_multivect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -541,12 +541,11 @@ subroutine psi_lswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. diff --git a/base/comm/internals/psi_lswaptran.F90 b/base/comm/internals/psi_lswaptran.F90 index b2b9536c..3bf0eacd 100644 --- a/base/comm/internals/psi_lswaptran.F90 +++ b/base/comm/internals/psi_lswaptran.F90 @@ -98,11 +98,11 @@ subroutine psi_lswaptran_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -185,11 +185,11 @@ subroutine psi_ltran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& use psb_desc_mod use psb_penv_mod use psb_l_base_vect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -204,12 +204,11 @@ subroutine psi_ltran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. @@ -445,11 +444,11 @@ subroutine psi_lswaptran_multivect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -533,11 +532,11 @@ subroutine psi_ltran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& use psb_desc_mod use psb_penv_mod use psb_l_base_multivect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -552,12 +551,11 @@ subroutine psi_ltran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. diff --git a/base/comm/internals/psi_mswapdata_a.F90 b/base/comm/internals/psi_mswapdata_a.F90 index e71f3a52..0a1a3a61 100644 --- a/base/comm/internals/psi_mswapdata_a.F90 +++ b/base/comm/internals/psi_mswapdata_a.F90 @@ -90,15 +90,16 @@ subroutine psi_mswapdatam(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: y(:,:), beta integer(psb_mpk_), target :: work(:) @@ -108,7 +109,8 @@ subroutine psi_mswapdatam(flag,n,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -161,17 +163,18 @@ subroutine psi_mswapidxm(ctxt,icomm,flag,n,beta,y,idx, & use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag,n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: y(:,:), beta integer(psb_mpk_), target :: work(:) @@ -179,19 +182,20 @@ subroutine psi_mswapidxm(ctxt,icomm,flag,n,beta,y,idx, & ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -565,11 +569,11 @@ subroutine psi_mswapdatav(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -583,7 +587,8 @@ subroutine psi_mswapdatav(flag,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -647,11 +652,11 @@ subroutine psi_mswapidxv(ctxt,icomm,flag,beta,y,idx, & use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -664,19 +669,20 @@ subroutine psi_mswapidxv(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_mswaptran_a.F90 b/base/comm/internals/psi_mswaptran_a.F90 index 3a780142..8d6e0b52 100644 --- a/base/comm/internals/psi_mswaptran_a.F90 +++ b/base/comm/internals/psi_mswaptran_a.F90 @@ -94,15 +94,16 @@ subroutine psi_mswaptranm(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: y(:,:), beta integer(psb_mpk_), target :: work(:) @@ -112,7 +113,8 @@ subroutine psi_mswaptranm(flag,n,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, err_act, totxch, data_ + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, err_act, totxch, data_ integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -166,36 +168,38 @@ subroutine psi_mtranidxm(ctxt,icomm,flag,n,beta,y,idx,& use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag,n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: y(:,:), beta integer(psb_mpk_), target :: work(:) integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -577,11 +581,11 @@ subroutine psi_mswaptranv(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -659,11 +663,11 @@ subroutine psi_mtranidxv(ctxt,icomm,flag,beta,y,idx,& use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -676,19 +680,20 @@ subroutine psi_mtranidxv(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_sovrl_restr.f90 b/base/comm/internals/psi_sovrl_restr.f90 index f51d98e2..86361fba 100644 --- a/base/comm/internals/psi_sovrl_restr.f90 +++ b/base/comm/internals/psi_sovrl_restr.f90 @@ -48,7 +48,8 @@ subroutine psi_sovrl_restr_vect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz + integer(psb_mpk_) :: np, me, isz + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_sovrl_restr_vect' @@ -91,7 +92,8 @@ subroutine psi_sovrl_restr_multivect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz, nc + integer(psb_mpk_) :: np, me, isz,nc + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_sovrl_restr_mv' diff --git a/base/comm/internals/psi_sovrl_save.f90 b/base/comm/internals/psi_sovrl_save.f90 index 04fc3350..cb058fe4 100644 --- a/base/comm/internals/psi_sovrl_save.f90 +++ b/base/comm/internals/psi_sovrl_save.f90 @@ -48,7 +48,8 @@ subroutine psi_sovrl_save_vect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz + integer(psb_mpk_) :: np, me, isz + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_dovrl_saver1' @@ -97,7 +98,8 @@ subroutine psi_sovrl_save_multivect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz, nc + integer(psb_mpk_) :: np, me, isz, nc + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_dovrl_saver1' diff --git a/base/comm/internals/psi_sovrl_upd.f90 b/base/comm/internals/psi_sovrl_upd.f90 index 046524ff..ba3a9f41 100644 --- a/base/comm/internals/psi_sovrl_upd.f90 +++ b/base/comm/internals/psi_sovrl_upd.f90 @@ -51,7 +51,8 @@ subroutine psi_sovrl_upd_vect(x,desc_a,update,info) ! locals real(psb_spk_), allocatable :: xs(:) type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, ndm, nx + integer(psb_mpk_) :: np, me, isz, nx, ndm + integer(psb_ipk_) :: err_act, i, idx integer(psb_ipk_) :: ierr(5) character(len=20) :: name, ch_err @@ -133,7 +134,8 @@ subroutine psi_sovrl_upd_multivect(x,desc_a,update,info) ! locals real(psb_spk_), allocatable :: xs(:,:) type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, ndm, nx, nc + integer(psb_mpk_) :: np, me, isz, ndm, nx, nc + integer(psb_ipk_) :: err_act, i, idx integer(psb_ipk_) :: ierr(5) character(len=20) :: name, ch_err diff --git a/base/comm/internals/psi_sswapdata.F90 b/base/comm/internals/psi_sswapdata.F90 index e4f11bd0..e3b49e34 100644 --- a/base/comm/internals/psi_sswapdata.F90 +++ b/base/comm/internals/psi_sswapdata.F90 @@ -96,11 +96,11 @@ subroutine psi_sswapdata_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -184,11 +184,11 @@ subroutine psi_sswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & use psb_desc_mod use psb_penv_mod use psb_s_base_vect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -203,11 +203,11 @@ subroutine psi_sswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me - integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_mpk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size),& + & iret, nesd, nerv integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti, n logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv @@ -434,11 +434,11 @@ subroutine psi_sswapdata_multivect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -522,11 +522,11 @@ subroutine psi_sswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & use psb_desc_mod use psb_penv_mod use psb_s_base_multivect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -541,12 +541,11 @@ subroutine psi_sswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. diff --git a/base/comm/internals/psi_sswapdata_a.F90 b/base/comm/internals/psi_sswapdata_a.F90 index 044dc141..0f1f26da 100644 --- a/base/comm/internals/psi_sswapdata_a.F90 +++ b/base/comm/internals/psi_sswapdata_a.F90 @@ -90,15 +90,16 @@ subroutine psi_sswapdatam(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_spk_) :: y(:,:), beta real(psb_spk_), target :: work(:) @@ -108,7 +109,8 @@ subroutine psi_sswapdatam(flag,n,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -161,17 +163,18 @@ subroutine psi_sswapidxm(ctxt,icomm,flag,n,beta,y,idx, & use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag,n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_spk_) :: y(:,:), beta real(psb_spk_), target :: work(:) @@ -179,19 +182,20 @@ subroutine psi_sswapidxm(ctxt,icomm,flag,n,beta,y,idx, & ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -565,11 +569,11 @@ subroutine psi_sswapdatav(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -583,7 +587,8 @@ subroutine psi_sswapdatav(flag,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -647,11 +652,11 @@ subroutine psi_sswapidxv(ctxt,icomm,flag,beta,y,idx, & use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -664,19 +669,20 @@ subroutine psi_sswapidxv(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_sswaptran.F90 b/base/comm/internals/psi_sswaptran.F90 index 90c4b275..abb0ebed 100644 --- a/base/comm/internals/psi_sswaptran.F90 +++ b/base/comm/internals/psi_sswaptran.F90 @@ -98,11 +98,11 @@ subroutine psi_sswaptran_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -185,11 +185,11 @@ subroutine psi_stran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& use psb_desc_mod use psb_penv_mod use psb_s_base_vect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -204,12 +204,11 @@ subroutine psi_stran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. @@ -445,11 +444,11 @@ subroutine psi_sswaptran_multivect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -533,11 +532,11 @@ subroutine psi_stran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& use psb_desc_mod use psb_penv_mod use psb_s_base_multivect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -552,12 +551,11 @@ subroutine psi_stran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. diff --git a/base/comm/internals/psi_sswaptran_a.F90 b/base/comm/internals/psi_sswaptran_a.F90 index 434cec4c..10e741dd 100644 --- a/base/comm/internals/psi_sswaptran_a.F90 +++ b/base/comm/internals/psi_sswaptran_a.F90 @@ -94,15 +94,16 @@ subroutine psi_sswaptranm(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_spk_) :: y(:,:), beta real(psb_spk_), target :: work(:) @@ -112,7 +113,8 @@ subroutine psi_sswaptranm(flag,n,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, err_act, totxch, data_ + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, err_act, totxch, data_ integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -166,36 +168,38 @@ subroutine psi_stranidxm(ctxt,icomm,flag,n,beta,y,idx,& use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag,n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_spk_) :: y(:,:), beta real(psb_spk_), target :: work(:) integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -577,11 +581,11 @@ subroutine psi_sswaptranv(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -659,11 +663,11 @@ subroutine psi_stranidxv(ctxt,icomm,flag,beta,y,idx,& use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -676,19 +680,20 @@ subroutine psi_stranidxv(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_zovrl_restr.f90 b/base/comm/internals/psi_zovrl_restr.f90 index 0b127c3e..7fe94aa6 100644 --- a/base/comm/internals/psi_zovrl_restr.f90 +++ b/base/comm/internals/psi_zovrl_restr.f90 @@ -48,7 +48,8 @@ subroutine psi_zovrl_restr_vect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz + integer(psb_mpk_) :: np, me, isz + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_zovrl_restr_vect' @@ -91,7 +92,8 @@ subroutine psi_zovrl_restr_multivect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz, nc + integer(psb_mpk_) :: np, me, isz,nc + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_zovrl_restr_mv' diff --git a/base/comm/internals/psi_zovrl_save.f90 b/base/comm/internals/psi_zovrl_save.f90 index 830479fe..841dec1d 100644 --- a/base/comm/internals/psi_zovrl_save.f90 +++ b/base/comm/internals/psi_zovrl_save.f90 @@ -48,7 +48,8 @@ subroutine psi_zovrl_save_vect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz + integer(psb_mpk_) :: np, me, isz + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_dovrl_saver1' @@ -97,7 +98,8 @@ subroutine psi_zovrl_save_multivect(x,xs,desc_a,info) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, isz, nc + integer(psb_mpk_) :: np, me, isz, nc + integer(psb_ipk_) :: err_act, i, idx character(len=20) :: name, ch_err name='psi_dovrl_saver1' diff --git a/base/comm/internals/psi_zovrl_upd.f90 b/base/comm/internals/psi_zovrl_upd.f90 index f71862f7..7a3bccf2 100644 --- a/base/comm/internals/psi_zovrl_upd.f90 +++ b/base/comm/internals/psi_zovrl_upd.f90 @@ -51,7 +51,8 @@ subroutine psi_zovrl_upd_vect(x,desc_a,update,info) ! locals complex(psb_dpk_), allocatable :: xs(:) type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, ndm, nx + integer(psb_mpk_) :: np, me, isz, nx, ndm + integer(psb_ipk_) :: err_act, i, idx integer(psb_ipk_) :: ierr(5) character(len=20) :: name, ch_err @@ -133,7 +134,8 @@ subroutine psi_zovrl_upd_multivect(x,desc_a,update,info) ! locals complex(psb_dpk_), allocatable :: xs(:,:) type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me, err_act, i, idx, ndm, nx, nc + integer(psb_mpk_) :: np, me, isz, ndm, nx, nc + integer(psb_ipk_) :: err_act, i, idx integer(psb_ipk_) :: ierr(5) character(len=20) :: name, ch_err diff --git a/base/comm/internals/psi_zswapdata.F90 b/base/comm/internals/psi_zswapdata.F90 index 991d6e40..53147c84 100644 --- a/base/comm/internals/psi_zswapdata.F90 +++ b/base/comm/internals/psi_zswapdata.F90 @@ -96,11 +96,11 @@ subroutine psi_zswapdata_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -184,11 +184,11 @@ subroutine psi_zswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & use psb_desc_mod use psb_penv_mod use psb_z_base_vect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -203,11 +203,11 @@ subroutine psi_zswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me - integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_mpk_) :: np, me + integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size),& + & iret, nesd, nerv integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti, n logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv @@ -434,11 +434,11 @@ subroutine psi_zswapdata_multivect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -522,11 +522,11 @@ subroutine psi_zswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & use psb_desc_mod use psb_penv_mod use psb_z_base_multivect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -541,12 +541,11 @@ subroutine psi_zswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. diff --git a/base/comm/internals/psi_zswapdata_a.F90 b/base/comm/internals/psi_zswapdata_a.F90 index 2d265c76..f37dc1c7 100644 --- a/base/comm/internals/psi_zswapdata_a.F90 +++ b/base/comm/internals/psi_zswapdata_a.F90 @@ -90,15 +90,16 @@ subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_dpk_) :: y(:,:), beta complex(psb_dpk_), target :: work(:) @@ -108,7 +109,8 @@ subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -161,17 +163,18 @@ subroutine psi_zswapidxm(ctxt,icomm,flag,n,beta,y,idx, & use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag,n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_dpk_) :: y(:,:), beta complex(psb_dpk_), target :: work(:) @@ -179,19 +182,20 @@ subroutine psi_zswapidxm(ctxt,icomm,flag,n,beta,y,idx, & ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -565,11 +569,11 @@ subroutine psi_zswapdatav(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -583,7 +587,8 @@ subroutine psi_zswapdatav(flag,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, totxch, data_, err_act + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -647,11 +652,11 @@ subroutine psi_zswapidxv(ctxt,icomm,flag,beta,y,idx, & use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -664,19 +669,20 @@ subroutine psi_zswapidxv(ctxt,icomm,flag,beta,y,idx, & integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_zswaptran.F90 b/base/comm/internals/psi_zswaptran.F90 index f027519f..367dbd33 100644 --- a/base/comm/internals/psi_zswaptran.F90 +++ b/base/comm/internals/psi_zswaptran.F90 @@ -98,11 +98,11 @@ subroutine psi_zswaptran_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -185,11 +185,11 @@ subroutine psi_ztran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& use psb_desc_mod use psb_penv_mod use psb_z_base_vect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -204,12 +204,11 @@ subroutine psi_ztran_vidx_vect(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. @@ -445,11 +444,11 @@ subroutine psi_zswaptran_multivect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -533,11 +532,11 @@ subroutine psi_ztran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& use psb_desc_mod use psb_penv_mod use psb_z_base_multivect_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -552,12 +551,11 @@ subroutine psi_ztran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable :: prcid(:) - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false., debug=.false. diff --git a/base/comm/internals/psi_zswaptran_a.F90 b/base/comm/internals/psi_zswaptran_a.F90 index 508d4045..8b4e4268 100644 --- a/base/comm/internals/psi_zswaptran_a.F90 +++ b/base/comm/internals/psi_zswaptran_a.F90 @@ -94,15 +94,16 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_dpk_) :: y(:,:), beta complex(psb_dpk_), target :: work(:) @@ -112,7 +113,8 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data) ! locals type(psb_ctxt_type) :: ctxt integer(psb_mpk_) :: icomm - integer(psb_ipk_) :: np, me, idxs, idxr, err_act, totxch, data_ + integer(psb_mpk_) :: np, me + integer(psb_ipk_) :: idxs, idxr, err_act, totxch, data_ integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name @@ -166,36 +168,38 @@ subroutine psi_ztranidxm(ctxt,icomm,flag,n,beta,y,idx,& use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag,n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_dpk_) :: y(:,:), beta complex(psb_dpk_), target :: work(:) integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -577,11 +581,11 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -659,11 +663,11 @@ subroutine psi_ztranidxv(ctxt,icomm,flag,beta,y,idx,& use psb_error_mod use psb_desc_mod use psb_penv_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -676,19 +680,20 @@ subroutine psi_ztranidxv(ctxt,icomm,flag,beta,y,idx,& integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv ! locals - integer(psb_ipk_) :: np, me + integer(psb_mpk_) :: np, me, nesd, nerv, n integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,& & sdsz, rvsz, prcid, rvhd, sdhd - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,& - & snd_pt, rcv_pt, pnti, n + integer(psb_ipk_) :: err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti logical :: swap_mpi, swap_sync, swap_send, swap_recv,& & albf,do_send,do_recv logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/psb_cgather.f90 b/base/comm/psb_cgather.f90 index 7893d7c3..bc5302f5 100644 --- a/base/comm/psb_cgather.f90 +++ b/base/comm/psb_cgather.f90 @@ -58,10 +58,11 @@ subroutine psb_cgather_vect(globx, locx, desc_a, info, iroot) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank + integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank, loc_rows integer(psb_ipk_) :: ierr(5), err_act, jlx, ilx, lda_locx, lda_globx, i integer(psb_lpk_) :: m, n, k, ilocx, jlocx, idx, iglobx, jglobx complex(psb_spk_), allocatable :: llocx(:) + integer(psb_mpk_), allocatable :: szs(:) character(len=20) :: name, ch_err name='psb_cgatherv' @@ -125,32 +126,34 @@ subroutine psb_cgather_vect(globx, locx, desc_a, info, iroot) goto 9999 end if - call psb_realloc(m,globx,info) - if (info /= psb_success_) then - info=psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if - - globx(:) = czero - llocx = locx%get_vect() - do i=1,desc_a%get_local_rows() - call psb_loc_to_glob(i,idx,desc_a,info) - globx(idx) = llocx(i) - end do - + llocx = locx%get_vect() ! adjust overlapped elements do i=1, size(desc_a%ovrlap_elem,1) if (me /= desc_a%ovrlap_elem(i,3)) then idx = desc_a%ovrlap_elem(i,1) - call psb_loc_to_glob(idx,desc_a,info) - globx(idx) = czero + llocx(idx) = czero end if end do - - call psb_sum(ctxt,globx(1:m),root=root) + allocate(szs(np)) + loc_rows = desc_a%get_local_rows() + call psb_gather(ctxt,loc_rows,szs,root=root) + if ((me == root).or.(root == -1)) then + if (sum(szs) /= m) then + info=psb_err_internal_error_ + call psb_errpush(info,name) + goto 9999 + end if + call psb_realloc(m,globx,info) + if (info /= psb_success_) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + end if + call psb_gatherv(ctxt,llocx(1:loc_rows),globx,szs,root=root) + call psb_erractionrestore(err_act) return diff --git a/base/comm/psb_chalo_a.f90 b/base/comm/psb_chalo_a.f90 index b27ffe56..30d47ba0 100644 --- a/base/comm/psb_chalo_a.f90 +++ b/base/comm/psb_chalo_a.f90 @@ -66,8 +66,8 @@ subroutine psb_chalom(x,desc_a,info,jx,ik,work,tran,mode,data) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me - integer(psb_ipk_) :: err_act, iix, jjx, k, maxk, nrow, imode, i,& + integer(psb_mpk_) :: np, me, k + integer(psb_ipk_) :: err_act, iix, jjx, maxk, nrow, imode, i,& & liwork,data_, ldx integer(psb_lpk_) :: m, n, ix, ijx complex(psb_spk_),pointer :: iwork(:), xp(:,:) diff --git a/base/comm/psb_covrl_a.f90 b/base/comm/psb_covrl_a.f90 index d0f079ae..2d389438 100644 --- a/base/comm/psb_covrl_a.f90 +++ b/base/comm/psb_covrl_a.f90 @@ -77,8 +77,8 @@ subroutine psb_covrlm(x,desc_a,info,jx,ik,work,update,mode) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me - integer(psb_ipk_) :: err_act, iix, jjx, nrow, ncol, k, maxk, update_,& + integer(psb_mpk_) :: np, me, k + integer(psb_ipk_) :: err_act, iix, jjx, nrow, ncol, maxk, update_,& & mode_, liwork, ldx integer(psb_lpk_) :: m, n, ix, ijx complex(psb_spk_),pointer :: iwork(:), xp(:,:) diff --git a/base/comm/psb_cscatter_a.F90 b/base/comm/psb_cscatter_a.F90 index 38d922e2..f351b0b0 100644 --- a/base/comm/psb_cscatter_a.F90 +++ b/base/comm/psb_cscatter_a.F90 @@ -46,11 +46,11 @@ subroutine psb_cscatterm(globx, locx, desc_a, info, root) use psb_base_mod, psb_protect_name => psb_cscatterm -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -63,7 +63,8 @@ subroutine psb_cscatterm(globx, locx, desc_a, info, root) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me, iroot, icomm, myrank, rootrank, iam, nlr + integer(psb_mpk_) :: np, me, iroot, icomm, myrank, rootrank, iam,& + & nlr, minfo integer(psb_ipk_) :: ierr(5), err_act, nrow,& & ilocx, jlocx, lda_locx, lda_globx, lock, globk, k, maxk, & & col,pos @@ -162,13 +163,13 @@ subroutine psb_cscatterm(globx, locx, desc_a, info, root) rootrank = psb_get_mpi_rank(ctxt,iroot) ! - ! This is potentially unsafe when IPK=8 - ! But then, IPK=8 is highly experimental anyway. + ! This is potentially unsafe when PSB_IPK=8 + ! But then, PSB_IPK=8 is highly experimental anyway. ! nlr = nrow call mpi_gather(nlr,1,psb_mpi_mpk_,all_dim,& - & 1,psb_mpi_mpk_,rootrank,icomm,info) - + & 1,psb_mpi_mpk_,rootrank,icomm,minfo) + info = minfo if (iam == iroot) then displ(1)=0 do i=2,np @@ -195,8 +196,8 @@ subroutine psb_cscatterm(globx, locx, desc_a, info, root) call mpi_gatherv(ltg,nlr,& & psb_mpi_lpk_,l_t_g_all,all_dim,& - & displ,psb_mpi_lpk_,rootrank,icomm,info) - + & displ,psb_mpi_lpk_,rootrank,icomm,minfo) + info = minfo do col=1, k ! prepare vector to scatter if(iam == iroot) then @@ -211,9 +212,9 @@ subroutine psb_cscatterm(globx, locx, desc_a, info, root) ! scatter call mpi_scatterv(scatterv,all_dim,displ,& - & psb_mpi_c_spk_,locx(1,col),nrow,& - & psb_mpi_c_spk_,rootrank,icomm,info) - + & psb_mpi_c_spk_,locx(1,col),nlr,& + & psb_mpi_c_spk_,rootrank,icomm,minfo) + info = minfo end do deallocate(l_t_g_all, scatterv,stat=info) @@ -291,11 +292,11 @@ end subroutine psb_cscatterm ! subroutine psb_cscatterv(globx, locx, desc_a, info, root) use psb_base_mod, psb_protect_name => psb_cscatterv -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -308,7 +309,7 @@ subroutine psb_cscatterv(globx, locx, desc_a, info, root) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, iam, iroot, iiroot, icomm, myrank, rootrank, nlr + integer(psb_mpk_) :: np, iam, iroot, iiroot, icomm, myrank, rootrank, nlr, minfo integer(psb_ipk_) :: ierr(5), err_act, nrow,& & ilocx, jlocx, lda_locx, lda_globx, k, pos, ilx, jlx integer(psb_lpk_) :: m, n, i, j, idx, iglobx, jglobx @@ -398,13 +399,13 @@ subroutine psb_cscatterv(globx, locx, desc_a, info, root) else rootrank = psb_get_mpi_rank(ctxt,iroot) ! - ! This is potentially unsafe when IPK=8 - ! But then, IPK=8 is highly experimental anyway. + ! This is potentially unsafe when PSB_IPK=8 + ! But then, PSB_IPK=8 is highly experimental anyway. ! nlr = nrow call mpi_gather(nlr,1,psb_mpi_mpk_,all_dim,& - & 1,psb_mpi_mpk_,rootrank,icomm,info) - + & 1,psb_mpi_mpk_,rootrank,icomm,minfo) + info = minfo if(iam == iroot) then displ(1)=0 do i=2,np @@ -436,8 +437,8 @@ subroutine psb_cscatterv(globx, locx, desc_a, info, root) call mpi_gatherv(ltg,nlr,& & psb_mpi_lpk_,l_t_g_all,all_dim,& - & displ,psb_mpi_lpk_,rootrank,icomm,info) - + & displ,psb_mpi_lpk_,rootrank,icomm,minfo) + info = minfo ! prepare vector to scatter if (iam == iroot) then do i=1,np @@ -451,9 +452,9 @@ subroutine psb_cscatterv(globx, locx, desc_a, info, root) end if call mpi_scatterv(scatterv,all_dim,displ,& - & psb_mpi_c_spk_,locx,nrow,& - & psb_mpi_c_spk_,rootrank,icomm,info) - + & psb_mpi_c_spk_,locx,nlr,& + & psb_mpi_c_spk_,rootrank,icomm,minfo) + info = minfo deallocate(l_t_g_all, scatterv,stat=info) if(info /= psb_success_) then info=psb_err_from_subroutine_ diff --git a/base/comm/psb_cspgather.F90 b/base/comm/psb_cspgather.F90 index 9d50ef56..d1bde487 100644 --- a/base/comm/psb_cspgather.F90 +++ b/base/comm/psb_cspgather.F90 @@ -33,8 +33,8 @@ ! ! Gathers a sparse matrix onto a single process. ! Two variants: -! 1. Gathers to PSB_c_SPARSE_MAT (i.e. to matrix with IPK_ indices) -! 2. Gathers to PSB_lc_SPARSE_MAT (i.e. to matrix with LPK_ indices) +! 1. Gathers to PSB_c_SPARSE_MAT (i.e. to matrix with PSB_IPK_ indices) +! 2. Gathers to PSB_lc_SPARSE_MAT (i.e. to matrix with PSB_LPK_ indices) ! ! Note: this function uses MPI_ALLGATHERV. At this time, the size of the ! resulting matrix must be within the range of 4 bytes because of the @@ -48,11 +48,12 @@ subroutine psb_csp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_cspmat_type), intent(inout) :: loca @@ -62,7 +63,7 @@ subroutine psb_csp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_c_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_c_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_ipk_) :: nrg, ncg, nzg, nzl integer(psb_ipk_) :: err_act, dupl_ integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k @@ -156,27 +157,27 @@ subroutine psb_csp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep enddo ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_c_spk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_c_spk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_c_spk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(locia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((locia),ndx,psb_mpi_lpk_,& & glbia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(locja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((locja),ndx,psb_mpi_lpk_,& & glbja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_c_spk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_c_spk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_c_spk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(locia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((locia),ndx,psb_mpi_lpk_,& & glbia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(locja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((locja),ndx,psb_mpi_lpk_,& & glbja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) @@ -231,11 +232,12 @@ subroutine psb_lcsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,kee use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_cspmat_type), intent(inout) :: loca @@ -245,7 +247,7 @@ subroutine psb_lcsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,kee integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_lc_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_lc_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_lpk_) :: nrg, ncg, nzg integer(psb_ipk_) :: err_act, dupl_ integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl @@ -337,27 +339,27 @@ subroutine psb_lcsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,kee enddo ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_c_spk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_c_spk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_c_spk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_c_spk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_c_spk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_c_spk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) end if @@ -369,7 +371,7 @@ subroutine psb_lcsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,kee call loc_coo%free() ! ! Is the code below safe? For very large cases - ! the indices in glob_coo will overflow. But then, + ! the indices in glob_coo will overflow. But then), ! for very large cases it does not make sense to ! gather the matrix on a single procecss anyway... ! @@ -403,11 +405,12 @@ subroutine psb_lclcsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_lcspmat_type), intent(inout) :: loca @@ -417,7 +420,7 @@ subroutine psb_lclcsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_lc_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_lc_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_lpk_) :: nrg, ncg, nzg integer(psb_ipk_) :: err_act, dupl_ integer(psb_lpk_) :: ip,naggrm1,naggrp1, i, j, k, nzl @@ -507,27 +510,27 @@ subroutine psb_lclcsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_c_spk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_c_spk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_c_spk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_c_spk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_c_spk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_c_spk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) end if diff --git a/base/comm/psb_dgather.f90 b/base/comm/psb_dgather.f90 index c1619b1b..ed0591e8 100644 --- a/base/comm/psb_dgather.f90 +++ b/base/comm/psb_dgather.f90 @@ -58,10 +58,11 @@ subroutine psb_dgather_vect(globx, locx, desc_a, info, iroot) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank + integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank, loc_rows integer(psb_ipk_) :: ierr(5), err_act, jlx, ilx, lda_locx, lda_globx, i integer(psb_lpk_) :: m, n, k, ilocx, jlocx, idx, iglobx, jglobx real(psb_dpk_), allocatable :: llocx(:) + integer(psb_mpk_), allocatable :: szs(:) character(len=20) :: name, ch_err name='psb_dgatherv' @@ -125,32 +126,34 @@ subroutine psb_dgather_vect(globx, locx, desc_a, info, iroot) goto 9999 end if - call psb_realloc(m,globx,info) - if (info /= psb_success_) then - info=psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if - - globx(:) = dzero - llocx = locx%get_vect() - do i=1,desc_a%get_local_rows() - call psb_loc_to_glob(i,idx,desc_a,info) - globx(idx) = llocx(i) - end do - + llocx = locx%get_vect() ! adjust overlapped elements do i=1, size(desc_a%ovrlap_elem,1) if (me /= desc_a%ovrlap_elem(i,3)) then idx = desc_a%ovrlap_elem(i,1) - call psb_loc_to_glob(idx,desc_a,info) - globx(idx) = dzero + llocx(idx) = dzero end if end do - - call psb_sum(ctxt,globx(1:m),root=root) + allocate(szs(np)) + loc_rows = desc_a%get_local_rows() + call psb_gather(ctxt,loc_rows,szs,root=root) + if ((me == root).or.(root == -1)) then + if (sum(szs) /= m) then + info=psb_err_internal_error_ + call psb_errpush(info,name) + goto 9999 + end if + call psb_realloc(m,globx,info) + if (info /= psb_success_) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + end if + call psb_gatherv(ctxt,llocx(1:loc_rows),globx,szs,root=root) + call psb_erractionrestore(err_act) return diff --git a/base/comm/psb_dhalo_a.f90 b/base/comm/psb_dhalo_a.f90 index ccbc169d..d802ead5 100644 --- a/base/comm/psb_dhalo_a.f90 +++ b/base/comm/psb_dhalo_a.f90 @@ -66,8 +66,8 @@ subroutine psb_dhalom(x,desc_a,info,jx,ik,work,tran,mode,data) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me - integer(psb_ipk_) :: err_act, iix, jjx, k, maxk, nrow, imode, i,& + integer(psb_mpk_) :: np, me, k + integer(psb_ipk_) :: err_act, iix, jjx, maxk, nrow, imode, i,& & liwork,data_, ldx integer(psb_lpk_) :: m, n, ix, ijx real(psb_dpk_),pointer :: iwork(:), xp(:,:) diff --git a/base/comm/psb_dovrl_a.f90 b/base/comm/psb_dovrl_a.f90 index e005a393..464b8e31 100644 --- a/base/comm/psb_dovrl_a.f90 +++ b/base/comm/psb_dovrl_a.f90 @@ -77,8 +77,8 @@ subroutine psb_dovrlm(x,desc_a,info,jx,ik,work,update,mode) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me - integer(psb_ipk_) :: err_act, iix, jjx, nrow, ncol, k, maxk, update_,& + integer(psb_mpk_) :: np, me, k + integer(psb_ipk_) :: err_act, iix, jjx, nrow, ncol, maxk, update_,& & mode_, liwork, ldx integer(psb_lpk_) :: m, n, ix, ijx real(psb_dpk_),pointer :: iwork(:), xp(:,:) diff --git a/base/comm/psb_dscatter_a.F90 b/base/comm/psb_dscatter_a.F90 index 0f3be5aa..8864cca8 100644 --- a/base/comm/psb_dscatter_a.F90 +++ b/base/comm/psb_dscatter_a.F90 @@ -46,11 +46,11 @@ subroutine psb_dscatterm(globx, locx, desc_a, info, root) use psb_base_mod, psb_protect_name => psb_dscatterm -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -63,7 +63,8 @@ subroutine psb_dscatterm(globx, locx, desc_a, info, root) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me, iroot, icomm, myrank, rootrank, iam, nlr + integer(psb_mpk_) :: np, me, iroot, icomm, myrank, rootrank, iam,& + & nlr, minfo integer(psb_ipk_) :: ierr(5), err_act, nrow,& & ilocx, jlocx, lda_locx, lda_globx, lock, globk, k, maxk, & & col,pos @@ -162,13 +163,13 @@ subroutine psb_dscatterm(globx, locx, desc_a, info, root) rootrank = psb_get_mpi_rank(ctxt,iroot) ! - ! This is potentially unsafe when IPK=8 - ! But then, IPK=8 is highly experimental anyway. + ! This is potentially unsafe when PSB_IPK=8 + ! But then, PSB_IPK=8 is highly experimental anyway. ! nlr = nrow call mpi_gather(nlr,1,psb_mpi_mpk_,all_dim,& - & 1,psb_mpi_mpk_,rootrank,icomm,info) - + & 1,psb_mpi_mpk_,rootrank,icomm,minfo) + info = minfo if (iam == iroot) then displ(1)=0 do i=2,np @@ -195,8 +196,8 @@ subroutine psb_dscatterm(globx, locx, desc_a, info, root) call mpi_gatherv(ltg,nlr,& & psb_mpi_lpk_,l_t_g_all,all_dim,& - & displ,psb_mpi_lpk_,rootrank,icomm,info) - + & displ,psb_mpi_lpk_,rootrank,icomm,minfo) + info = minfo do col=1, k ! prepare vector to scatter if(iam == iroot) then @@ -211,9 +212,9 @@ subroutine psb_dscatterm(globx, locx, desc_a, info, root) ! scatter call mpi_scatterv(scatterv,all_dim,displ,& - & psb_mpi_r_dpk_,locx(1,col),nrow,& - & psb_mpi_r_dpk_,rootrank,icomm,info) - + & psb_mpi_r_dpk_,locx(1,col),nlr,& + & psb_mpi_r_dpk_,rootrank,icomm,minfo) + info = minfo end do deallocate(l_t_g_all, scatterv,stat=info) @@ -291,11 +292,11 @@ end subroutine psb_dscatterm ! subroutine psb_dscatterv(globx, locx, desc_a, info, root) use psb_base_mod, psb_protect_name => psb_dscatterv -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -308,7 +309,7 @@ subroutine psb_dscatterv(globx, locx, desc_a, info, root) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, iam, iroot, iiroot, icomm, myrank, rootrank, nlr + integer(psb_mpk_) :: np, iam, iroot, iiroot, icomm, myrank, rootrank, nlr, minfo integer(psb_ipk_) :: ierr(5), err_act, nrow,& & ilocx, jlocx, lda_locx, lda_globx, k, pos, ilx, jlx integer(psb_lpk_) :: m, n, i, j, idx, iglobx, jglobx @@ -398,13 +399,13 @@ subroutine psb_dscatterv(globx, locx, desc_a, info, root) else rootrank = psb_get_mpi_rank(ctxt,iroot) ! - ! This is potentially unsafe when IPK=8 - ! But then, IPK=8 is highly experimental anyway. + ! This is potentially unsafe when PSB_IPK=8 + ! But then, PSB_IPK=8 is highly experimental anyway. ! nlr = nrow call mpi_gather(nlr,1,psb_mpi_mpk_,all_dim,& - & 1,psb_mpi_mpk_,rootrank,icomm,info) - + & 1,psb_mpi_mpk_,rootrank,icomm,minfo) + info = minfo if(iam == iroot) then displ(1)=0 do i=2,np @@ -436,8 +437,8 @@ subroutine psb_dscatterv(globx, locx, desc_a, info, root) call mpi_gatherv(ltg,nlr,& & psb_mpi_lpk_,l_t_g_all,all_dim,& - & displ,psb_mpi_lpk_,rootrank,icomm,info) - + & displ,psb_mpi_lpk_,rootrank,icomm,minfo) + info = minfo ! prepare vector to scatter if (iam == iroot) then do i=1,np @@ -451,9 +452,9 @@ subroutine psb_dscatterv(globx, locx, desc_a, info, root) end if call mpi_scatterv(scatterv,all_dim,displ,& - & psb_mpi_r_dpk_,locx,nrow,& - & psb_mpi_r_dpk_,rootrank,icomm,info) - + & psb_mpi_r_dpk_,locx,nlr,& + & psb_mpi_r_dpk_,rootrank,icomm,minfo) + info = minfo deallocate(l_t_g_all, scatterv,stat=info) if(info /= psb_success_) then info=psb_err_from_subroutine_ diff --git a/base/comm/psb_dspgather.F90 b/base/comm/psb_dspgather.F90 index 13d04d7b..98a9bc91 100644 --- a/base/comm/psb_dspgather.F90 +++ b/base/comm/psb_dspgather.F90 @@ -33,8 +33,8 @@ ! ! Gathers a sparse matrix onto a single process. ! Two variants: -! 1. Gathers to PSB_d_SPARSE_MAT (i.e. to matrix with IPK_ indices) -! 2. Gathers to PSB_ld_SPARSE_MAT (i.e. to matrix with LPK_ indices) +! 1. Gathers to PSB_d_SPARSE_MAT (i.e. to matrix with PSB_IPK_ indices) +! 2. Gathers to PSB_ld_SPARSE_MAT (i.e. to matrix with PSB_LPK_ indices) ! ! Note: this function uses MPI_ALLGATHERV. At this time, the size of the ! resulting matrix must be within the range of 4 bytes because of the @@ -48,11 +48,12 @@ subroutine psb_dsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_dspmat_type), intent(inout) :: loca @@ -62,7 +63,7 @@ subroutine psb_dsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_d_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_d_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_ipk_) :: nrg, ncg, nzg, nzl integer(psb_ipk_) :: err_act, dupl_ integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k @@ -156,27 +157,27 @@ subroutine psb_dsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep enddo ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_r_dpk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_r_dpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_r_dpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(locia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((locia),ndx,psb_mpi_lpk_,& & glbia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(locja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((locja),ndx,psb_mpi_lpk_,& & glbja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_r_dpk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_r_dpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_r_dpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(locia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((locia),ndx,psb_mpi_lpk_,& & glbia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(locja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((locja),ndx,psb_mpi_lpk_,& & glbja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) @@ -231,11 +232,12 @@ subroutine psb_ldsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,kee use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_dspmat_type), intent(inout) :: loca @@ -245,7 +247,7 @@ subroutine psb_ldsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,kee integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_ld_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_ld_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_lpk_) :: nrg, ncg, nzg integer(psb_ipk_) :: err_act, dupl_ integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl @@ -337,27 +339,27 @@ subroutine psb_ldsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,kee enddo ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_r_dpk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_r_dpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_r_dpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_r_dpk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_r_dpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_r_dpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) end if @@ -369,7 +371,7 @@ subroutine psb_ldsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,kee call loc_coo%free() ! ! Is the code below safe? For very large cases - ! the indices in glob_coo will overflow. But then, + ! the indices in glob_coo will overflow. But then), ! for very large cases it does not make sense to ! gather the matrix on a single procecss anyway... ! @@ -403,11 +405,12 @@ subroutine psb_ldldsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ldspmat_type), intent(inout) :: loca @@ -417,7 +420,7 @@ subroutine psb_ldldsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_ld_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_ld_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_lpk_) :: nrg, ncg, nzg integer(psb_ipk_) :: err_act, dupl_ integer(psb_lpk_) :: ip,naggrm1,naggrp1, i, j, k, nzl @@ -507,27 +510,27 @@ subroutine psb_ldldsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_r_dpk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_r_dpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_r_dpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_r_dpk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_r_dpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_r_dpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) end if diff --git a/base/comm/psb_ehalo_a.f90 b/base/comm/psb_ehalo_a.f90 index 03aa1e3f..d5431e69 100644 --- a/base/comm/psb_ehalo_a.f90 +++ b/base/comm/psb_ehalo_a.f90 @@ -66,8 +66,8 @@ subroutine psb_ehalom(x,desc_a,info,jx,ik,work,tran,mode,data) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me - integer(psb_ipk_) :: err_act, iix, jjx, k, maxk, nrow, imode, i,& + integer(psb_mpk_) :: np, me, k + integer(psb_ipk_) :: err_act, iix, jjx, maxk, nrow, imode, i,& & liwork,data_, ldx integer(psb_lpk_) :: m, n, ix, ijx integer(psb_epk_),pointer :: iwork(:), xp(:,:) diff --git a/base/comm/psb_eovrl_a.f90 b/base/comm/psb_eovrl_a.f90 index fc6a868d..b24e5ab8 100644 --- a/base/comm/psb_eovrl_a.f90 +++ b/base/comm/psb_eovrl_a.f90 @@ -77,8 +77,8 @@ subroutine psb_eovrlm(x,desc_a,info,jx,ik,work,update,mode) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me - integer(psb_ipk_) :: err_act, iix, jjx, nrow, ncol, k, maxk, update_,& + integer(psb_mpk_) :: np, me, k + integer(psb_ipk_) :: err_act, iix, jjx, nrow, ncol, maxk, update_,& & mode_, liwork, ldx integer(psb_lpk_) :: m, n, ix, ijx integer(psb_epk_),pointer :: iwork(:), xp(:,:) diff --git a/base/comm/psb_escatter_a.F90 b/base/comm/psb_escatter_a.F90 index e2b45f5c..9c5ed19d 100644 --- a/base/comm/psb_escatter_a.F90 +++ b/base/comm/psb_escatter_a.F90 @@ -46,11 +46,11 @@ subroutine psb_escatterm(globx, locx, desc_a, info, root) use psb_base_mod, psb_protect_name => psb_escatterm -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -63,7 +63,8 @@ subroutine psb_escatterm(globx, locx, desc_a, info, root) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me, iroot, icomm, myrank, rootrank, iam, nlr + integer(psb_mpk_) :: np, me, iroot, icomm, myrank, rootrank, iam,& + & nlr, minfo integer(psb_ipk_) :: ierr(5), err_act, nrow,& & ilocx, jlocx, lda_locx, lda_globx, lock, globk, k, maxk, & & col,pos @@ -162,13 +163,13 @@ subroutine psb_escatterm(globx, locx, desc_a, info, root) rootrank = psb_get_mpi_rank(ctxt,iroot) ! - ! This is potentially unsafe when IPK=8 - ! But then, IPK=8 is highly experimental anyway. + ! This is potentially unsafe when PSB_IPK=8 + ! But then, PSB_IPK=8 is highly experimental anyway. ! nlr = nrow call mpi_gather(nlr,1,psb_mpi_mpk_,all_dim,& - & 1,psb_mpi_mpk_,rootrank,icomm,info) - + & 1,psb_mpi_mpk_,rootrank,icomm,minfo) + info = minfo if (iam == iroot) then displ(1)=0 do i=2,np @@ -195,8 +196,8 @@ subroutine psb_escatterm(globx, locx, desc_a, info, root) call mpi_gatherv(ltg,nlr,& & psb_mpi_lpk_,l_t_g_all,all_dim,& - & displ,psb_mpi_lpk_,rootrank,icomm,info) - + & displ,psb_mpi_lpk_,rootrank,icomm,minfo) + info = minfo do col=1, k ! prepare vector to scatter if(iam == iroot) then @@ -211,9 +212,9 @@ subroutine psb_escatterm(globx, locx, desc_a, info, root) ! scatter call mpi_scatterv(scatterv,all_dim,displ,& - & psb_mpi_epk_,locx(1,col),nrow,& - & psb_mpi_epk_,rootrank,icomm,info) - + & psb_mpi_epk_,locx(1,col),nlr,& + & psb_mpi_epk_,rootrank,icomm,minfo) + info = minfo end do deallocate(l_t_g_all, scatterv,stat=info) @@ -291,11 +292,11 @@ end subroutine psb_escatterm ! subroutine psb_escatterv(globx, locx, desc_a, info, root) use psb_base_mod, psb_protect_name => psb_escatterv -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -308,7 +309,7 @@ subroutine psb_escatterv(globx, locx, desc_a, info, root) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, iam, iroot, iiroot, icomm, myrank, rootrank, nlr + integer(psb_mpk_) :: np, iam, iroot, iiroot, icomm, myrank, rootrank, nlr, minfo integer(psb_ipk_) :: ierr(5), err_act, nrow,& & ilocx, jlocx, lda_locx, lda_globx, k, pos, ilx, jlx integer(psb_lpk_) :: m, n, i, j, idx, iglobx, jglobx @@ -398,13 +399,13 @@ subroutine psb_escatterv(globx, locx, desc_a, info, root) else rootrank = psb_get_mpi_rank(ctxt,iroot) ! - ! This is potentially unsafe when IPK=8 - ! But then, IPK=8 is highly experimental anyway. + ! This is potentially unsafe when PSB_IPK=8 + ! But then, PSB_IPK=8 is highly experimental anyway. ! nlr = nrow call mpi_gather(nlr,1,psb_mpi_mpk_,all_dim,& - & 1,psb_mpi_mpk_,rootrank,icomm,info) - + & 1,psb_mpi_mpk_,rootrank,icomm,minfo) + info = minfo if(iam == iroot) then displ(1)=0 do i=2,np @@ -436,8 +437,8 @@ subroutine psb_escatterv(globx, locx, desc_a, info, root) call mpi_gatherv(ltg,nlr,& & psb_mpi_lpk_,l_t_g_all,all_dim,& - & displ,psb_mpi_lpk_,rootrank,icomm,info) - + & displ,psb_mpi_lpk_,rootrank,icomm,minfo) + info = minfo ! prepare vector to scatter if (iam == iroot) then do i=1,np @@ -451,9 +452,9 @@ subroutine psb_escatterv(globx, locx, desc_a, info, root) end if call mpi_scatterv(scatterv,all_dim,displ,& - & psb_mpi_epk_,locx,nrow,& - & psb_mpi_epk_,rootrank,icomm,info) - + & psb_mpi_epk_,locx,nlr,& + & psb_mpi_epk_,rootrank,icomm,minfo) + info = minfo deallocate(l_t_g_all, scatterv,stat=info) if(info /= psb_success_) then info=psb_err_from_subroutine_ diff --git a/base/comm/psb_i2halo_a.f90 b/base/comm/psb_i2halo_a.f90 index d49d71c6..054b61be 100644 --- a/base/comm/psb_i2halo_a.f90 +++ b/base/comm/psb_i2halo_a.f90 @@ -66,8 +66,8 @@ subroutine psb_i2halom(x,desc_a,info,jx,ik,work,tran,mode,data) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me - integer(psb_ipk_) :: err_act, iix, jjx, k, maxk, nrow, imode, i,& + integer(psb_mpk_) :: np, me, k + integer(psb_ipk_) :: err_act, iix, jjx, maxk, nrow, imode, i,& & liwork,data_, ldx integer(psb_lpk_) :: m, n, ix, ijx integer(psb_i2pk_),pointer :: iwork(:), xp(:,:) diff --git a/base/comm/psb_i2ovrl_a.f90 b/base/comm/psb_i2ovrl_a.f90 index f7ccd7a6..09cc3b5d 100644 --- a/base/comm/psb_i2ovrl_a.f90 +++ b/base/comm/psb_i2ovrl_a.f90 @@ -77,8 +77,8 @@ subroutine psb_i2ovrlm(x,desc_a,info,jx,ik,work,update,mode) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me - integer(psb_ipk_) :: err_act, iix, jjx, nrow, ncol, k, maxk, update_,& + integer(psb_mpk_) :: np, me, k + integer(psb_ipk_) :: err_act, iix, jjx, nrow, ncol, maxk, update_,& & mode_, liwork, ldx integer(psb_lpk_) :: m, n, ix, ijx integer(psb_i2pk_),pointer :: iwork(:), xp(:,:) diff --git a/base/comm/psb_i2scatter_a.F90 b/base/comm/psb_i2scatter_a.F90 index 960e48b0..1a07587f 100644 --- a/base/comm/psb_i2scatter_a.F90 +++ b/base/comm/psb_i2scatter_a.F90 @@ -46,11 +46,11 @@ subroutine psb_i2scatterm(globx, locx, desc_a, info, root) use psb_base_mod, psb_protect_name => psb_i2scatterm -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -63,7 +63,8 @@ subroutine psb_i2scatterm(globx, locx, desc_a, info, root) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me, iroot, icomm, myrank, rootrank, iam, nlr + integer(psb_mpk_) :: np, me, iroot, icomm, myrank, rootrank, iam,& + & nlr, minfo integer(psb_ipk_) :: ierr(5), err_act, nrow,& & ilocx, jlocx, lda_locx, lda_globx, lock, globk, k, maxk, & & col,pos @@ -162,13 +163,13 @@ subroutine psb_i2scatterm(globx, locx, desc_a, info, root) rootrank = psb_get_mpi_rank(ctxt,iroot) ! - ! This is potentially unsafe when IPK=8 - ! But then, IPK=8 is highly experimental anyway. + ! This is potentially unsafe when PSB_IPK=8 + ! But then, PSB_IPK=8 is highly experimental anyway. ! nlr = nrow call mpi_gather(nlr,1,psb_mpi_mpk_,all_dim,& - & 1,psb_mpi_mpk_,rootrank,icomm,info) - + & 1,psb_mpi_mpk_,rootrank,icomm,minfo) + info = minfo if (iam == iroot) then displ(1)=0 do i=2,np @@ -195,8 +196,8 @@ subroutine psb_i2scatterm(globx, locx, desc_a, info, root) call mpi_gatherv(ltg,nlr,& & psb_mpi_lpk_,l_t_g_all,all_dim,& - & displ,psb_mpi_lpk_,rootrank,icomm,info) - + & displ,psb_mpi_lpk_,rootrank,icomm,minfo) + info = minfo do col=1, k ! prepare vector to scatter if(iam == iroot) then @@ -211,9 +212,9 @@ subroutine psb_i2scatterm(globx, locx, desc_a, info, root) ! scatter call mpi_scatterv(scatterv,all_dim,displ,& - & psb_mpi_i2pk_,locx(1,col),nrow,& - & psb_mpi_i2pk_,rootrank,icomm,info) - + & psb_mpi_i2pk_,locx(1,col),nlr,& + & psb_mpi_i2pk_,rootrank,icomm,minfo) + info = minfo end do deallocate(l_t_g_all, scatterv,stat=info) @@ -291,11 +292,11 @@ end subroutine psb_i2scatterm ! subroutine psb_i2scatterv(globx, locx, desc_a, info, root) use psb_base_mod, psb_protect_name => psb_i2scatterv -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -308,7 +309,7 @@ subroutine psb_i2scatterv(globx, locx, desc_a, info, root) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, iam, iroot, iiroot, icomm, myrank, rootrank, nlr + integer(psb_mpk_) :: np, iam, iroot, iiroot, icomm, myrank, rootrank, nlr, minfo integer(psb_ipk_) :: ierr(5), err_act, nrow,& & ilocx, jlocx, lda_locx, lda_globx, k, pos, ilx, jlx integer(psb_lpk_) :: m, n, i, j, idx, iglobx, jglobx @@ -398,13 +399,13 @@ subroutine psb_i2scatterv(globx, locx, desc_a, info, root) else rootrank = psb_get_mpi_rank(ctxt,iroot) ! - ! This is potentially unsafe when IPK=8 - ! But then, IPK=8 is highly experimental anyway. + ! This is potentially unsafe when PSB_IPK=8 + ! But then, PSB_IPK=8 is highly experimental anyway. ! nlr = nrow call mpi_gather(nlr,1,psb_mpi_mpk_,all_dim,& - & 1,psb_mpi_mpk_,rootrank,icomm,info) - + & 1,psb_mpi_mpk_,rootrank,icomm,minfo) + info = minfo if(iam == iroot) then displ(1)=0 do i=2,np @@ -436,8 +437,8 @@ subroutine psb_i2scatterv(globx, locx, desc_a, info, root) call mpi_gatherv(ltg,nlr,& & psb_mpi_lpk_,l_t_g_all,all_dim,& - & displ,psb_mpi_lpk_,rootrank,icomm,info) - + & displ,psb_mpi_lpk_,rootrank,icomm,minfo) + info = minfo ! prepare vector to scatter if (iam == iroot) then do i=1,np @@ -451,9 +452,9 @@ subroutine psb_i2scatterv(globx, locx, desc_a, info, root) end if call mpi_scatterv(scatterv,all_dim,displ,& - & psb_mpi_i2pk_,locx,nrow,& - & psb_mpi_i2pk_,rootrank,icomm,info) - + & psb_mpi_i2pk_,locx,nlr,& + & psb_mpi_i2pk_,rootrank,icomm,minfo) + info = minfo deallocate(l_t_g_all, scatterv,stat=info) if(info /= psb_success_) then info=psb_err_from_subroutine_ diff --git a/base/comm/psb_igather.f90 b/base/comm/psb_igather.f90 index afa794eb..acfdf52a 100644 --- a/base/comm/psb_igather.f90 +++ b/base/comm/psb_igather.f90 @@ -58,10 +58,11 @@ subroutine psb_igather_vect(globx, locx, desc_a, info, iroot) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank + integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank, loc_rows integer(psb_ipk_) :: ierr(5), err_act, jlx, ilx, lda_locx, lda_globx, i integer(psb_lpk_) :: m, n, k, ilocx, jlocx, idx, iglobx, jglobx integer(psb_ipk_), allocatable :: llocx(:) + integer(psb_mpk_), allocatable :: szs(:) character(len=20) :: name, ch_err name='psb_igatherv' @@ -125,32 +126,34 @@ subroutine psb_igather_vect(globx, locx, desc_a, info, iroot) goto 9999 end if - call psb_realloc(m,globx,info) - if (info /= psb_success_) then - info=psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if - - globx(:) = izero - llocx = locx%get_vect() - do i=1,desc_a%get_local_rows() - call psb_loc_to_glob(i,idx,desc_a,info) - globx(idx) = llocx(i) - end do - + llocx = locx%get_vect() ! adjust overlapped elements do i=1, size(desc_a%ovrlap_elem,1) if (me /= desc_a%ovrlap_elem(i,3)) then idx = desc_a%ovrlap_elem(i,1) - call psb_loc_to_glob(idx,desc_a,info) - globx(idx) = izero + llocx(idx) = izero end if end do - - call psb_sum(ctxt,globx(1:m),root=root) + allocate(szs(np)) + loc_rows = desc_a%get_local_rows() + call psb_gather(ctxt,loc_rows,szs,root=root) + if ((me == root).or.(root == -1)) then + if (sum(szs) /= m) then + info=psb_err_internal_error_ + call psb_errpush(info,name) + goto 9999 + end if + call psb_realloc(m,globx,info) + if (info /= psb_success_) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + end if + call psb_gatherv(ctxt,llocx(1:loc_rows),globx,szs,root=root) + call psb_erractionrestore(err_act) return diff --git a/base/comm/psb_ispgather.F90 b/base/comm/psb_ispgather.F90 index e45f0f5d..773c5864 100644 --- a/base/comm/psb_ispgather.F90 +++ b/base/comm/psb_ispgather.F90 @@ -33,8 +33,8 @@ ! ! Gathers a sparse matrix onto a single process. ! Two variants: -! 1. Gathers to PSB_i_SPARSE_MAT (i.e. to matrix with IPK_ indices) -! 2. Gathers to PSB_@LX@_SPARSE_MAT (i.e. to matrix with LPK_ indices) +! 1. Gathers to PSB_i_SPARSE_MAT (i.e. to matrix with PSB_IPK_ indices) +! 2. Gathers to PSB_@LX@_SPARSE_MAT (i.e. to matrix with PSB_LPK_ indices) ! ! Note: this function uses MPI_ALLGATHERV. At this time, the size of the ! resulting matrix must be within the range of 4 bytes because of the @@ -48,11 +48,12 @@ subroutine psb_isp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ispmat_type), intent(inout) :: loca @@ -62,7 +63,7 @@ subroutine psb_isp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_i_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_i_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_ipk_) :: nrg, ncg, nzg, nzl integer(psb_ipk_) :: err_act, dupl_ integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k @@ -156,27 +157,27 @@ subroutine psb_isp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep enddo ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_ipk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_ipk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_ipk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(locia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((locia),ndx,psb_mpi_lpk_,& & glbia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(locja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((locja),ndx,psb_mpi_lpk_,& & glbja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_ipk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_ipk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_ipk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(locia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((locia),ndx,psb_mpi_lpk_,& & glbia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(locja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((locja),ndx,psb_mpi_lpk_,& & glbja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) @@ -231,11 +232,12 @@ subroutine psb_@LX@sp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ispmat_type), intent(inout) :: loca @@ -245,7 +247,7 @@ subroutine psb_@LX@sp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_@LX@_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_@LX@_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_lpk_) :: nrg, ncg, nzg integer(psb_ipk_) :: err_act, dupl_ integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl @@ -337,27 +339,27 @@ subroutine psb_@LX@sp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k enddo ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_ipk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_ipk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_ipk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_ipk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_ipk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_ipk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) end if @@ -369,7 +371,7 @@ subroutine psb_@LX@sp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k call loc_coo%free() ! ! Is the code below safe? For very large cases - ! the indices in glob_coo will overflow. But then, + ! the indices in glob_coo will overflow. But then), ! for very large cases it does not make sense to ! gather the matrix on a single procecss anyway... ! @@ -403,11 +405,12 @@ subroutine psb_@LX@@LX@sp_allgather(globa, loca, desc_a, info, root, dupl,keepn use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_@LX@spmat_type), intent(inout) :: loca @@ -417,7 +420,7 @@ subroutine psb_@LX@@LX@sp_allgather(globa, loca, desc_a, info, root, dupl,keepn integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_@LX@_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_@LX@_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_lpk_) :: nrg, ncg, nzg integer(psb_ipk_) :: err_act, dupl_ integer(psb_lpk_) :: ip,naggrm1,naggrp1, i, j, k, nzl @@ -507,27 +510,27 @@ subroutine psb_@LX@@LX@sp_allgather(globa, loca, desc_a, info, root, dupl,keepn ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_ipk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_ipk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_ipk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_ipk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_ipk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_ipk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) end if diff --git a/base/comm/psb_lgather.f90 b/base/comm/psb_lgather.f90 index 00af3cd1..17359bce 100644 --- a/base/comm/psb_lgather.f90 +++ b/base/comm/psb_lgather.f90 @@ -58,10 +58,11 @@ subroutine psb_lgather_vect(globx, locx, desc_a, info, iroot) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank + integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank, loc_rows integer(psb_ipk_) :: ierr(5), err_act, jlx, ilx, lda_locx, lda_globx, i integer(psb_lpk_) :: m, n, k, ilocx, jlocx, idx, iglobx, jglobx integer(psb_lpk_), allocatable :: llocx(:) + integer(psb_mpk_), allocatable :: szs(:) character(len=20) :: name, ch_err name='psb_lgatherv' @@ -125,32 +126,34 @@ subroutine psb_lgather_vect(globx, locx, desc_a, info, iroot) goto 9999 end if - call psb_realloc(m,globx,info) - if (info /= psb_success_) then - info=psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if - - globx(:) = lzero - llocx = locx%get_vect() - do i=1,desc_a%get_local_rows() - call psb_loc_to_glob(i,idx,desc_a,info) - globx(idx) = llocx(i) - end do - + llocx = locx%get_vect() ! adjust overlapped elements do i=1, size(desc_a%ovrlap_elem,1) if (me /= desc_a%ovrlap_elem(i,3)) then idx = desc_a%ovrlap_elem(i,1) - call psb_loc_to_glob(idx,desc_a,info) - globx(idx) = lzero + llocx(idx) = lzero end if end do - - call psb_sum(ctxt,globx(1:m),root=root) + allocate(szs(np)) + loc_rows = desc_a%get_local_rows() + call psb_gather(ctxt,loc_rows,szs,root=root) + if ((me == root).or.(root == -1)) then + if (sum(szs) /= m) then + info=psb_err_internal_error_ + call psb_errpush(info,name) + goto 9999 + end if + call psb_realloc(m,globx,info) + if (info /= psb_success_) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + end if + call psb_gatherv(ctxt,llocx(1:loc_rows),globx,szs,root=root) + call psb_erractionrestore(err_act) return diff --git a/base/comm/psb_lspgather.F90 b/base/comm/psb_lspgather.F90 index aa7b8fcc..8cc009bf 100644 --- a/base/comm/psb_lspgather.F90 +++ b/base/comm/psb_lspgather.F90 @@ -33,8 +33,8 @@ ! ! Gathers a sparse matrix onto a single process. ! Two variants: -! 1. Gathers to PSB_l_SPARSE_MAT (i.e. to matrix with IPK_ indices) -! 2. Gathers to PSB_@LX@_SPARSE_MAT (i.e. to matrix with LPK_ indices) +! 1. Gathers to PSB_l_SPARSE_MAT (i.e. to matrix with PSB_IPK_ indices) +! 2. Gathers to PSB_@LX@_SPARSE_MAT (i.e. to matrix with PSB_LPK_ indices) ! ! Note: this function uses MPI_ALLGATHERV. At this time, the size of the ! resulting matrix must be within the range of 4 bytes because of the @@ -48,11 +48,12 @@ subroutine psb_lsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_lspmat_type), intent(inout) :: loca @@ -62,7 +63,7 @@ subroutine psb_lsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_l_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_l_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_ipk_) :: nrg, ncg, nzg, nzl integer(psb_ipk_) :: err_act, dupl_ integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k @@ -156,27 +157,27 @@ subroutine psb_lsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep enddo ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_lpk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_lpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(locia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((locia),ndx,psb_mpi_lpk_,& & glbia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(locja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((locja),ndx,psb_mpi_lpk_,& & glbja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_lpk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_lpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(locia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((locia),ndx,psb_mpi_lpk_,& & glbia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(locja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((locja),ndx,psb_mpi_lpk_,& & glbja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) @@ -231,11 +232,12 @@ subroutine psb_@LX@sp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_lspmat_type), intent(inout) :: loca @@ -245,7 +247,7 @@ subroutine psb_@LX@sp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_@LX@_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_@LX@_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_lpk_) :: nrg, ncg, nzg integer(psb_ipk_) :: err_act, dupl_ integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl @@ -337,27 +339,27 @@ subroutine psb_@LX@sp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k enddo ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_lpk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_lpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_lpk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_lpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) end if @@ -369,7 +371,7 @@ subroutine psb_@LX@sp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k call loc_coo%free() ! ! Is the code below safe? For very large cases - ! the indices in glob_coo will overflow. But then, + ! the indices in glob_coo will overflow. But then), ! for very large cases it does not make sense to ! gather the matrix on a single procecss anyway... ! @@ -403,11 +405,12 @@ subroutine psb_@LX@@LX@sp_allgather(globa, loca, desc_a, info, root, dupl,keepn use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_@LX@spmat_type), intent(inout) :: loca @@ -417,7 +420,7 @@ subroutine psb_@LX@@LX@sp_allgather(globa, loca, desc_a, info, root, dupl,keepn integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_@LX@_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_@LX@_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_lpk_) :: nrg, ncg, nzg integer(psb_ipk_) :: err_act, dupl_ integer(psb_lpk_) :: ip,naggrm1,naggrp1, i, j, k, nzl @@ -507,27 +510,27 @@ subroutine psb_@LX@@LX@sp_allgather(globa, loca, desc_a, info, root, dupl,keepn ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_lpk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_lpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_lpk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_lpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) end if diff --git a/base/comm/psb_mhalo_a.f90 b/base/comm/psb_mhalo_a.f90 index cb9ffec1..c3f6a688 100644 --- a/base/comm/psb_mhalo_a.f90 +++ b/base/comm/psb_mhalo_a.f90 @@ -66,8 +66,8 @@ subroutine psb_mhalom(x,desc_a,info,jx,ik,work,tran,mode,data) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me - integer(psb_ipk_) :: err_act, iix, jjx, k, maxk, nrow, imode, i,& + integer(psb_mpk_) :: np, me, k + integer(psb_ipk_) :: err_act, iix, jjx, maxk, nrow, imode, i,& & liwork,data_, ldx integer(psb_lpk_) :: m, n, ix, ijx integer(psb_mpk_),pointer :: iwork(:), xp(:,:) diff --git a/base/comm/psb_movrl_a.f90 b/base/comm/psb_movrl_a.f90 index 42d7d82d..983bcbf8 100644 --- a/base/comm/psb_movrl_a.f90 +++ b/base/comm/psb_movrl_a.f90 @@ -77,8 +77,8 @@ subroutine psb_movrlm(x,desc_a,info,jx,ik,work,update,mode) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me - integer(psb_ipk_) :: err_act, iix, jjx, nrow, ncol, k, maxk, update_,& + integer(psb_mpk_) :: np, me, k + integer(psb_ipk_) :: err_act, iix, jjx, nrow, ncol, maxk, update_,& & mode_, liwork, ldx integer(psb_lpk_) :: m, n, ix, ijx integer(psb_mpk_),pointer :: iwork(:), xp(:,:) diff --git a/base/comm/psb_mscatter_a.F90 b/base/comm/psb_mscatter_a.F90 index 2c6d9fbb..628fcf19 100644 --- a/base/comm/psb_mscatter_a.F90 +++ b/base/comm/psb_mscatter_a.F90 @@ -46,11 +46,11 @@ subroutine psb_mscatterm(globx, locx, desc_a, info, root) use psb_base_mod, psb_protect_name => psb_mscatterm -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -63,7 +63,8 @@ subroutine psb_mscatterm(globx, locx, desc_a, info, root) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me, iroot, icomm, myrank, rootrank, iam, nlr + integer(psb_mpk_) :: np, me, iroot, icomm, myrank, rootrank, iam,& + & nlr, minfo integer(psb_ipk_) :: ierr(5), err_act, nrow,& & ilocx, jlocx, lda_locx, lda_globx, lock, globk, k, maxk, & & col,pos @@ -162,13 +163,13 @@ subroutine psb_mscatterm(globx, locx, desc_a, info, root) rootrank = psb_get_mpi_rank(ctxt,iroot) ! - ! This is potentially unsafe when IPK=8 - ! But then, IPK=8 is highly experimental anyway. + ! This is potentially unsafe when PSB_IPK=8 + ! But then, PSB_IPK=8 is highly experimental anyway. ! nlr = nrow call mpi_gather(nlr,1,psb_mpi_mpk_,all_dim,& - & 1,psb_mpi_mpk_,rootrank,icomm,info) - + & 1,psb_mpi_mpk_,rootrank,icomm,minfo) + info = minfo if (iam == iroot) then displ(1)=0 do i=2,np @@ -195,8 +196,8 @@ subroutine psb_mscatterm(globx, locx, desc_a, info, root) call mpi_gatherv(ltg,nlr,& & psb_mpi_lpk_,l_t_g_all,all_dim,& - & displ,psb_mpi_lpk_,rootrank,icomm,info) - + & displ,psb_mpi_lpk_,rootrank,icomm,minfo) + info = minfo do col=1, k ! prepare vector to scatter if(iam == iroot) then @@ -211,9 +212,9 @@ subroutine psb_mscatterm(globx, locx, desc_a, info, root) ! scatter call mpi_scatterv(scatterv,all_dim,displ,& - & psb_mpi_mpk_,locx(1,col),nrow,& - & psb_mpi_mpk_,rootrank,icomm,info) - + & psb_mpi_mpk_,locx(1,col),nlr,& + & psb_mpi_mpk_,rootrank,icomm,minfo) + info = minfo end do deallocate(l_t_g_all, scatterv,stat=info) @@ -291,11 +292,11 @@ end subroutine psb_mscatterm ! subroutine psb_mscatterv(globx, locx, desc_a, info, root) use psb_base_mod, psb_protect_name => psb_mscatterv -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -308,7 +309,7 @@ subroutine psb_mscatterv(globx, locx, desc_a, info, root) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, iam, iroot, iiroot, icomm, myrank, rootrank, nlr + integer(psb_mpk_) :: np, iam, iroot, iiroot, icomm, myrank, rootrank, nlr, minfo integer(psb_ipk_) :: ierr(5), err_act, nrow,& & ilocx, jlocx, lda_locx, lda_globx, k, pos, ilx, jlx integer(psb_lpk_) :: m, n, i, j, idx, iglobx, jglobx @@ -398,13 +399,13 @@ subroutine psb_mscatterv(globx, locx, desc_a, info, root) else rootrank = psb_get_mpi_rank(ctxt,iroot) ! - ! This is potentially unsafe when IPK=8 - ! But then, IPK=8 is highly experimental anyway. + ! This is potentially unsafe when PSB_IPK=8 + ! But then, PSB_IPK=8 is highly experimental anyway. ! nlr = nrow call mpi_gather(nlr,1,psb_mpi_mpk_,all_dim,& - & 1,psb_mpi_mpk_,rootrank,icomm,info) - + & 1,psb_mpi_mpk_,rootrank,icomm,minfo) + info = minfo if(iam == iroot) then displ(1)=0 do i=2,np @@ -436,8 +437,8 @@ subroutine psb_mscatterv(globx, locx, desc_a, info, root) call mpi_gatherv(ltg,nlr,& & psb_mpi_lpk_,l_t_g_all,all_dim,& - & displ,psb_mpi_lpk_,rootrank,icomm,info) - + & displ,psb_mpi_lpk_,rootrank,icomm,minfo) + info = minfo ! prepare vector to scatter if (iam == iroot) then do i=1,np @@ -451,9 +452,9 @@ subroutine psb_mscatterv(globx, locx, desc_a, info, root) end if call mpi_scatterv(scatterv,all_dim,displ,& - & psb_mpi_mpk_,locx,nrow,& - & psb_mpi_mpk_,rootrank,icomm,info) - + & psb_mpi_mpk_,locx,nlr,& + & psb_mpi_mpk_,rootrank,icomm,minfo) + info = minfo deallocate(l_t_g_all, scatterv,stat=info) if(info /= psb_success_) then info=psb_err_from_subroutine_ diff --git a/base/comm/psb_sgather.f90 b/base/comm/psb_sgather.f90 index 21ce1408..59cecc17 100644 --- a/base/comm/psb_sgather.f90 +++ b/base/comm/psb_sgather.f90 @@ -58,10 +58,11 @@ subroutine psb_sgather_vect(globx, locx, desc_a, info, iroot) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank + integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank, loc_rows integer(psb_ipk_) :: ierr(5), err_act, jlx, ilx, lda_locx, lda_globx, i integer(psb_lpk_) :: m, n, k, ilocx, jlocx, idx, iglobx, jglobx real(psb_spk_), allocatable :: llocx(:) + integer(psb_mpk_), allocatable :: szs(:) character(len=20) :: name, ch_err name='psb_sgatherv' @@ -125,32 +126,34 @@ subroutine psb_sgather_vect(globx, locx, desc_a, info, iroot) goto 9999 end if - call psb_realloc(m,globx,info) - if (info /= psb_success_) then - info=psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if - - globx(:) = szero - llocx = locx%get_vect() - do i=1,desc_a%get_local_rows() - call psb_loc_to_glob(i,idx,desc_a,info) - globx(idx) = llocx(i) - end do - + llocx = locx%get_vect() ! adjust overlapped elements do i=1, size(desc_a%ovrlap_elem,1) if (me /= desc_a%ovrlap_elem(i,3)) then idx = desc_a%ovrlap_elem(i,1) - call psb_loc_to_glob(idx,desc_a,info) - globx(idx) = szero + llocx(idx) = szero end if end do - - call psb_sum(ctxt,globx(1:m),root=root) + allocate(szs(np)) + loc_rows = desc_a%get_local_rows() + call psb_gather(ctxt,loc_rows,szs,root=root) + if ((me == root).or.(root == -1)) then + if (sum(szs) /= m) then + info=psb_err_internal_error_ + call psb_errpush(info,name) + goto 9999 + end if + call psb_realloc(m,globx,info) + if (info /= psb_success_) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + end if + call psb_gatherv(ctxt,llocx(1:loc_rows),globx,szs,root=root) + call psb_erractionrestore(err_act) return diff --git a/base/comm/psb_shalo_a.f90 b/base/comm/psb_shalo_a.f90 index 0030d5c9..23cc464d 100644 --- a/base/comm/psb_shalo_a.f90 +++ b/base/comm/psb_shalo_a.f90 @@ -66,8 +66,8 @@ subroutine psb_shalom(x,desc_a,info,jx,ik,work,tran,mode,data) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me - integer(psb_ipk_) :: err_act, iix, jjx, k, maxk, nrow, imode, i,& + integer(psb_mpk_) :: np, me, k + integer(psb_ipk_) :: err_act, iix, jjx, maxk, nrow, imode, i,& & liwork,data_, ldx integer(psb_lpk_) :: m, n, ix, ijx real(psb_spk_),pointer :: iwork(:), xp(:,:) diff --git a/base/comm/psb_sovrl_a.f90 b/base/comm/psb_sovrl_a.f90 index 9944036d..6ced0fd5 100644 --- a/base/comm/psb_sovrl_a.f90 +++ b/base/comm/psb_sovrl_a.f90 @@ -77,8 +77,8 @@ subroutine psb_sovrlm(x,desc_a,info,jx,ik,work,update,mode) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me - integer(psb_ipk_) :: err_act, iix, jjx, nrow, ncol, k, maxk, update_,& + integer(psb_mpk_) :: np, me, k + integer(psb_ipk_) :: err_act, iix, jjx, nrow, ncol, maxk, update_,& & mode_, liwork, ldx integer(psb_lpk_) :: m, n, ix, ijx real(psb_spk_),pointer :: iwork(:), xp(:,:) diff --git a/base/comm/psb_sscatter_a.F90 b/base/comm/psb_sscatter_a.F90 index 08026536..e060bd1c 100644 --- a/base/comm/psb_sscatter_a.F90 +++ b/base/comm/psb_sscatter_a.F90 @@ -46,11 +46,11 @@ subroutine psb_sscatterm(globx, locx, desc_a, info, root) use psb_base_mod, psb_protect_name => psb_sscatterm -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -63,7 +63,8 @@ subroutine psb_sscatterm(globx, locx, desc_a, info, root) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me, iroot, icomm, myrank, rootrank, iam, nlr + integer(psb_mpk_) :: np, me, iroot, icomm, myrank, rootrank, iam,& + & nlr, minfo integer(psb_ipk_) :: ierr(5), err_act, nrow,& & ilocx, jlocx, lda_locx, lda_globx, lock, globk, k, maxk, & & col,pos @@ -162,13 +163,13 @@ subroutine psb_sscatterm(globx, locx, desc_a, info, root) rootrank = psb_get_mpi_rank(ctxt,iroot) ! - ! This is potentially unsafe when IPK=8 - ! But then, IPK=8 is highly experimental anyway. + ! This is potentially unsafe when PSB_IPK=8 + ! But then, PSB_IPK=8 is highly experimental anyway. ! nlr = nrow call mpi_gather(nlr,1,psb_mpi_mpk_,all_dim,& - & 1,psb_mpi_mpk_,rootrank,icomm,info) - + & 1,psb_mpi_mpk_,rootrank,icomm,minfo) + info = minfo if (iam == iroot) then displ(1)=0 do i=2,np @@ -195,8 +196,8 @@ subroutine psb_sscatterm(globx, locx, desc_a, info, root) call mpi_gatherv(ltg,nlr,& & psb_mpi_lpk_,l_t_g_all,all_dim,& - & displ,psb_mpi_lpk_,rootrank,icomm,info) - + & displ,psb_mpi_lpk_,rootrank,icomm,minfo) + info = minfo do col=1, k ! prepare vector to scatter if(iam == iroot) then @@ -211,9 +212,9 @@ subroutine psb_sscatterm(globx, locx, desc_a, info, root) ! scatter call mpi_scatterv(scatterv,all_dim,displ,& - & psb_mpi_r_spk_,locx(1,col),nrow,& - & psb_mpi_r_spk_,rootrank,icomm,info) - + & psb_mpi_r_spk_,locx(1,col),nlr,& + & psb_mpi_r_spk_,rootrank,icomm,minfo) + info = minfo end do deallocate(l_t_g_all, scatterv,stat=info) @@ -291,11 +292,11 @@ end subroutine psb_sscatterm ! subroutine psb_sscatterv(globx, locx, desc_a, info, root) use psb_base_mod, psb_protect_name => psb_sscatterv -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -308,7 +309,7 @@ subroutine psb_sscatterv(globx, locx, desc_a, info, root) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, iam, iroot, iiroot, icomm, myrank, rootrank, nlr + integer(psb_mpk_) :: np, iam, iroot, iiroot, icomm, myrank, rootrank, nlr, minfo integer(psb_ipk_) :: ierr(5), err_act, nrow,& & ilocx, jlocx, lda_locx, lda_globx, k, pos, ilx, jlx integer(psb_lpk_) :: m, n, i, j, idx, iglobx, jglobx @@ -398,13 +399,13 @@ subroutine psb_sscatterv(globx, locx, desc_a, info, root) else rootrank = psb_get_mpi_rank(ctxt,iroot) ! - ! This is potentially unsafe when IPK=8 - ! But then, IPK=8 is highly experimental anyway. + ! This is potentially unsafe when PSB_IPK=8 + ! But then, PSB_IPK=8 is highly experimental anyway. ! nlr = nrow call mpi_gather(nlr,1,psb_mpi_mpk_,all_dim,& - & 1,psb_mpi_mpk_,rootrank,icomm,info) - + & 1,psb_mpi_mpk_,rootrank,icomm,minfo) + info = minfo if(iam == iroot) then displ(1)=0 do i=2,np @@ -436,8 +437,8 @@ subroutine psb_sscatterv(globx, locx, desc_a, info, root) call mpi_gatherv(ltg,nlr,& & psb_mpi_lpk_,l_t_g_all,all_dim,& - & displ,psb_mpi_lpk_,rootrank,icomm,info) - + & displ,psb_mpi_lpk_,rootrank,icomm,minfo) + info = minfo ! prepare vector to scatter if (iam == iroot) then do i=1,np @@ -451,9 +452,9 @@ subroutine psb_sscatterv(globx, locx, desc_a, info, root) end if call mpi_scatterv(scatterv,all_dim,displ,& - & psb_mpi_r_spk_,locx,nrow,& - & psb_mpi_r_spk_,rootrank,icomm,info) - + & psb_mpi_r_spk_,locx,nlr,& + & psb_mpi_r_spk_,rootrank,icomm,minfo) + info = minfo deallocate(l_t_g_all, scatterv,stat=info) if(info /= psb_success_) then info=psb_err_from_subroutine_ diff --git a/base/comm/psb_sspgather.F90 b/base/comm/psb_sspgather.F90 index 5678b676..7e822c87 100644 --- a/base/comm/psb_sspgather.F90 +++ b/base/comm/psb_sspgather.F90 @@ -33,8 +33,8 @@ ! ! Gathers a sparse matrix onto a single process. ! Two variants: -! 1. Gathers to PSB_s_SPARSE_MAT (i.e. to matrix with IPK_ indices) -! 2. Gathers to PSB_ls_SPARSE_MAT (i.e. to matrix with LPK_ indices) +! 1. Gathers to PSB_s_SPARSE_MAT (i.e. to matrix with PSB_IPK_ indices) +! 2. Gathers to PSB_ls_SPARSE_MAT (i.e. to matrix with PSB_LPK_ indices) ! ! Note: this function uses MPI_ALLGATHERV. At this time, the size of the ! resulting matrix must be within the range of 4 bytes because of the @@ -48,11 +48,12 @@ subroutine psb_ssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_sspmat_type), intent(inout) :: loca @@ -62,7 +63,7 @@ subroutine psb_ssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_s_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_s_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_ipk_) :: nrg, ncg, nzg, nzl integer(psb_ipk_) :: err_act, dupl_ integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k @@ -156,27 +157,27 @@ subroutine psb_ssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep enddo ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_r_spk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_r_spk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_r_spk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(locia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((locia),ndx,psb_mpi_lpk_,& & glbia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(locja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((locja),ndx,psb_mpi_lpk_,& & glbja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_r_spk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_r_spk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_r_spk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(locia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((locia),ndx,psb_mpi_lpk_,& & glbia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(locja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((locja),ndx,psb_mpi_lpk_,& & glbja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) @@ -231,11 +232,12 @@ subroutine psb_lssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,kee use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_sspmat_type), intent(inout) :: loca @@ -245,7 +247,7 @@ subroutine psb_lssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,kee integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_ls_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_ls_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_lpk_) :: nrg, ncg, nzg integer(psb_ipk_) :: err_act, dupl_ integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl @@ -337,27 +339,27 @@ subroutine psb_lssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,kee enddo ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_r_spk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_r_spk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_r_spk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_r_spk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_r_spk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_r_spk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) end if @@ -369,7 +371,7 @@ subroutine psb_lssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,kee call loc_coo%free() ! ! Is the code below safe? For very large cases - ! the indices in glob_coo will overflow. But then, + ! the indices in glob_coo will overflow. But then), ! for very large cases it does not make sense to ! gather the matrix on a single procecss anyway... ! @@ -403,11 +405,12 @@ subroutine psb_lslssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_lsspmat_type), intent(inout) :: loca @@ -417,7 +420,7 @@ subroutine psb_lslssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_ls_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_ls_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_lpk_) :: nrg, ncg, nzg integer(psb_ipk_) :: err_act, dupl_ integer(psb_lpk_) :: ip,naggrm1,naggrp1, i, j, k, nzl @@ -507,27 +510,27 @@ subroutine psb_lslssp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_r_spk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_r_spk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_r_spk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_r_spk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_r_spk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_r_spk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) end if diff --git a/base/comm/psb_zgather.f90 b/base/comm/psb_zgather.f90 index 53cba210..5cf445a9 100644 --- a/base/comm/psb_zgather.f90 +++ b/base/comm/psb_zgather.f90 @@ -58,10 +58,11 @@ subroutine psb_zgather_vect(globx, locx, desc_a, info, iroot) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank + integer(psb_mpk_) :: np, me, root, iiroot, icomm, myrank, rootrank, loc_rows integer(psb_ipk_) :: ierr(5), err_act, jlx, ilx, lda_locx, lda_globx, i integer(psb_lpk_) :: m, n, k, ilocx, jlocx, idx, iglobx, jglobx complex(psb_dpk_), allocatable :: llocx(:) + integer(psb_mpk_), allocatable :: szs(:) character(len=20) :: name, ch_err name='psb_zgatherv' @@ -125,32 +126,34 @@ subroutine psb_zgather_vect(globx, locx, desc_a, info, iroot) goto 9999 end if - call psb_realloc(m,globx,info) - if (info /= psb_success_) then - info=psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if - - globx(:) = zzero - llocx = locx%get_vect() - do i=1,desc_a%get_local_rows() - call psb_loc_to_glob(i,idx,desc_a,info) - globx(idx) = llocx(i) - end do - + llocx = locx%get_vect() ! adjust overlapped elements do i=1, size(desc_a%ovrlap_elem,1) if (me /= desc_a%ovrlap_elem(i,3)) then idx = desc_a%ovrlap_elem(i,1) - call psb_loc_to_glob(idx,desc_a,info) - globx(idx) = zzero + llocx(idx) = zzero end if end do - - call psb_sum(ctxt,globx(1:m),root=root) + allocate(szs(np)) + loc_rows = desc_a%get_local_rows() + call psb_gather(ctxt,loc_rows,szs,root=root) + if ((me == root).or.(root == -1)) then + if (sum(szs) /= m) then + info=psb_err_internal_error_ + call psb_errpush(info,name) + goto 9999 + end if + call psb_realloc(m,globx,info) + if (info /= psb_success_) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + end if + call psb_gatherv(ctxt,llocx(1:loc_rows),globx,szs,root=root) + call psb_erractionrestore(err_act) return diff --git a/base/comm/psb_zhalo_a.f90 b/base/comm/psb_zhalo_a.f90 index 4855592a..2acc2463 100644 --- a/base/comm/psb_zhalo_a.f90 +++ b/base/comm/psb_zhalo_a.f90 @@ -66,8 +66,8 @@ subroutine psb_zhalom(x,desc_a,info,jx,ik,work,tran,mode,data) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me - integer(psb_ipk_) :: err_act, iix, jjx, k, maxk, nrow, imode, i,& + integer(psb_mpk_) :: np, me, k + integer(psb_ipk_) :: err_act, iix, jjx, maxk, nrow, imode, i,& & liwork,data_, ldx integer(psb_lpk_) :: m, n, ix, ijx complex(psb_dpk_),pointer :: iwork(:), xp(:,:) diff --git a/base/comm/psb_zovrl_a.f90 b/base/comm/psb_zovrl_a.f90 index 6af46069..e7a87cef 100644 --- a/base/comm/psb_zovrl_a.f90 +++ b/base/comm/psb_zovrl_a.f90 @@ -77,8 +77,8 @@ subroutine psb_zovrlm(x,desc_a,info,jx,ik,work,update,mode) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me - integer(psb_ipk_) :: err_act, iix, jjx, nrow, ncol, k, maxk, update_,& + integer(psb_mpk_) :: np, me, k + integer(psb_ipk_) :: err_act, iix, jjx, nrow, ncol, maxk, update_,& & mode_, liwork, ldx integer(psb_lpk_) :: m, n, ix, ijx complex(psb_dpk_),pointer :: iwork(:), xp(:,:) diff --git a/base/comm/psb_zscatter_a.F90 b/base/comm/psb_zscatter_a.F90 index aaa684b6..d51dc82a 100644 --- a/base/comm/psb_zscatter_a.F90 +++ b/base/comm/psb_zscatter_a.F90 @@ -46,11 +46,11 @@ subroutine psb_zscatterm(globx, locx, desc_a, info, root) use psb_base_mod, psb_protect_name => psb_zscatterm -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -63,7 +63,8 @@ subroutine psb_zscatterm(globx, locx, desc_a, info, root) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, me, iroot, icomm, myrank, rootrank, iam, nlr + integer(psb_mpk_) :: np, me, iroot, icomm, myrank, rootrank, iam,& + & nlr, minfo integer(psb_ipk_) :: ierr(5), err_act, nrow,& & ilocx, jlocx, lda_locx, lda_globx, lock, globk, k, maxk, & & col,pos @@ -162,13 +163,13 @@ subroutine psb_zscatterm(globx, locx, desc_a, info, root) rootrank = psb_get_mpi_rank(ctxt,iroot) ! - ! This is potentially unsafe when IPK=8 - ! But then, IPK=8 is highly experimental anyway. + ! This is potentially unsafe when PSB_IPK=8 + ! But then, PSB_IPK=8 is highly experimental anyway. ! nlr = nrow call mpi_gather(nlr,1,psb_mpi_mpk_,all_dim,& - & 1,psb_mpi_mpk_,rootrank,icomm,info) - + & 1,psb_mpi_mpk_,rootrank,icomm,minfo) + info = minfo if (iam == iroot) then displ(1)=0 do i=2,np @@ -195,8 +196,8 @@ subroutine psb_zscatterm(globx, locx, desc_a, info, root) call mpi_gatherv(ltg,nlr,& & psb_mpi_lpk_,l_t_g_all,all_dim,& - & displ,psb_mpi_lpk_,rootrank,icomm,info) - + & displ,psb_mpi_lpk_,rootrank,icomm,minfo) + info = minfo do col=1, k ! prepare vector to scatter if(iam == iroot) then @@ -211,9 +212,9 @@ subroutine psb_zscatterm(globx, locx, desc_a, info, root) ! scatter call mpi_scatterv(scatterv,all_dim,displ,& - & psb_mpi_c_dpk_,locx(1,col),nrow,& - & psb_mpi_c_dpk_,rootrank,icomm,info) - + & psb_mpi_c_dpk_,locx(1,col),nlr,& + & psb_mpi_c_dpk_,rootrank,icomm,minfo) + info = minfo end do deallocate(l_t_g_all, scatterv,stat=info) @@ -291,11 +292,11 @@ end subroutine psb_zscatterm ! subroutine psb_zscatterv(globx, locx, desc_a, info, root) use psb_base_mod, psb_protect_name => psb_zscatterv -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -308,7 +309,7 @@ subroutine psb_zscatterv(globx, locx, desc_a, info, root) ! locals type(psb_ctxt_type) :: ctxt - integer(psb_mpk_) :: np, iam, iroot, iiroot, icomm, myrank, rootrank, nlr + integer(psb_mpk_) :: np, iam, iroot, iiroot, icomm, myrank, rootrank, nlr, minfo integer(psb_ipk_) :: ierr(5), err_act, nrow,& & ilocx, jlocx, lda_locx, lda_globx, k, pos, ilx, jlx integer(psb_lpk_) :: m, n, i, j, idx, iglobx, jglobx @@ -398,13 +399,13 @@ subroutine psb_zscatterv(globx, locx, desc_a, info, root) else rootrank = psb_get_mpi_rank(ctxt,iroot) ! - ! This is potentially unsafe when IPK=8 - ! But then, IPK=8 is highly experimental anyway. + ! This is potentially unsafe when PSB_IPK=8 + ! But then, PSB_IPK=8 is highly experimental anyway. ! nlr = nrow call mpi_gather(nlr,1,psb_mpi_mpk_,all_dim,& - & 1,psb_mpi_mpk_,rootrank,icomm,info) - + & 1,psb_mpi_mpk_,rootrank,icomm,minfo) + info = minfo if(iam == iroot) then displ(1)=0 do i=2,np @@ -436,8 +437,8 @@ subroutine psb_zscatterv(globx, locx, desc_a, info, root) call mpi_gatherv(ltg,nlr,& & psb_mpi_lpk_,l_t_g_all,all_dim,& - & displ,psb_mpi_lpk_,rootrank,icomm,info) - + & displ,psb_mpi_lpk_,rootrank,icomm,minfo) + info = minfo ! prepare vector to scatter if (iam == iroot) then do i=1,np @@ -451,9 +452,9 @@ subroutine psb_zscatterv(globx, locx, desc_a, info, root) end if call mpi_scatterv(scatterv,all_dim,displ,& - & psb_mpi_c_dpk_,locx,nrow,& - & psb_mpi_c_dpk_,rootrank,icomm,info) - + & psb_mpi_c_dpk_,locx,nlr,& + & psb_mpi_c_dpk_,rootrank,icomm,minfo) + info = minfo deallocate(l_t_g_all, scatterv,stat=info) if(info /= psb_success_) then info=psb_err_from_subroutine_ diff --git a/base/comm/psb_zspgather.F90 b/base/comm/psb_zspgather.F90 index 6b59caa8..1630f904 100644 --- a/base/comm/psb_zspgather.F90 +++ b/base/comm/psb_zspgather.F90 @@ -33,8 +33,8 @@ ! ! Gathers a sparse matrix onto a single process. ! Two variants: -! 1. Gathers to PSB_z_SPARSE_MAT (i.e. to matrix with IPK_ indices) -! 2. Gathers to PSB_lz_SPARSE_MAT (i.e. to matrix with LPK_ indices) +! 1. Gathers to PSB_z_SPARSE_MAT (i.e. to matrix with PSB_IPK_ indices) +! 2. Gathers to PSB_lz_SPARSE_MAT (i.e. to matrix with PSB_LPK_ indices) ! ! Note: this function uses MPI_ALLGATHERV. At this time, the size of the ! resulting matrix must be within the range of 4 bytes because of the @@ -48,11 +48,12 @@ subroutine psb_zsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_zspmat_type), intent(inout) :: loca @@ -62,7 +63,7 @@ subroutine psb_zsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_z_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_z_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_ipk_) :: nrg, ncg, nzg, nzl integer(psb_ipk_) :: err_act, dupl_ integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k @@ -156,27 +157,27 @@ subroutine psb_zsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,keep enddo ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_c_dpk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_c_dpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_c_dpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(locia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((locia),ndx,psb_mpi_lpk_,& & glbia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(locja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((locja),ndx,psb_mpi_lpk_,& & glbja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_c_dpk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_c_dpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_c_dpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(locia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((locia),ndx,psb_mpi_lpk_,& & glbia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(locja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((locja),ndx,psb_mpi_lpk_,& & glbja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) @@ -231,11 +232,12 @@ subroutine psb_lzsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,kee use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_zspmat_type), intent(inout) :: loca @@ -245,7 +247,7 @@ subroutine psb_lzsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,kee integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_lz_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_lz_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_lpk_) :: nrg, ncg, nzg integer(psb_ipk_) :: err_act, dupl_ integer(psb_ipk_) :: ip,naggrm1,naggrp1, i, j, k, nzl @@ -337,27 +339,27 @@ subroutine psb_lzsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,kee enddo ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_c_dpk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_c_dpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_c_dpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_c_dpk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_c_dpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_c_dpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) end if @@ -369,7 +371,7 @@ subroutine psb_lzsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,kee call loc_coo%free() ! ! Is the code below safe? For very large cases - ! the indices in glob_coo will overflow. But then, + ! the indices in glob_coo will overflow. But then), ! for very large cases it does not make sense to ! gather the matrix on a single procecss anyway... ! @@ -403,11 +405,12 @@ subroutine psb_lzlzsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k use psb_penv_mod use psb_mat_mod use psb_tools_mod -#ifdef MPI_MOD + use iso_c_binding +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_lzspmat_type), intent(inout) :: loca @@ -417,7 +420,7 @@ subroutine psb_lzlzsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k integer(psb_ipk_), intent(in), optional :: root, dupl logical, intent(in), optional :: keepnum,keeploc - type(psb_lz_coo_sparse_mat) :: loc_coo, glob_coo + type(psb_lz_coo_sparse_mat), target :: loc_coo, glob_coo integer(psb_lpk_) :: nrg, ncg, nzg integer(psb_ipk_) :: err_act, dupl_ integer(psb_lpk_) :: ip,naggrm1,naggrp1, i, j, k, nzl @@ -507,27 +510,27 @@ subroutine psb_lzlzsp_allgather(globa, loca, desc_a, info, root, dupl,keepnum,k ndx = nzbr(me+1) if (root_ == -1) then - call mpi_allgatherv(loc_coo%val,ndx,psb_mpi_c_dpk_,& + call mpi_allgatherv((loc_coo%val),ndx,psb_mpi_c_dpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_c_dpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_allgatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_allgatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,icomm,minfo) else - call mpi_gatherv(loc_coo%val,ndx,psb_mpi_c_dpk_,& + call mpi_gatherv((loc_coo%val),ndx,psb_mpi_c_dpk_,& & glob_coo%val,nzbr,idisp,& & psb_mpi_c_dpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ia,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ia),ndx,psb_mpi_lpk_,& & glob_coo%ia,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) if (minfo == psb_success_) call & - & mpi_gatherv(loc_coo%ja,ndx,psb_mpi_lpk_,& + & mpi_gatherv((loc_coo%ja),ndx,psb_mpi_lpk_,& & glob_coo%ja,nzbr,idisp,& & psb_mpi_lpk_,root_,icomm,minfo) end if diff --git a/base/internals/psi_a2a_fnd_owner.F90 b/base/internals/psi_a2a_fnd_owner.F90 index edc160d0..9c7a2e65 100644 --- a/base/internals/psi_a2a_fnd_owner.F90 +++ b/base/internals/psi_a2a_fnd_owner.F90 @@ -55,12 +55,12 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info,samesize) use psb_penv_mod use psb_realloc_mod use psb_indx_map_mod, psb_protect_name => psi_a2a_fnd_owner -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_lpk_), intent(in) :: idx(:) @@ -138,7 +138,7 @@ subroutine psi_a2a_fnd_owner(idx,iprc,idxmap,info,samesize) call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') goto 9999 end if -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) iprc(:) = 0 #else call mpi_allgather(idx,nv,psb_mpi_lpk_,rmtidx,nv,psb_mpi_lpk_,icomm,minfo) diff --git a/base/internals/psi_adjcncy_fnd_owner.F90 b/base/internals/psi_adjcncy_fnd_owner.F90 index 639cdb5d..4af37493 100644 --- a/base/internals/psi_adjcncy_fnd_owner.F90 +++ b/base/internals/psi_adjcncy_fnd_owner.F90 @@ -61,12 +61,12 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) use psb_realloc_mod use psb_timers_mod use psb_indx_map_mod, psb_protect_name => psi_adjcncy_fnd_owner -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_lpk_), intent(in) :: idx(:) @@ -81,13 +81,13 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) integer(psb_mpk_), allocatable :: hsz(:),hidx(:), sdidx(:), rvidx(:),& & sdsz(:), rvsz(:), sdhd(:), rvhd(:), p2pstat(:,:) integer(psb_mpk_) :: prc, p2ptag, iret - integer(psb_mpk_) :: icomm, minfo - integer(psb_ipk_) :: i,n_row,n_col,err_act,hsize,ip,isz,j, k,& - & last_ih, last_j, nidx, nrecv, nadj + integer(psb_mpk_) :: icomm, minfo, ip,nidx + integer(psb_ipk_) :: n_row,n_col,err_act,hsize,isz,j, k,& + & last_ih, last_j, nrecv, nadj integer(psb_lpk_) :: mglob, ih type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np,me - logical, parameter :: gettime=.true., debug=.false. + integer(psb_mpk_) :: np,me + logical, parameter :: debug=.false. integer(psb_mpk_) :: xchg_alg logical, parameter :: do_timings=.false. integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1 @@ -132,10 +132,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) goto 9999 end if - if (gettime) then - t0 = psb_wtime() - end if - nadj = size(adj) nidx = size(idx) call psb_realloc(nidx,iprc,info) @@ -143,7 +139,7 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) call psb_errpush(psb_err_from_subroutine_,name,a_err='psb_realloc') goto 9999 end if -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) iprc(:) = 0 #else iprc = -1 @@ -180,8 +176,8 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) if (do_timings) call psb_toc(idx_phase11) if (do_timings) call psb_tic(idx_phase12) rvidx(0) = 0 - do i=0, np-1 - rvidx(i+1) = rvidx(i) + rvsz(i) + do ip=0, np-1 + rvidx(ip+1) = rvidx(ip) + rvsz(ip) end do hsize = rvidx(np) @@ -208,9 +204,9 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) ! Third, compute local answers ! call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) - do i=1, hsize - tproc(i) = -1 - if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me + do ip=1, hsize + tproc(ip) = -1 + if ((0 < lclidx(ip)).and. (lclidx(ip) <= n_row)) tproc(ip) = me end do if (do_timings) call psb_toc(idx_phase2) if (do_timings) call psb_tic(idx_phase3) @@ -219,8 +215,8 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) ! Fourth, exchange the answers ! ! Adjust sdidx for reuse in receiving lclidx array - do i=0,np-1 - sdidx(i+1) = sdidx(i) + sdsz(i) + do ip=0,np-1 + sdidx(ip+1) = sdidx(ip) + sdsz(ip) end do call mpi_alltoallv(tproc,rvsz,rvidx,psb_mpi_ipk_,& & lclidx,sdsz,sdidx,psb_mpi_ipk_,icomm,iret) @@ -229,10 +225,10 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) ! Because IPRC has been initialized to -1, the MAX operation selects ! the answers. ! - do i=0, np-1 - if (sdsz(i)>0) then + do ip=0, np-1 + if (sdsz(ip)>0) then ! Must be nidx == sdsz(i) - iprc(1:nidx) = max(iprc(1:nidx), lclidx(sdidx(i)+1:sdidx(i)+sdsz(i))) + iprc(1:nidx) = max(iprc(1:nidx), lclidx(sdidx(ip)+1:sdidx(ip)+sdsz(ip))) end if end do if (do_timings) call psb_toc(idx_phase3) @@ -266,8 +262,8 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& & rvsz,1,psb_mpi_mpk_,icomm,minfo) hidx(0) = 0 - do i=0, np-1 - hidx(i+1) = hidx(i) + rvsz(i) + do ip=0, np-1 + hidx(ip+1) = hidx(ip) + rvsz(ip) end do hsize = hidx(np) ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) @@ -280,22 +276,23 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') goto 9999 end if - do i = 0, np-1 - if (rvsz(i)>0) then + do ip = 0, np-1 + if (rvsz(ip)>0) then ! write(0,*) me, ' First receive from ',i,rvsz(i) - prc = psb_get_mpi_rank(ctxt,i) + prc = psb_get_mpi_rank(ctxt,ip) p2ptag = psb_long_swap_tag !write(0,*) me, ' Posting first receive from ',i,rvsz(i),prc - call mpi_irecv(rmtidx(hidx(i)+1),rvsz(i),& + call mpi_irecv(rmtidx(hidx(ip)+1),rvsz(ip),& & psb_mpi_lpk_,prc,& - & p2ptag, icomm,rvhd(i),iret) + & p2ptag, icomm,rvhd(ip),iret) end if end do if (do_timings) call psb_toc(idx_phase11) if (do_timings) call psb_tic(idx_phase12) do j=1, nadj if (nidx > 0) then - prc = psb_get_mpi_rank(ctxt,adj(j)) + ip = adj(j) + prc = psb_get_mpi_rank(ctxt,ip) p2ptag = psb_long_swap_tag !write(0,*) me, ' First send to ',adj(j),nidx, prc call mpi_send(idx,nidx,& @@ -314,9 +311,9 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) ! Third, compute local answers ! call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) - do i=1, hsize - tproc(i) = -1 - if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me + do ip=1, hsize + tproc(ip) = -1 + if ((0 < lclidx(ip)).and. (lclidx(ip) <= n_row)) tproc(ip) = me end do if (do_timings) call psb_toc(idx_phase2) if (do_timings) call psb_tic(idx_phase3) @@ -327,7 +324,8 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) do j=1, nadj !write(0,*) me, ' First send to ',adj(j),nidx if (nidx > 0) then - prc = psb_get_mpi_rank(ctxt,adj(j)) + ip = adj(j) + prc = psb_get_mpi_rank(ctxt,ip) p2ptag = psb_int_swap_tag !write(0,*) me, ' Posting second receive from ',adj(j),nidx, prc call mpi_irecv(lclidx((j-1)*nidx+1),nidx, & @@ -339,12 +337,12 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) ! ! Fourth, send data back; ! - do i = 0, np-1 - if (rvsz(i)>0) then - prc = psb_get_mpi_rank(ctxt,i) + do ip = 0, np-1 + if (rvsz(ip)>0) then + prc = psb_get_mpi_rank(ctxt,ip) p2ptag = psb_int_swap_tag !write(0,*) me, ' Second send to ',i,rvsz(i), prc - call mpi_send(tproc(hidx(i)+1),rvsz(i),& + call mpi_send(tproc(hidx(ip)+1),rvsz(ip),& & psb_mpi_ipk_,prc,& & p2ptag, icomm,iret) end if @@ -376,8 +374,8 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) call mpi_alltoall(sdsz,1,psb_mpi_mpk_,& & rvsz,1,psb_mpi_mpk_,icomm,minfo) hidx(0) = 0 - do i=0, np-1 - hidx(i+1) = hidx(i) + rvsz(i) + do ip=0, np-1 + hidx(ip+1) = hidx(ip) + rvsz(ip) end do hsize = hidx(np) ! write(0,*)me,' Check on sizes from a2a:',hsize,rvsz(:) @@ -392,12 +390,13 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) end if do j=1, nadj !write(0,*) me, ' First send to ',adj(j),nidx - if (nidx > 0) call psb_snd(ctxt,idx(1:nidx),adj(j)) + ip = adj(j) + if (nidx > 0) call psb_snd(ctxt,idx(1:nidx),ip) end do - do i = 0, np-1 - if (rvsz(i)>0) then + do ip = 0, np-1 + if (rvsz(ip)>0) then ! write(0,*) me, ' First receive from ',i,rvsz(i) - call psb_rcv(ctxt,rmtidx(hidx(i)+1:hidx(i)+rvsz(i)),i) + call psb_rcv(ctxt,rmtidx(hidx(ip)+1:hidx(ip)+rvsz(ip)),ip) end if end do @@ -405,18 +404,18 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) ! Third, compute local answers ! call idxmap%g2l(rmtidx(1:hsize),lclidx(1:hsize),info,owned=.true.) - do i=1, hsize - tproc(i) = -1 - if ((0 < lclidx(i)).and. (lclidx(i) <= n_row)) tproc(i) = me + do ip=1, hsize + tproc(ip) = -1 + if ((0 < lclidx(ip)).and. (lclidx(ip) <= n_row)) tproc(ip) = me end do ! ! Fourth, send data back; ! - do i = 0, np-1 - if (rvsz(i)>0) then + do ip = 0, np-1 + if (rvsz(ip)>0) then !write(0,*) me, ' Second send to ',i,rvsz(i) - call psb_snd(ctxt,tproc(hidx(i)+1:hidx(i)+rvsz(i)),i) + call psb_snd(ctxt,tproc(hidx(ip)+1:hidx(ip)+rvsz(ip)),ip) end if end do ! @@ -424,8 +423,9 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) ! answer is -1. Reuse tproc ! do j = 1, nadj - !write(0,*) me, ' Second receive from ',adj(j), nidx - if (nidx > 0) call psb_rcv(ctxt,tproc(1:nidx),adj(j)) + !write(0,*) me, ' Second receive from ',adj(j), nidx + ip = adj(j) + if (nidx > 0) call psb_rcv(ctxt,tproc(1:nidx),ip) iprc(1:nidx) = max(iprc(1:nidx), tproc(1:nidx)) end do case default diff --git a/base/internals/psi_bld_glb_dep_list.F90 b/base/internals/psi_bld_glb_dep_list.F90 index f43e5f17..ea5349c4 100644 --- a/base/internals/psi_bld_glb_dep_list.F90 +++ b/base/internals/psi_bld_glb_dep_list.F90 @@ -31,7 +31,7 @@ ! subroutine psi_i_bld_glb_dep_list(ctxt,loc_dl,length_dl,c_dep_list,dl_ptr,info) use psi_mod, psb_protect_name => psi_i_bld_glb_dep_list -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif use psb_penv_mod @@ -40,13 +40,15 @@ subroutine psi_i_bld_glb_dep_list(ctxt,loc_dl,length_dl,c_dep_list,dl_ptr,info) use psb_desc_mod use psb_sort_mod implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif ! ....scalar parameters... type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_ipk_), intent(in) :: loc_dl(:), length_dl(0:) - integer(psb_ipk_), allocatable, intent(out) :: c_dep_list(:), dl_ptr(:) + integer(psb_ipk_), intent(in) :: loc_dl(:) + integer(psb_mpk_), intent(in) :: length_dl(0:) + integer(psb_mpk_), allocatable, intent(out) :: dl_ptr(:) + integer(psb_ipk_), allocatable, intent(out) :: c_dep_list(:) integer(psb_ipk_), intent(out) :: info @@ -54,10 +56,11 @@ subroutine psi_i_bld_glb_dep_list(ctxt,loc_dl,length_dl,c_dep_list,dl_ptr,info) integer(psb_ipk_) :: int_err(5) ! .....local scalars... - integer(psb_ipk_) :: i, proc,j,err_act, length, myld + integer(psb_mpk_) :: myld + integer(psb_ipk_) :: i, proc,j,err_act, length integer(psb_ipk_) :: err integer(psb_ipk_) :: debug_level, debug_unit - integer(psb_ipk_) :: me, np + integer(psb_mpk_) :: me, np integer(psb_mpk_) :: icomm, minfo logical, parameter :: dist_symm_list=.false., print_dl=.false. character name*20 diff --git a/base/internals/psi_crea_index.f90 b/base/internals/psi_crea_index.f90 index 7e749288..bbef054e 100644 --- a/base/internals/psi_crea_index.f90 +++ b/base/internals/psi_crea_index.f90 @@ -67,8 +67,8 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: me, np, mode, err_act, dl_lda, ldl ! ...parameters... - integer(psb_ipk_), allocatable :: length_dl(:), loc_dl(:),& - & c_dep_list(:), dl_ptr(:) + integer(psb_mpk_), allocatable :: length_dl(:), dl_ptr(:) + integer(psb_ipk_), allocatable :: loc_dl(:), c_dep_list(:) integer(psb_ipk_) :: dlmax, dlavg integer(psb_ipk_),parameter :: root=psb_root_,no_comm=-1 integer(psb_ipk_) :: debug_level, debug_unit @@ -132,7 +132,7 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) if (do_timings) call psb_toc(idx_phase21) if (do_timings) call psb_tic(idx_phase22) - call psi_sort_dl(dl_ptr,c_dep_list,length_dl,ctxt,info) + call psi_i_csr_sort_dl(dl_ptr,c_dep_list,length_dl,ctxt,info) if (info /= 0) then write(0,*) me,trim(name),' From sort_dl ',info end if diff --git a/base/internals/psi_desc_index.F90 b/base/internals/psi_desc_index.F90 index 35c8d921..ec16afbe 100644 --- a/base/internals/psi_desc_index.F90 +++ b/base/internals/psi_desc_index.F90 @@ -101,14 +101,14 @@ subroutine psi_i_desc_index(desc,index_in,dep_list,& use psb_realloc_mod use psb_error_mod use psb_const_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif use psb_penv_mod use psb_timers_mod use psi_mod, psb_protect_name => psi_i_desc_index implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -119,7 +119,8 @@ subroutine psi_i_desc_index(desc,index_in,dep_list,& integer(psb_ipk_),allocatable :: desc_index(:) integer(psb_ipk_) :: length_dl,nsnd,nrcv,info ! ....local scalars... - integer(psb_ipk_) :: j,me,np,i,proc + integer(psb_mpk_) :: me,np,proc + integer(psb_ipk_) :: j,i ! ...parameters... type(psb_ctxt_type) :: ctxt integer(psb_ipk_), parameter :: no_comm=-1 @@ -137,7 +138,7 @@ subroutine psi_i_desc_index(desc,index_in,dep_list,& & idxr, idxs, iszs, iszr, nesd, nerv, ixp, idx integer(psb_mpk_) :: icomm, minfo - logical, parameter :: do_timings=.true., oldstyle=.false., debug=.false. + logical, parameter :: do_timings=.false., oldstyle=.false., debug=.false. integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1, idx_phase4=-1 logical, parameter :: usempi=.false. integer(psb_ipk_) :: debug_level, debug_unit diff --git a/base/internals/psi_fnd_owner.F90 b/base/internals/psi_fnd_owner.F90 index 7f111a03..f19b4b1f 100644 --- a/base/internals/psi_fnd_owner.F90 +++ b/base/internals/psi_fnd_owner.F90 @@ -54,12 +54,12 @@ subroutine psi_fnd_owner(nv,idx,iprc,desc,info) use psb_penv_mod use psb_realloc_mod use psi_mod, psb_protect_name => psi_fnd_owner -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_ipk_), intent(in) :: nv diff --git a/base/internals/psi_graph_fnd_owner.F90 b/base/internals/psi_graph_fnd_owner.F90 index e5eed8c6..de5d5915 100644 --- a/base/internals/psi_graph_fnd_owner.F90 +++ b/base/internals/psi_graph_fnd_owner.F90 @@ -86,12 +86,12 @@ subroutine psi_graph_fnd_owner(idx,iprc,ladj,idxmap,info) use psb_realloc_mod use psb_timers_mod use psb_desc_mod, psb_protect_name => psi_graph_fnd_owner -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_lpk_), intent(in) :: idx(:) @@ -152,7 +152,7 @@ subroutine psi_graph_fnd_owner(idx,iprc,ladj,idxmap,info) ! nv = size(idx) call psb_realloc(nv,iprc,info) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) iprc(:) = 0 #else if (info == psb_success_) call psb_realloc(nv,tidx,info) @@ -237,7 +237,7 @@ subroutine psi_graph_fnd_owner(idx,iprc,ladj,idxmap,info) ! Choose a sample, should it be done in this simplistic way? ! Note: nsampl_in is a hint, not an absolute, hence nsampl_out ! - call psi_get_sample(1,idx,iprc,tidx,tsmpl,iend,nsampl_in,nsampl_out) + call psi_get_sample(ione,idx,iprc,tidx,tsmpl,iend,nsampl_in,nsampl_out) nsampl = min(nsampl_out,nsampl_in) if (debugsz) write(0,*) me,' From first sampling ',nsampl_in ! @@ -291,7 +291,7 @@ subroutine psi_graph_fnd_owner(idx,iprc,ladj,idxmap,info) 9999 call psb_error_handler(ctxt,err_act) return -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) contains diff --git a/base/internals/psi_indx_map_fnd_owner.F90 b/base/internals/psi_indx_map_fnd_owner.F90 index 157b73a1..30777cdf 100644 --- a/base/internals/psi_indx_map_fnd_owner.F90 +++ b/base/internals/psi_indx_map_fnd_owner.F90 @@ -58,12 +58,12 @@ subroutine psi_indx_map_fnd_owner(idx,iprc,idxmap,info,adj) use psb_penv_mod use psb_realloc_mod use psb_indx_map_mod, psb_protect_name => psi_indx_map_fnd_owner -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_lpk_), intent(in) :: idx(:) diff --git a/base/internals/psi_sort_dl.f90 b/base/internals/psi_sort_dl.f90 index ef3ac74d..a5c0b374 100644 --- a/base/internals/psi_sort_dl.f90 +++ b/base/internals/psi_sort_dl.f90 @@ -84,8 +84,8 @@ subroutine psi_i_csr_sort_dl(dl_ptr,c_dep_list,l_dep_list,ctxt,info) use psb_sort_mod implicit none - integer(psb_ipk_), intent(in) :: dl_ptr(0:) - integer(psb_ipk_), intent(inout) :: c_dep_list(:), l_dep_list(0:) + integer(psb_mpk_), intent(in) :: dl_ptr(0:), l_dep_list(0:) + integer(psb_ipk_), intent(inout) :: c_dep_list(:) type(psb_ctxt_type), intent(in) :: ctxt integer(psb_ipk_), intent(out) :: info ! Local variables diff --git a/base/internals/psi_symm_dep_list.F90 b/base/internals/psi_symm_dep_list.F90 index 728ee832..f88eda91 100644 --- a/base/internals/psi_symm_dep_list.F90 +++ b/base/internals/psi_symm_dep_list.F90 @@ -44,12 +44,12 @@ subroutine psi_symm_dep_list_inrv(rvsz,adj,ctxt,info) use psb_penv_mod use psb_realloc_mod use psb_indx_map_mod, psb_protect_name => psi_symm_dep_list_inrv -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_mpk_), intent(inout) :: rvsz(0:) @@ -123,12 +123,12 @@ subroutine psi_symm_dep_list_norv(adj,ctxt,info) use psb_penv_mod use psb_realloc_mod use psb_indx_map_mod, psb_protect_name => psi_symm_dep_list_norv -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_ipk_), allocatable, intent(inout) :: adj(:) diff --git a/base/internals/psi_xtr_loc_dl.F90 b/base/internals/psi_xtr_loc_dl.F90 index c920ade1..26751dc9 100644 --- a/base/internals/psi_xtr_loc_dl.F90 +++ b/base/internals/psi_xtr_loc_dl.F90 @@ -109,7 +109,7 @@ subroutine psi_i_xtr_loc_dl(ctxt,is_bld,is_upd,desc_str,loc_dl,length_dl,info) ! dependence list of current process ! use psi_mod, psb_protect_name => psi_i_xtr_loc_dl -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif use psb_penv_mod @@ -118,14 +118,15 @@ subroutine psi_i_xtr_loc_dl(ctxt,is_bld,is_upd,desc_str,loc_dl,length_dl,info) use psb_desc_mod use psb_sort_mod implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif ! ....scalar parameters... logical, intent(in) :: is_bld, is_upd type(psb_ctxt_type), intent(in) :: ctxt integer(psb_ipk_), intent(in) :: desc_str(:) - integer(psb_ipk_), allocatable, intent(out) :: loc_dl(:), length_dl(:) + integer(psb_ipk_), allocatable, intent(out) :: loc_dl(:) + integer(psb_mpk_), allocatable, intent(out) :: length_dl(:) integer(psb_ipk_), intent(out) :: info ! .....local arrays.... integer(psb_ipk_) :: int_err(5) diff --git a/base/modules/Makefile b/base/modules/Makefile index 9c14dab8..dd13cb2b 100644 --- a/base/modules/Makefile +++ b/base/modules/Makefile @@ -10,13 +10,14 @@ BASIC_MODS= psb_const_mod.o psb_cbind_const_mod.o psb_error_mod.o psb_realloc_mo auxil/psb_z_realloc_mod.o COMMINT= penv/psi_penv_mod.o \ - penv/psi_p2p_mod.o penv/psi_m_p2p_mod.o \ + penv/psi_p2p_mod.o penv/psi_m_p2p_mod.o penv/psi_i2_p2p_mod.o \ penv/psi_e_p2p_mod.o \ penv/psi_s_p2p_mod.o \ penv/psi_d_p2p_mod.o \ penv/psi_c_p2p_mod.o \ penv/psi_z_p2p_mod.o \ penv/psi_collective_mod.o \ + penv/psi_i2_collective_mod.o \ penv/psi_e_collective_mod.o \ penv/psi_m_collective_mod.o \ penv/psi_s_collective_mod.o \ @@ -115,6 +116,7 @@ UTIL_MODS = desc/psb_desc_const_mod.o desc/psb_indx_map_mod.o\ MODULES=$(BASIC_MODS) $(SERIAL_MODS) $(UTIL_MODS) OBJS = error.o psb_base_mod.o $(EXTRA_COBJS) cutil.o MODDIR=../../modules +INCDIR=../../include LIBDIR=../ CINCLUDES=-I. FINCLUDES=$(FMFLAG)$(LIBDIR) $(FMFLAG). $(FIFLAG). @@ -122,6 +124,7 @@ FINCLUDES=$(FMFLAG)$(LIBDIR) $(FMFLAG). $(FIFLAG). objs: $(MODULES) $(OBJS) $(MPFOBJS) /bin/cp -p $(CPUPDFLAG) *$(.mod) $(MODDIR) + /bin/cp -p $(CPUPDFLAG) psb_config.h psb_types.h $(INCDIR) lib: objs $(LIBDIR)/$(LIBNAME) @@ -149,23 +152,25 @@ psb_realloc_mod.o: auxil/psb_m_realloc_mod.o \ auxil/psb_c_realloc_mod.o \ auxil/psb_z_realloc_mod.o -penv/psi_p2p_mod.o: penv/psi_m_p2p_mod.o \ - penv/psi_e_p2p_mod.o \ - penv/psi_s_p2p_mod.o \ - penv/psi_d_p2p_mod.o \ - penv/psi_c_p2p_mod.o \ - penv/psi_z_p2p_mod.o -penv/psi_collective_mod.o: penv/psi_e_collective_mod.o \ - penv/psi_m_collective_mod.o \ - penv/psi_s_collective_mod.o \ - penv/psi_d_collective_mod.o \ - penv/psi_c_collective_mod.o \ - penv/psi_z_collective_mod.o - -penv/psi_m_p2p_mod.o penv/psi_e_p2p_mod.o penv/psi_s_p2p_mod.o \ +penv/psi_p2p_mod.o: penv/psi_i2_p2p_mod.o \ + penv/psi_m_p2p_mod.o \ + penv/psi_e_p2p_mod.o \ + penv/psi_s_p2p_mod.o \ + penv/psi_d_p2p_mod.o \ + penv/psi_c_p2p_mod.o \ + penv/psi_z_p2p_mod.o +penv/psi_collective_mod.o: penv/psi_i2_collective_mod.o \ + penv/psi_e_collective_mod.o \ + penv/psi_m_collective_mod.o \ + penv/psi_s_collective_mod.o \ + penv/psi_d_collective_mod.o \ + penv/psi_c_collective_mod.o \ + penv/psi_z_collective_mod.o + +penv/psi_i2_p2p_mod.o penv/psi_m_p2p_mod.o penv/psi_e_p2p_mod.o penv/psi_s_p2p_mod.o \ penv/psi_d_p2p_mod.o penv/psi_c_p2p_mod.o penv/psi_z_p2p_mod.o: penv/psi_penv_mod.o -penv/psi_e_collective_mod.o penv/psi_m_collective_mod.o penv/psi_s_collective_mod.o \ +penv/psi_i2_collective_mod.o penv/psi_e_collective_mod.o penv/psi_m_collective_mod.o penv/psi_s_collective_mod.o \ penv/psi_d_collective_mod.o penv/psi_c_collective_mod.o penv/psi_z_collective_mod.o: penv/psi_penv_mod.o \ penv/psi_m_p2p_mod.o penv/psi_e_p2p_mod.o penv/psi_s_p2p_mod.o \ penv/psi_d_p2p_mod.o penv/psi_c_p2p_mod.o penv/psi_z_p2p_mod.o @@ -319,7 +324,7 @@ desc/psb_hash_map_mod.o desc/psb_list_map_mod.o desc/psb_repl_map_mod.o desc/psb desc/psb_indx_map_mod.o desc/psb_desc_const_mod.o \ auxil/psb_sort_mod.o psb_penv_mod.o desc/psb_glist_map_mod.o: desc/psb_list_map_mod.o -desc/psb_hash_map_mod.o: desc/psb_hash_mod.o auxil/psb_sort_mod.o +desc/psb_hash_map_mod.o: desc/psb_hash_mod.o auxil/psb_sort_mod.o psb_timers_mod.o desc/psb_gen_block_map_mod.o: desc/psb_hash_mod.o desc/psb_hash_mod.o: psb_cbind_const_mod.o psb_cbind_const_mod.o: psb_const_mod.o @@ -412,7 +417,8 @@ penv/psi_collective_mod.o: penv/psi_collective_mod.F90 $(BASIC_MODS) $(FC) $(FINCLUDES) $(FDEFINES) $(FCOPT) $(EXTRA_OPT) -c $< -o $@ clean: - /bin/rm -f $(MODULES) $(OBJS) $(MPFOBJS) *$(.mod) + /bin/rm -f $(MODULES) $(OBJS) $(MPFOBJS) *$(.mod) veryclean: clean + /bin/rm -f *.h diff --git a/base/modules/auxil/psb_c_realloc_mod.F90 b/base/modules/auxil/psb_c_realloc_mod.F90 index 9b22bee7..16be3183 100644 --- a/base/modules/auxil/psb_c_realloc_mod.F90 +++ b/base/modules/auxil/psb_c_realloc_mod.F90 @@ -154,30 +154,32 @@ Contains end if ub_ = lb_ + len-1 +#if defined(PSB_OPENMP) + !$omp critical(r_m_c_rk1) +#endif if (allocated(rrax)) then dim = size(rrax) lbi = lbound(rrax,1) If ((dim /= len).or.(lbi /= lb_)) Then Allocate(tmp(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & - & a_err='complex(psb_spk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 Allocate(rrax(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & - & a_err='complex(psb_spk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_m_c_rk1) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & + & a_err='complex(psb_spk_)') + goto 9999 + end if if (present(pad)) then !$omp parallel do private(i) shared(dim,len) do i=lb_-1+dim+1,lb_-1+len @@ -239,7 +241,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_m_c_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -248,27 +252,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & - & a_err='complex(psb_spk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & - & a_err='complex(psb_spk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_m_c_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & + & a_err='complex(psb_spk_)') + goto 9999 + end if if (present(pad)) then !$omp parallel do private(i) shared(lb1_,dim,len1) do i=lb1_-1+dim+1,lb1_-1+len1 @@ -325,30 +328,33 @@ Contains end if ub_ = lb_ + len-1 +#if defined(PSB_OPENMP) + !$omp critical(r_e_c_rk1) +#endif if (allocated(rrax)) then dim = size(rrax) lbi = lbound(rrax,1) If ((dim /= len).or.(lbi /= lb_)) Then Allocate(tmp(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len/), & - & a_err='complex(psb_spk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 Allocate(rrax(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len/), & - & a_err='complex(psb_spk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_e_c_rk1) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len/), & + & a_err='complex(psb_spk_)') + goto 9999 + end if + if (present(pad)) then rrax(lb_-1+dim+1:lb_-1+len) = pad endif @@ -407,7 +413,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_e_c_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -416,27 +424,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/(len1*len2)/), & - & a_err='complex(psb_spk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/(len1*len2)/), & - & a_err='complex(psb_spk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_e_c_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/(len1*len2)/), & + & a_err='complex(psb_spk_)') + goto 9999 + end if if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -498,7 +505,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_me_c_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -507,27 +516,28 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='complex(psb_spk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name,e_err=(/len1*len2/),& - & a_err='complex(psb_spk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_me_c_rk2) +#endif + + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len1*len2/), & + & a_err='complex(psb_spk_)') + goto 9999 + end if + if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -589,7 +599,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_em_c_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -598,27 +610,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='complex(psb_spk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='complex(psb_spk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_em_c_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len1*len2/), & + & a_err='complex(psb_spk_)') + goto 9999 + end if if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -715,8 +726,6 @@ Contains End Subroutine psb_r_e_2_c_rk1 - - subroutine psb_ab_cpy_c_s(vin,vout,info) use psb_error_mod @@ -999,8 +1008,9 @@ Contains isz = psb_size(v) If (len > isz) Then -#if defined(OPENMP) - !$OMP CRITICAL +#if defined(PSB_OPENMP) + !$omp critical(m_sz_c_rk1) + isz = psb_size(v) if (len > isz) then if (present(newsz)) then isz = max(len+1,1,newsz) @@ -1012,7 +1022,9 @@ Contains call psb_realloc(isz,v,info,pad=pad) end if - !$OMP END CRITICAL + if (info /= psb_success_) & + & write(0,*) 'Error from realloc ',info,len,isz + !$omp end critical(m_sz_c_rk1) if (info /= psb_success_) then info=psb_err_from_subroutine_ @@ -1028,7 +1040,6 @@ Contains else isz = max(len,1,int(1.25*isz)) endif - call psb_realloc(isz,v,info,pad=pad) end if @@ -1075,6 +1086,28 @@ Contains end if isz = psb_size(v) If (len > isz) Then +#if defined(PSB_OPENMP) + !$omp critical(e_sz_c_rk1) + isz = psb_size(v) + If (len > isz) Then + if (present(newsz)) then + isz = max(len+1,1,newsz) + else if (present(addsz)) then + isz = max(len,1,isz+addsz) + else + isz = max(len,1,int(1.25*isz)) + endif + call psb_realloc(isz,v,info,pad=pad) + end If + if (info /= psb_success_)& + & write(0,*) 'Error from realloc ',info,len,isz + !$omp end critical(e_sz_c_rk1) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_realloc') + goto 9999 + End If +#else if (present(newsz)) then isz = max(len+1,1,newsz) else if (present(addsz)) then @@ -1082,13 +1115,13 @@ Contains else isz = max(len,1,int(1.25*isz)) endif - call psb_realloc(isz,v,info,pad=pad) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='psb_realloc') goto 9999 End If +#endif end If call psb_erractionrestore(err_act) diff --git a/base/modules/auxil/psb_d_realloc_mod.F90 b/base/modules/auxil/psb_d_realloc_mod.F90 index ca85e0ec..ae093a4a 100644 --- a/base/modules/auxil/psb_d_realloc_mod.F90 +++ b/base/modules/auxil/psb_d_realloc_mod.F90 @@ -154,30 +154,32 @@ Contains end if ub_ = lb_ + len-1 +#if defined(PSB_OPENMP) + !$omp critical(r_m_d_rk1) +#endif if (allocated(rrax)) then dim = size(rrax) lbi = lbound(rrax,1) If ((dim /= len).or.(lbi /= lb_)) Then Allocate(tmp(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & - & a_err='real(psb_dpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 Allocate(rrax(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & - & a_err='real(psb_dpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_m_d_rk1) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & + & a_err='real(psb_dpk_)') + goto 9999 + end if if (present(pad)) then !$omp parallel do private(i) shared(dim,len) do i=lb_-1+dim+1,lb_-1+len @@ -239,7 +241,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_m_d_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -248,27 +252,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & - & a_err='real(psb_dpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & - & a_err='real(psb_dpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_m_d_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & + & a_err='real(psb_dpk_)') + goto 9999 + end if if (present(pad)) then !$omp parallel do private(i) shared(lb1_,dim,len1) do i=lb1_-1+dim+1,lb1_-1+len1 @@ -325,30 +328,33 @@ Contains end if ub_ = lb_ + len-1 +#if defined(PSB_OPENMP) + !$omp critical(r_e_d_rk1) +#endif if (allocated(rrax)) then dim = size(rrax) lbi = lbound(rrax,1) If ((dim /= len).or.(lbi /= lb_)) Then Allocate(tmp(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len/), & - & a_err='real(psb_dpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 Allocate(rrax(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len/), & - & a_err='real(psb_dpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_e_d_rk1) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len/), & + & a_err='real(psb_dpk_)') + goto 9999 + end if + if (present(pad)) then rrax(lb_-1+dim+1:lb_-1+len) = pad endif @@ -407,7 +413,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_e_d_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -416,27 +424,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/(len1*len2)/), & - & a_err='real(psb_dpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/(len1*len2)/), & - & a_err='real(psb_dpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_e_d_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/(len1*len2)/), & + & a_err='real(psb_dpk_)') + goto 9999 + end if if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -498,7 +505,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_me_d_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -507,27 +516,28 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='real(psb_dpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name,e_err=(/len1*len2/),& - & a_err='real(psb_dpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_me_d_rk2) +#endif + + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len1*len2/), & + & a_err='real(psb_dpk_)') + goto 9999 + end if + if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -589,7 +599,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_em_d_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -598,27 +610,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='real(psb_dpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='real(psb_dpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_em_d_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len1*len2/), & + & a_err='real(psb_dpk_)') + goto 9999 + end if if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -715,8 +726,6 @@ Contains End Subroutine psb_r_e_2_d_rk1 - - subroutine psb_ab_cpy_d_s(vin,vout,info) use psb_error_mod @@ -999,8 +1008,9 @@ Contains isz = psb_size(v) If (len > isz) Then -#if defined(OPENMP) - !$OMP CRITICAL +#if defined(PSB_OPENMP) + !$omp critical(m_sz_d_rk1) + isz = psb_size(v) if (len > isz) then if (present(newsz)) then isz = max(len+1,1,newsz) @@ -1012,7 +1022,9 @@ Contains call psb_realloc(isz,v,info,pad=pad) end if - !$OMP END CRITICAL + if (info /= psb_success_) & + & write(0,*) 'Error from realloc ',info,len,isz + !$omp end critical(m_sz_d_rk1) if (info /= psb_success_) then info=psb_err_from_subroutine_ @@ -1028,7 +1040,6 @@ Contains else isz = max(len,1,int(1.25*isz)) endif - call psb_realloc(isz,v,info,pad=pad) end if @@ -1075,6 +1086,28 @@ Contains end if isz = psb_size(v) If (len > isz) Then +#if defined(PSB_OPENMP) + !$omp critical(e_sz_d_rk1) + isz = psb_size(v) + If (len > isz) Then + if (present(newsz)) then + isz = max(len+1,1,newsz) + else if (present(addsz)) then + isz = max(len,1,isz+addsz) + else + isz = max(len,1,int(1.25*isz)) + endif + call psb_realloc(isz,v,info,pad=pad) + end If + if (info /= psb_success_)& + & write(0,*) 'Error from realloc ',info,len,isz + !$omp end critical(e_sz_d_rk1) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_realloc') + goto 9999 + End If +#else if (present(newsz)) then isz = max(len+1,1,newsz) else if (present(addsz)) then @@ -1082,13 +1115,13 @@ Contains else isz = max(len,1,int(1.25*isz)) endif - call psb_realloc(isz,v,info,pad=pad) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='psb_realloc') goto 9999 End If +#endif end If call psb_erractionrestore(err_act) diff --git a/base/modules/auxil/psb_e_realloc_mod.F90 b/base/modules/auxil/psb_e_realloc_mod.F90 index 06a6d034..688753d8 100644 --- a/base/modules/auxil/psb_e_realloc_mod.F90 +++ b/base/modules/auxil/psb_e_realloc_mod.F90 @@ -154,30 +154,32 @@ Contains end if ub_ = lb_ + len-1 +#if defined(PSB_OPENMP) + !$omp critical(r_m_e_rk1) +#endif if (allocated(rrax)) then dim = size(rrax) lbi = lbound(rrax,1) If ((dim /= len).or.(lbi /= lb_)) Then Allocate(tmp(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & - & a_err='integer(psb_epk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 Allocate(rrax(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & - & a_err='integer(psb_epk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_m_e_rk1) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & + & a_err='integer(psb_epk_)') + goto 9999 + end if if (present(pad)) then !$omp parallel do private(i) shared(dim,len) do i=lb_-1+dim+1,lb_-1+len @@ -239,7 +241,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_m_e_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -248,27 +252,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & - & a_err='integer(psb_epk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & - & a_err='integer(psb_epk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_m_e_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & + & a_err='integer(psb_epk_)') + goto 9999 + end if if (present(pad)) then !$omp parallel do private(i) shared(lb1_,dim,len1) do i=lb1_-1+dim+1,lb1_-1+len1 @@ -325,30 +328,33 @@ Contains end if ub_ = lb_ + len-1 +#if defined(PSB_OPENMP) + !$omp critical(r_e_e_rk1) +#endif if (allocated(rrax)) then dim = size(rrax) lbi = lbound(rrax,1) If ((dim /= len).or.(lbi /= lb_)) Then Allocate(tmp(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len/), & - & a_err='integer(psb_epk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 Allocate(rrax(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len/), & - & a_err='integer(psb_epk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_e_e_rk1) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len/), & + & a_err='integer(psb_epk_)') + goto 9999 + end if + if (present(pad)) then rrax(lb_-1+dim+1:lb_-1+len) = pad endif @@ -407,7 +413,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_e_e_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -416,27 +424,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/(len1*len2)/), & - & a_err='integer(psb_epk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/(len1*len2)/), & - & a_err='integer(psb_epk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_e_e_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/(len1*len2)/), & + & a_err='integer(psb_epk_)') + goto 9999 + end if if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -498,7 +505,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_me_e_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -507,27 +516,28 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='integer(psb_epk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name,e_err=(/len1*len2/),& - & a_err='integer(psb_epk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_me_e_rk2) +#endif + + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len1*len2/), & + & a_err='integer(psb_epk_)') + goto 9999 + end if + if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -589,7 +599,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_em_e_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -598,27 +610,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='integer(psb_epk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='integer(psb_epk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_em_e_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len1*len2/), & + & a_err='integer(psb_epk_)') + goto 9999 + end if if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -715,8 +726,6 @@ Contains End Subroutine psb_r_e_2_e_rk1 - - subroutine psb_ab_cpy_e_s(vin,vout,info) use psb_error_mod @@ -999,8 +1008,9 @@ Contains isz = psb_size(v) If (len > isz) Then -#if defined(OPENMP) - !$OMP CRITICAL +#if defined(PSB_OPENMP) + !$omp critical(m_sz_e_rk1) + isz = psb_size(v) if (len > isz) then if (present(newsz)) then isz = max(len+1,1,newsz) @@ -1012,7 +1022,9 @@ Contains call psb_realloc(isz,v,info,pad=pad) end if - !$OMP END CRITICAL + if (info /= psb_success_) & + & write(0,*) 'Error from realloc ',info,len,isz + !$omp end critical(m_sz_e_rk1) if (info /= psb_success_) then info=psb_err_from_subroutine_ @@ -1028,7 +1040,6 @@ Contains else isz = max(len,1,int(1.25*isz)) endif - call psb_realloc(isz,v,info,pad=pad) end if @@ -1075,6 +1086,28 @@ Contains end if isz = psb_size(v) If (len > isz) Then +#if defined(PSB_OPENMP) + !$omp critical(e_sz_e_rk1) + isz = psb_size(v) + If (len > isz) Then + if (present(newsz)) then + isz = max(len+1,1,newsz) + else if (present(addsz)) then + isz = max(len,1,isz+addsz) + else + isz = max(len,1,int(1.25*isz)) + endif + call psb_realloc(isz,v,info,pad=pad) + end If + if (info /= psb_success_)& + & write(0,*) 'Error from realloc ',info,len,isz + !$omp end critical(e_sz_e_rk1) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_realloc') + goto 9999 + End If +#else if (present(newsz)) then isz = max(len+1,1,newsz) else if (present(addsz)) then @@ -1082,13 +1115,13 @@ Contains else isz = max(len,1,int(1.25*isz)) endif - call psb_realloc(isz,v,info,pad=pad) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='psb_realloc') goto 9999 End If +#endif end If call psb_erractionrestore(err_act) diff --git a/base/modules/auxil/psb_i2_realloc_mod.F90 b/base/modules/auxil/psb_i2_realloc_mod.F90 index 146bdf7e..8d14f563 100644 --- a/base/modules/auxil/psb_i2_realloc_mod.F90 +++ b/base/modules/auxil/psb_i2_realloc_mod.F90 @@ -154,30 +154,32 @@ Contains end if ub_ = lb_ + len-1 +#if defined(PSB_OPENMP) + !$omp critical(r_m_i2_rk1) +#endif if (allocated(rrax)) then dim = size(rrax) lbi = lbound(rrax,1) If ((dim /= len).or.(lbi /= lb_)) Then Allocate(tmp(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & - & a_err='integer(psb_i2pk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 Allocate(rrax(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & - & a_err='integer(psb_i2pk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_m_i2_rk1) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & + & a_err='integer(psb_i2pk_)') + goto 9999 + end if if (present(pad)) then !$omp parallel do private(i) shared(dim,len) do i=lb_-1+dim+1,lb_-1+len @@ -239,7 +241,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_m_i2_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -248,27 +252,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & - & a_err='integer(psb_i2pk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & - & a_err='integer(psb_i2pk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_m_i2_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & + & a_err='integer(psb_i2pk_)') + goto 9999 + end if if (present(pad)) then !$omp parallel do private(i) shared(lb1_,dim,len1) do i=lb1_-1+dim+1,lb1_-1+len1 @@ -325,30 +328,33 @@ Contains end if ub_ = lb_ + len-1 +#if defined(PSB_OPENMP) + !$omp critical(r_e_i2_rk1) +#endif if (allocated(rrax)) then dim = size(rrax) lbi = lbound(rrax,1) If ((dim /= len).or.(lbi /= lb_)) Then Allocate(tmp(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len/), & - & a_err='integer(psb_i2pk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 Allocate(rrax(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len/), & - & a_err='integer(psb_i2pk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_e_i2_rk1) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len/), & + & a_err='integer(psb_i2pk_)') + goto 9999 + end if + if (present(pad)) then rrax(lb_-1+dim+1:lb_-1+len) = pad endif @@ -407,7 +413,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_e_i2_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -416,27 +424,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/(len1*len2)/), & - & a_err='integer(psb_i2pk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/(len1*len2)/), & - & a_err='integer(psb_i2pk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_e_i2_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/(len1*len2)/), & + & a_err='integer(psb_i2pk_)') + goto 9999 + end if if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -498,7 +505,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_me_i2_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -507,27 +516,28 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='integer(psb_i2pk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name,e_err=(/len1*len2/),& - & a_err='integer(psb_i2pk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_me_i2_rk2) +#endif + + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len1*len2/), & + & a_err='integer(psb_i2pk_)') + goto 9999 + end if + if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -589,7 +599,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_em_i2_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -598,27 +610,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='integer(psb_i2pk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='integer(psb_i2pk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_em_i2_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len1*len2/), & + & a_err='integer(psb_i2pk_)') + goto 9999 + end if if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -715,8 +726,6 @@ Contains End Subroutine psb_r_e_2_i2_rk1 - - subroutine psb_ab_cpy_i2_s(vin,vout,info) use psb_error_mod @@ -999,8 +1008,9 @@ Contains isz = psb_size(v) If (len > isz) Then -#if defined(OPENMP) - !$OMP CRITICAL +#if defined(PSB_OPENMP) + !$omp critical(m_sz_i2_rk1) + isz = psb_size(v) if (len > isz) then if (present(newsz)) then isz = max(len+1,1,newsz) @@ -1012,7 +1022,9 @@ Contains call psb_realloc(isz,v,info,pad=pad) end if - !$OMP END CRITICAL + if (info /= psb_success_) & + & write(0,*) 'Error from realloc ',info,len,isz + !$omp end critical(m_sz_i2_rk1) if (info /= psb_success_) then info=psb_err_from_subroutine_ @@ -1028,7 +1040,6 @@ Contains else isz = max(len,1,int(1.25*isz)) endif - call psb_realloc(isz,v,info,pad=pad) end if @@ -1075,6 +1086,28 @@ Contains end if isz = psb_size(v) If (len > isz) Then +#if defined(PSB_OPENMP) + !$omp critical(e_sz_i2_rk1) + isz = psb_size(v) + If (len > isz) Then + if (present(newsz)) then + isz = max(len+1,1,newsz) + else if (present(addsz)) then + isz = max(len,1,isz+addsz) + else + isz = max(len,1,int(1.25*isz)) + endif + call psb_realloc(isz,v,info,pad=pad) + end If + if (info /= psb_success_)& + & write(0,*) 'Error from realloc ',info,len,isz + !$omp end critical(e_sz_i2_rk1) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_realloc') + goto 9999 + End If +#else if (present(newsz)) then isz = max(len+1,1,newsz) else if (present(addsz)) then @@ -1082,13 +1115,13 @@ Contains else isz = max(len,1,int(1.25*isz)) endif - call psb_realloc(isz,v,info,pad=pad) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='psb_realloc') goto 9999 End If +#endif end If call psb_erractionrestore(err_act) diff --git a/base/modules/auxil/psb_m_realloc_mod.F90 b/base/modules/auxil/psb_m_realloc_mod.F90 index 4d2f9316..865aa47b 100644 --- a/base/modules/auxil/psb_m_realloc_mod.F90 +++ b/base/modules/auxil/psb_m_realloc_mod.F90 @@ -154,30 +154,32 @@ Contains end if ub_ = lb_ + len-1 +#if defined(PSB_OPENMP) + !$omp critical(r_m_m_rk1) +#endif if (allocated(rrax)) then dim = size(rrax) lbi = lbound(rrax,1) If ((dim /= len).or.(lbi /= lb_)) Then Allocate(tmp(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & - & a_err='integer(psb_mpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 Allocate(rrax(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & - & a_err='integer(psb_mpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_m_m_rk1) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & + & a_err='integer(psb_mpk_)') + goto 9999 + end if if (present(pad)) then !$omp parallel do private(i) shared(dim,len) do i=lb_-1+dim+1,lb_-1+len @@ -239,7 +241,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_m_m_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -248,27 +252,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & - & a_err='integer(psb_mpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & - & a_err='integer(psb_mpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_m_m_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & + & a_err='integer(psb_mpk_)') + goto 9999 + end if if (present(pad)) then !$omp parallel do private(i) shared(lb1_,dim,len1) do i=lb1_-1+dim+1,lb1_-1+len1 @@ -325,30 +328,33 @@ Contains end if ub_ = lb_ + len-1 +#if defined(PSB_OPENMP) + !$omp critical(r_e_m_rk1) +#endif if (allocated(rrax)) then dim = size(rrax) lbi = lbound(rrax,1) If ((dim /= len).or.(lbi /= lb_)) Then Allocate(tmp(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len/), & - & a_err='integer(psb_mpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 Allocate(rrax(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len/), & - & a_err='integer(psb_mpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_e_m_rk1) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len/), & + & a_err='integer(psb_mpk_)') + goto 9999 + end if + if (present(pad)) then rrax(lb_-1+dim+1:lb_-1+len) = pad endif @@ -407,7 +413,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_e_m_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -416,27 +424,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/(len1*len2)/), & - & a_err='integer(psb_mpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/(len1*len2)/), & - & a_err='integer(psb_mpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_e_m_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/(len1*len2)/), & + & a_err='integer(psb_mpk_)') + goto 9999 + end if if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -498,7 +505,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_me_m_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -507,27 +516,28 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='integer(psb_mpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name,e_err=(/len1*len2/),& - & a_err='integer(psb_mpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_me_m_rk2) +#endif + + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len1*len2/), & + & a_err='integer(psb_mpk_)') + goto 9999 + end if + if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -589,7 +599,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_em_m_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -598,27 +610,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='integer(psb_mpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='integer(psb_mpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_em_m_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len1*len2/), & + & a_err='integer(psb_mpk_)') + goto 9999 + end if if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -715,8 +726,6 @@ Contains End Subroutine psb_r_e_2_m_rk1 - - subroutine psb_ab_cpy_m_s(vin,vout,info) use psb_error_mod @@ -999,8 +1008,9 @@ Contains isz = psb_size(v) If (len > isz) Then -#if defined(OPENMP) - !$OMP CRITICAL +#if defined(PSB_OPENMP) + !$omp critical(m_sz_m_rk1) + isz = psb_size(v) if (len > isz) then if (present(newsz)) then isz = max(len+1,1,newsz) @@ -1012,7 +1022,9 @@ Contains call psb_realloc(isz,v,info,pad=pad) end if - !$OMP END CRITICAL + if (info /= psb_success_) & + & write(0,*) 'Error from realloc ',info,len,isz + !$omp end critical(m_sz_m_rk1) if (info /= psb_success_) then info=psb_err_from_subroutine_ @@ -1028,7 +1040,6 @@ Contains else isz = max(len,1,int(1.25*isz)) endif - call psb_realloc(isz,v,info,pad=pad) end if @@ -1075,6 +1086,28 @@ Contains end if isz = psb_size(v) If (len > isz) Then +#if defined(PSB_OPENMP) + !$omp critical(e_sz_m_rk1) + isz = psb_size(v) + If (len > isz) Then + if (present(newsz)) then + isz = max(len+1,1,newsz) + else if (present(addsz)) then + isz = max(len,1,isz+addsz) + else + isz = max(len,1,int(1.25*isz)) + endif + call psb_realloc(isz,v,info,pad=pad) + end If + if (info /= psb_success_)& + & write(0,*) 'Error from realloc ',info,len,isz + !$omp end critical(e_sz_m_rk1) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_realloc') + goto 9999 + End If +#else if (present(newsz)) then isz = max(len+1,1,newsz) else if (present(addsz)) then @@ -1082,13 +1115,13 @@ Contains else isz = max(len,1,int(1.25*isz)) endif - call psb_realloc(isz,v,info,pad=pad) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='psb_realloc') goto 9999 End If +#endif end If call psb_erractionrestore(err_act) diff --git a/base/modules/auxil/psb_s_realloc_mod.F90 b/base/modules/auxil/psb_s_realloc_mod.F90 index f064e606..fb2820cc 100644 --- a/base/modules/auxil/psb_s_realloc_mod.F90 +++ b/base/modules/auxil/psb_s_realloc_mod.F90 @@ -154,30 +154,32 @@ Contains end if ub_ = lb_ + len-1 +#if defined(PSB_OPENMP) + !$omp critical(r_m_s_rk1) +#endif if (allocated(rrax)) then dim = size(rrax) lbi = lbound(rrax,1) If ((dim /= len).or.(lbi /= lb_)) Then Allocate(tmp(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & - & a_err='real(psb_spk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 Allocate(rrax(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & - & a_err='real(psb_spk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_m_s_rk1) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & + & a_err='real(psb_spk_)') + goto 9999 + end if if (present(pad)) then !$omp parallel do private(i) shared(dim,len) do i=lb_-1+dim+1,lb_-1+len @@ -239,7 +241,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_m_s_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -248,27 +252,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & - & a_err='real(psb_spk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & - & a_err='real(psb_spk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_m_s_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & + & a_err='real(psb_spk_)') + goto 9999 + end if if (present(pad)) then !$omp parallel do private(i) shared(lb1_,dim,len1) do i=lb1_-1+dim+1,lb1_-1+len1 @@ -325,30 +328,33 @@ Contains end if ub_ = lb_ + len-1 +#if defined(PSB_OPENMP) + !$omp critical(r_e_s_rk1) +#endif if (allocated(rrax)) then dim = size(rrax) lbi = lbound(rrax,1) If ((dim /= len).or.(lbi /= lb_)) Then Allocate(tmp(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len/), & - & a_err='real(psb_spk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 Allocate(rrax(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len/), & - & a_err='real(psb_spk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_e_s_rk1) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len/), & + & a_err='real(psb_spk_)') + goto 9999 + end if + if (present(pad)) then rrax(lb_-1+dim+1:lb_-1+len) = pad endif @@ -407,7 +413,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_e_s_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -416,27 +424,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/(len1*len2)/), & - & a_err='real(psb_spk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/(len1*len2)/), & - & a_err='real(psb_spk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_e_s_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/(len1*len2)/), & + & a_err='real(psb_spk_)') + goto 9999 + end if if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -498,7 +505,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_me_s_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -507,27 +516,28 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='real(psb_spk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name,e_err=(/len1*len2/),& - & a_err='real(psb_spk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_me_s_rk2) +#endif + + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len1*len2/), & + & a_err='real(psb_spk_)') + goto 9999 + end if + if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -589,7 +599,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_em_s_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -598,27 +610,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='real(psb_spk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='real(psb_spk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_em_s_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len1*len2/), & + & a_err='real(psb_spk_)') + goto 9999 + end if if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -715,8 +726,6 @@ Contains End Subroutine psb_r_e_2_s_rk1 - - subroutine psb_ab_cpy_s_s(vin,vout,info) use psb_error_mod @@ -999,8 +1008,9 @@ Contains isz = psb_size(v) If (len > isz) Then -#if defined(OPENMP) - !$OMP CRITICAL +#if defined(PSB_OPENMP) + !$omp critical(m_sz_s_rk1) + isz = psb_size(v) if (len > isz) then if (present(newsz)) then isz = max(len+1,1,newsz) @@ -1012,7 +1022,9 @@ Contains call psb_realloc(isz,v,info,pad=pad) end if - !$OMP END CRITICAL + if (info /= psb_success_) & + & write(0,*) 'Error from realloc ',info,len,isz + !$omp end critical(m_sz_s_rk1) if (info /= psb_success_) then info=psb_err_from_subroutine_ @@ -1028,7 +1040,6 @@ Contains else isz = max(len,1,int(1.25*isz)) endif - call psb_realloc(isz,v,info,pad=pad) end if @@ -1075,6 +1086,28 @@ Contains end if isz = psb_size(v) If (len > isz) Then +#if defined(PSB_OPENMP) + !$omp critical(e_sz_s_rk1) + isz = psb_size(v) + If (len > isz) Then + if (present(newsz)) then + isz = max(len+1,1,newsz) + else if (present(addsz)) then + isz = max(len,1,isz+addsz) + else + isz = max(len,1,int(1.25*isz)) + endif + call psb_realloc(isz,v,info,pad=pad) + end If + if (info /= psb_success_)& + & write(0,*) 'Error from realloc ',info,len,isz + !$omp end critical(e_sz_s_rk1) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_realloc') + goto 9999 + End If +#else if (present(newsz)) then isz = max(len+1,1,newsz) else if (present(addsz)) then @@ -1082,13 +1115,13 @@ Contains else isz = max(len,1,int(1.25*isz)) endif - call psb_realloc(isz,v,info,pad=pad) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='psb_realloc') goto 9999 End If +#endif end If call psb_erractionrestore(err_act) diff --git a/base/modules/auxil/psb_z_realloc_mod.F90 b/base/modules/auxil/psb_z_realloc_mod.F90 index e9eb26d3..cf33e731 100644 --- a/base/modules/auxil/psb_z_realloc_mod.F90 +++ b/base/modules/auxil/psb_z_realloc_mod.F90 @@ -154,30 +154,32 @@ Contains end if ub_ = lb_ + len-1 +#if defined(PSB_OPENMP) + !$omp critical(r_m_z_rk1) +#endif if (allocated(rrax)) then dim = size(rrax) lbi = lbound(rrax,1) If ((dim /= len).or.(lbi /= lb_)) Then Allocate(tmp(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & - & a_err='complex(psb_dpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 Allocate(rrax(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & - & a_err='complex(psb_dpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_m_z_rk1) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, l_err=(/len*1_psb_lpk_/), & + & a_err='complex(psb_dpk_)') + goto 9999 + end if if (present(pad)) then !$omp parallel do private(i) shared(dim,len) do i=lb_-1+dim+1,lb_-1+len @@ -239,7 +241,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_m_z_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -248,27 +252,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & - & a_err='complex(psb_dpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & - & a_err='complex(psb_dpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_m_z_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, l_err=(/len1*1_psb_lpk_*len2/), & + & a_err='complex(psb_dpk_)') + goto 9999 + end if if (present(pad)) then !$omp parallel do private(i) shared(lb1_,dim,len1) do i=lb1_-1+dim+1,lb1_-1+len1 @@ -325,30 +328,33 @@ Contains end if ub_ = lb_ + len-1 +#if defined(PSB_OPENMP) + !$omp critical(r_e_z_rk1) +#endif if (allocated(rrax)) then dim = size(rrax) lbi = lbound(rrax,1) If ((dim /= len).or.(lbi /= lb_)) Then Allocate(tmp(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len/), & - & a_err='complex(psb_dpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb_:lb_-1+min(len,dim))=rrax(lbi:lbi-1+min(len,dim)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 Allocate(rrax(lb_:ub_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len/), & - & a_err='complex(psb_dpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_e_z_rk1) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len/), & + & a_err='complex(psb_dpk_)') + goto 9999 + end if + if (present(pad)) then rrax(lb_-1+dim+1:lb_-1+len) = pad endif @@ -407,7 +413,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_e_z_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -416,27 +424,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/(len1*len2)/), & - & a_err='complex(psb_dpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/(len1*len2)/), & - & a_err='complex(psb_dpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_e_z_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/(len1*len2)/), & + & a_err='complex(psb_dpk_)') + goto 9999 + end if if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -498,7 +505,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_me_z_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -507,27 +516,28 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='complex(psb_dpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name,e_err=(/len1*len2/),& - & a_err='complex(psb_dpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_me_z_rk2) +#endif + + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len1*len2/), & + & a_err='complex(psb_dpk_)') + goto 9999 + end if + if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -589,7 +599,9 @@ Contains goto 9999 end if - +#if defined(PSB_OPENMP) + !$omp critical(r_em_z_rk2) +#endif if (allocated(rrax)) then dim = size(rrax,1) lbi1 = lbound(rrax,1) @@ -598,27 +610,26 @@ Contains If ((dim /= len1).or.(dim2 /= len2).or.(lbi1 /= lb1_)& & .or.(lbi2 /= lb2_)) Then Allocate(tmp(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='complex(psb_dpk_)') - goto 9999 + if (info == psb_success_) then + tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & + & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) + call psb_move_alloc(tmp,rrax,info) end if - tmp(lb1_:lb1_-1+min(len1,dim),lb2_:lb2_-1+min(len2,dim2)) = & - & rrax(lbi1:lbi1-1+min(len1,dim),lbi2:lbi2-1+min(len2,dim2)) - call psb_move_alloc(tmp,rrax,info) End If else dim = 0 dim2 = 0 Allocate(rrax(lb1_:ub1_,lb2_:ub2_),stat=info) - if (info /= psb_success_) then - err=4025 - call psb_errpush(err,name, e_err=(/len1*len2/), & - & a_err='complex(psb_dpk_)') - goto 9999 - end if endif +#if defined(PSB_OPENMP) + !$omp end critical(r_em_z_rk2) +#endif + if (info /= psb_success_) then + err=4025 + call psb_errpush(err,name, e_err=(/len1*len2/), & + & a_err='complex(psb_dpk_)') + goto 9999 + end if if (present(pad)) then rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad @@ -715,8 +726,6 @@ Contains End Subroutine psb_r_e_2_z_rk1 - - subroutine psb_ab_cpy_z_s(vin,vout,info) use psb_error_mod @@ -999,8 +1008,9 @@ Contains isz = psb_size(v) If (len > isz) Then -#if defined(OPENMP) - !$OMP CRITICAL +#if defined(PSB_OPENMP) + !$omp critical(m_sz_z_rk1) + isz = psb_size(v) if (len > isz) then if (present(newsz)) then isz = max(len+1,1,newsz) @@ -1012,7 +1022,9 @@ Contains call psb_realloc(isz,v,info,pad=pad) end if - !$OMP END CRITICAL + if (info /= psb_success_) & + & write(0,*) 'Error from realloc ',info,len,isz + !$omp end critical(m_sz_z_rk1) if (info /= psb_success_) then info=psb_err_from_subroutine_ @@ -1028,7 +1040,6 @@ Contains else isz = max(len,1,int(1.25*isz)) endif - call psb_realloc(isz,v,info,pad=pad) end if @@ -1075,6 +1086,28 @@ Contains end if isz = psb_size(v) If (len > isz) Then +#if defined(PSB_OPENMP) + !$omp critical(e_sz_z_rk1) + isz = psb_size(v) + If (len > isz) Then + if (present(newsz)) then + isz = max(len+1,1,newsz) + else if (present(addsz)) then + isz = max(len,1,isz+addsz) + else + isz = max(len,1,int(1.25*isz)) + endif + call psb_realloc(isz,v,info,pad=pad) + end If + if (info /= psb_success_)& + & write(0,*) 'Error from realloc ',info,len,isz + !$omp end critical(e_sz_z_rk1) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_realloc') + goto 9999 + End If +#else if (present(newsz)) then isz = max(len+1,1,newsz) else if (present(addsz)) then @@ -1082,13 +1115,13 @@ Contains else isz = max(len,1,int(1.25*isz)) endif - call psb_realloc(isz,v,info,pad=pad) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='psb_realloc') goto 9999 End If +#endif end If call psb_erractionrestore(err_act) diff --git a/base/modules/auxil/psi_c_serial_mod.f90 b/base/modules/auxil/psi_c_serial_mod.f90 index 0fdff04b..1d30df10 100644 --- a/base/modules/auxil/psi_c_serial_mod.f90 +++ b/base/modules/auxil/psi_c_serial_mod.f90 @@ -99,66 +99,101 @@ module psi_c_serial_mod end subroutine psi_caxpbyv2 end interface psb_geaxpby + interface psi_upd_xyz + subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent (in) :: x(:) + complex(psb_spk_), intent (inout) :: y(:) + complex(psb_spk_), intent (inout) :: z(:) + complex(psb_spk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_c_upd_xyz + end interface psi_upd_xyz + + interface psi_xyzw + subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent (in) :: x(:) + complex(psb_spk_), intent (inout) :: y(:) + complex(psb_spk_), intent (inout) :: z(:) + complex(psb_spk_), intent (inout) :: w(:) + complex(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_cxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_cgthmv(n,k,idx,alpha,x,beta,y) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: x(:,:), y(:),alpha,beta end subroutine psi_cgthmv subroutine psi_cgthv(n,idx,alpha,x,beta,y) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: x(:), y(:),alpha,beta end subroutine psi_cgthv subroutine psi_cgthzmv(n,k,idx,x,y) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: x(:,:), y(:) end subroutine psi_cgthzmv subroutine psi_cgthzmm(n,k,idx,x,y) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: x(:,:), y(:,:) end subroutine psi_cgthzmm subroutine psi_cgthzv(n,idx,x,y) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: x(:), y(:) end subroutine psi_cgthzv end interface psi_gth interface psi_sct subroutine psi_csctmm(n,k,idx,x,beta,y) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: beta, x(:,:), y(:,:) end subroutine psi_csctmm subroutine psi_csctmv(n,k,idx,x,beta,y) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: beta, x(:), y(:,:) end subroutine psi_csctmv subroutine psi_csctv(n,idx,x,beta,y) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: beta, x(:), y(:) end subroutine psi_csctv end interface psi_sct interface psi_exscan subroutine psi_c_exscanv(n,x,info,shift) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none integer(psb_ipk_), intent(in) :: n complex(psb_spk_), intent (inout) :: x(:) diff --git a/base/modules/auxil/psi_d_serial_mod.f90 b/base/modules/auxil/psi_d_serial_mod.f90 index 0ce14dbb..4115d89a 100644 --- a/base/modules/auxil/psi_d_serial_mod.f90 +++ b/base/modules/auxil/psi_d_serial_mod.f90 @@ -99,66 +99,101 @@ module psi_d_serial_mod end subroutine psi_daxpbyv2 end interface psb_geaxpby + interface psi_upd_xyz + subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent (in) :: x(:) + real(psb_dpk_), intent (inout) :: y(:) + real(psb_dpk_), intent (inout) :: z(:) + real(psb_dpk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_d_upd_xyz + end interface psi_upd_xyz + + interface psi_xyzw + subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent (in) :: x(:) + real(psb_dpk_), intent (inout) :: y(:) + real(psb_dpk_), intent (inout) :: z(:) + real(psb_dpk_), intent (inout) :: w(:) + real(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_dxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_dgthmv(n,k,idx,alpha,x,beta,y) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: x(:,:), y(:),alpha,beta end subroutine psi_dgthmv subroutine psi_dgthv(n,idx,alpha,x,beta,y) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: x(:), y(:),alpha,beta end subroutine psi_dgthv subroutine psi_dgthzmv(n,k,idx,x,y) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: x(:,:), y(:) end subroutine psi_dgthzmv subroutine psi_dgthzmm(n,k,idx,x,y) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: x(:,:), y(:,:) end subroutine psi_dgthzmm subroutine psi_dgthzv(n,idx,x,y) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: x(:), y(:) end subroutine psi_dgthzv end interface psi_gth interface psi_sct subroutine psi_dsctmm(n,k,idx,x,beta,y) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: beta, x(:,:), y(:,:) end subroutine psi_dsctmm subroutine psi_dsctmv(n,k,idx,x,beta,y) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: beta, x(:), y(:,:) end subroutine psi_dsctmv subroutine psi_dsctv(n,idx,x,beta,y) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: beta, x(:), y(:) end subroutine psi_dsctv end interface psi_sct interface psi_exscan subroutine psi_d_exscanv(n,x,info,shift) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none integer(psb_ipk_), intent(in) :: n real(psb_dpk_), intent (inout) :: x(:) diff --git a/base/modules/auxil/psi_e_serial_mod.f90 b/base/modules/auxil/psi_e_serial_mod.f90 index f0372e01..6ebc3a54 100644 --- a/base/modules/auxil/psi_e_serial_mod.f90 +++ b/base/modules/auxil/psi_e_serial_mod.f90 @@ -99,37 +99,69 @@ module psi_e_serial_mod end subroutine psi_eaxpbyv2 end interface psb_geaxpby + interface psi_upd_xyz + subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_epk_), intent (in) :: x(:) + integer(psb_epk_), intent (inout) :: y(:) + integer(psb_epk_), intent (inout) :: z(:) + integer(psb_epk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_e_upd_xyz + end interface psi_upd_xyz + + interface psi_xyzw + subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_epk_), intent (in) :: x(:) + integer(psb_epk_), intent (inout) :: y(:) + integer(psb_epk_), intent (inout) :: z(:) + integer(psb_epk_), intent (inout) :: w(:) + integer(psb_epk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_exyzw + end interface psi_xyzw + interface psi_gth subroutine psi_egthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_epk_) :: x(:,:), y(:),alpha,beta end subroutine psi_egthmv subroutine psi_egthv(n,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_epk_) :: x(:), y(:),alpha,beta end subroutine psi_egthv subroutine psi_egthzmv(n,k,idx,x,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_epk_) :: x(:,:), y(:) end subroutine psi_egthzmv subroutine psi_egthzmm(n,k,idx,x,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_epk_) :: x(:,:), y(:,:) end subroutine psi_egthzmm subroutine psi_egthzv(n,idx,x,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_epk_) :: x(:), y(:) end subroutine psi_egthzv end interface psi_gth @@ -138,20 +170,23 @@ module psi_e_serial_mod subroutine psi_esctmm(n,k,idx,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_epk_) :: beta, x(:,:), y(:,:) end subroutine psi_esctmm subroutine psi_esctmv(n,k,idx,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_epk_) :: beta, x(:), y(:,:) end subroutine psi_esctmv subroutine psi_esctv(n,idx,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_epk_) :: beta, x(:), y(:) end subroutine psi_esctv end interface psi_sct diff --git a/base/modules/auxil/psi_i2_serial_mod.f90 b/base/modules/auxil/psi_i2_serial_mod.f90 index 70dd95e1..57712a66 100644 --- a/base/modules/auxil/psi_i2_serial_mod.f90 +++ b/base/modules/auxil/psi_i2_serial_mod.f90 @@ -99,37 +99,69 @@ module psi_i2_serial_mod end subroutine psi_i2axpbyv2 end interface psb_geaxpby + interface psi_upd_xyz + subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_i2pk_), intent (in) :: x(:) + integer(psb_i2pk_), intent (inout) :: y(:) + integer(psb_i2pk_), intent (inout) :: z(:) + integer(psb_i2pk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_i2_upd_xyz + end interface psi_upd_xyz + + interface psi_xyzw + subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_i2pk_), intent (in) :: x(:) + integer(psb_i2pk_), intent (inout) :: y(:) + integer(psb_i2pk_), intent (inout) :: z(:) + integer(psb_i2pk_), intent (inout) :: w(:) + integer(psb_i2pk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_i2xyzw + end interface psi_xyzw + interface psi_gth subroutine psi_i2gthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_i2pk_) :: x(:,:), y(:),alpha,beta end subroutine psi_i2gthmv subroutine psi_i2gthv(n,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_i2pk_) :: x(:), y(:),alpha,beta end subroutine psi_i2gthv subroutine psi_i2gthzmv(n,k,idx,x,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_i2pk_) :: x(:,:), y(:) end subroutine psi_i2gthzmv subroutine psi_i2gthzmm(n,k,idx,x,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_i2pk_) :: x(:,:), y(:,:) end subroutine psi_i2gthzmm subroutine psi_i2gthzv(n,idx,x,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_i2pk_) :: x(:), y(:) end subroutine psi_i2gthzv end interface psi_gth @@ -138,20 +170,23 @@ module psi_i2_serial_mod subroutine psi_i2sctmm(n,k,idx,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_i2pk_) :: beta, x(:,:), y(:,:) end subroutine psi_i2sctmm subroutine psi_i2sctmv(n,k,idx,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_i2pk_) :: beta, x(:), y(:,:) end subroutine psi_i2sctmv subroutine psi_i2sctv(n,idx,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_i2pk_) :: beta, x(:), y(:) end subroutine psi_i2sctv end interface psi_sct diff --git a/base/modules/auxil/psi_m_serial_mod.f90 b/base/modules/auxil/psi_m_serial_mod.f90 index cfd1348e..05a75bde 100644 --- a/base/modules/auxil/psi_m_serial_mod.f90 +++ b/base/modules/auxil/psi_m_serial_mod.f90 @@ -99,37 +99,69 @@ module psi_m_serial_mod end subroutine psi_maxpbyv2 end interface psb_geaxpby + interface psi_upd_xyz + subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_mpk_), intent (in) :: x(:) + integer(psb_mpk_), intent (inout) :: y(:) + integer(psb_mpk_), intent (inout) :: z(:) + integer(psb_mpk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_m_upd_xyz + end interface psi_upd_xyz + + interface psi_xyzw + subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_mpk_), intent (in) :: x(:) + integer(psb_mpk_), intent (inout) :: y(:) + integer(psb_mpk_), intent (inout) :: z(:) + integer(psb_mpk_), intent (inout) :: w(:) + integer(psb_mpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_mxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_mgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_mpk_) :: x(:,:), y(:),alpha,beta end subroutine psi_mgthmv subroutine psi_mgthv(n,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_mpk_) :: x(:), y(:),alpha,beta end subroutine psi_mgthv subroutine psi_mgthzmv(n,k,idx,x,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_mpk_) :: x(:,:), y(:) end subroutine psi_mgthzmv subroutine psi_mgthzmm(n,k,idx,x,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_mpk_) :: x(:,:), y(:,:) end subroutine psi_mgthzmm subroutine psi_mgthzv(n,idx,x,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_mpk_) :: x(:), y(:) end subroutine psi_mgthzv end interface psi_gth @@ -138,20 +170,23 @@ module psi_m_serial_mod subroutine psi_msctmm(n,k,idx,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_mpk_) :: beta, x(:,:), y(:,:) end subroutine psi_msctmm subroutine psi_msctmv(n,k,idx,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_mpk_) :: beta, x(:), y(:,:) end subroutine psi_msctmv subroutine psi_msctv(n,idx,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_mpk_) :: beta, x(:), y(:) end subroutine psi_msctv end interface psi_sct diff --git a/base/modules/auxil/psi_s_serial_mod.f90 b/base/modules/auxil/psi_s_serial_mod.f90 index 25c4a7ef..95f536f3 100644 --- a/base/modules/auxil/psi_s_serial_mod.f90 +++ b/base/modules/auxil/psi_s_serial_mod.f90 @@ -99,66 +99,101 @@ module psi_s_serial_mod end subroutine psi_saxpbyv2 end interface psb_geaxpby + interface psi_upd_xyz + subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent (in) :: x(:) + real(psb_spk_), intent (inout) :: y(:) + real(psb_spk_), intent (inout) :: z(:) + real(psb_spk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_s_upd_xyz + end interface psi_upd_xyz + + interface psi_xyzw + subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent (in) :: x(:) + real(psb_spk_), intent (inout) :: y(:) + real(psb_spk_), intent (inout) :: z(:) + real(psb_spk_), intent (inout) :: w(:) + real(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_sxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_sgthmv(n,k,idx,alpha,x,beta,y) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: x(:,:), y(:),alpha,beta end subroutine psi_sgthmv subroutine psi_sgthv(n,idx,alpha,x,beta,y) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: x(:), y(:),alpha,beta end subroutine psi_sgthv subroutine psi_sgthzmv(n,k,idx,x,y) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: x(:,:), y(:) end subroutine psi_sgthzmv subroutine psi_sgthzmm(n,k,idx,x,y) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: x(:,:), y(:,:) end subroutine psi_sgthzmm subroutine psi_sgthzv(n,idx,x,y) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: x(:), y(:) end subroutine psi_sgthzv end interface psi_gth interface psi_sct subroutine psi_ssctmm(n,k,idx,x,beta,y) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: beta, x(:,:), y(:,:) end subroutine psi_ssctmm subroutine psi_ssctmv(n,k,idx,x,beta,y) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: beta, x(:), y(:,:) end subroutine psi_ssctmv subroutine psi_ssctv(n,idx,x,beta,y) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: beta, x(:), y(:) end subroutine psi_ssctv end interface psi_sct interface psi_exscan subroutine psi_s_exscanv(n,x,info,shift) - import :: psb_ipk_, psb_spk_ + import :: psb_ipk_, psb_mpk_, psb_spk_ implicit none integer(psb_ipk_), intent(in) :: n real(psb_spk_), intent (inout) :: x(:) diff --git a/base/modules/auxil/psi_z_serial_mod.f90 b/base/modules/auxil/psi_z_serial_mod.f90 index b40cf05a..c08a0fec 100644 --- a/base/modules/auxil/psi_z_serial_mod.f90 +++ b/base/modules/auxil/psi_z_serial_mod.f90 @@ -99,66 +99,101 @@ module psi_z_serial_mod end subroutine psi_zaxpbyv2 end interface psb_geaxpby + interface psi_upd_xyz + subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent (in) :: x(:) + complex(psb_dpk_), intent (inout) :: y(:) + complex(psb_dpk_), intent (inout) :: z(:) + complex(psb_dpk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_z_upd_xyz + end interface psi_upd_xyz + + interface psi_xyzw + subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent (in) :: x(:) + complex(psb_dpk_), intent (inout) :: y(:) + complex(psb_dpk_), intent (inout) :: z(:) + complex(psb_dpk_), intent (inout) :: w(:) + complex(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_zxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_zgthmv(n,k,idx,alpha,x,beta,y) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: x(:,:), y(:),alpha,beta end subroutine psi_zgthmv subroutine psi_zgthv(n,idx,alpha,x,beta,y) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: x(:), y(:),alpha,beta end subroutine psi_zgthv subroutine psi_zgthzmv(n,k,idx,x,y) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: x(:,:), y(:) end subroutine psi_zgthzmv subroutine psi_zgthzmm(n,k,idx,x,y) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: x(:,:), y(:,:) end subroutine psi_zgthzmm subroutine psi_zgthzv(n,idx,x,y) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: x(:), y(:) end subroutine psi_zgthzv end interface psi_gth interface psi_sct subroutine psi_zsctmm(n,k,idx,x,beta,y) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: beta, x(:,:), y(:,:) end subroutine psi_zsctmm subroutine psi_zsctmv(n,k,idx,x,beta,y) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: beta, x(:), y(:,:) end subroutine psi_zsctmv subroutine psi_zsctv(n,idx,x,beta,y) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: beta, x(:), y(:) end subroutine psi_zsctv end interface psi_sct interface psi_exscan subroutine psi_z_exscanv(n,x,info,shift) - import :: psb_ipk_, psb_dpk_ + import :: psb_ipk_, psb_mpk_, psb_dpk_ implicit none integer(psb_ipk_), intent(in) :: n complex(psb_dpk_), intent (inout) :: x(:) diff --git a/base/modules/comm/psi_c_comm_a_mod.f90 b/base/modules/comm/psi_c_comm_a_mod.f90 index 1277efdf..ce2da78d 100644 --- a/base/modules/comm/psi_c_comm_a_mod.f90 +++ b/base/modules/comm/psi_c_comm_a_mod.f90 @@ -36,7 +36,8 @@ module psi_c_comm_a_mod interface psi_swapdata subroutine psi_cswapdatam(flag,n,beta,y,desc_a,work,info,data) import - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_spk_) :: y(:,:), beta complex(psb_spk_),target :: work(:) @@ -57,7 +58,8 @@ module psi_c_comm_a_mod import type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_spk_) :: y(:,:), beta complex(psb_spk_),target :: work(:) @@ -80,7 +82,8 @@ module psi_c_comm_a_mod interface psi_swaptran subroutine psi_cswaptranm(flag,n,beta,y,desc_a,work,info,data) import - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_ipk_), intent(in) :: flag + integer(psb_Mpk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info complex(psb_spk_) :: y(:,:), beta complex(psb_spk_),target :: work(:) @@ -101,7 +104,8 @@ module psi_c_comm_a_mod import type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_spk_) :: y(:,:), beta complex(psb_spk_),target :: work(:) diff --git a/base/modules/comm/psi_d_comm_a_mod.f90 b/base/modules/comm/psi_d_comm_a_mod.f90 index e2b0aa87..b1dda3f8 100644 --- a/base/modules/comm/psi_d_comm_a_mod.f90 +++ b/base/modules/comm/psi_d_comm_a_mod.f90 @@ -36,7 +36,8 @@ module psi_d_comm_a_mod interface psi_swapdata subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) import - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_dpk_) :: y(:,:), beta real(psb_dpk_),target :: work(:) @@ -57,7 +58,8 @@ module psi_d_comm_a_mod import type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_dpk_) :: y(:,:), beta real(psb_dpk_),target :: work(:) @@ -80,7 +82,8 @@ module psi_d_comm_a_mod interface psi_swaptran subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data) import - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_ipk_), intent(in) :: flag + integer(psb_Mpk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info real(psb_dpk_) :: y(:,:), beta real(psb_dpk_),target :: work(:) @@ -101,7 +104,8 @@ module psi_d_comm_a_mod import type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_dpk_) :: y(:,:), beta real(psb_dpk_),target :: work(:) diff --git a/base/modules/comm/psi_e_comm_a_mod.f90 b/base/modules/comm/psi_e_comm_a_mod.f90 index 8c0d48ff..4b6c5104 100644 --- a/base/modules/comm/psi_e_comm_a_mod.f90 +++ b/base/modules/comm/psi_e_comm_a_mod.f90 @@ -36,7 +36,8 @@ module psi_e_comm_a_mod interface psi_swapdata subroutine psi_eswapdatam(flag,n,beta,y,desc_a,work,info,data) import - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_epk_) :: y(:,:), beta integer(psb_epk_),target :: work(:) @@ -57,7 +58,8 @@ module psi_e_comm_a_mod import type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_epk_) :: y(:,:), beta integer(psb_epk_),target :: work(:) @@ -80,7 +82,8 @@ module psi_e_comm_a_mod interface psi_swaptran subroutine psi_eswaptranm(flag,n,beta,y,desc_a,work,info,data) import - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_ipk_), intent(in) :: flag + integer(psb_Mpk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info integer(psb_epk_) :: y(:,:), beta integer(psb_epk_),target :: work(:) @@ -101,7 +104,8 @@ module psi_e_comm_a_mod import type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_epk_) :: y(:,:), beta integer(psb_epk_),target :: work(:) diff --git a/base/modules/comm/psi_i2_comm_a_mod.f90 b/base/modules/comm/psi_i2_comm_a_mod.f90 index 49f1af71..484c9824 100644 --- a/base/modules/comm/psi_i2_comm_a_mod.f90 +++ b/base/modules/comm/psi_i2_comm_a_mod.f90 @@ -36,7 +36,8 @@ module psi_i2_comm_a_mod interface psi_swapdata subroutine psi_i2swapdatam(flag,n,beta,y,desc_a,work,info,data) import - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_i2pk_) :: y(:,:), beta integer(psb_i2pk_),target :: work(:) @@ -57,7 +58,8 @@ module psi_i2_comm_a_mod import type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_i2pk_) :: y(:,:), beta integer(psb_i2pk_),target :: work(:) @@ -80,7 +82,8 @@ module psi_i2_comm_a_mod interface psi_swaptran subroutine psi_i2swaptranm(flag,n,beta,y,desc_a,work,info,data) import - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_ipk_), intent(in) :: flag + integer(psb_Mpk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info integer(psb_i2pk_) :: y(:,:), beta integer(psb_i2pk_),target :: work(:) @@ -101,7 +104,8 @@ module psi_i2_comm_a_mod import type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_i2pk_) :: y(:,:), beta integer(psb_i2pk_),target :: work(:) diff --git a/base/modules/comm/psi_m_comm_a_mod.f90 b/base/modules/comm/psi_m_comm_a_mod.f90 index ca49efa5..825e1579 100644 --- a/base/modules/comm/psi_m_comm_a_mod.f90 +++ b/base/modules/comm/psi_m_comm_a_mod.f90 @@ -36,7 +36,8 @@ module psi_m_comm_a_mod interface psi_swapdata subroutine psi_mswapdatam(flag,n,beta,y,desc_a,work,info,data) import - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: y(:,:), beta integer(psb_mpk_),target :: work(:) @@ -57,7 +58,8 @@ module psi_m_comm_a_mod import type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: y(:,:), beta integer(psb_mpk_),target :: work(:) @@ -80,7 +82,8 @@ module psi_m_comm_a_mod interface psi_swaptran subroutine psi_mswaptranm(flag,n,beta,y,desc_a,work,info,data) import - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_ipk_), intent(in) :: flag + integer(psb_Mpk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: y(:,:), beta integer(psb_mpk_),target :: work(:) @@ -101,7 +104,8 @@ module psi_m_comm_a_mod import type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: y(:,:), beta integer(psb_mpk_),target :: work(:) diff --git a/base/modules/comm/psi_s_comm_a_mod.f90 b/base/modules/comm/psi_s_comm_a_mod.f90 index f2d3ae79..10369b51 100644 --- a/base/modules/comm/psi_s_comm_a_mod.f90 +++ b/base/modules/comm/psi_s_comm_a_mod.f90 @@ -36,7 +36,8 @@ module psi_s_comm_a_mod interface psi_swapdata subroutine psi_sswapdatam(flag,n,beta,y,desc_a,work,info,data) import - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_spk_) :: y(:,:), beta real(psb_spk_),target :: work(:) @@ -57,7 +58,8 @@ module psi_s_comm_a_mod import type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_spk_) :: y(:,:), beta real(psb_spk_),target :: work(:) @@ -80,7 +82,8 @@ module psi_s_comm_a_mod interface psi_swaptran subroutine psi_sswaptranm(flag,n,beta,y,desc_a,work,info,data) import - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_ipk_), intent(in) :: flag + integer(psb_Mpk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info real(psb_spk_) :: y(:,:), beta real(psb_spk_),target :: work(:) @@ -101,7 +104,8 @@ module psi_s_comm_a_mod import type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info real(psb_spk_) :: y(:,:), beta real(psb_spk_),target :: work(:) diff --git a/base/modules/comm/psi_z_comm_a_mod.f90 b/base/modules/comm/psi_z_comm_a_mod.f90 index 16872677..9f7477a1 100644 --- a/base/modules/comm/psi_z_comm_a_mod.f90 +++ b/base/modules/comm/psi_z_comm_a_mod.f90 @@ -36,7 +36,8 @@ module psi_z_comm_a_mod interface psi_swapdata subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data) import - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_dpk_) :: y(:,:), beta complex(psb_dpk_),target :: work(:) @@ -57,7 +58,8 @@ module psi_z_comm_a_mod import type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_dpk_) :: y(:,:), beta complex(psb_dpk_),target :: work(:) @@ -80,7 +82,8 @@ module psi_z_comm_a_mod interface psi_swaptran subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data) import - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_ipk_), intent(in) :: flag + integer(psb_Mpk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info complex(psb_dpk_) :: y(:,:), beta complex(psb_dpk_),target :: work(:) @@ -101,7 +104,8 @@ module psi_z_comm_a_mod import type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(in) :: icomm - integer(psb_ipk_), intent(in) :: flag, n + integer(psb_mpk_), intent(in) :: n + integer(psb_ipk_), intent(in) :: flag integer(psb_ipk_), intent(out) :: info complex(psb_dpk_) :: y(:,:), beta complex(psb_dpk_),target :: work(:) diff --git a/base/modules/cutil.c b/base/modules/cutil.c index 503e14a5..7748d147 100644 --- a/base/modules/cutil.c +++ b/base/modules/cutil.c @@ -1,8 +1,9 @@ #include #include #include +#include -void psi_c_diffadd(void *p1, void *p2, int *ret) +void psi_c_diffadd(void *p1, void *p2, int64_t *ret) { *ret = (int)((char *)p2-(char *)p1); return; diff --git a/base/modules/desc/psb_desc_const_mod.f90 b/base/modules/desc/psb_desc_const_mod.f90 index 8953aafc..173031e9 100644 --- a/base/modules/desc/psb_desc_const_mod.f90 +++ b/base/modules/desc/psb_desc_const_mod.f90 @@ -35,7 +35,7 @@ ! Auxiliary module for descriptor: constant values. ! module psb_desc_const_mod - use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ + use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_, psb_i2pk_ ! ! Communication, prolongation & restriction ! @@ -108,7 +108,7 @@ module psb_desc_const_mod integer(psb_ipk_), parameter :: psb_max_hash_bits = 22 integer(psb_ipk_), parameter :: psb_hash_size = 2**psb_hash_bits, psb_hash_mask=psb_hash_size-1 integer(psb_ipk_), parameter :: psb_hpnt_nentries_ = 7 - integer(psb_ipk_), parameter :: psb_default_large_threshold=1*1024*1024 + integer(psb_ipk_), parameter :: psb_default_hash_threshold=1*1024*1024 ! ! Choice of algorithm for sparse matrix A2AV ! diff --git a/base/modules/desc/psb_desc_mod.F90 b/base/modules/desc/psb_desc_mod.F90 index 226d9d2b..716e222c 100644 --- a/base/modules/desc/psb_desc_mod.F90 +++ b/base/modules/desc/psb_desc_mod.F90 @@ -285,14 +285,14 @@ module psb_desc_mod module procedure psb_cdfree end interface psb_free - interface psb_cd_set_large_threshold - module procedure psb_i_cd_set_large_threshold - end interface psb_cd_set_large_threshold - -#if defined(IPK4) && defined(LPK8) - interface psb_cd_set_large_threshold - module procedure psb_l_cd_set_large_threshold - end interface psb_cd_set_large_threshold + interface psb_cd_set_hash_threshold + module procedure psb_i_cd_set_hash_threshold + end interface psb_cd_set_hash_threshold + +#if defined(PSB_IPK4) && defined(PSB_LPK8) + interface psb_cd_set_hash_threshold + module procedure psb_l_cd_set_hash_threshold + end interface psb_cd_set_hash_threshold #endif interface psb_set_sp_a2av_alg @@ -309,7 +309,7 @@ module psb_desc_mod & cd_g2ls2_ins, cd_g2lv1_ins, cd_g2lv2_ins, cd_fnd_owner - integer(psb_lpk_), private, save :: cd_large_threshold = psb_default_large_threshold + integer(psb_lpk_), private, save :: cd_hash_threshold = psb_default_hash_threshold integer(psb_ipk_), private, save :: sp_a2av_alg = psb_sp_a2av_smpl_triad_ contains @@ -363,27 +363,27 @@ contains - subroutine psb_i_cd_set_large_threshold(ith) + subroutine psb_i_cd_set_hash_threshold(ith) implicit none integer(psb_ipk_), intent(in) :: ith if (ith > 0) then - cd_large_threshold = ith + cd_hash_threshold = ith end if - end subroutine psb_i_cd_set_large_threshold + end subroutine psb_i_cd_set_hash_threshold - subroutine psb_l_cd_set_large_threshold(ith) + subroutine psb_l_cd_set_hash_threshold(ith) implicit none integer(psb_lpk_), intent(in) :: ith if (ith > 0) then - cd_large_threshold = ith + cd_hash_threshold = ith end if - end subroutine psb_l_cd_set_large_threshold + end subroutine psb_l_cd_set_hash_threshold - function psb_cd_get_large_threshold() result(val) + function psb_cd_get_hash_threshold() result(val) implicit none integer(psb_lpk_) :: val - val = cd_large_threshold - end function psb_cd_get_large_threshold + val = cd_hash_threshold + end function psb_cd_get_hash_threshold function psb_cd_is_large_size(m) result(val) use psb_penv_mod @@ -392,7 +392,7 @@ contains integer(psb_lpk_), intent(in) :: m logical :: val !locals - val = (m > psb_cd_get_large_threshold()) + val = (m > psb_cd_get_hash_threshold()) end function psb_cd_is_large_size function psb_cd_choose_large_state(ctxt,m) result(val) @@ -409,7 +409,7 @@ contains ! ! Since the hashed lists take up (somewhat) more than 2*N_COL integers, ! it makes no sense to use them if you don't have at least - ! 3 processes, no matter what the size of the process. + ! 3 processes, no matter what the size of the index space. ! val = psb_cd_is_large_size(m) .and. (np > 2) end function psb_cd_choose_large_state diff --git a/base/modules/desc/psb_gen_block_map_mod.F90 b/base/modules/desc/psb_gen_block_map_mod.F90 index 82a4cc15..d87f1226 100644 --- a/base/modules/desc/psb_gen_block_map_mod.F90 +++ b/base/modules/desc/psb_gen_block_map_mod.F90 @@ -109,13 +109,13 @@ module psb_gen_block_map_mod & block_lg2ls1_ins, block_lg2ls2_ins, block_lg2lv1_ins, block_lg2lv2_ins, & & block_clone, block_reinit,& & block_get_fmt, i_gen_block_search -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) private :: l_gen_block_search #endif interface gen_block_search module procedure i_gen_block_search -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) module procedure l_gen_block_search #endif end interface gen_block_search @@ -215,9 +215,15 @@ contains end if if (present(mask)) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,idx,idxmap,owned_,info) & !$omp private(i) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idx,idxmap,owned_,info) & + ! $ o m p private(i) +>>>>>>> development do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then @@ -231,11 +237,19 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do else if (.not.present(mask)) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(idx,idxmap,owned_,info) & !$omp private(i) +======= + ! $ o m p end parallel do + else if (.not.present(mask)) then + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idx,idxmap,owned_,info) & + ! $ o m p private(i) +>>>>>>> development do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then idx(i) = idxmap%min_glob_row + idx(i) - 1 @@ -247,7 +261,11 @@ contains info = -1 end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development end if end subroutine block_ll2gv1 @@ -281,9 +299,15 @@ contains end if if (present(mask)) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,idxin,idxout,idxmap,owned_,info,im) & !$omp private(i) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idxin,idxout,idxmap,owned_,info,im) & + ! $ o m p private(i) +>>>>>>> development do i=1, im if (mask(i)) then if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then @@ -297,11 +321,19 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do else if (.not.present(mask)) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(idxin,idxout,idxmap,owned_,info,im) & !$omp private(i) +======= + ! $ o m p end parallel do + else if (.not.present(mask)) then + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idxin,idxout,idxmap,owned_,info,im) & + ! $ o m p private(i) +>>>>>>> development do i=1, im if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then idxout(i) = idxmap%min_glob_row + idxin(i) - 1 @@ -313,7 +345,11 @@ contains info = -1 end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development end if if (is > im) then @@ -400,9 +436,15 @@ contains if (present(mask)) then if (idxmap%is_asb()) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,is,idx,idxmap,owned_) & !$omp private(i,nv,tidx) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,is,idx,idxmap,owned_) & + ! $ o m p private(i,nv,tidx) +>>>>>>> development do i=1, is if (mask(i)) then if ((idxmap%min_glob_row <= idx(i)).and. & @@ -419,11 +461,19 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do else if (idxmap%is_valid()) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,is,idx,idxmap,owned_) & !$omp private(i,ip,lip,tidx,info) +======= + ! $ o m p end parallel do + else if (idxmap%is_valid()) then + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,is,idx,idxmap,owned_) & + ! $ o m p private(i,ip,lip,tidx,info) +>>>>>>> development do i=1,is if (mask(i)) then if ((idxmap%min_glob_row <= idx(i)).and.& @@ -439,7 +489,11 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development else idx(1:is) = -1 info = -1 @@ -448,9 +502,15 @@ contains else if (.not.present(mask)) then if (idxmap%is_asb()) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(is,idx,idxmap,owned_) & !$omp private(i,nv,tidx) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(is,idx,idxmap,owned_) & + ! $ o m p private(i,nv,tidx) +>>>>>>> development do i=1, is if ((idxmap%min_glob_row <= idx(i)).and.& & (idx(i) <= idxmap%max_glob_row)) then @@ -465,11 +525,19 @@ contains idx(i) = -1 end if end do +<<<<<<< HEAD !$omp end parallel do else if (idxmap%is_valid()) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(is,idx,idxmap,owned_) & !$omp private(i,ip,lip,tidx,info) +======= + ! $ o m p end parallel do + else if (idxmap%is_valid()) then + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(is,idx,idxmap,owned_) & + ! $ o m p private(i,ip,lip,tidx,info) +>>>>>>> development do i=1,is if ((idxmap%min_glob_row <= idx(i)).and.& & (idx(i) <= idxmap%max_glob_row)) then @@ -483,7 +551,11 @@ contains idx(i) = -1 end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development else idx(1:is) = -1 info = -1 @@ -1382,7 +1454,7 @@ contains return end function i_gen_block_search -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) function l_gen_block_search(key,n,v) result(ipos) implicit none diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index c3d833c6..e9dc38d7 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -57,6 +57,10 @@ module psb_hash_map_mod use psb_desc_const_mod use psb_indx_map_mod use psb_hash_mod + use psb_penv_mod + use psb_sort_mod + use psb_realloc_mod + use psb_error_mod type, extends(psb_indx_map) :: psb_hash_map @@ -106,13 +110,22 @@ module psb_hash_map_mod & hash_bld_g2l_map, hash_inner_cnvs2, hash_inner_cnvs1, & & hash_inner_cnv2, hash_inner_cnv1, hash_row_extendable - integer(psb_ipk_), private :: laddsz=500 + integer(psb_ipk_), private :: psb_laddsz=500 interface hash_inner_cnv module procedure hash_inner_cnvs2, hash_inner_cnv2,& & hash_inner_cnvs1, hash_inner_cnv1 end interface hash_inner_cnv private :: hash_inner_cnv + interface hash_srch +#if defined(PSB_IPK4) && defined(PSB_LPK8) + module procedure hash_srch_ipk, hash_srch_lpk +#else + module procedure hash_srch_ipk +#endif + end interface hash_srch + private :: hash_srch + integer, parameter, private :: seqsrchmax=6 contains @@ -221,9 +234,15 @@ contains if (present(mask)) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,idx,idxmap,owned_) & !$omp private(i) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idx,idxmap,owned_) & + ! $ o m p private(i) +>>>>>>> development do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then @@ -236,12 +255,21 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do else if (.not.present(mask)) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(idx,idxmap,owned_) & !$omp private(i) +======= + ! $ o m p end parallel do + else if (.not.present(mask)) then + + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idx,idxmap,owned_) & + ! $ o m p private(i) +>>>>>>> development do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then idx(i) = idxmap%loc_to_glob(idx(i)) @@ -252,7 +280,11 @@ contains idx(i) = -1 end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development end if end subroutine hash_l2gv1 @@ -321,8 +353,6 @@ contains subroutine hash_g2lv1(idx,idxmap,info,mask,owned) - use psb_penv_mod - use psb_sort_mod implicit none class(psb_hash_map), intent(in) :: idxmap integer(psb_lpk_), intent(inout) :: idx(:) @@ -369,6 +399,7 @@ contains else if (idxmap%is_valid()) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) & !$omp private(i,ip,lip,tlip,info) @@ -390,14 +421,68 @@ contains if (lip<=nrow) then idx(i) = lip else +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) & + ! $ o m p private(i,ip,lip,tlip,info) + do i = 1, is + if (mask(i)) then + ip = idx(i) + if ((ip < 1 ).or.(ip>mglob)) then +>>>>>>> development idx(i) = -1 + cycle endif +<<<<<<< HEAD else idx(i) = lip endif end if enddo !$omp end parallel do +======= + call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& + & idxmap%glb_lc,nrm) + if (lip < 0) then + call psb_hash_searchkey(ip,tlip,idxmap%hash,info) + lip = tlip + info = 0 + end if + if (owned_) then + if (lip<=nrow) then + idx(i) = lip + else + idx(i) = -1 + endif + else + idx(i) = lip + endif + end if + enddo + ! $ o m p end parallel do + +!!$ call hash_inner_cnv(is,idx,idxmap%hashvmask,idxmap%hashv,& +!!$ & idxmap%glb_lc,nrm=nrm,mask=mask) +!!$ +!!$ do i = 1, is +!!$ lip = idx(i) +!!$ if (lip < 0) then +!!$ call psb_hash_searchkey(ip,tlip,idxmap%hash,info) +!!$ lip = tlip +!!$ info = 0 +!!$ if (owned_) then +!!$ if (lip<=nrow) then +!!$ idx(i) = lip +!!$ else +!!$ idx(i) = -1 +!!$ endif +!!$ else +!!$ idx(i) = lip +!!$ endif +!!$ end if +!!$ enddo +!!$ +>>>>>>> development else write(0,*) 'Hash status: invalid ',idxmap%get_state() idx(1:is) = -1 @@ -413,9 +498,15 @@ contains else if (idxmap%is_valid()) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) & !$omp private(i,ip,lip,tlip,info) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) & + ! $ o m p private(i,ip,lip,tlip,info) +>>>>>>> development do i = 1, is ip = idx(i) if ((ip < 1 ).or.(ip>mglob)) then @@ -439,7 +530,11 @@ contains idx(i) = lip endif enddo +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development else write(0,*) 'Hash status: invalid ',idxmap%get_state() idx(1:is) = -1 @@ -449,9 +544,6 @@ contains end subroutine hash_g2lv1 subroutine hash_g2lv2(idxin,idxout,idxmap,info,mask,owned) - use psb_penv_mod - use psb_sort_mod - use psb_realloc_mod implicit none class(psb_hash_map), intent(in) :: idxmap integer(psb_lpk_), intent(in) :: idxin(:) @@ -502,36 +594,46 @@ contains & idxmap%hashv,idxmap%glb_lc,mask=mask, nrm=nrm) else if (idxmap%is_valid()) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) & !$omp private(i,ip,lip,tlip,info) +======= + call hash_inner_cnv(is,idxin,idxout,idxmap%hashvmask,& + & idxmap%hashv,idxmap%glb_lc,nrm=nrm,mask=mask) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) & + ! $ o m p private(i,ip,lip,tlip,info) +>>>>>>> development do i = 1, is - if (mask(i)) then + if (mask(i).and.(idxout(i)<0)) then ip = idxin(i) if ((ip < 1 ).or.(ip>mglob)) then idxout(i) = -1 cycle endif - call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& - & idxmap%glb_lc,nrm) - if (lip < 0) then + if (idxout(i) < 0) then call psb_hash_searchkey(ip,tlip,idxmap%hash,info) lip = tlip info = 0 - end if - if (owned_) then - if (lip<=nrow) then + if (owned_) then + if (lip<=nrow) then + idxout(i) = lip + else + idxout(i) = -1 + endif + else idxout(i) = lip - else - idxout(i) = -1 endif - else - idxout(i) = lip - endif + end if end if enddo +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development else write(0,*) 'Hash status: invalid ',idxmap%get_state() idxout(1:is) = -1 @@ -546,34 +648,44 @@ contains & idxmap%hashv,idxmap%glb_lc,nrm=nrm) else if (idxmap%is_valid()) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) & !$omp private(i,ip,lip,tlip,info) +======= + call hash_inner_cnv(is,idxin,idxout,idxmap%hashvmask,& + & idxmap%hashv,idxmap%glb_lc,nrm=nrm) + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) & + ! $ o m p private(i,ip,lip,tlip,info) +>>>>>>> development do i = 1, is ip = idxin(i) if ((ip < 1 ).or.(ip>mglob)) then idxout(i) = -1 cycle endif - call hash_inner_cnv(ip,lip,idxmap%hashvmask,& - & idxmap%hashv,idxmap%glb_lc,nrm) - if (lip < 0) then + if (idxout(i) < 0) then call psb_hash_searchkey(ip,tlip,idxmap%hash,info) lip = tlip info = 0 - end if - if (owned_) then - if (lip<=nrow) then + if (owned_) then + if (lip<=nrow) then + idxout(i) = lip + else + idxout(i) = -1 + endif + else idxout(i) = lip - else - idxout(i) = -1 endif - else - idxout(i) = lip - endif + end if enddo +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development else write(0,*) 'Hash status: invalid ',idxmap%get_state() idxout(1:is) = -1 @@ -585,8 +697,6 @@ contains subroutine hash_g2ls1_ins(idx,idxmap,info,mask,lidx) - use psb_realloc_mod - use psb_sort_mod implicit none class(psb_hash_map), intent(inout) :: idxmap integer(psb_lpk_), intent(inout) :: idx @@ -642,16 +752,11 @@ contains end subroutine hash_g2ls2_ins ! #################### THESIS #################### - subroutine hash_g2lv1_ins(idx,idxmap,info,mask,lidx) - use psb_error_mod - use psb_realloc_mod - use psb_sort_mod - use psb_penv_mod -#ifdef OPENMP + use psb_timers_mod +#ifdef PSB_OPENMP use omp_lib #endif - implicit none class(psb_hash_map), intent(inout) :: idxmap @@ -666,37 +771,65 @@ contains type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: me, np,ith character(len=20) :: name,ch_err - logical, allocatable :: mask_(:) + integer(psb_ipk_), allocatable :: tidx(:) !!$ logical :: use_openmp = .true. -#ifdef OPENMP +#ifdef PSB_OPENMP integer(kind = OMP_lock_kind) :: ins_lck #endif logical, volatile :: isLoopValid + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: ins_phase1=-1, ins_phase2=-1, ins_phase3=-1, ins_phase4=-1 + integer(psb_ipk_), save :: ins_phase11=-1, ins_phase12=-1 + info = psb_success_ name = 'hash_g2lv1_ins' call psb_erractionsave(err_act) ctxt = idxmap%get_ctxt() call psb_info(ctxt, me, np) - + if ((do_timings).and.(ins_phase1==-1)) & + & ins_phase1 = psb_get_timer_idx("HSHINS: inner_cnv ") + if ((do_timings).and.(ins_phase2==-1)) & + & ins_phase2 = psb_get_timer_idx("HSINS: srchins_lp") +!!$ if ((do_timings).and.(ins_phase3==-1)) & +!!$ & ins_phase3 = psb_get_timer_idx("HSHINS: csput") +!!$ if ((do_timings).and.(ins_phase4==-1)) & +!!$ & ins_phase4 = psb_get_timer_idx("HSHINS: rmt%csput") is = size(idx) + call psb_realloc(is,tidx,info) + call idxmap%lg2lv2_ins(idx,tidx,info,mask=mask,lidx=lidx) + idx(1:is) = tidx(1:is) + end subroutine hash_g2lv1_ins + + subroutine hash_g2lv2_ins(idxin,idxout,idxmap,info,mask,lidx) + use psb_timers_mod + implicit none + class(psb_hash_map), intent(inout) :: idxmap + integer(psb_lpk_), intent(in) :: idxin(:) + integer(psb_ipk_), intent(out) :: idxout(:) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: mask(:) + integer(psb_ipk_), intent(in), optional :: lidx(:) + integer(psb_ipk_) :: is, im + integer(psb_ipk_) :: i, lip, nrow, ncol + integer(psb_lpk_) :: mglob, ip, nxt, tlip + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: me, np, ith, err_act + character(len=20) :: name,ch_err + logical, volatile :: isLoopValid + logical, parameter :: do_timings=.false. - if (present(mask)) then - if (size(mask) < size(idx)) then - info = -1 - return - end if - end if - - if (present(lidx)) then - if (size(lidx) < size(idx)) then - info = -1 - return - end if - end if + info = psb_success_ + name = 'hash_g2lv2_ins' + call psb_erractionsave(err_act) + ctxt = idxmap%get_ctxt() + call psb_info(ctxt, me, np) + is = size(idxin) + is = min(is,size(idxout)) mglob = idxmap%get_gr() nrow = idxmap%get_lr() +<<<<<<< HEAD !write(0,*) me,name,' before loop ',psb_errstatus_fatal() #if defined(OPENMP) isLoopValid = .true. @@ -951,17 +1084,24 @@ contains if (.not. isLoopValid) goto 9999 #else !!$ else if (.not.use_openmp) then +======= + !write(0,*)me, name, ':', present(lidx),present(mask),idxmap%is_bld() +>>>>>>> development isLoopValid = .true. if (idxmap%is_bld()) then if (present(lidx)) then if (present(mask)) then + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(lidx,mask,name,me,is,idxin,idxout,ins_lck,mglob,idxmap,ncol,nrow,psb_laddsz) & + ! $ o m p private(i,ip,lip,tlip,nxt,info) & + ! $ o m p reduction(.AND.:isLoopValid) do i = 1, is ncol = idxmap%get_lc() if (mask(i)) then - ip = idx(i) + ip = idxin(i) if ((ip < 1 ).or.(ip>mglob) ) then - idx(i) = -1 + idxout(i) = -1 cycle endif call hash_inner_cnv(ip,lip,idxmap%hashvmask,& @@ -969,7 +1109,7 @@ contains if (lip < 0) then nxt = lidx(i) if (nxt <= nrow) then - idx(i) = -1 + idxout(i) = -1 cycle endif call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) @@ -978,9 +1118,11 @@ contains if (nxt == tlip) then ncol = max(ncol,nxt) call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& - & pad=-1_psb_lpk_,addsz=laddsz) + & pad=-1_psb_lpk_) if (info /= psb_success_) then - !write(0,*) 'Error spot' + !write(0,*) 'Error spot' + write(0,*)'Problem 5:',info,lip,size(idxmap%loc_to_glob) + info = lip call psb_errpush(psb_err_from_subroutine_ai_,name,& &a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. @@ -995,20 +1137,25 @@ contains isLoopValid = .false. end if end if - idx(i) = lip + idxout(i) = lip info = psb_success_ else - idx(i) = -1 + idxout(i) = -1 end if enddo + ! $ o m p end parallel do else if (.not.present(mask)) then + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(lidx,name,me,is,idxin,idxout,ins_lck,mglob,idxmap,ncol,nrow,psb_laddsz) & + ! $ o m p private(i,ip,lip,tlip,nxt,info) & + ! $ o m p reduction(.AND.:isLoopValid) do i = 1, is ncol = idxmap%get_lc() - ip = idx(i) + ip = idxin(i) if ((ip < 1 ).or.(ip>mglob)) then - idx(i) = -1 + idxout(i) = -1 cycle endif call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& @@ -1016,7 +1163,7 @@ contains if (lip < 0) then nxt = lidx(i) if (nxt <= nrow) then - idx(i) = -1 + idxout(i) = -1 cycle endif call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) @@ -1026,10 +1173,11 @@ contains if (nxt == lip) then ncol = max(nxt,ncol) call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& - & pad=-1_psb_lpk_,addsz=laddsz) + & pad=-1_psb_lpk_) if (info /= psb_success_) then - info=1 !write(0,*) 'Error spot' + write(0,*)'Problem 6:',info,lip,size(idxmap%loc_to_glob) + info = lip call psb_errpush(psb_err_from_subroutine_ai_,name,& &a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. @@ -1044,39 +1192,37 @@ contains isLoopValid = .false. end if end if - idx(i) = lip + idxout(i) = lip info = psb_success_ enddo + ! $ o m p end parallel do end if else if (.not.present(lidx)) then if (present(mask)) then + ncol = idxmap%get_lc() + call hash_inner_cnv(is,idxin,idxout,idxmap%hashvmask,idxmap%hashv,& + & idxmap%glb_lc,nrm=ncol, mask=mask) + ! write(0,*) me,' v2 after hash_inner_cnv ',idx(1:is) do i = 1, is - if (mask(i)) then - ip = idx(i) - if ((ip < 1 ).or.(ip>mglob)) then - idx(i) = -1 - cycle - endif + if (mask(i).and.(idxout(i)<0)) then ncol = idxmap%get_lc() nxt = ncol + 1 - call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& - & idxmap%glb_lc,ncol) - if (lip < 0) then - call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) - lip = tlip - end if + ip = idxin(i) + call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) + lip = tlip + !if (i==1) write(0,*) me,' v2 isrchins:',i,lip if (info >=0) then if (nxt == lip) then ncol = nxt call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& - & pad=-1_psb_lpk_,addsz=laddsz) + & pad=-1_psb_lpk_) if (info /= psb_success_) then - info=1 - write(0,*) 'Error spot 5' + write(0,*)'Problem 7:',info,lip,size(idxmap%loc_to_glob) + info = lip call psb_errpush(psb_err_from_subroutine_ai_,name,& & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. @@ -1090,67 +1236,69 @@ contains & a_err='SearchInsKeyVal',i_err=(/info/)) isLoopValid = .false. end if - idx(i) = lip + idxout(i) = lip info = psb_success_ - else - idx(i) = -1 + else if (.not.mask(i)) then + idxout(i) = -1 end if enddo + ! write(0,*) me,' v2 after cleanup ',idx(1:is) else if (.not.present(mask)) then do i = 1, is ncol = idxmap%get_lc() - ip = idx(i) + ip = idxin(i) if ((ip < 1 ).or.(ip>mglob)) then - idx(i) = -1 + idxout(i) = -1 cycle endif nxt = ncol + 1 call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& & idxmap%glb_lc,ncol) - if (lip < 0) then + if (lip > 0) then + idxout(i) = lip + info = psb_success_ + else call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) lip = tlip - end if - if (info >=0) then - if (nxt == lip) then - ncol = nxt - call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& - & pad=-1_psb_lpk_,addsz=laddsz) - if (info /= psb_success_) then - info=1 - write(0,*) 'Error spot 6' - ch_err='psb_ensure_size' - call psb_errpush(psb_err_from_subroutine_ai_,name,& - &a_err=ch_err,i_err=(/info,izero,izero,izero,izero/)) - isLoopValid = .false. + if (info >=0) then + if (nxt == lip) then + ncol = nxt + call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& + & pad=-1_psb_lpk_) + if (info /= psb_success_) then + write(0,*)'Problem 8:',info,lip,size(idxmap%loc_to_glob) + info = lip + ch_err='psb_ensure_size' + call psb_errpush(psb_err_from_subroutine_ai_,name,& + &a_err=ch_err,i_err=(/info,izero,izero,izero,izero/)) + isLoopValid = .false. - end if - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(ncol) - endif + end if + idxmap%loc_to_glob(nxt) = ip + call idxmap%set_lc(ncol) + endif + info = psb_success_ + else + ch_err='SearchInsKeyVal' + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err=ch_err,i_err=(/info,izero,izero,izero,izero/)) + isLoopValid = .false. + end if + idxout(i) = lip info = psb_success_ - else - ch_err='SearchInsKeyVal' - call psb_errpush(psb_err_from_subroutine_ai_,name,& - & a_err=ch_err,i_err=(/info,izero,izero,izero,izero/)) - isLoopValid = .false. end if - idx(i) = lip - info = psb_success_ enddo end if end if else ! Wrong state - idx = -1 + idxout(:) = -1 info = -1 end if if (.not. isLoopValid) goto 9999 -#endif - !write(0,*) me,name,' after loop ',psb_errstatus_fatal() call psb_erractionrestore(err_act) return @@ -1158,46 +1306,13 @@ contains return - end subroutine hash_g2lv1_ins - - ! ################## END THESIS ######################### - - subroutine hash_g2lv2_ins(idxin,idxout,idxmap,info,mask,lidx) - use psb_realloc_mod - use psb_error_mod - implicit none - class(psb_hash_map), intent(inout) :: idxmap - integer(psb_lpk_), intent(in) :: idxin(:) - integer(psb_ipk_), intent(out) :: idxout(:) - integer(psb_ipk_), intent(out) :: info - logical, intent(in), optional :: mask(:) - integer(psb_ipk_), intent(in), optional :: lidx(:) - integer(psb_lpk_), allocatable :: tidx(:) - integer(psb_ipk_) :: is, im - - is = size(idxin) - im = min(is,size(idxout)) - !write(0,*) 'g2lv2_ins before realloc ',psb_errstatus_fatal() - call psb_realloc(im,tidx,info) - !write(0,*) 'g2lv2_ins after realloc ',psb_errstatus_fatal() - tidx(1:im) = idxin(1:im) - call idxmap%g2lip_ins(tidx(1:im),info,mask=mask,lidx=lidx) - idxout(1:im) = tidx(1:im) - if (is > im) then - write(0,*) 'g2lv2_ins err -3' - info = -3 - end if - end subroutine hash_g2lv2_ins + ! ################## END THESIS ######################### ! ! init from VL, with checks on input. ! subroutine hash_init_vl(idxmap,ctxt,vl,info) - use psb_penv_mod - use psb_error_mod - use psb_sort_mod - use psb_realloc_mod implicit none class(psb_hash_map), intent(inout) :: idxmap type(psb_ctxt_type), intent(in) :: ctxt @@ -1206,11 +1321,9 @@ contains ! To be implemented integer(psb_ipk_) :: iam, np integer(psb_ipk_) :: i, nlu, nl, int_err(5) - integer(psb_lpk_) :: m, nrt - integer(psb_lpk_), allocatable :: vlu(:) - integer(psb_lpk_), allocatable :: ix(:) + integer(psb_lpk_) :: m, nrt character(len=20), parameter :: name='hash_map_init_vl' - + real(psb_dpk_) :: t0, t1, t2,t3, t4, t5 info = 0 call psb_info(ctxt,iam,np) if (np < 0) then @@ -1218,7 +1331,6 @@ contains info = -1 return end if - nl = size(vl) m = maxval(vl(1:nl)) @@ -1226,51 +1338,18 @@ contains call psb_sum(ctxt,nrt) call psb_max(ctxt,m) - allocate(vlu(nl), ix(nl), stat=info) - if (info /= 0) then - info = -1 - return - end if - - do i=1,nl - if ((vl(i)<1).or.(vl(i)>m)) then - info = psb_err_entry_out_of_bounds_ - int_err(1) = i - int_err(2) = vl(i) - int_err(3) = nl - int_err(4) = m - exit - endif - vlu(i) = vl(i) - end do - if ((m /= nrt).and.(iam == psb_root_)) then write(psb_err_unit,*) trim(name),& - & ' Warning: globalcheck=.false., but there is a mismatch' + & ' Warning: we got to hash_init_vl but there is a mismatch' write(psb_err_unit,*) trim(name),& & ' : in the global sizes!',m,nrt end if - - call psb_msort(vlu,ix) - nlu = 1 - do i=2,nl - if (vlu(i) /= vlu(nlu)) then - nlu = nlu + 1 - vlu(nlu) = vlu(i) - ix(nlu) = ix(i) - end if - end do - call psb_msort(ix(1:nlu),vlu(1:nlu),flag=psb_sort_keep_idx_) - - nlu = nl - call hash_init_vlu(idxmap,ctxt,m,nlu,vlu,info) + call hash_init_vlu(idxmap,ctxt,m,nl,vl,info) end subroutine hash_init_vl subroutine hash_init_vg(idxmap,ctxt,vg,info) - use psb_penv_mod - use psb_error_mod implicit none class(psb_hash_map), intent(inout) :: idxmap type(psb_ctxt_type), intent(in) :: ctxt @@ -1327,10 +1406,6 @@ contains ! init from VL, with no checks on input ! subroutine hash_init_vlu(idxmap,ctxt,ntot,nl,vlu,info) - use psb_penv_mod - use psb_error_mod - use psb_sort_mod - use psb_realloc_mod implicit none class(psb_hash_map), intent(inout) :: idxmap type(psb_ctxt_type), intent(in) :: ctxt @@ -1354,7 +1429,7 @@ contains idxmap%global_cols = ntot idxmap%local_rows = nl idxmap%local_cols = nl - idxmap%ctxt = ctxt + idxmap%ctxt = ctxt idxmap%state = psb_desc_bld_ idxmap%mpic = psb_get_mpi_comm(ctxt) @@ -1383,10 +1458,6 @@ contains subroutine hash_bld_g2l_map(idxmap,info) - use psb_penv_mod - use psb_error_mod - use psb_sort_mod - use psb_realloc_mod implicit none class(psb_hash_map), intent(inout) :: idxmap integer(psb_ipk_), intent(out) :: info @@ -1488,10 +1559,6 @@ contains subroutine hash_asb(idxmap,info) - use psb_penv_mod - use psb_error_mod - use psb_realloc_mod - use psb_sort_mod implicit none class(psb_hash_map), intent(inout) :: idxmap integer(psb_ipk_), intent(out) :: info @@ -1514,7 +1581,7 @@ contains call psb_free(idxmap%hash,info) - + if (info /= 0) then write(0,*) 'Error from hash free', info return @@ -1537,12 +1604,12 @@ contains integer(psb_ipk_), intent(in) :: hashv(0:) integer(psb_lpk_), intent(inout) :: x integer(psb_ipk_), intent(in) :: nrm - integer(psb_ipk_) :: idx,nh,tmp,lb,ub,lm + integer(psb_ipk_) :: idx,nh,tmp integer(psb_lpk_) :: key, ih ! ! When a large descriptor is assembled the indices ! are kept in a (hashed) list of ordered lists. - ! Thus we first hash the index, then we do a binary search on the + ! Thus we first hash the index, then we do a search on the ! ordered sublist. The hashing is based on the low-order bits ! for a width of psb_hash_bits ! @@ -1551,25 +1618,7 @@ contains ih = iand(key,hashmask) idx = hashv(ih) nh = hashv(ih+1) - hashv(ih) - if (nh > 0) then - tmp = -1 - lb = idx - ub = idx+nh-1 - do - if (lb>ub) exit - lm = (lb+ub)/2 - if (key == glb_lc(lm,1)) then - tmp = lm - exit - else if (key 0) then x = glb_lc(tmp,2) if (x > nrm) then @@ -1586,12 +1635,12 @@ contains integer(psb_lpk_), intent(in) :: hashmask, x, glb_lc(:,:) integer(psb_ipk_), intent(out) :: y integer(psb_ipk_), intent(in) :: nrm - integer(psb_ipk_) :: idx,nh,tmp,lb,ub,lm + integer(psb_ipk_) :: idx,nh,tmp integer(psb_lpk_) :: ih, key ! ! When a large descriptor is assembled the indices ! are kept in a (hashed) list of ordered lists. - ! Thus we first hash the index, then we do a binary search on the + ! Thus we first hash the index, then we do a search on the ! ordered sublist. The hashing is based on the low-order bits ! for a width of psb_hash_bits ! @@ -1600,25 +1649,7 @@ contains ih = iand(key,hashmask) idx = hashv(ih) nh = hashv(ih+1) - hashv(ih) - if (nh > 0) then - tmp = -1 - lb = idx - ub = idx+nh-1 - do - if (lb>ub) exit - lm = (lb+ub)/2 - if (key == glb_lc(lm,1)) then - tmp = lm - exit - else if (key 0) then y = glb_lc(tmp,2) if (y > nrm) then @@ -1633,49 +1664,37 @@ contains subroutine hash_inner_cnv1(n,x,hashmask,hashv,glb_lc,mask,nrm) implicit none integer(psb_ipk_), intent(in) :: n, hashv(0:) + integer(psb_lpk_), intent(inout) :: x(:) integer(psb_lpk_), intent(in) :: glb_lc(:,:),hashmask logical, intent(in), optional :: mask(:) integer(psb_ipk_), intent(in), optional :: nrm - integer(psb_lpk_), intent(inout) :: x(:) - integer(psb_ipk_) :: i, nh,tmp,lb,ub,lm + integer(psb_ipk_) :: i, nh,tmp integer(psb_lpk_) :: ih, key, idx ! ! When a large descriptor is assembled the indices ! are kept in a (hashed) list of ordered lists. - ! Thus we first hash the index, then we do a binary search on the + ! Thus we first hash the index, then we do a search on the ! ordered sublist. The hashing is based on the low-order bits ! for a width of psb_hash_bits ! if (present(mask)) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(n,hashv,hashmask,x,glb_lc,nrm,mask) & !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(n,hashv,hashmask,x,glb_lc,nrm,mask) & + ! $ o m p private(i,key,idx,ih,nh,tmp,lb,ub,lm) +>>>>>>> development do i=1, n if (mask(i)) then key = x(i) ih = iand(key,hashmask) idx = hashv(ih) - nh = hashv(ih+1) - hashv(ih) - if (nh > 0) then - tmp = -1 - lb = idx - ub = idx+nh-1 - do - if (lb>ub) exit - lm = (lb+ub)/2 - if (key == glb_lc(lm,1)) then - tmp = lm - exit - else if (key 0) then x(i) = glb_lc(tmp,2) if (present(nrm)) then @@ -1688,35 +1707,25 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do else !$omp parallel do default(none) schedule(dynamic) & !$omp shared(n,hashv,hashmask,x,glb_lc,nrm) & !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) +======= + ! $ o m p end parallel do + else + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(n,hashv,hashmask,x,glb_lc,nrm) & + ! $ o m p private(i,key,idx,ih,nh,tmp,lb,ub,lm) +>>>>>>> development do i=1, n key = x(i) ih = iand(key,hashmask) idx = hashv(ih) - nh = hashv(ih+1) - hashv(ih) - if (nh > 0) then - tmp = -1 - lb = idx - ub = idx+nh-1 - do - if (lb>ub) exit - lm = (lb+ub)/2 - if (key == glb_lc(lm,1)) then - tmp = lm - exit - else if (key 0) then x(i) = glb_lc(tmp,2) if (present(nrm)) then @@ -1728,7 +1737,11 @@ contains x(i) = tmp end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development end if end subroutine hash_inner_cnv1 @@ -1741,19 +1754,25 @@ contains integer(psb_lpk_), intent(in) :: x(:) integer(psb_ipk_), intent(out) :: y(:) - integer(psb_ipk_) :: i, idx,nh,tmp,lb,ub,lm + integer(psb_ipk_) :: i, idx,nh,tmp integer(psb_lpk_) :: ih, key ! ! When a large descriptor is assembled the indices ! are kept in a (hashed) list of ordered lists. - ! Thus we first hash the index, then we do a binary search on the + ! Thus we first hash the index, then we do a search on the ! ordered sublist. The hashing is based on the low-order bits ! for a width of psb_hash_bits ! if (present(mask)) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,mask,psb_err_unit) & !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(n,hashv,hashmask,x,y,glb_lc,nrm,mask,psb_err_unit) & + ! $ o m p private(i,key,idx,ih,nh,tmp,lb,ub,lm) +>>>>>>> development do i=1, n if (mask(i)) then key = x(i) @@ -1762,26 +1781,8 @@ contains write(psb_err_unit,*) ' In inner cnv: ',ih,ubound(hashv) end if idx = hashv(ih) - nh = hashv(ih+1) - hashv(ih) - if (nh > 0) then - tmp = -1 - lb = idx - ub = idx+nh-1 - do - if (lb>ub) exit - lm = (lb+ub)/2 - if (key == glb_lc(lm,1)) then - tmp = lm - exit - else if (key 0) then y(i) = glb_lc(tmp,2) if (present(nrm)) then @@ -1794,12 +1795,21 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do else !$omp parallel do default(none) schedule(dynamic) & !$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,psb_err_unit) & !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) +======= + ! $ o m p end parallel do + else + + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(n,hashv,hashmask,x,y,glb_lc,nrm,psb_err_unit) & + ! $ o m p private(i,key,idx,ih,nh,tmp,lb,ub,lm) +>>>>>>> development do i=1, n key = x(i) ih = iand(key,hashmask) @@ -1808,25 +1818,7 @@ contains end if idx = hashv(ih) nh = hashv(ih+1) - hashv(ih) - if (nh > 0) then - tmp = -1 - lb = idx - ub = idx+nh-1 - do - if (lb>ub) exit - lm = (lb+ub)/2 - if (key == glb_lc(lm,1)) then - tmp = lm - exit - else if (key 0) then y(i) = glb_lc(tmp,2) if (present(nrm)) then @@ -1838,10 +1830,99 @@ contains y(i) = tmp end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development end if end subroutine hash_inner_cnv2 + function hash_srch_ipk(key,idx,nh,glb_lc) result(res) + integer(psb_lpk_), intent(in) :: key + integer(psb_lpk_), intent(in) :: glb_lc(:) + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_), intent(in) :: nh + integer(psb_ipk_) :: res + ! + integer(psb_ipk_) :: lb,ub,lm + res = -1 + if (nh > 0) then + if (nh <= seqsrchmax) then + ! + ! If the list is short, a sequential search is enough + ! + do lm=idx,idx+nh-1 + if (key == glb_lc(lm)) then + res = lm + exit + end if + end do + else + ! + ! Otherwise use binary + ! + lb = idx + ub = idx+nh-1 + do + if (lb>ub) exit + lm = (lb+ub)/2 + if (key == glb_lc(lm)) then + res = lm + exit + else if (key 0) then + if (nh <= seqsrchmax) then + ! + ! If the list is short, a sequential search is enough + ! + do lm=idx,idx+nh-1 + if (key == glb_lc(lm)) then + res = lm + exit + end if + end do + else + ! + ! Otherwise use binary + ! + lb = idx + ub = idx+nh-1 + do + if (lb>ub) exit + lm = (lb+ub)/2 + if (key == glb_lc(lm)) then + res = lm + exit + else if (keynr) then @@ -1963,5 +2044,5 @@ contains return end subroutine hash_reinit - + end module psb_hash_map_mod diff --git a/base/modules/desc/psb_hash_mod.F90 b/base/modules/desc/psb_hash_mod.F90 index 18b1142d..c2839cf4 100644 --- a/base/modules/desc/psb_hash_mod.F90 +++ b/base/modules/desc/psb_hash_mod.F90 @@ -71,7 +71,7 @@ module psb_hash_mod integer(psb_ipk_), parameter :: HashNotFound = -256 interface psb_hashval -#if defined(IPK4) +#if defined(PSB_IPK4) function psb_c_hashval_32(key) bind(c) result(res) import psb_c_ipk_ implicit none @@ -79,7 +79,7 @@ module psb_hash_mod integer(psb_c_ipk_) :: res end function psb_c_hashval_32 #endif -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) function psb_c_hashval_64_32(key) bind(c) result(res) import psb_c_ipk_, psb_c_lpk_ implicit none @@ -87,7 +87,7 @@ module psb_hash_mod integer(psb_c_ipk_) :: res end function psb_c_hashval_64_32 #endif -#if defined(IPK8) +#if defined(PSB_IPK8) function psb_c_hashval_64(key) bind(c) result(res) import psb_c_ipk_ implicit none @@ -115,7 +115,7 @@ module psb_hash_mod module procedure psb_hash_lsearchkey end interface psb_hash_searchkey -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) interface psb_hash_init module procedure psb_hash_init_v, psb_hash_init_n end interface diff --git a/base/modules/desc/psb_indx_map_mod.F90 b/base/modules/desc/psb_indx_map_mod.F90 index fa3e5a69..11795373 100644 --- a/base/modules/desc/psb_indx_map_mod.F90 +++ b/base/modules/desc/psb_indx_map_mod.F90 @@ -153,7 +153,11 @@ module psb_indx_map_mod procedure, pass(idxmap) :: set_gci => base_set_gci procedure, pass(idxmap) :: set_grl => base_set_grl procedure, pass(idxmap) :: set_gcl => base_set_gcl +<<<<<<< HEAD #if defined(IPK4) && defined(LPK8) +======= +#if defined(PSB_IPK4) && defined(PSB_LPK8) +>>>>>>> development generic, public :: set_gr => set_grl, set_gri generic, public :: set_gc => set_gcl, set_gci #else @@ -165,7 +169,11 @@ module psb_indx_map_mod procedure, pass(idxmap) :: set_lci => base_set_lci procedure, pass(idxmap) :: set_lcl => base_set_lcl procedure, pass(idxmap) :: inc_lc => base_inc_lc +<<<<<<< HEAD #if defined(IPK4) && defined(LPK8) +======= +#if defined(PSB_IPK4) && defined(PSB_LPK8) +>>>>>>> development generic, public :: set_lr => set_lrl, set_lri generic, public :: set_lc => set_lcl, set_lci #else @@ -742,9 +750,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -768,9 +775,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) end subroutine base_l2gs2 @@ -792,9 +798,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return end subroutine base_l2gv1 @@ -816,9 +821,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -843,9 +847,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -869,9 +872,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) end subroutine base_ll2gs2 @@ -893,9 +895,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return end subroutine base_ll2gv1 @@ -917,9 +918,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -942,9 +942,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -968,9 +967,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -993,9 +991,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -1019,13 +1016,11 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return - end subroutine base_g2lv2 subroutine base_lg2ls1(idx,idxmap,info,mask,owned) @@ -1044,9 +1039,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -1070,9 +1064,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -1095,9 +1088,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -1121,9 +1113,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -1146,9 +1137,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -1172,9 +1162,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -1198,9 +1187,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -1224,9 +1212,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -1249,9 +1236,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -1275,9 +1261,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -1300,10 +1285,9 @@ contains call psb_get_erraction(err_act) ! This is the base version. If we get here ! it means the derived class is incomplete, - ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + ! so we throw an error + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -1327,9 +1311,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return @@ -1349,7 +1332,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& + info = psb_err_missing_override_method_ + call psb_errpush(info,& & name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) @@ -1394,9 +1378,9 @@ contains type(psb_ctxt_type), intent(in) :: ctxt integer(psb_ipk_), intent(out) :: info + info = psb_success_ call idxmap%set_null() idxmap%ctxt = ctxt - info = 0 return end subroutine base_init_null @@ -1415,11 +1399,10 @@ contains call psb_get_erraction(err_act) ! This is the base version. If we get here ! it means the derived class is incomplete, - ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - - call psb_error_handler(err_act) + ! so we throw an error + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) + call psb_error_handler(err_act) return end subroutine base_init_vl @@ -1440,9 +1423,8 @@ contains ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return end subroutine base_clone @@ -1505,15 +1487,12 @@ contains character(len=20) :: name='base_reinit' logical, parameter :: debug=.false. - info = psb_success_ - call psb_get_erraction(err_act) ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,& - & name,a_err=idxmap%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=idxmap%get_fmt()) call psb_error_handler(err_act) return end subroutine base_reinit @@ -1530,6 +1509,7 @@ contains integer(psb_ipk_) :: me, np integer(psb_ipk_) :: i, j, nr, nc, nh + info = psb_success_ call psb_info(idxmap%ctxt,me,np) ! The idea here is to store only the halo part nr = idxmap%local_rows @@ -1552,6 +1532,7 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nh + info = psb_success_ nh = size(idxmap%halo_owner) !v = idxmap%halo_owner(1:nh) call psb_safe_ab_cpy(idxmap%halo_owner,v,info) @@ -1568,6 +1549,8 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, j, nr, nc, nh + + info = psb_success_ nr = idxmap%local_rows nc = idxmap%local_cols nc = min(idxmap%local_cols, (nr+psb_size(idxmap%halo_owner))) @@ -1597,6 +1580,8 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, j, nr, nc, nh, sz + + info = psb_success_ nr = idxmap%local_rows nc = min(idxmap%local_cols, (nr+psb_size(idxmap%halo_owner))) sz = min(size(xin),size(xout)) diff --git a/base/modules/desc/psb_list_map_mod.F90 b/base/modules/desc/psb_list_map_mod.F90 index 913145da..ba3ebb33 100644 --- a/base/modules/desc/psb_list_map_mod.F90 +++ b/base/modules/desc/psb_list_map_mod.F90 @@ -179,9 +179,15 @@ contains if (present(mask)) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,idx,idxmap,owned_) & !$omp private(i) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idx,idxmap,owned_) & + ! $ o m p private(i) +>>>>>>> development do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then @@ -194,12 +200,21 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do else if (.not.present(mask)) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(idx,idxmap,owned_) & !$omp private(i) +======= + ! $ o m p end parallel do + else if (.not.present(mask)) then + + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idx,idxmap,owned_) & + ! $ o m p private(i) +>>>>>>> development do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then idx(i) = idxmap%loc_to_glob(idx(i)) @@ -210,7 +225,11 @@ contains idx(i) = -1 end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development end if @@ -305,9 +324,15 @@ contains if (present(mask)) then if (idxmap%is_valid()) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,is,idx,idxmap,owned_) & !$omp private(i,ix) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,is,idx,idxmap,owned_) & + ! $ o m p private(i,ix) +>>>>>>> development do i=1,is if (mask(i)) then if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -319,7 +344,11 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development else idx(1:is) = -1 info = -1 @@ -328,9 +357,15 @@ contains else if (.not.present(mask)) then if (idxmap%is_valid()) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(is,idx,idxmap,owned_) & !$omp private(i,ix) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(is,idx,idxmap,owned_) & + ! $ o m p private(i,ix) +>>>>>>> development do i=1, is if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idx(i)) @@ -340,7 +375,11 @@ contains idx(i) = -1 end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development else idx(1:is) = -1 info = -1 @@ -380,9 +419,15 @@ contains if (present(mask)) then if (idxmap%is_valid()) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,is,idxin,idxout,idxmap,owned_) & !$omp private(i,ix) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,is,idxin,idxout,idxmap,owned_) & + ! $ o m p private(i,ix) +>>>>>>> development do i=1,is if (mask(i)) then if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -394,7 +439,11 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development else idxout(1:is) = -1 info = -1 @@ -403,9 +452,15 @@ contains else if (.not.present(mask)) then if (idxmap%is_valid()) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(is,idxin,idxout,idxmap,owned_) & !$omp private(i,ix) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(is,idxin,idxout,idxmap,owned_) & + ! $ o m p private(i,ix) +>>>>>>> development do i=1, is if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idxin(i)) @@ -415,7 +470,11 @@ contains idxout(i) = -1 end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development else idxout(1:is) = -1 info = -1 @@ -528,7 +587,7 @@ contains if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idx(i)) if (ix < 0) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP CRITICAL(LISTINS) ix = idxmap%glob_to_loc(idx(i)) if (ix < 0) then @@ -564,16 +623,22 @@ contains else if (.not.present(mask)) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,is,idx,idxmap,laddsz,lidx) & !$omp private(i,ix,info) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,is,idx,idxmap,laddsz,lidx) & + ! $ o m p private(i,ix,info) +>>>>>>> development ! $ o m p reduction(.AND.:isLoopValid) do i=1, is if (info /= 0) cycle if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idx(i)) if (ix < 0) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP CRITICAL(LISTINS) ix = idxmap%glob_to_loc(idx(i)) if (ix < 0) then @@ -606,7 +671,11 @@ contains idx(i) = -1 end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development end if else if (.not.present(lidx)) then @@ -617,7 +686,7 @@ contains if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idx(i)) if (ix < 0) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP CRITICAL(LISTINS) ix = idxmap%glob_to_loc(idx(i)) if (ix < 0) then @@ -658,7 +727,7 @@ contains if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idx(i)) if (ix < 0) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP CRITICAL(LISTINS) ix = idxmap%glob_to_loc(idx(i)) if (ix < 0) then @@ -745,7 +814,7 @@ contains if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idxin(i)) if (ix < 0) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP CRITICAL(LISTINS) ix = idxmap%glob_to_loc(idxin(i)) if (ix < 0) then @@ -786,7 +855,7 @@ contains if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idxin(i)) if (ix < 0) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP CRITICAL(LISTINS) ix = idxmap%glob_to_loc(idxin(i)) if (ix < 0) then @@ -829,7 +898,7 @@ contains if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idxin(i)) if (ix < 0) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP CRITICAL(LISTINS) ix = idxmap%glob_to_loc(idxin(i)) if (ix < 0) then @@ -870,7 +939,7 @@ contains if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idxin(i)) if (ix < 0) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP CRITICAL(LISTINS) ix = idxmap%glob_to_loc(idxin(i)) if (ix < 0) then diff --git a/base/modules/desc/psb_repl_map_mod.F90 b/base/modules/desc/psb_repl_map_mod.F90 index f68ae3b8..32d856d0 100644 --- a/base/modules/desc/psb_repl_map_mod.F90 +++ b/base/modules/desc/psb_repl_map_mod.F90 @@ -332,9 +332,15 @@ contains if (present(mask)) then if (idxmap%is_asb()) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,idx,idxmap,is) & !$omp private(i) +======= + ! $ o m p parallel do default(none) schedule(static) & + ! $ o m p shared(mask,idx,idxmap,is) & + ! $ o m p private(i) +>>>>>>> development do i=1, is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -344,11 +350,19 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do else if (idxmap%is_valid()) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,idx,idxmap,is) & !$omp private(i) +======= + ! $ o m p end parallel do + else if (idxmap%is_valid()) then + ! $ o m p parallel do default(none) schedule(static) & + ! $ o m p shared(mask,idx,idxmap,is) & + ! $ o m p private(i) +>>>>>>> development do i=1,is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -359,7 +373,11 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development else idx(1:is) = -1 info = -1 @@ -368,9 +386,15 @@ contains else if (.not.present(mask)) then if (idxmap%is_asb()) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(idx,idxmap,is) & !$omp private(i) +======= + ! $ o m p parallel do default(none) schedule(static) & + ! $ o m p shared(idx,idxmap,is) & + ! $ o m p private(i) +>>>>>>> development do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -378,11 +402,19 @@ contains idx(i) = -1 end if end do +<<<<<<< HEAD !$omp end parallel do else if (idxmap%is_valid()) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(idx,idxmap,is) & !$omp private(i) +======= + ! $ o m p end parallel do + else if (idxmap%is_valid()) then + ! $ o m p parallel do default(none) schedule(static) & + ! $ o m p shared(idx,idxmap,is) & + ! $ o m p private(i) +>>>>>>> development do i=1,is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -390,7 +422,11 @@ contains idx(i) = -1 end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development else idx(1:is) = -1 info = -1 @@ -433,9 +469,15 @@ contains if (present(mask)) then if (idxmap%is_asb()) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,idxin,idxout,idxmap,im) & !$omp private(i) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idxin,idxout,idxmap,im) & + ! $ o m p private(i) +>>>>>>> development do i=1, im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -445,11 +487,19 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do else if (idxmap%is_valid()) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,idxin,idxout,idxmap,im) & !$omp private(i) +======= + ! $ o m p end parallel do + else if (idxmap%is_valid()) then + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idxin,idxout,idxmap,im) & + ! $ o m p private(i) +>>>>>>> development do i=1,im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -459,7 +509,11 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development else idxout(1:im) = -1 info = -1 @@ -468,9 +522,15 @@ contains else if (.not.present(mask)) then if (idxmap%is_asb()) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(idxin,idxout,idxmap,im) & !$omp private(i) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idxin,idxout,idxmap,im) & + ! $ o m p private(i) +>>>>>>> development do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -478,11 +538,19 @@ contains idxout(i) = -1 end if end do +<<<<<<< HEAD !$omp end parallel do else if (idxmap%is_valid()) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(idxin,idxout,idxmap,im) & !$omp private(i) +======= + ! $ o m p end parallel do + else if (idxmap%is_valid()) then + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idxin,idxout,idxmap,im) & + ! $ o m p private(i) +>>>>>>> development do i=1,im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -490,7 +558,11 @@ contains idxout(i) = -1 end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development else idxout(1:im) = -1 info = -1 @@ -597,9 +669,15 @@ contains else if (idxmap%is_valid()) then if (present(lidx)) then if (present(mask)) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,idx,lidx,is,idxmap) & !$omp private(i) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idx,lidx,is,idxmap) & + ! $ o m p private(i) +>>>>>>> development do i=1, is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -609,11 +687,19 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do else if (.not.present(mask)) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,idx,lidx,is,idxmap) & !$omp private(i) +======= + ! $ o m p end parallel do + else if (.not.present(mask)) then + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idx,lidx,is,idxmap) & + ! $ o m p private(i) +>>>>>>> development do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -621,6 +707,7 @@ contains idx(i) = -1 end if end do +<<<<<<< HEAD !$omp end parallel do end if else if (.not.present(lidx)) then @@ -628,6 +715,15 @@ contains !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,idx,is,idxmap) & !$omp private(i) +======= + ! $ o m p end parallel do + end if + else if (.not.present(lidx)) then + if (present(mask)) then + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idx,is,idxmap) & + ! $ o m p private(i) +>>>>>>> development do i=1, is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -637,11 +733,19 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do else if (.not.present(mask)) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(idx,is,idxmap) & !$omp private(i) +======= + ! $ o m p end parallel do + else if (.not.present(mask)) then + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idx,is,idxmap) & + ! $ o m p private(i) +>>>>>>> development do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -649,7 +753,11 @@ contains idx(i) = -1 end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development end if end if else @@ -697,9 +805,15 @@ contains else if (idxmap%is_valid()) then if (present(lidx)) then if (present(mask)) then +<<<<<<< HEAD !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,idxin,idxout,im,idxmap) & !$omp private(i) +======= + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idxin,idxout,im,idxmap) & + ! $ o m p private(i) +>>>>>>> development do i=1, im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -709,11 +823,19 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do else if (.not.present(mask)) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(idxin,idxout,im,idxmap) & !$omp private(i) +======= + ! $ o m p end parallel do + else if (.not.present(mask)) then + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idxin,idxout,im,idxmap) & + ! $ o m p private(i) +>>>>>>> development do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -721,6 +843,7 @@ contains idxout(i) = -1 end if end do +<<<<<<< HEAD !$omp end parallel do end if else if (.not.present(lidx)) then @@ -728,6 +851,15 @@ contains !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,idxin,idxout,im,idxmap) & !$omp private(i) +======= + ! $ o m p end parallel do + end if + else if (.not.present(lidx)) then + if (present(mask)) then + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(mask,idxin,idxout,im,idxmap) & + ! $ o m p private(i) +>>>>>>> development do i=1, im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -737,11 +869,19 @@ contains end if end if end do +<<<<<<< HEAD !$omp end parallel do else if (.not.present(mask)) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(idxin,idxout,im,idxmap) & !$omp private(i) +======= + ! $ o m p end parallel do + else if (.not.present(mask)) then + ! $ o m p parallel do default(none) schedule(dynamic) & + ! $ o m p shared(idxin,idxout,im,idxmap) & + ! $ o m p private(i) +>>>>>>> development do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -749,7 +889,11 @@ contains idxout(i) = -1 end if end do +<<<<<<< HEAD !$omp end parallel do +======= + ! $ o m p end parallel do +>>>>>>> development end if end if else diff --git a/base/modules/fakempi.c b/base/modules/fakempi.c deleted file mode 100644 index 7d56938f..00000000 --- a/base/modules/fakempi.c +++ /dev/null @@ -1,393 +0,0 @@ -#include -#include -#include -#include "psb_internals.h" - - -#ifdef LowerUnderscore -#define mpi_wtime mpi_wtime_ -#define mpi_send mpi_send_ -#define mpi_isend mpi_isend_ -#define mpi_irecv mpi_irecv_ -#define mpi_wait mpi_wait_ -#define mpi_alltoall mpi_alltoall_ -#define mpi_alltoallv mpi_alltoallv_ -#define mpi_gather mpi_gather_ -#define mpi_gatherv mpi_gatherv_ -#define mpi_allgather mpi_allgather_ -#define mpi_allgatherv mpi_allgatherv_ -#define mpi_scatterv mpi_scatterv_ -#define mpi_scatter mpi_scatter_ -#endif -#ifdef LowerDoubleUnderscore -#define mpi_wtime mpi_wtime__ -#define mpi_send mpi_send__ -#define mpi_isend mpi_isend__ -#define mpi_irecv mpi_irecv__ -#define mpi_wait mpi_wait__ -#define mpi_alltoall mpi_alltoall__ -#define mpi_alltoallv mpi_alltoallv__ -#define mpi_gather mpi_gather__ -#define mpi_gatherv mpi_gatherv__ -#define mpi_allgather mpi_allgather__ -#define mpi_allgatherv mpi_allgatherv__ -#define mpi_scatterv mpi_scatterv__ -#define mpi_scatter mpi_scatter__ -#endif -#ifdef LowerCase -#define mpi_wtime mpi_wtime -#define mpi_send mpi_send -#define mpi_isend mpi_isend -#define mpi_irecv mpi_irecv -#define mpi_wait mpi_wait -#define mpi_alltoall mpi_alltoall -#define mpi_alltoallv mpi_alltoallv -#define mpi_gather mpi_gather -#define mpi_gatherv mpi_gatherv -#define mpi_allgather mpi_allgather -#define mpi_allgatherv mpi_allgatherv -#define mpi_scatterv mpi_scatterv -#define mpi_scatter mpi_scatter -#endif -#ifdef UpperUnderscore -#define mpi_wtime MPI_WTIME_ -#define mpi_send MPI_SEND_ -#define mpi_isend MPI_ISEND_ -#define mpi_irecv MPI_IRECV_ -#define mpi_wait MPI_WAIT_ -#define mpi_alltoall MPI_ALLTOALL_ -#define mpi_alltoallv MPI_ALLTOALLV_ -#define mpi_gather MPI_GATHER_ -#define mpi_gatherv MPI_GATHERV_ -#define mpi_allgather MPI_ALLGATHER_ -#define mpi_allgatherv MPI_ALLGATHERV_ -#define mpi_scatterv MPI_SCATTERV_ -#define mpi_scatter MPI_SCATTER_ -#endif -#ifdef UpperDoubleUnderscore -#define mpi_wtime MPI_WTIME__ -#define mpi_send MPI_SEND__ -#define mpi_isend MPI_ISEND__ -#define mpi_irecv MPI_IRECV__ -#define mpi_wait MPI_WAIT__ -#define mpi_alltoall MPI_ALLTOALL__ -#define mpi_alltoallv MPI_ALLTOALLV__ -#define mpi_gather MPI_GATHER__ -#define mpi_gatherv MPI_GATHERV__ -#define mpi_allgather MPI_ALLGATHER__ -#define mpi_allgatherv MPI_ALLGATHERV__ -#define mpi_scatterv MPI_SCATTERV__ -#define mpi_scatter MPI_SCATTER__ -#endif -#ifdef UpperCase -#define mpi_wtime MPI_WTIME -#define mpi_send MPI_SEND -#define mpi_isend MPI_ISEND -#define mpi_irecv MPI_IRECV -#define mpi_wait MPI_WAIT -#define mpi_alltoall MPI_ALLTOALL -#define mpi_alltoallv MPI_ALLTOALLV -#define mpi_gather MPI_GATHER -#define mpi_gatherv MPI_GATHERV -#define mpi_allgather MPI_ALLGATHER -#define mpi_allgatherv MPI_ALLGATHERV -#define mpi_scatterv MPI_SCATTERV -#define mpi_scatter MPI_SCATTER -#endif - -#define mpi_integer 1 -#define mpi_integer8 2 -#define mpi_real 3 -#define mpi_double 4 -#define mpi_complex 5 -#define mpi_double_complex 6 - -double mpi_wtime() -{ - struct timeval tt; - struct timezone tz; - double temp; - if (gettimeofday(&tt,&tz) != 0) { - fprintf(stderr,"Fatal error for gettimeofday ??? \n"); - temp=0.0; - } else { - temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6; - } - return(temp); -} - - -void mpi_wait(int *request, int* status, int *ierr) - -{ - *ierr = 0; - return; -} -void mpi_send(void* buf, int* count, int* datatype, - int *dest, int *tag, int *comm, int *ierr) -{ - *ierr = 0; - return; -} -void mpi_isend(void* buf, int* count, int* datatype, - int *dest, int *tag, int *comm, int *request, - int *ierr) -{ - *ierr = 0; - return; -} -void mpi_irecv(void* buf, int* count, int* datatype, - int *src, int *tag, int *comm, int *request, - int *ierr) -{ - *ierr = 0; - return; -} - - -void mpi_alltoall(void* sdb, int* sdc, int* sdt, - void* rvb, int* rvc, int* rvt, int* comm, int* ierr) -{ - int i,j,k; - - if (*sdt == mpi_integer) { - memcpy(rvb,sdb, (*sdc)*sizeof(int)); - } - if (*sdt == mpi_integer8) { - memcpy(rvb,sdb, (*sdc)*sizeof(long long)); - } - if (*sdt == mpi_real) { - memcpy(rvb,sdb, (*sdc)*sizeof(float)); - } - if (*sdt == mpi_double) { - memcpy(rvb,sdb, (*sdc)*sizeof(double)); - } - if (*sdt == mpi_complex) { - memcpy(rvb,sdb, (*sdc)*2*sizeof(float)); - } - if (*sdt == mpi_double_complex) { - memcpy(rvb,sdb, (*sdc)*2*sizeof(double)); - } - *ierr = 0; -} - -void mpi_alltoallv(void* sdb, int* sdc, int* sdspl, int* sdt, - void* rvb, int* rvc, int* rdspl, int* rvt, int* comm, int* ierr) -{ - int i,j,k; - - - if (*sdt == mpi_integer) { - memcpy((void *)((char *)rvb+rdspl[0]*sizeof(int)), - (void *)((char *)sdb+sdspl[0]*sizeof(int)),(*sdc)*sizeof(int)); - } - if (*sdt == mpi_integer8) { - memcpy((void *)((char *)rvb+rdspl[0]*sizeof(long long)), - (void *)((char *)sdb+sdspl[0]*sizeof(long long)),(*sdc)*sizeof(long long)); - } - if (*sdt == mpi_real) { - memcpy((void *)((char *)rvb+rdspl[0]*sizeof(float)), - (void *)((char *)sdb+sdspl[0]*sizeof(float)),(*sdc)*sizeof(float)); - } - if (*sdt == mpi_double) { - memcpy((void *)((char *)rvb+rdspl[0]*sizeof(double)), - (void *)((char *)sdb+sdspl[0]*sizeof(double)),(*sdc)*sizeof(double)); - } - if (*sdt == mpi_complex) { - memcpy((void *)((char *)rvb+rdspl[0]*2*sizeof(float)), - (void *)((char *)sdb+sdspl[0]*2*sizeof(float)),(*sdc)*2*sizeof(float)); - } - if (*sdt == mpi_double_complex) { - memcpy((void *)((char *)rvb+rdspl[0]*2*sizeof(double)), - (void *)((char *)sdb+sdspl[0]*2*sizeof(double)),(*sdc)*2*sizeof(double)); - } - *ierr = 0; -} - - -void mpi_gather(void* sdb, int* sdc, int* sdt, - void* rvb, int* rvc, int* rvt, int *root, int* comm, int* ierr) -{ - int i,j,k; - - if (*sdt == mpi_integer) { - memcpy(rvb,sdb, (*sdc)*sizeof(int)); - } - if (*sdt == mpi_integer8) { - memcpy(rvb,sdb, (*sdc)*sizeof(long long)); - } - if (*sdt == mpi_real) { - memcpy(rvb,sdb, (*sdc)*sizeof(float)); - } - if (*sdt == mpi_double) { - memcpy(rvb,sdb, (*sdc)*sizeof(double)); - } - if (*sdt == mpi_complex) { - memcpy(rvb,sdb, (*sdc)*2*sizeof(float)); - } - if (*sdt == mpi_double_complex) { - memcpy(rvb,sdb, (*sdc)*2*sizeof(double)); - } - *ierr = 0; -} - - -void mpi_gatherv(void* sdb, int* sdc, int* sdt, - void* rvb, int* rvc, int* rdspl, - int* rvt, int* comm, int *root, int* ierr) -{ - int i,j,k; - - if (*sdt == mpi_integer) { - memcpy((void *)((char *)rvb+rdspl[0]*sizeof(int)), - (void *)((char *)sdb),(*sdc)*sizeof(int)); - } - if (*sdt == mpi_integer8) { - memcpy((void *)((char *)rvb+rdspl[0]*sizeof(long long)), - (void *)((char *)sdb),(*sdc)*sizeof(long long)); - } - if (*sdt == mpi_real) { - memcpy((void *)((char *)rvb+rdspl[0]*sizeof(float)), - (void *)((char *)sdb),(*sdc)*sizeof(float)); - } - if (*sdt == mpi_double) { - memcpy((void *)((char *)rvb+rdspl[0]*sizeof(double)), - (void *)((char *)sdb),(*sdc)*sizeof(double)); - } - if (*sdt == mpi_complex) { - memcpy((void *)((char *)rvb+rdspl[0]*2*sizeof(float)), - (void *)((char *)sdb),(*sdc)*2*sizeof(float)); - } - if (*sdt == mpi_double_complex) { - memcpy((void *)((char *)rvb+rdspl[0]*2*sizeof(double)), - (void *)((char *)sdb),(*sdc)*2*sizeof(double)); - } - - - *ierr = 0; -} - - -void mpi_scatter(void* sdb, int* sdc, int* sdt, - void* rvb, int* rvc, int* rvt, int *root, int* comm, int* ierr) -{ - int i,j,k; - - if (*sdt == mpi_integer) { - memcpy(rvb,sdb, (*sdc)*sizeof(int)); - } - if (*sdt == mpi_integer8) { - memcpy(rvb,sdb, (*sdc)*sizeof(long long)); - } - if (*sdt == mpi_real) { - memcpy(rvb,sdb, (*sdc)*sizeof(float)); - } - if (*sdt == mpi_double) { - memcpy(rvb,sdb, (*sdc)*sizeof(double)); - } - if (*sdt == mpi_complex) { - memcpy(rvb,sdb, (*sdc)*2*sizeof(float)); - } - if (*sdt == mpi_double_complex) { - memcpy(rvb,sdb, (*sdc)*2*sizeof(double)); - } - *ierr = 0; -} - - -void mpi_scatterv(void* sdb, int* sdc, int* sdt, - void* rvb, int* rvc, int* rdspl, - int* rvt, int* comm, int *root, int* ierr) -{ - int i,j,k; - - if (*sdt == mpi_integer) { - memcpy((void *)((char *)rvb+rdspl[0]*sizeof(int)), - (void *)((char *)sdb),(*sdc)*sizeof(int)); - } - if (*sdt == mpi_integer8) { - memcpy((void *)((char *)rvb+rdspl[0]*sizeof(long long)), - (void *)((char *)sdb),(*sdc)*sizeof(long long)); - } - if (*sdt == mpi_real) { - memcpy((void *)((char *)rvb+rdspl[0]*sizeof(float)), - (void *)((char *)sdb),(*sdc)*sizeof(float)); - } - if (*sdt == mpi_double) { - memcpy((void *)((char *)rvb+rdspl[0]*sizeof(double)), - (void *)((char *)sdb),(*sdc)*sizeof(double)); - } - if (*sdt == mpi_complex) { - memcpy((void *)((char *)rvb+rdspl[0]*2*sizeof(float)), - (void *)((char *)sdb),(*sdc)*2*sizeof(float)); - } - if (*sdt == mpi_double_complex) { - memcpy((void *)((char *)rvb+rdspl[0]*2*sizeof(double)), - (void *)((char *)sdb),(*sdc)*2*sizeof(double)); - } - - - *ierr = 0; -} - - -void mpi_allgather(void* sdb, int* sdc, int* sdt, - void* rvb, int* rvc, int* rvt, int* comm, int* ierr) -{ - int i,j,k; - - if (*sdt == mpi_integer) { - memcpy(rvb,sdb, (*sdc)*sizeof(int)); - } - if (*sdt == mpi_integer8) { - memcpy(rvb,sdb, (*sdc)*sizeof(long long)); - } - if (*sdt == mpi_real) { - memcpy(rvb,sdb, (*sdc)*sizeof(float)); - } - if (*sdt == mpi_double) { - memcpy(rvb,sdb, (*sdc)*sizeof(double)); - } - if (*sdt == mpi_complex) { - memcpy(rvb,sdb, (*sdc)*2*sizeof(float)); - } - if (*sdt == mpi_double_complex) { - memcpy(rvb,sdb, (*sdc)*2*sizeof(double)); - } - *ierr = 0; -} - -void mpi_allgatherv(void* sdb, int* sdc, int* sdt, - void* rvb, int* rvc, int* rdspl, - int* rvt, int* comm, int* ierr) -{ - int i,j,k; - - if (*sdt == mpi_integer) { - memcpy((void *)((char *)rvb+rdspl[0]*sizeof(int)), - (void *)((char *)sdb),(*sdc)*sizeof(int)); - } - if (*sdt == mpi_integer8) { - memcpy((void *)((char *)rvb+rdspl[0]*sizeof(long long)), - (void *)((char *)sdb),(*sdc)*sizeof(long long)); - } - if (*sdt == mpi_real) { - memcpy((void *)((char *)rvb+rdspl[0]*sizeof(float)), - (void *)((char *)sdb),(*sdc)*sizeof(float)); - } - if (*sdt == mpi_double) { - memcpy((void *)((char *)rvb+rdspl[0]*sizeof(double)), - (void *)((char *)sdb),(*sdc)*sizeof(double)); - } - if (*sdt == mpi_complex) { - memcpy((void *)((char *)rvb+rdspl[0]*2*sizeof(float)), - (void *)((char *)sdb),(*sdc)*2*sizeof(float)); - } - if (*sdt == mpi_double_complex) { - memcpy((void *)((char *)rvb+rdspl[0]*2*sizeof(double)), - (void *)((char *)sdb),(*sdc)*2*sizeof(double)); - } - - - *ierr = 0; -} diff --git a/base/modules/penv/psi_c_collective_mod.F90 b/base/modules/penv/psi_c_collective_mod.F90 index 6da00176..0e04fd5e 100644 --- a/base/modules/penv/psi_c_collective_mod.F90 +++ b/base/modules/penv/psi_c_collective_mod.F90 @@ -32,8 +32,17 @@ module psi_c_collective_mod use psi_penv_mod use psb_desc_const_mod + use iso_c_binding + interface psb_gather + module procedure psb_cgather_s, psb_cgather_v + end interface psb_gather + + interface psb_gatherv + module procedure psb_cgatherv_v + end interface + interface psb_sum module procedure psb_csums, psb_csumv, psb_csumm end interface @@ -76,16 +85,260 @@ contains + ! + ! gather + ! + subroutine psb_cgather_s(ctxt,dat,resv,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + complex(psb_spk_), intent(inout) :: dat, resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(1) = dat +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + call mpi_allgather(dat,1,psb_mpi_c_spk_,& + & resv,1,psb_mpi_c_spk_,icomm,info) + else + call mpi_gather(dat,1,psb_mpi_c_spk_,& + & resv,1,psb_mpi_c_spk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + call mpi_iallgather(dat,1,psb_mpi_c_spk_,& + & resv,1,psb_mpi_c_spk_,icomm,request,info) + else + call mpi_igather(dat,1,psb_mpi_c_spk_,& + & resv,1,psb_mpi_c_spk_,root_,icomm,request,info) + endif + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_cgather_s + + subroutine psb_cgather_v(ctxt,dat,resv,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + complex(psb_spk_), intent(inout) :: dat(:), resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(:) = dat(:) +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + call mpi_allgather(dat,size(dat),psb_mpi_c_spk_,& + & resv,size(dat),psb_mpi_c_spk_,icomm,info) + else + call mpi_gather(dat,size(dat),psb_mpi_c_spk_,& + & resv,size(dat),psb_mpi_c_spk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + call mpi_iallgather(dat,size(dat),psb_mpi_c_spk_,& + & resv,size(dat),psb_mpi_c_spk_,icomm,request,info) + else + call mpi_igather(dat,size(dat),psb_mpi_c_spk_,& + & resv,size(dat),psb_mpi_c_spk_,root_,icomm,request,info) + endif + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_cgather_v + + subroutine psb_cgatherv_v(ctxt,dat,resv,szs,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + complex(psb_spk_), intent(inout) :: dat(:), resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_mpk_), intent(in), optional :: szs(:) + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info,i + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + integer(psb_mpk_), allocatable :: displs(:) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(:) = dat(:) +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + call mpi_allgatherv(dat,size(dat),psb_mpi_c_spk_,& + & resv,szs,displs,psb_mpi_c_spk_,icomm,info) + else + if (iam == root_) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + else + allocate(displs(0)) + end if + call mpi_gatherv(dat,size(dat),psb_mpi_c_spk_,& + & resv,szs,displs,psb_mpi_c_spk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + call mpi_iallgatherv(dat,size(dat),psb_mpi_c_spk_,& + & resv,szs,displs,psb_mpi_c_spk_,icomm,request,info) + else + if (iam == root_) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + else + allocate(displs(0)) + end if + call mpi_igatherv(dat,size(dat),psb_mpi_c_spk_,& + & resv,szs,displs,psb_mpi_c_spk_,root_,icomm,request,info) + endif + + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_cgatherv_v + + + ! ! SUM ! subroutine psb_csums(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -99,7 +352,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -124,20 +377,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_c_spk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_c_spk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_c_spk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_c_spk_,mpi_sum,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_c_spk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_c_spk_,mpi_sum,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_c_spk_,mpi_sum,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_c_spk_,mpi_sum,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -147,11 +410,11 @@ contains end subroutine psb_csums subroutine psb_csumv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -165,7 +428,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -190,20 +453,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_spk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_spk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_spk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_c_spk_,mpi_sum,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_spk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_spk_,mpi_sum,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_c_spk_,mpi_sum,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -214,11 +487,11 @@ contains end subroutine psb_csumv subroutine psb_csumm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -232,7 +505,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -258,20 +531,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_spk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_spk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_spk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_c_spk_,mpi_sum,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_spk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_spk_,mpi_sum,root_, icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_spk_,mpi_sum,root_, icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_c_spk_,mpi_sum,root_, icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -285,11 +568,11 @@ contains ! subroutine psb_camxs(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -303,7 +586,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -328,20 +611,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_c_spk_,mpi_camx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_c_spk_,mpi_camx_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_c_spk_,mpi_camx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_c_spk_,mpi_camx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_c_spk_,mpi_camx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_c_spk_,mpi_camx_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_c_spk_,mpi_camx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_c_spk_,mpi_camx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -352,11 +645,11 @@ contains end subroutine psb_camxs subroutine psb_camxv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -370,7 +663,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -395,20 +688,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& psb_mpi_c_spk_,mpi_camx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_spk_,mpi_camx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_c_spk_,mpi_camx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_spk_,mpi_camx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_spk_,mpi_camx_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_spk_,mpi_camx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_c_spk_,mpi_camx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -419,11 +722,11 @@ contains end subroutine psb_camxv subroutine psb_camxm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -437,7 +740,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -463,20 +766,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_spk_,mpi_camx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_spk_,mpi_camx_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_spk_,mpi_camx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_c_spk_,mpi_camx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_spk_,mpi_camx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_spk_,mpi_camx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& & psb_mpi_c_spk_,mpi_camx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -489,11 +802,11 @@ contains ! AMN: Minimum Absolute Value ! subroutine psb_camns(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -507,7 +820,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -532,20 +845,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_c_spk_,mpi_camn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_c_spk_,mpi_camn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -556,11 +879,11 @@ contains end subroutine psb_camns subroutine psb_camnv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -574,7 +897,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -599,20 +922,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_spk_,mpi_camn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_spk_,mpi_camn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -623,11 +956,11 @@ contains end subroutine psb_camnv subroutine psb_camnm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -641,7 +974,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -667,20 +1000,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_spk_,mpi_camn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_spk_,mpi_camn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_c_spk_,mpi_camn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -694,11 +1037,11 @@ contains ! BCAST Broadcast ! subroutine psb_cbcasts(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -713,7 +1056,7 @@ contains logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -749,11 +1092,11 @@ contains end subroutine psb_cbcasts subroutine psb_cbcastv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -767,7 +1110,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -804,11 +1147,11 @@ contains end subroutine psb_cbcastv subroutine psb_cbcastm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -822,7 +1165,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -866,11 +1209,11 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_cscan_sums(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -878,13 +1221,13 @@ contains integer(psb_mpk_), intent(inout), optional :: request complex(psb_spk_), intent(inout) :: dat complex(psb_spk_) :: dat_ - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -901,26 +1244,28 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_scan(MPI_IN_PLACE,dat,1,& + call mpi_scan(dat_,dat,1,& & psb_mpi_c_spk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iscan(MPI_IN_PLACE,dat,1,& + call mpi_iscan(dat_,dat,1,& & psb_mpi_c_spk_,mpi_sum,icomm,request,minfo) else if (collective_end) then call mpi_wait(request,status,minfo) end if end if + info = minfo #endif end subroutine psb_cscan_sums subroutine psb_cexscan_sums(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -928,14 +1273,14 @@ contains integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request complex(psb_spk_) :: dat_ - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -952,41 +1297,44 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_exscan(MPI_IN_PLACE,dat,1,& + call mpi_exscan(dat_,dat,1,& & psb_mpi_c_spk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iexscan(MPI_IN_PLACE,dat,1,& + call mpi_iexscan(dat_,dat,1,& & psb_mpi_c_spk_,mpi_sum,icomm,request,minfo) else if (collective_end) then call mpi_wait(request,status,minfo) end if end if + info = minfo #else dat = czero #endif end subroutine psb_cexscan_sums subroutine psb_cscan_sumv(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt complex(psb_spk_), intent(inout) :: dat(:) integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync - -#if !defined(SERIAL_MPI) + complex(psb_spk_), allocatable :: dat_(:) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1003,40 +1351,43 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_scan(MPI_IN_PLACE,dat,size(dat),& + call mpi_scan(dat_,dat,size(dat),& & psb_mpi_c_spk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iscan(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_spk_,mpi_sum,icomm,request,info) + call mpi_iscan(dat_,dat,size(dat),& + & psb_mpi_c_spk_,mpi_sum,icomm,request,minfo) else if (collective_end) then - call mpi_wait(request,status,info) + call mpi_wait(request,status,minfo) end if end if + info = minfo #endif end subroutine psb_cscan_sumv subroutine psb_cexscan_sumv(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt complex(psb_spk_), intent(inout) :: dat(:) integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request - complex(psb_spk_), allocatable :: dat_(:) - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + complex(psb_spk_), allocatable :: dat_(:) -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1053,18 +1404,19 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_exscan(MPI_IN_PLACE,dat,size(dat),& + call mpi_exscan(dat_,dat,size(dat),& & psb_mpi_c_spk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iexscan(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_spk_,mpi_sum,icomm,request,info) + call mpi_iexscan(dat_,dat,size(dat),& + & psb_mpi_c_spk_,mpi_sum,icomm,request,minfo) else if (collective_end) then - call mpi_wait(request,status,info) + call mpi_wait(request,status,minfo) end if end if - + info = minfo #else dat = czero #endif @@ -1079,7 +1431,9 @@ contains integer(psb_mpk_), intent(in) :: bsdindx(:), brvindx(:), sdsz(:), rvsz(:) type(psb_ctxt_type), intent(in) :: ctxt integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz + integer(psb_ipk_) :: i,j,k, ipx, idx + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1108,11 +1462,11 @@ contains subroutine psb_c_m_simple_triad_a2av(valsnd,iasnd,jasnd,sdsz,bsdindx,& & valrcv,iarcv,jarcv,rvsz,brvindx,ctxt,info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif complex(psb_spk_), intent(in) :: valsnd(:) @@ -1124,9 +1478,11 @@ contains integer(psb_ipk_), intent(out) :: info !Local variables - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz, counter + integer(psb_ipk_) :: i,j,k, ipx, idx, counter integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret, icomm integer(psb_mpk_), allocatable :: prcid(:), rvhd(:,:) + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1191,11 +1547,11 @@ contains subroutine psb_c_e_simple_triad_a2av(valsnd,iasnd,jasnd,sdsz,bsdindx,& & valrcv,iarcv,jarcv,rvsz,brvindx,ctxt,info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif complex(psb_spk_), intent(in) :: valsnd(:) @@ -1207,9 +1563,11 @@ contains integer(psb_ipk_), intent(out) :: info !Local variables - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz, counter + integer(psb_ipk_) :: i,j,k, ipx, idx, counter integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret, icomm integer(psb_mpk_), allocatable :: prcid(:), rvhd(:,:) + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1271,6 +1629,5 @@ contains Enddo end subroutine psb_c_e_simple_triad_a2av - end module psi_c_collective_mod diff --git a/base/modules/penv/psi_c_p2p_mod.F90 b/base/modules/penv/psi_c_p2p_mod.F90 index 245a98b6..183584ff 100644 --- a/base/modules/penv/psi_c_p2p_mod.F90 +++ b/base/modules/penv/psi_c_p2p_mod.F90 @@ -44,11 +44,11 @@ module psi_c_p2p_mod contains subroutine psb_csnds(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -56,7 +56,7 @@ contains integer(psb_mpk_), intent(in) :: dst complex(psb_spk_), allocatable :: dat_(:) integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else allocate(dat_(1), stat=info) @@ -67,11 +67,11 @@ contains subroutine psb_csndv(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -80,7 +80,7 @@ contains complex(psb_spk_), allocatable :: dat_(:) integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else allocate(dat_(size(dat)), stat=info) dat_(:) = dat(:) @@ -91,11 +91,11 @@ contains subroutine psb_csndm(ctxt,dat,dst,m) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -106,7 +106,7 @@ contains integer(psb_ipk_) :: i,j,k,m_,n_ integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else if (present(m)) then m_ = m @@ -127,11 +127,11 @@ contains end subroutine psb_csndm subroutine psb_crcvs(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -139,7 +139,7 @@ contains integer(psb_mpk_), intent(in) :: src integer(psb_mpk_) :: info, icomm integer(psb_mpk_) :: status(mpi_status_size) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else icomm = psb_get_mpi_comm(ctxt) @@ -150,11 +150,11 @@ contains subroutine psb_crcvv(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -163,7 +163,7 @@ contains complex(psb_spk_), allocatable :: dat_(:) integer(psb_mpk_) :: info, icomm integer(psb_mpk_) :: status(mpi_status_size) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else icomm = psb_get_mpi_comm(ctxt) call mpi_recv(dat,size(dat),psb_mpi_c_spk_,src,psb_complex_tag,icomm,status,info) @@ -174,11 +174,11 @@ contains subroutine psb_crcvm(ctxt,dat,src,m) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -189,7 +189,7 @@ contains integer(psb_mpk_) :: info ,m_,n_, ld, mp_rcv_type integer(psb_mpk_) :: i,j,k integer(psb_mpk_) :: status(mpi_status_size), icomm -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! What should we do here?? #else if (present(m)) then diff --git a/base/modules/penv/psi_collective_mod.F90 b/base/modules/penv/psi_collective_mod.F90 index 2a669c53..4ca131d4 100644 --- a/base/modules/penv/psi_collective_mod.F90 +++ b/base/modules/penv/psi_collective_mod.F90 @@ -53,11 +53,11 @@ module psi_collective_mod contains subroutine psb_hbcasts(ctxt,dat,root,length) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -66,7 +66,7 @@ contains integer(psb_mpk_) :: iam, np, root_,length_,info, icomm -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) if (present(root)) then root_ = root else @@ -86,11 +86,11 @@ contains end subroutine psb_hbcasts subroutine psb_hbcastv(ctxt,dat,root) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -100,7 +100,7 @@ contains integer(psb_mpk_) :: iam, np, root_, icomm integer(psb_mpk_) :: length_,info, size_ -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) if (present(root)) then root_ = root else @@ -117,11 +117,11 @@ contains end subroutine psb_hbcastv subroutine psb_lbcasts(ctxt,dat,root) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -130,7 +130,7 @@ contains integer(psb_mpk_) :: iam, np, root_,info, icomm -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) if (present(root)) then root_ = root else @@ -145,11 +145,11 @@ contains end subroutine psb_lbcasts subroutine psb_lallreduceand(ctxt,dat,rec) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -158,7 +158,7 @@ contains integer(psb_mpk_) :: iam, np, info, icomm -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(rec)) then @@ -172,11 +172,11 @@ end subroutine psb_lallreduceand subroutine psb_lbcastv(ctxt,dat,root) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -185,7 +185,7 @@ end subroutine psb_lallreduceand integer(psb_mpk_) :: iam, np, root_,info, icomm -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) if (present(root)) then root_ = root else @@ -201,11 +201,11 @@ end subroutine psb_lallreduceand #if defined(SHORT_INTEGERS) subroutine psb_i2sums(ctxt,dat,root) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_mpk_), intent(in) :: ctxt @@ -216,7 +216,7 @@ end subroutine psb_lallreduceand integer(psb_mpk_) :: iam, np, info, icomm integer(psb_ipk_) :: iinfo -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -239,11 +239,11 @@ end subroutine psb_lallreduceand subroutine psb_i2sumv(ctxt,dat,root) use psb_realloc_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_mpk_), intent(in) :: ctxt @@ -254,7 +254,7 @@ end subroutine psb_lallreduceand integer(psb_mpk_) :: iam, np, info, icomm integer(psb_ipk_) :: iinfo -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -283,11 +283,11 @@ end subroutine psb_lallreduceand subroutine psb_i2summ(ctxt,dat,root) use psb_realloc_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_mpk_), intent(in) :: ctxt @@ -299,7 +299,7 @@ end subroutine psb_lallreduceand integer(psb_ipk_) :: iinfo -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then diff --git a/base/modules/penv/psi_d_collective_mod.F90 b/base/modules/penv/psi_d_collective_mod.F90 index 1f91c69e..bf66dc47 100644 --- a/base/modules/penv/psi_d_collective_mod.F90 +++ b/base/modules/penv/psi_d_collective_mod.F90 @@ -32,6 +32,7 @@ module psi_d_collective_mod use psi_penv_mod use psb_desc_const_mod + use iso_c_binding interface psb_max module procedure psb_dmaxs, psb_dmaxv, psb_dmaxm @@ -45,6 +46,14 @@ module psi_d_collective_mod module procedure psb_d_nrm2s, psb_d_nrm2v end interface psb_nrm2 + interface psb_gather + module procedure psb_dgather_s, psb_dgather_v + end interface psb_gather + + interface psb_gatherv + module procedure psb_dgatherv_v + end interface + interface psb_sum module procedure psb_dsums, psb_dsumv, psb_dsumm end interface @@ -93,11 +102,11 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_dmaxs(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -110,8 +119,9 @@ contains integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + real(psb_dpk_) :: dat_ -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -134,20 +144,29 @@ contains collective_start = .false. collective_end = .false. end if - if (collective_sync) then + if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_r_dpk_,mpi_max,icomm,info) + call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_r_dpk_,mpi_max,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_r_dpk_,mpi_max,root_,icomm,info) + if (iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,psb_mpi_r_dpk_,mpi_max,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,psb_mpi_r_dpk_,mpi_max,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_r_dpk_,mpi_max,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info) + if (iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -157,11 +176,11 @@ contains end subroutine psb_dmaxs subroutine psb_dmaxv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -174,9 +193,10 @@ contains integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + real(psb_dpk_) :: dat_(1) ! This is a dummy -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -200,21 +220,31 @@ contains collective_end = .false. end if if (collective_sync) then - if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + if (root_ == -1) then + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_max,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_max,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_max,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_max,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_max,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -225,11 +255,11 @@ contains end subroutine psb_dmaxv subroutine psb_dmaxm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -242,9 +272,10 @@ contains integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + real(psb_dpk_) :: dat_(1,1) ! this is a dummy -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -268,28 +299,37 @@ contains collective_start = .false. collective_end = .false. end if - if (collective_sync) then + if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_max,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_max,root_,icomm,info) - endif + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_max,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_max,root_,icomm,info) + endif + end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_max,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_max,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) end if end if - #endif end subroutine psb_dmaxm @@ -297,11 +337,11 @@ contains ! MIN: Minimum Value ! subroutine psb_dmins(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -315,7 +355,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -340,18 +380,27 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_r_dpk_,mpi_min,icomm,info) + call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_r_dpk_,mpi_min,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_r_dpk_,mpi_min,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,psb_mpi_r_dpk_,mpi_min,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,psb_mpi_r_dpk_,mpi_min,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_r_dpk_,mpi_min,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -362,11 +411,11 @@ contains end subroutine psb_dmins subroutine psb_dminv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -380,7 +429,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -405,20 +454,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_min,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_min,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_min,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_min,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_min,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -429,11 +488,11 @@ contains end subroutine psb_dminv subroutine psb_dminm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -447,7 +506,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -473,20 +532,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_min,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_min,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_min,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_min,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_min,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_min,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -502,11 +571,11 @@ contains ! ! !!!!!!!!!!!! subroutine psb_d_nrm2s(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -520,7 +589,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -545,20 +614,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_r_dpk_,mpi_dnrm2_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_r_dpk_,mpi_dnrm2_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -569,11 +648,11 @@ contains end subroutine psb_d_nrm2s subroutine psb_d_nrm2v(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -587,7 +666,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -612,20 +691,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),psb_mpi_r_dpk_,& + call mpi_allreduce(mpi_in_place,dat,size(dat),psb_mpi_r_dpk_,& & mpi_dnrm2_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),psb_mpi_r_dpk_,& - & mpi_dnrm2_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),psb_mpi_r_dpk_,& + & mpi_dnrm2_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),psb_mpi_r_dpk_,& + & mpi_dnrm2_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_dnrm2_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_dnrm2_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -636,16 +725,260 @@ contains end subroutine psb_d_nrm2v + ! + ! gather + ! + subroutine psb_dgather_s(ctxt,dat,resv,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + real(psb_dpk_), intent(inout) :: dat, resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(1) = dat +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + call mpi_allgather(dat,1,psb_mpi_r_dpk_,& + & resv,1,psb_mpi_r_dpk_,icomm,info) + else + call mpi_gather(dat,1,psb_mpi_r_dpk_,& + & resv,1,psb_mpi_r_dpk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + call mpi_iallgather(dat,1,psb_mpi_r_dpk_,& + & resv,1,psb_mpi_r_dpk_,icomm,request,info) + else + call mpi_igather(dat,1,psb_mpi_r_dpk_,& + & resv,1,psb_mpi_r_dpk_,root_,icomm,request,info) + endif + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_dgather_s + + subroutine psb_dgather_v(ctxt,dat,resv,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + real(psb_dpk_), intent(inout) :: dat(:), resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(:) = dat(:) +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + call mpi_allgather(dat,size(dat),psb_mpi_r_dpk_,& + & resv,size(dat),psb_mpi_r_dpk_,icomm,info) + else + call mpi_gather(dat,size(dat),psb_mpi_r_dpk_,& + & resv,size(dat),psb_mpi_r_dpk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + call mpi_iallgather(dat,size(dat),psb_mpi_r_dpk_,& + & resv,size(dat),psb_mpi_r_dpk_,icomm,request,info) + else + call mpi_igather(dat,size(dat),psb_mpi_r_dpk_,& + & resv,size(dat),psb_mpi_r_dpk_,root_,icomm,request,info) + endif + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_dgather_v + + subroutine psb_dgatherv_v(ctxt,dat,resv,szs,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + real(psb_dpk_), intent(inout) :: dat(:), resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_mpk_), intent(in), optional :: szs(:) + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info,i + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + integer(psb_mpk_), allocatable :: displs(:) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(:) = dat(:) +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + call mpi_allgatherv(dat,size(dat),psb_mpi_r_dpk_,& + & resv,szs,displs,psb_mpi_r_dpk_,icomm,info) + else + if (iam == root_) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + else + allocate(displs(0)) + end if + call mpi_gatherv(dat,size(dat),psb_mpi_r_dpk_,& + & resv,szs,displs,psb_mpi_r_dpk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + call mpi_iallgatherv(dat,size(dat),psb_mpi_r_dpk_,& + & resv,szs,displs,psb_mpi_r_dpk_,icomm,request,info) + else + if (iam == root_) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + else + allocate(displs(0)) + end if + call mpi_igatherv(dat,size(dat),psb_mpi_r_dpk_,& + & resv,szs,displs,psb_mpi_r_dpk_,root_,icomm,request,info) + endif + + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_dgatherv_v + + + ! ! SUM ! subroutine psb_dsums(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -659,7 +992,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -684,20 +1017,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_r_dpk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_dpk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_r_dpk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_r_dpk_,mpi_sum,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_r_dpk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_dpk_,mpi_sum,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_r_dpk_,mpi_sum,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_r_dpk_,mpi_sum,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -707,11 +1050,11 @@ contains end subroutine psb_dsums subroutine psb_dsumv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -725,7 +1068,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -750,20 +1093,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_sum,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_sum,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_sum,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -774,11 +1127,11 @@ contains end subroutine psb_dsumv subroutine psb_dsumm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -792,7 +1145,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -818,20 +1171,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_sum,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_sum,root_, icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_sum,root_, icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_sum,root_, icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -845,11 +1208,11 @@ contains ! subroutine psb_damxs(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -863,7 +1226,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -888,20 +1251,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_r_dpk_,mpi_damx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_r_dpk_,mpi_damx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -912,11 +1285,11 @@ contains end subroutine psb_damxs subroutine psb_damxv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -930,7 +1303,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -955,20 +1328,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& psb_mpi_r_dpk_,mpi_damx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_damx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -979,11 +1362,11 @@ contains end subroutine psb_damxv subroutine psb_damxm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -997,7 +1380,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -1023,20 +1406,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_damx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_damx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& & psb_mpi_r_dpk_,mpi_damx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -1049,11 +1442,11 @@ contains ! AMN: Minimum Absolute Value ! subroutine psb_damns(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1067,7 +1460,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1092,20 +1485,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_r_dpk_,mpi_damn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_r_dpk_,mpi_damn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -1116,11 +1519,11 @@ contains end subroutine psb_damns subroutine psb_damnv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1134,7 +1537,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1159,20 +1562,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_damn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_damn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -1183,11 +1596,11 @@ contains end subroutine psb_damnv subroutine psb_damnm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1201,7 +1614,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -1227,20 +1640,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_damn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_dpk_,mpi_damn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_damn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -1254,11 +1677,11 @@ contains ! BCAST Broadcast ! subroutine psb_dbcasts(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1273,7 +1696,7 @@ contains logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1309,11 +1732,11 @@ contains end subroutine psb_dbcasts subroutine psb_dbcastv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1327,7 +1750,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1364,11 +1787,11 @@ contains end subroutine psb_dbcastv subroutine psb_dbcastm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1382,7 +1805,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -1426,11 +1849,11 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_dscan_sums(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1438,13 +1861,13 @@ contains integer(psb_mpk_), intent(inout), optional :: request real(psb_dpk_), intent(inout) :: dat real(psb_dpk_) :: dat_ - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1461,26 +1884,28 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_scan(MPI_IN_PLACE,dat,1,& + call mpi_scan(dat_,dat,1,& & psb_mpi_r_dpk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iscan(MPI_IN_PLACE,dat,1,& + call mpi_iscan(dat_,dat,1,& & psb_mpi_r_dpk_,mpi_sum,icomm,request,minfo) else if (collective_end) then call mpi_wait(request,status,minfo) end if end if + info = minfo #endif end subroutine psb_dscan_sums subroutine psb_dexscan_sums(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1488,14 +1913,14 @@ contains integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request real(psb_dpk_) :: dat_ - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1512,41 +1937,44 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_exscan(MPI_IN_PLACE,dat,1,& + call mpi_exscan(dat_,dat,1,& & psb_mpi_r_dpk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iexscan(MPI_IN_PLACE,dat,1,& + call mpi_iexscan(dat_,dat,1,& & psb_mpi_r_dpk_,mpi_sum,icomm,request,minfo) else if (collective_end) then call mpi_wait(request,status,minfo) end if end if + info = minfo #else dat = dzero #endif end subroutine psb_dexscan_sums subroutine psb_dscan_sumv(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt real(psb_dpk_), intent(inout) :: dat(:) integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync - -#if !defined(SERIAL_MPI) + real(psb_dpk_), allocatable :: dat_(:) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1563,40 +1991,43 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_scan(MPI_IN_PLACE,dat,size(dat),& + call mpi_scan(dat_,dat,size(dat),& & psb_mpi_r_dpk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iscan(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_sum,icomm,request,info) + call mpi_iscan(dat_,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_sum,icomm,request,minfo) else if (collective_end) then - call mpi_wait(request,status,info) + call mpi_wait(request,status,minfo) end if end if + info = minfo #endif end subroutine psb_dscan_sumv subroutine psb_dexscan_sumv(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt real(psb_dpk_), intent(inout) :: dat(:) integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request - real(psb_dpk_), allocatable :: dat_(:) - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + real(psb_dpk_), allocatable :: dat_(:) -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1613,18 +2044,19 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_exscan(MPI_IN_PLACE,dat,size(dat),& + call mpi_exscan(dat_,dat,size(dat),& & psb_mpi_r_dpk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iexscan(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_dpk_,mpi_sum,icomm,request,info) + call mpi_iexscan(dat_,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_sum,icomm,request,minfo) else if (collective_end) then - call mpi_wait(request,status,info) + call mpi_wait(request,status,minfo) end if end if - + info = minfo #else dat = dzero #endif @@ -1639,7 +2071,9 @@ contains integer(psb_mpk_), intent(in) :: bsdindx(:), brvindx(:), sdsz(:), rvsz(:) type(psb_ctxt_type), intent(in) :: ctxt integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz + integer(psb_ipk_) :: i,j,k, ipx, idx + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1668,11 +2102,11 @@ contains subroutine psb_d_m_simple_triad_a2av(valsnd,iasnd,jasnd,sdsz,bsdindx,& & valrcv,iarcv,jarcv,rvsz,brvindx,ctxt,info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif real(psb_dpk_), intent(in) :: valsnd(:) @@ -1684,9 +2118,11 @@ contains integer(psb_ipk_), intent(out) :: info !Local variables - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz, counter + integer(psb_ipk_) :: i,j,k, ipx, idx, counter integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret, icomm integer(psb_mpk_), allocatable :: prcid(:), rvhd(:,:) + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1751,11 +2187,11 @@ contains subroutine psb_d_e_simple_triad_a2av(valsnd,iasnd,jasnd,sdsz,bsdindx,& & valrcv,iarcv,jarcv,rvsz,brvindx,ctxt,info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif real(psb_dpk_), intent(in) :: valsnd(:) @@ -1767,9 +2203,11 @@ contains integer(psb_ipk_), intent(out) :: info !Local variables - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz, counter + integer(psb_ipk_) :: i,j,k, ipx, idx, counter integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret, icomm integer(psb_mpk_), allocatable :: prcid(:), rvhd(:,:) + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1831,6 +2269,5 @@ contains Enddo end subroutine psb_d_e_simple_triad_a2av - end module psi_d_collective_mod diff --git a/base/modules/penv/psi_d_p2p_mod.F90 b/base/modules/penv/psi_d_p2p_mod.F90 index 614c6802..280f328d 100644 --- a/base/modules/penv/psi_d_p2p_mod.F90 +++ b/base/modules/penv/psi_d_p2p_mod.F90 @@ -44,11 +44,11 @@ module psi_d_p2p_mod contains subroutine psb_dsnds(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -56,7 +56,7 @@ contains integer(psb_mpk_), intent(in) :: dst real(psb_dpk_), allocatable :: dat_(:) integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else allocate(dat_(1), stat=info) @@ -67,11 +67,11 @@ contains subroutine psb_dsndv(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -80,7 +80,7 @@ contains real(psb_dpk_), allocatable :: dat_(:) integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else allocate(dat_(size(dat)), stat=info) dat_(:) = dat(:) @@ -91,11 +91,11 @@ contains subroutine psb_dsndm(ctxt,dat,dst,m) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -106,7 +106,7 @@ contains integer(psb_ipk_) :: i,j,k,m_,n_ integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else if (present(m)) then m_ = m @@ -127,11 +127,11 @@ contains end subroutine psb_dsndm subroutine psb_drcvs(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -139,7 +139,7 @@ contains integer(psb_mpk_), intent(in) :: src integer(psb_mpk_) :: info, icomm integer(psb_mpk_) :: status(mpi_status_size) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else icomm = psb_get_mpi_comm(ctxt) @@ -150,11 +150,11 @@ contains subroutine psb_drcvv(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -163,7 +163,7 @@ contains real(psb_dpk_), allocatable :: dat_(:) integer(psb_mpk_) :: info, icomm integer(psb_mpk_) :: status(mpi_status_size) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else icomm = psb_get_mpi_comm(ctxt) call mpi_recv(dat,size(dat),psb_mpi_r_dpk_,src,psb_double_tag,icomm,status,info) @@ -174,11 +174,11 @@ contains subroutine psb_drcvm(ctxt,dat,src,m) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -189,7 +189,7 @@ contains integer(psb_mpk_) :: info ,m_,n_, ld, mp_rcv_type integer(psb_mpk_) :: i,j,k integer(psb_mpk_) :: status(mpi_status_size), icomm -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! What should we do here?? #else if (present(m)) then diff --git a/base/modules/penv/psi_e_collective_mod.F90 b/base/modules/penv/psi_e_collective_mod.F90 index 7f57f2a6..15c69864 100644 --- a/base/modules/penv/psi_e_collective_mod.F90 +++ b/base/modules/penv/psi_e_collective_mod.F90 @@ -32,6 +32,7 @@ module psi_e_collective_mod use psi_penv_mod use psb_desc_const_mod + use iso_c_binding interface psb_max module procedure psb_emaxs, psb_emaxv, psb_emaxm @@ -42,6 +43,14 @@ module psi_e_collective_mod end interface psb_min + interface psb_gather + module procedure psb_egather_s, psb_egather_v + end interface psb_gather + + interface psb_gatherv + module procedure psb_egatherv_v + end interface + interface psb_sum module procedure psb_esums, psb_esumv, psb_esumm end interface @@ -90,11 +99,11 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_emaxs(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -107,8 +116,9 @@ contains integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + integer(psb_epk_) :: dat_ -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -131,20 +141,29 @@ contains collective_start = .false. collective_end = .false. end if - if (collective_sync) then + if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_epk_,mpi_max,icomm,info) + call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_epk_,mpi_max,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_epk_,mpi_max,root_,icomm,info) + if (iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,psb_mpi_epk_,mpi_max,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,psb_mpi_epk_,mpi_max,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_epk_,mpi_max,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_epk_,mpi_max,root_,icomm,request,info) + if (iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_epk_,mpi_max,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_epk_,mpi_max,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -154,11 +173,11 @@ contains end subroutine psb_emaxs subroutine psb_emaxv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -171,9 +190,10 @@ contains integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + integer(psb_epk_) :: dat_(1) ! This is a dummy -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -197,21 +217,31 @@ contains collective_end = .false. end if if (collective_sync) then - if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + if (root_ == -1) then + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_max,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_max,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_max,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_max,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_max,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_max,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_max,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_max,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -222,11 +252,11 @@ contains end subroutine psb_emaxv subroutine psb_emaxm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -239,9 +269,10 @@ contains integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + integer(psb_epk_) :: dat_(1,1) ! this is a dummy -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -265,28 +296,37 @@ contains collective_start = .false. collective_end = .false. end if - if (collective_sync) then + if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_max,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_max,root_,icomm,info) - endif + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_max,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_max,root_,icomm,info) + endif + end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_max,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_max,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_max,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_max,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) end if end if - #endif end subroutine psb_emaxm @@ -294,11 +334,11 @@ contains ! MIN: Minimum Value ! subroutine psb_emins(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -312,7 +352,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -337,18 +377,27 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_epk_,mpi_min,icomm,info) + call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_epk_,mpi_min,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_epk_,mpi_min,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,psb_mpi_epk_,mpi_min,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,psb_mpi_epk_,mpi_min,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_epk_,mpi_min,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_epk_,mpi_min,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_epk_,mpi_min,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_epk_,mpi_min,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -359,11 +408,11 @@ contains end subroutine psb_emins subroutine psb_eminv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -377,7 +426,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -402,20 +451,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_min,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_min,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_min,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_min,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_min,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_min,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_min,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_min,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -426,11 +485,11 @@ contains end subroutine psb_eminv subroutine psb_eminm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -444,7 +503,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -470,20 +529,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_min,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_min,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_min,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_min,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_min,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_min,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_min,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_min,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -494,16 +563,260 @@ contains + ! + ! gather + ! + subroutine psb_egather_s(ctxt,dat,resv,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_epk_), intent(inout) :: dat, resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(1) = dat +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + call mpi_allgather(dat,1,psb_mpi_epk_,& + & resv,1,psb_mpi_epk_,icomm,info) + else + call mpi_gather(dat,1,psb_mpi_epk_,& + & resv,1,psb_mpi_epk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + call mpi_iallgather(dat,1,psb_mpi_epk_,& + & resv,1,psb_mpi_epk_,icomm,request,info) + else + call mpi_igather(dat,1,psb_mpi_epk_,& + & resv,1,psb_mpi_epk_,root_,icomm,request,info) + endif + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_egather_s + + subroutine psb_egather_v(ctxt,dat,resv,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_epk_), intent(inout) :: dat(:), resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(:) = dat(:) +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + call mpi_allgather(dat,size(dat),psb_mpi_epk_,& + & resv,size(dat),psb_mpi_epk_,icomm,info) + else + call mpi_gather(dat,size(dat),psb_mpi_epk_,& + & resv,size(dat),psb_mpi_epk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + call mpi_iallgather(dat,size(dat),psb_mpi_epk_,& + & resv,size(dat),psb_mpi_epk_,icomm,request,info) + else + call mpi_igather(dat,size(dat),psb_mpi_epk_,& + & resv,size(dat),psb_mpi_epk_,root_,icomm,request,info) + endif + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_egather_v + + subroutine psb_egatherv_v(ctxt,dat,resv,szs,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_epk_), intent(inout) :: dat(:), resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_mpk_), intent(in), optional :: szs(:) + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info,i + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + integer(psb_mpk_), allocatable :: displs(:) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(:) = dat(:) +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + call mpi_allgatherv(dat,size(dat),psb_mpi_epk_,& + & resv,szs,displs,psb_mpi_epk_,icomm,info) + else + if (iam == root_) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + else + allocate(displs(0)) + end if + call mpi_gatherv(dat,size(dat),psb_mpi_epk_,& + & resv,szs,displs,psb_mpi_epk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + call mpi_iallgatherv(dat,size(dat),psb_mpi_epk_,& + & resv,szs,displs,psb_mpi_epk_,icomm,request,info) + else + if (iam == root_) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + else + allocate(displs(0)) + end if + call mpi_igatherv(dat,size(dat),psb_mpi_epk_,& + & resv,szs,displs,psb_mpi_epk_,root_,icomm,request,info) + endif + + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_egatherv_v + + + ! ! SUM ! subroutine psb_esums(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -517,7 +830,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -542,20 +855,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_epk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_epk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_epk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_epk_,mpi_sum,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_epk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_epk_,mpi_sum,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_epk_,mpi_sum,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_epk_,mpi_sum,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -565,11 +888,11 @@ contains end subroutine psb_esums subroutine psb_esumv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -583,7 +906,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -608,20 +931,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_sum,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_sum,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_sum,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -632,11 +965,11 @@ contains end subroutine psb_esumv subroutine psb_esumm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -650,7 +983,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -676,20 +1009,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_sum,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_sum,root_, icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_sum,root_, icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_sum,root_, icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -703,11 +1046,11 @@ contains ! subroutine psb_eamxs(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -721,7 +1064,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -746,20 +1089,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_epk_,mpi_eamx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_epk_,mpi_eamx_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_epk_,mpi_eamx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_epk_,mpi_eamx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_epk_,mpi_eamx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_epk_,mpi_eamx_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_epk_,mpi_eamx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_epk_,mpi_eamx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -770,11 +1123,11 @@ contains end subroutine psb_eamxs subroutine psb_eamxv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -788,7 +1141,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -813,20 +1166,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& psb_mpi_epk_,mpi_eamx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_eamx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_eamx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_eamx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_eamx_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_eamx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_eamx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -837,11 +1200,11 @@ contains end subroutine psb_eamxv subroutine psb_eamxm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -855,7 +1218,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -881,20 +1244,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_eamx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_eamx_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_eamx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_eamx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_eamx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_eamx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& & psb_mpi_epk_,mpi_eamx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -907,11 +1280,11 @@ contains ! AMN: Minimum Absolute Value ! subroutine psb_eamns(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -925,7 +1298,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -950,20 +1323,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_epk_,mpi_eamn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_epk_,mpi_eamn_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_epk_,mpi_eamn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_epk_,mpi_eamn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_epk_,mpi_eamn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -974,11 +1357,11 @@ contains end subroutine psb_eamns subroutine psb_eamnv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -992,7 +1375,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1017,20 +1400,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_eamn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_eamn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_eamn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_eamn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -1041,11 +1434,11 @@ contains end subroutine psb_eamnv subroutine psb_eamnm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1059,7 +1452,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -1085,20 +1478,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_eamn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_eamn_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_eamn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_eamn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_epk_,mpi_eamn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_epk_,mpi_eamn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -1112,11 +1515,11 @@ contains ! BCAST Broadcast ! subroutine psb_ebcasts(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1131,7 +1534,7 @@ contains logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1167,11 +1570,11 @@ contains end subroutine psb_ebcasts subroutine psb_ebcastv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1185,7 +1588,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1222,11 +1625,11 @@ contains end subroutine psb_ebcastv subroutine psb_ebcastm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1240,7 +1643,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -1284,11 +1687,11 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_escan_sums(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1296,13 +1699,13 @@ contains integer(psb_mpk_), intent(inout), optional :: request integer(psb_epk_), intent(inout) :: dat integer(psb_epk_) :: dat_ - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1319,26 +1722,28 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_scan(MPI_IN_PLACE,dat,1,& + call mpi_scan(dat_,dat,1,& & psb_mpi_epk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iscan(MPI_IN_PLACE,dat,1,& + call mpi_iscan(dat_,dat,1,& & psb_mpi_epk_,mpi_sum,icomm,request,minfo) else if (collective_end) then call mpi_wait(request,status,minfo) end if end if + info = minfo #endif end subroutine psb_escan_sums subroutine psb_eexscan_sums(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1346,14 +1751,14 @@ contains integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request integer(psb_epk_) :: dat_ - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1370,41 +1775,44 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_exscan(MPI_IN_PLACE,dat,1,& + call mpi_exscan(dat_,dat,1,& & psb_mpi_epk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iexscan(MPI_IN_PLACE,dat,1,& + call mpi_iexscan(dat_,dat,1,& & psb_mpi_epk_,mpi_sum,icomm,request,minfo) else if (collective_end) then call mpi_wait(request,status,minfo) end if end if + info = minfo #else dat = ezero #endif end subroutine psb_eexscan_sums subroutine psb_escan_sumv(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_epk_), intent(inout) :: dat(:) integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync - -#if !defined(SERIAL_MPI) + integer(psb_epk_), allocatable :: dat_(:) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1421,40 +1829,43 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_scan(MPI_IN_PLACE,dat,size(dat),& + call mpi_scan(dat_,dat,size(dat),& & psb_mpi_epk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iscan(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_sum,icomm,request,info) + call mpi_iscan(dat_,dat,size(dat),& + & psb_mpi_epk_,mpi_sum,icomm,request,minfo) else if (collective_end) then - call mpi_wait(request,status,info) + call mpi_wait(request,status,minfo) end if end if + info = minfo #endif end subroutine psb_escan_sumv subroutine psb_eexscan_sumv(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_epk_), intent(inout) :: dat(:) integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request - integer(psb_epk_), allocatable :: dat_(:) - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + integer(psb_epk_), allocatable :: dat_(:) -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1471,18 +1882,19 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_exscan(MPI_IN_PLACE,dat,size(dat),& + call mpi_exscan(dat_,dat,size(dat),& & psb_mpi_epk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iexscan(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_epk_,mpi_sum,icomm,request,info) + call mpi_iexscan(dat_,dat,size(dat),& + & psb_mpi_epk_,mpi_sum,icomm,request,minfo) else if (collective_end) then - call mpi_wait(request,status,info) + call mpi_wait(request,status,minfo) end if end if - + info = minfo #else dat = ezero #endif @@ -1497,7 +1909,9 @@ contains integer(psb_mpk_), intent(in) :: bsdindx(:), brvindx(:), sdsz(:), rvsz(:) type(psb_ctxt_type), intent(in) :: ctxt integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz + integer(psb_ipk_) :: i,j,k, ipx, idx + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1526,11 +1940,11 @@ contains subroutine psb_e_m_simple_triad_a2av(valsnd,iasnd,jasnd,sdsz,bsdindx,& & valrcv,iarcv,jarcv,rvsz,brvindx,ctxt,info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_epk_), intent(in) :: valsnd(:) @@ -1542,9 +1956,11 @@ contains integer(psb_ipk_), intent(out) :: info !Local variables - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz, counter + integer(psb_ipk_) :: i,j,k, ipx, idx, counter integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret, icomm integer(psb_mpk_), allocatable :: prcid(:), rvhd(:,:) + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1609,11 +2025,11 @@ contains subroutine psb_e_e_simple_triad_a2av(valsnd,iasnd,jasnd,sdsz,bsdindx,& & valrcv,iarcv,jarcv,rvsz,brvindx,ctxt,info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_epk_), intent(in) :: valsnd(:) @@ -1625,9 +2041,11 @@ contains integer(psb_ipk_), intent(out) :: info !Local variables - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz, counter + integer(psb_ipk_) :: i,j,k, ipx, idx, counter integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret, icomm integer(psb_mpk_), allocatable :: prcid(:), rvhd(:,:) + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1689,6 +2107,5 @@ contains Enddo end subroutine psb_e_e_simple_triad_a2av - end module psi_e_collective_mod diff --git a/base/modules/penv/psi_e_p2p_mod.F90 b/base/modules/penv/psi_e_p2p_mod.F90 index 7c54bbf9..f6c37d8a 100644 --- a/base/modules/penv/psi_e_p2p_mod.F90 +++ b/base/modules/penv/psi_e_p2p_mod.F90 @@ -44,11 +44,11 @@ module psi_e_p2p_mod contains subroutine psb_esnds(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -56,7 +56,7 @@ contains integer(psb_mpk_), intent(in) :: dst integer(psb_epk_), allocatable :: dat_(:) integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else allocate(dat_(1), stat=info) @@ -67,11 +67,11 @@ contains subroutine psb_esndv(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -80,7 +80,7 @@ contains integer(psb_epk_), allocatable :: dat_(:) integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else allocate(dat_(size(dat)), stat=info) dat_(:) = dat(:) @@ -91,11 +91,11 @@ contains subroutine psb_esndm(ctxt,dat,dst,m) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -106,7 +106,7 @@ contains integer(psb_ipk_) :: i,j,k,m_,n_ integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else if (present(m)) then m_ = m @@ -127,11 +127,11 @@ contains end subroutine psb_esndm subroutine psb_ercvs(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -139,7 +139,7 @@ contains integer(psb_mpk_), intent(in) :: src integer(psb_mpk_) :: info, icomm integer(psb_mpk_) :: status(mpi_status_size) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else icomm = psb_get_mpi_comm(ctxt) @@ -150,11 +150,11 @@ contains subroutine psb_ercvv(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -163,7 +163,7 @@ contains integer(psb_epk_), allocatable :: dat_(:) integer(psb_mpk_) :: info, icomm integer(psb_mpk_) :: status(mpi_status_size) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else icomm = psb_get_mpi_comm(ctxt) call mpi_recv(dat,size(dat),psb_mpi_epk_,src,psb_int8_tag,icomm,status,info) @@ -174,11 +174,11 @@ contains subroutine psb_ercvm(ctxt,dat,src,m) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -189,7 +189,7 @@ contains integer(psb_mpk_) :: info ,m_,n_, ld, mp_rcv_type integer(psb_mpk_) :: i,j,k integer(psb_mpk_) :: status(mpi_status_size), icomm -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! What should we do here?? #else if (present(m)) then diff --git a/base/modules/penv/psi_i2_collective_mod.F90 b/base/modules/penv/psi_i2_collective_mod.F90 index bfe3bf35..7ca2de15 100644 --- a/base/modules/penv/psi_i2_collective_mod.F90 +++ b/base/modules/penv/psi_i2_collective_mod.F90 @@ -32,6 +32,7 @@ module psi_i2_collective_mod use psi_penv_mod use psb_desc_const_mod + use iso_c_binding interface psb_max module procedure psb_i2maxs, psb_i2maxv, psb_i2maxm @@ -42,6 +43,14 @@ module psi_i2_collective_mod end interface psb_min + interface psb_gather + module procedure psb_i2gather_s, psb_i2gather_v + end interface psb_gather + + interface psb_gatherv + module procedure psb_i2gatherv_v + end interface + interface psb_sum module procedure psb_i2sums, psb_i2sumv, psb_i2summ end interface @@ -90,11 +99,11 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_i2maxs(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -107,8 +116,9 @@ contains integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + integer(psb_i2pk_) :: dat_ -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -131,20 +141,29 @@ contains collective_start = .false. collective_end = .false. end if - if (collective_sync) then + if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_i2pk_,mpi_max,icomm,info) + call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_i2pk_,mpi_max,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_i2pk_,mpi_max,root_,icomm,info) + if (iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,psb_mpi_i2pk_,mpi_max,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,psb_mpi_i2pk_,mpi_max,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_i2pk_,mpi_max,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_i2pk_,mpi_max,root_,icomm,request,info) + if (iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_i2pk_,mpi_max,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_i2pk_,mpi_max,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -154,11 +173,11 @@ contains end subroutine psb_i2maxs subroutine psb_i2maxv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -171,9 +190,10 @@ contains integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + integer(psb_i2pk_) :: dat_(1) ! This is a dummy -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -197,21 +217,31 @@ contains collective_end = .false. end if if (collective_sync) then - if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + if (root_ == -1) then + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_max,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_max,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_max,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_max,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_max,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_max,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_max,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_max,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -222,11 +252,11 @@ contains end subroutine psb_i2maxv subroutine psb_i2maxm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -239,9 +269,10 @@ contains integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + integer(psb_i2pk_) :: dat_(1,1) ! this is a dummy -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -265,28 +296,37 @@ contains collective_start = .false. collective_end = .false. end if - if (collective_sync) then + if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_max,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_max,root_,icomm,info) - endif + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_max,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_max,root_,icomm,info) + endif + end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_max,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_max,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_max,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_max,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) end if end if - #endif end subroutine psb_i2maxm @@ -294,11 +334,11 @@ contains ! MIN: Minimum Value ! subroutine psb_i2mins(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -312,7 +352,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -337,18 +377,27 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_i2pk_,mpi_min,icomm,info) + call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_i2pk_,mpi_min,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_i2pk_,mpi_min,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,psb_mpi_i2pk_,mpi_min,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,psb_mpi_i2pk_,mpi_min,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_i2pk_,mpi_min,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_i2pk_,mpi_min,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_i2pk_,mpi_min,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_i2pk_,mpi_min,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -359,11 +408,11 @@ contains end subroutine psb_i2mins subroutine psb_i2minv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -377,7 +426,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -402,20 +451,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_min,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_min,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_min,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_min,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_min,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_min,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_min,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_min,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -426,11 +485,11 @@ contains end subroutine psb_i2minv subroutine psb_i2minm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -444,7 +503,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -470,20 +529,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_min,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_min,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_min,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_min,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_min,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_min,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_min,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_min,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -494,16 +563,260 @@ contains + ! + ! gather + ! + subroutine psb_i2gather_s(ctxt,dat,resv,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_i2pk_), intent(inout) :: dat, resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(1) = dat +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + call mpi_allgather(dat,1,psb_mpi_i2pk_,& + & resv,1,psb_mpi_i2pk_,icomm,info) + else + call mpi_gather(dat,1,psb_mpi_i2pk_,& + & resv,1,psb_mpi_i2pk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + call mpi_iallgather(dat,1,psb_mpi_i2pk_,& + & resv,1,psb_mpi_i2pk_,icomm,request,info) + else + call mpi_igather(dat,1,psb_mpi_i2pk_,& + & resv,1,psb_mpi_i2pk_,root_,icomm,request,info) + endif + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_i2gather_s + + subroutine psb_i2gather_v(ctxt,dat,resv,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_i2pk_), intent(inout) :: dat(:), resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(:) = dat(:) +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + call mpi_allgather(dat,size(dat),psb_mpi_i2pk_,& + & resv,size(dat),psb_mpi_i2pk_,icomm,info) + else + call mpi_gather(dat,size(dat),psb_mpi_i2pk_,& + & resv,size(dat),psb_mpi_i2pk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + call mpi_iallgather(dat,size(dat),psb_mpi_i2pk_,& + & resv,size(dat),psb_mpi_i2pk_,icomm,request,info) + else + call mpi_igather(dat,size(dat),psb_mpi_i2pk_,& + & resv,size(dat),psb_mpi_i2pk_,root_,icomm,request,info) + endif + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_i2gather_v + + subroutine psb_i2gatherv_v(ctxt,dat,resv,szs,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_i2pk_), intent(inout) :: dat(:), resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_mpk_), intent(in), optional :: szs(:) + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info,i + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + integer(psb_mpk_), allocatable :: displs(:) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(:) = dat(:) +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + call mpi_allgatherv(dat,size(dat),psb_mpi_i2pk_,& + & resv,szs,displs,psb_mpi_i2pk_,icomm,info) + else + if (iam == root_) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + else + allocate(displs(0)) + end if + call mpi_gatherv(dat,size(dat),psb_mpi_i2pk_,& + & resv,szs,displs,psb_mpi_i2pk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + call mpi_iallgatherv(dat,size(dat),psb_mpi_i2pk_,& + & resv,szs,displs,psb_mpi_i2pk_,icomm,request,info) + else + if (iam == root_) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + else + allocate(displs(0)) + end if + call mpi_igatherv(dat,size(dat),psb_mpi_i2pk_,& + & resv,szs,displs,psb_mpi_i2pk_,root_,icomm,request,info) + endif + + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_i2gatherv_v + + + ! ! SUM ! subroutine psb_i2sums(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -517,7 +830,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -542,20 +855,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_i2pk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_i2pk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_i2pk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_i2pk_,mpi_sum,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_i2pk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_i2pk_,mpi_sum,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_i2pk_,mpi_sum,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_i2pk_,mpi_sum,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -565,11 +888,11 @@ contains end subroutine psb_i2sums subroutine psb_i2sumv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -583,7 +906,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -608,20 +931,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_sum,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_sum,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_sum,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -632,11 +965,11 @@ contains end subroutine psb_i2sumv subroutine psb_i2summ(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -650,7 +983,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -676,20 +1009,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_sum,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_sum,root_, icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_sum,root_, icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_sum,root_, icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -703,11 +1046,11 @@ contains ! subroutine psb_i2amxs(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -721,7 +1064,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -746,20 +1089,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_i2pk_,mpi_i2amx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_i2pk_,mpi_i2amx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -770,11 +1123,11 @@ contains end subroutine psb_i2amxs subroutine psb_i2amxv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -788,7 +1141,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -813,20 +1166,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& psb_mpi_i2pk_,mpi_i2amx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_i2amx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -837,11 +1200,11 @@ contains end subroutine psb_i2amxv subroutine psb_i2amxm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -855,7 +1218,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -881,20 +1244,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_i2amx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_i2amx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& & psb_mpi_i2pk_,mpi_i2amx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -907,11 +1280,11 @@ contains ! AMN: Minimum Absolute Value ! subroutine psb_i2amns(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -925,7 +1298,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -950,20 +1323,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_i2pk_,mpi_i2amn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_i2pk_,mpi_i2amn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -974,11 +1357,11 @@ contains end subroutine psb_i2amns subroutine psb_i2amnv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -992,7 +1375,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1017,20 +1400,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_i2amn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_i2amn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -1041,11 +1434,11 @@ contains end subroutine psb_i2amnv subroutine psb_i2amnm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1059,7 +1452,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -1085,20 +1478,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_i2amn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_i2pk_,mpi_i2amn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_i2pk_,mpi_i2amn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -1112,11 +1515,11 @@ contains ! BCAST Broadcast ! subroutine psb_i2bcasts(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1131,7 +1534,7 @@ contains logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1167,11 +1570,11 @@ contains end subroutine psb_i2bcasts subroutine psb_i2bcastv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1185,7 +1588,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1222,11 +1625,11 @@ contains end subroutine psb_i2bcastv subroutine psb_i2bcastm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1240,7 +1643,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -1284,11 +1687,11 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_i2scan_sums(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1296,13 +1699,13 @@ contains integer(psb_mpk_), intent(inout), optional :: request integer(psb_i2pk_), intent(inout) :: dat integer(psb_i2pk_) :: dat_ - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1319,26 +1722,28 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_scan(MPI_IN_PLACE,dat,1,& + call mpi_scan(dat_,dat,1,& & psb_mpi_i2pk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iscan(MPI_IN_PLACE,dat,1,& + call mpi_iscan(dat_,dat,1,& & psb_mpi_i2pk_,mpi_sum,icomm,request,minfo) else if (collective_end) then call mpi_wait(request,status,minfo) end if end if + info = minfo #endif end subroutine psb_i2scan_sums subroutine psb_i2exscan_sums(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1346,14 +1751,14 @@ contains integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request integer(psb_i2pk_) :: dat_ - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1370,41 +1775,44 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_exscan(MPI_IN_PLACE,dat,1,& + call mpi_exscan(dat_,dat,1,& & psb_mpi_i2pk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iexscan(MPI_IN_PLACE,dat,1,& + call mpi_iexscan(dat_,dat,1,& & psb_mpi_i2pk_,mpi_sum,icomm,request,minfo) else if (collective_end) then call mpi_wait(request,status,minfo) end if end if + info = minfo #else dat = i2zero #endif end subroutine psb_i2exscan_sums subroutine psb_i2scan_sumv(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_i2pk_), intent(inout) :: dat(:) integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync - -#if !defined(SERIAL_MPI) + integer(psb_i2pk_), allocatable :: dat_(:) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1421,40 +1829,43 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_scan(MPI_IN_PLACE,dat,size(dat),& + call mpi_scan(dat_,dat,size(dat),& & psb_mpi_i2pk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iscan(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_sum,icomm,request,info) + call mpi_iscan(dat_,dat,size(dat),& + & psb_mpi_i2pk_,mpi_sum,icomm,request,minfo) else if (collective_end) then - call mpi_wait(request,status,info) + call mpi_wait(request,status,minfo) end if end if + info = minfo #endif end subroutine psb_i2scan_sumv subroutine psb_i2exscan_sumv(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_i2pk_), intent(inout) :: dat(:) integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request - integer(psb_i2pk_), allocatable :: dat_(:) - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + integer(psb_i2pk_), allocatable :: dat_(:) -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1471,18 +1882,19 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_exscan(MPI_IN_PLACE,dat,size(dat),& + call mpi_exscan(dat_,dat,size(dat),& & psb_mpi_i2pk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iexscan(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_i2pk_,mpi_sum,icomm,request,info) + call mpi_iexscan(dat_,dat,size(dat),& + & psb_mpi_i2pk_,mpi_sum,icomm,request,minfo) else if (collective_end) then - call mpi_wait(request,status,info) + call mpi_wait(request,status,minfo) end if end if - + info = minfo #else dat = i2zero #endif @@ -1497,7 +1909,9 @@ contains integer(psb_mpk_), intent(in) :: bsdindx(:), brvindx(:), sdsz(:), rvsz(:) type(psb_ctxt_type), intent(in) :: ctxt integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz + integer(psb_ipk_) :: i,j,k, ipx, idx + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1526,11 +1940,11 @@ contains subroutine psb_i2_m_simple_triad_a2av(valsnd,iasnd,jasnd,sdsz,bsdindx,& & valrcv,iarcv,jarcv,rvsz,brvindx,ctxt,info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_i2pk_), intent(in) :: valsnd(:) @@ -1542,9 +1956,11 @@ contains integer(psb_ipk_), intent(out) :: info !Local variables - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz, counter + integer(psb_ipk_) :: i,j,k, ipx, idx, counter integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret, icomm integer(psb_mpk_), allocatable :: prcid(:), rvhd(:,:) + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1609,11 +2025,11 @@ contains subroutine psb_i2_e_simple_triad_a2av(valsnd,iasnd,jasnd,sdsz,bsdindx,& & valrcv,iarcv,jarcv,rvsz,brvindx,ctxt,info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_i2pk_), intent(in) :: valsnd(:) @@ -1625,9 +2041,11 @@ contains integer(psb_ipk_), intent(out) :: info !Local variables - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz, counter + integer(psb_ipk_) :: i,j,k, ipx, idx, counter integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret, icomm integer(psb_mpk_), allocatable :: prcid(:), rvhd(:,:) + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1689,6 +2107,5 @@ contains Enddo end subroutine psb_i2_e_simple_triad_a2av - end module psi_i2_collective_mod diff --git a/base/modules/penv/psi_i2_p2p_mod.F90 b/base/modules/penv/psi_i2_p2p_mod.F90 index ad80cb44..4d2d3385 100644 --- a/base/modules/penv/psi_i2_p2p_mod.F90 +++ b/base/modules/penv/psi_i2_p2p_mod.F90 @@ -44,11 +44,11 @@ module psi_i2_p2p_mod contains subroutine psb_i2snds(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -56,7 +56,7 @@ contains integer(psb_mpk_), intent(in) :: dst integer(psb_i2pk_), allocatable :: dat_(:) integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else allocate(dat_(1), stat=info) @@ -67,11 +67,11 @@ contains subroutine psb_i2sndv(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -80,7 +80,7 @@ contains integer(psb_i2pk_), allocatable :: dat_(:) integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else allocate(dat_(size(dat)), stat=info) dat_(:) = dat(:) @@ -91,11 +91,11 @@ contains subroutine psb_i2sndm(ctxt,dat,dst,m) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -106,7 +106,7 @@ contains integer(psb_ipk_) :: i,j,k,m_,n_ integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else if (present(m)) then m_ = m @@ -127,11 +127,11 @@ contains end subroutine psb_i2sndm subroutine psb_i2rcvs(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -139,7 +139,7 @@ contains integer(psb_mpk_), intent(in) :: src integer(psb_mpk_) :: info, icomm integer(psb_mpk_) :: status(mpi_status_size) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else icomm = psb_get_mpi_comm(ctxt) @@ -150,11 +150,11 @@ contains subroutine psb_i2rcvv(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -163,7 +163,7 @@ contains integer(psb_i2pk_), allocatable :: dat_(:) integer(psb_mpk_) :: info, icomm integer(psb_mpk_) :: status(mpi_status_size) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else icomm = psb_get_mpi_comm(ctxt) call mpi_recv(dat,size(dat),psb_mpi_i2pk_,src,psb_int2_tag,icomm,status,info) @@ -174,11 +174,11 @@ contains subroutine psb_i2rcvm(ctxt,dat,src,m) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -189,7 +189,7 @@ contains integer(psb_mpk_) :: info ,m_,n_, ld, mp_rcv_type integer(psb_mpk_) :: i,j,k integer(psb_mpk_) :: status(mpi_status_size), icomm -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! What should we do here?? #else if (present(m)) then diff --git a/base/modules/penv/psi_m_collective_mod.F90 b/base/modules/penv/psi_m_collective_mod.F90 index 09995175..0e858c03 100644 --- a/base/modules/penv/psi_m_collective_mod.F90 +++ b/base/modules/penv/psi_m_collective_mod.F90 @@ -32,6 +32,7 @@ module psi_m_collective_mod use psi_penv_mod use psb_desc_const_mod + use iso_c_binding interface psb_max module procedure psb_mmaxs, psb_mmaxv, psb_mmaxm @@ -42,6 +43,14 @@ module psi_m_collective_mod end interface psb_min + interface psb_gather + module procedure psb_mgather_s, psb_mgather_v + end interface psb_gather + + interface psb_gatherv + module procedure psb_mgatherv_v + end interface + interface psb_sum module procedure psb_msums, psb_msumv, psb_msumm end interface @@ -90,11 +99,11 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_mmaxs(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -107,8 +116,9 @@ contains integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + integer(psb_mpk_) :: dat_ -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -131,20 +141,29 @@ contains collective_start = .false. collective_end = .false. end if - if (collective_sync) then + if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_mpk_,mpi_max,icomm,info) + call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_mpk_,mpi_max,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_mpk_,mpi_max,root_,icomm,info) + if (iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,psb_mpi_mpk_,mpi_max,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,psb_mpi_mpk_,mpi_max,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_mpk_,mpi_max,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_mpk_,mpi_max,root_,icomm,request,info) + if (iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_mpk_,mpi_max,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_mpk_,mpi_max,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -154,11 +173,11 @@ contains end subroutine psb_mmaxs subroutine psb_mmaxv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -171,9 +190,10 @@ contains integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + integer(psb_mpk_) :: dat_(1) ! This is a dummy -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -197,21 +217,31 @@ contains collective_end = .false. end if if (collective_sync) then - if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + if (root_ == -1) then + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_max,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_max,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_max,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_max,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_max,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_max,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_max,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_max,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -222,11 +252,11 @@ contains end subroutine psb_mmaxv subroutine psb_mmaxm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -239,9 +269,10 @@ contains integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + integer(psb_mpk_) :: dat_(1,1) ! this is a dummy -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -265,28 +296,37 @@ contains collective_start = .false. collective_end = .false. end if - if (collective_sync) then + if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_max,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_max,root_,icomm,info) - endif + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_max,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_max,root_,icomm,info) + endif + end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_max,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_max,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_max,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_max,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) end if end if - #endif end subroutine psb_mmaxm @@ -294,11 +334,11 @@ contains ! MIN: Minimum Value ! subroutine psb_mmins(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -312,7 +352,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -337,18 +377,27 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_mpk_,mpi_min,icomm,info) + call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_mpk_,mpi_min,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_mpk_,mpi_min,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,psb_mpi_mpk_,mpi_min,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,psb_mpi_mpk_,mpi_min,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_mpk_,mpi_min,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_mpk_,mpi_min,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_mpk_,mpi_min,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_mpk_,mpi_min,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -359,11 +408,11 @@ contains end subroutine psb_mmins subroutine psb_mminv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -377,7 +426,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -402,20 +451,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_min,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_min,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_min,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_min,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_min,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_min,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_min,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_min,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -426,11 +485,11 @@ contains end subroutine psb_mminv subroutine psb_mminm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -444,7 +503,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -470,20 +529,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_min,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_min,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_min,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_min,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_min,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_min,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_min,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_min,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -494,16 +563,260 @@ contains + ! + ! gather + ! + subroutine psb_mgather_s(ctxt,dat,resv,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(inout) :: dat, resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(1) = dat +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + call mpi_allgather(dat,1,psb_mpi_mpk_,& + & resv,1,psb_mpi_mpk_,icomm,info) + else + call mpi_gather(dat,1,psb_mpi_mpk_,& + & resv,1,psb_mpi_mpk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + call mpi_iallgather(dat,1,psb_mpi_mpk_,& + & resv,1,psb_mpi_mpk_,icomm,request,info) + else + call mpi_igather(dat,1,psb_mpi_mpk_,& + & resv,1,psb_mpi_mpk_,root_,icomm,request,info) + endif + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_mgather_s + + subroutine psb_mgather_v(ctxt,dat,resv,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(inout) :: dat(:), resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(:) = dat(:) +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + call mpi_allgather(dat,size(dat),psb_mpi_mpk_,& + & resv,size(dat),psb_mpi_mpk_,icomm,info) + else + call mpi_gather(dat,size(dat),psb_mpi_mpk_,& + & resv,size(dat),psb_mpi_mpk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + call mpi_iallgather(dat,size(dat),psb_mpi_mpk_,& + & resv,size(dat),psb_mpi_mpk_,icomm,request,info) + else + call mpi_igather(dat,size(dat),psb_mpi_mpk_,& + & resv,size(dat),psb_mpi_mpk_,root_,icomm,request,info) + endif + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_mgather_v + + subroutine psb_mgatherv_v(ctxt,dat,resv,szs,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(inout) :: dat(:), resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_mpk_), intent(in), optional :: szs(:) + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info,i + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + integer(psb_mpk_), allocatable :: displs(:) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(:) = dat(:) +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + call mpi_allgatherv(dat,size(dat),psb_mpi_mpk_,& + & resv,szs,displs,psb_mpi_mpk_,icomm,info) + else + if (iam == root_) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + else + allocate(displs(0)) + end if + call mpi_gatherv(dat,size(dat),psb_mpi_mpk_,& + & resv,szs,displs,psb_mpi_mpk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + call mpi_iallgatherv(dat,size(dat),psb_mpi_mpk_,& + & resv,szs,displs,psb_mpi_mpk_,icomm,request,info) + else + if (iam == root_) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + else + allocate(displs(0)) + end if + call mpi_igatherv(dat,size(dat),psb_mpi_mpk_,& + & resv,szs,displs,psb_mpi_mpk_,root_,icomm,request,info) + endif + + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_mgatherv_v + + + ! ! SUM ! subroutine psb_msums(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -517,7 +830,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -542,20 +855,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_mpk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_mpk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_mpk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_mpk_,mpi_sum,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_mpk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_mpk_,mpi_sum,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_mpk_,mpi_sum,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_mpk_,mpi_sum,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -565,11 +888,11 @@ contains end subroutine psb_msums subroutine psb_msumv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -583,7 +906,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -608,20 +931,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_sum,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_sum,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_sum,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -632,11 +965,11 @@ contains end subroutine psb_msumv subroutine psb_msumm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -650,7 +983,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -676,20 +1009,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_sum,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_sum,root_, icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_sum,root_, icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_sum,root_, icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -703,11 +1046,11 @@ contains ! subroutine psb_mamxs(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -721,7 +1064,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -746,20 +1089,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_mpk_,mpi_mamx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_mpk_,mpi_mamx_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_mpk_,mpi_mamx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_mpk_,mpi_mamx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_mpk_,mpi_mamx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_mpk_,mpi_mamx_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_mpk_,mpi_mamx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_mpk_,mpi_mamx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -770,11 +1123,11 @@ contains end subroutine psb_mamxs subroutine psb_mamxv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -788,7 +1141,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -813,20 +1166,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& psb_mpi_mpk_,mpi_mamx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_mamx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_mamx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_mamx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_mamx_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_mamx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_mamx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -837,11 +1200,11 @@ contains end subroutine psb_mamxv subroutine psb_mamxm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -855,7 +1218,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -881,20 +1244,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_mamx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_mamx_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_mamx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_mamx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_mamx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_mamx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& & psb_mpi_mpk_,mpi_mamx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -907,11 +1280,11 @@ contains ! AMN: Minimum Absolute Value ! subroutine psb_mamns(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -925,7 +1298,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -950,20 +1323,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_mpk_,mpi_mamn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_mpk_,mpi_mamn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -974,11 +1357,11 @@ contains end subroutine psb_mamns subroutine psb_mamnv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -992,7 +1375,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1017,20 +1400,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_mamn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_mamn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -1041,11 +1434,11 @@ contains end subroutine psb_mamnv subroutine psb_mamnm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1059,7 +1452,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -1085,20 +1478,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_mamn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_mpk_,mpi_mamn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_mpk_,mpi_mamn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -1112,11 +1515,11 @@ contains ! BCAST Broadcast ! subroutine psb_mbcasts(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1131,7 +1534,7 @@ contains logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1167,11 +1570,11 @@ contains end subroutine psb_mbcasts subroutine psb_mbcastv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1185,7 +1588,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1222,11 +1625,11 @@ contains end subroutine psb_mbcastv subroutine psb_mbcastm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1240,7 +1643,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -1284,11 +1687,11 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_mscan_sums(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1296,13 +1699,13 @@ contains integer(psb_mpk_), intent(inout), optional :: request integer(psb_mpk_), intent(inout) :: dat integer(psb_mpk_) :: dat_ - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1319,26 +1722,28 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_scan(MPI_IN_PLACE,dat,1,& + call mpi_scan(dat_,dat,1,& & psb_mpi_mpk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iscan(MPI_IN_PLACE,dat,1,& + call mpi_iscan(dat_,dat,1,& & psb_mpi_mpk_,mpi_sum,icomm,request,minfo) else if (collective_end) then call mpi_wait(request,status,minfo) end if end if + info = minfo #endif end subroutine psb_mscan_sums subroutine psb_mexscan_sums(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1346,14 +1751,14 @@ contains integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request integer(psb_mpk_) :: dat_ - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1370,41 +1775,44 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_exscan(MPI_IN_PLACE,dat,1,& + call mpi_exscan(dat_,dat,1,& & psb_mpi_mpk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iexscan(MPI_IN_PLACE,dat,1,& + call mpi_iexscan(dat_,dat,1,& & psb_mpi_mpk_,mpi_sum,icomm,request,minfo) else if (collective_end) then call mpi_wait(request,status,minfo) end if end if + info = minfo #else dat = mzero #endif end subroutine psb_mexscan_sums subroutine psb_mscan_sumv(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(inout) :: dat(:) integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync - -#if !defined(SERIAL_MPI) + integer(psb_mpk_), allocatable :: dat_(:) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1421,40 +1829,43 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_scan(MPI_IN_PLACE,dat,size(dat),& + call mpi_scan(dat_,dat,size(dat),& & psb_mpi_mpk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iscan(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_sum,icomm,request,info) + call mpi_iscan(dat_,dat,size(dat),& + & psb_mpi_mpk_,mpi_sum,icomm,request,minfo) else if (collective_end) then - call mpi_wait(request,status,info) + call mpi_wait(request,status,minfo) end if end if + info = minfo #endif end subroutine psb_mscan_sumv subroutine psb_mexscan_sumv(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_), intent(inout) :: dat(:) integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request - integer(psb_mpk_), allocatable :: dat_(:) - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + integer(psb_mpk_), allocatable :: dat_(:) -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1471,18 +1882,19 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_exscan(MPI_IN_PLACE,dat,size(dat),& + call mpi_exscan(dat_,dat,size(dat),& & psb_mpi_mpk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iexscan(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_mpk_,mpi_sum,icomm,request,info) + call mpi_iexscan(dat_,dat,size(dat),& + & psb_mpi_mpk_,mpi_sum,icomm,request,minfo) else if (collective_end) then - call mpi_wait(request,status,info) + call mpi_wait(request,status,minfo) end if end if - + info = minfo #else dat = mzero #endif @@ -1497,7 +1909,9 @@ contains integer(psb_mpk_), intent(in) :: bsdindx(:), brvindx(:), sdsz(:), rvsz(:) type(psb_ctxt_type), intent(in) :: ctxt integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz + integer(psb_ipk_) :: i,j,k, ipx, idx + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1526,11 +1940,11 @@ contains subroutine psb_m_m_simple_triad_a2av(valsnd,iasnd,jasnd,sdsz,bsdindx,& & valrcv,iarcv,jarcv,rvsz,brvindx,ctxt,info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_mpk_), intent(in) :: valsnd(:) @@ -1542,9 +1956,11 @@ contains integer(psb_ipk_), intent(out) :: info !Local variables - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz, counter + integer(psb_ipk_) :: i,j,k, ipx, idx, counter integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret, icomm integer(psb_mpk_), allocatable :: prcid(:), rvhd(:,:) + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1609,11 +2025,11 @@ contains subroutine psb_m_e_simple_triad_a2av(valsnd,iasnd,jasnd,sdsz,bsdindx,& & valrcv,iarcv,jarcv,rvsz,brvindx,ctxt,info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_mpk_), intent(in) :: valsnd(:) @@ -1625,9 +2041,11 @@ contains integer(psb_ipk_), intent(out) :: info !Local variables - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz, counter + integer(psb_ipk_) :: i,j,k, ipx, idx, counter integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret, icomm integer(psb_mpk_), allocatable :: prcid(:), rvhd(:,:) + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1689,6 +2107,5 @@ contains Enddo end subroutine psb_m_e_simple_triad_a2av - end module psi_m_collective_mod diff --git a/base/modules/penv/psi_m_p2p_mod.F90 b/base/modules/penv/psi_m_p2p_mod.F90 index 9f6c7bc6..0132ce02 100644 --- a/base/modules/penv/psi_m_p2p_mod.F90 +++ b/base/modules/penv/psi_m_p2p_mod.F90 @@ -44,11 +44,11 @@ module psi_m_p2p_mod contains subroutine psb_msnds(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -56,7 +56,7 @@ contains integer(psb_mpk_), intent(in) :: dst integer(psb_mpk_), allocatable :: dat_(:) integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else allocate(dat_(1), stat=info) @@ -67,11 +67,11 @@ contains subroutine psb_msndv(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -80,7 +80,7 @@ contains integer(psb_mpk_), allocatable :: dat_(:) integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else allocate(dat_(size(dat)), stat=info) dat_(:) = dat(:) @@ -91,11 +91,11 @@ contains subroutine psb_msndm(ctxt,dat,dst,m) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -106,7 +106,7 @@ contains integer(psb_ipk_) :: i,j,k,m_,n_ integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else if (present(m)) then m_ = m @@ -127,11 +127,11 @@ contains end subroutine psb_msndm subroutine psb_mrcvs(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -139,7 +139,7 @@ contains integer(psb_mpk_), intent(in) :: src integer(psb_mpk_) :: info, icomm integer(psb_mpk_) :: status(mpi_status_size) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else icomm = psb_get_mpi_comm(ctxt) @@ -150,11 +150,11 @@ contains subroutine psb_mrcvv(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -163,7 +163,7 @@ contains integer(psb_mpk_), allocatable :: dat_(:) integer(psb_mpk_) :: info, icomm integer(psb_mpk_) :: status(mpi_status_size) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else icomm = psb_get_mpi_comm(ctxt) call mpi_recv(dat,size(dat),psb_mpi_mpk_,src,psb_int4_tag,icomm,status,info) @@ -174,11 +174,11 @@ contains subroutine psb_mrcvm(ctxt,dat,src,m) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -189,7 +189,7 @@ contains integer(psb_mpk_) :: info ,m_,n_, ld, mp_rcv_type integer(psb_mpk_) :: i,j,k integer(psb_mpk_) :: status(mpi_status_size), icomm -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! What should we do here?? #else if (present(m)) then diff --git a/base/modules/penv/psi_p2p_mod.F90 b/base/modules/penv/psi_p2p_mod.F90 index f7262378..275b5de7 100644 --- a/base/modules/penv/psi_p2p_mod.F90 +++ b/base/modules/penv/psi_p2p_mod.F90 @@ -66,11 +66,11 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_lsnds(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -78,7 +78,7 @@ contains integer(psb_mpk_), intent(in) :: dst logical, allocatable :: dat_(:) integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else allocate(dat_(1), stat=info) @@ -89,11 +89,11 @@ contains subroutine psb_lsndv(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -102,7 +102,7 @@ contains logical, allocatable :: dat_(:) integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else allocate(dat_(size(dat)), stat=info) dat_(:) = dat(:) @@ -113,11 +113,11 @@ contains subroutine psb_lsndm(ctxt,dat,dst,m) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -128,7 +128,7 @@ contains integer(psb_mpk_) :: info integer(psb_ipk_) :: i,j,k,m_,n_ -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else if (present(m)) then m_ = m @@ -150,11 +150,11 @@ contains subroutine psb_hsnds(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -162,7 +162,7 @@ contains integer(psb_mpk_), intent(in) :: dst character(len=1), allocatable :: dat_(:) integer(psb_mpk_) :: info, l, i -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else l = len(dat) @@ -182,11 +182,11 @@ contains subroutine psb_lrcvs(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -194,7 +194,7 @@ contains integer(psb_mpk_), intent(in) :: src integer(psb_mpk_) :: info, icomm integer(psb_mpk_) :: status(mpi_status_size) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else icomm = psb_get_mpi_comm(ctxt) @@ -205,11 +205,11 @@ contains subroutine psb_lrcvv(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -217,7 +217,7 @@ contains integer(psb_mpk_), intent(in) :: src integer(psb_mpk_) :: info integer(psb_mpk_) :: status(mpi_status_size), icomm -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else icomm = psb_get_mpi_comm(ctxt) call mpi_recv(dat,size(dat),mpi_logical,src,psb_logical_tag,icomm,status,info) @@ -228,11 +228,11 @@ contains subroutine psb_lrcvm(ctxt,dat,src,m) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -242,7 +242,7 @@ contains integer(psb_mpk_) :: info ,m_,n_, ld, mp_rcv_type integer(psb_ipk_) :: i,j,k integer(psb_mpk_) :: status(mpi_status_size), icomm -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! What should we do here?? #else icomm = psb_get_mpi_comm(ctxt) @@ -269,11 +269,11 @@ contains subroutine psb_hrcvs(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -282,7 +282,7 @@ contains character(len=1), allocatable :: dat_(:) integer(psb_mpk_) :: info, l, i integer(psb_mpk_) :: status(mpi_status_size), icomm -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else l = len(dat) diff --git a/base/modules/penv/psi_penv_mod.F90 b/base/modules/penv/psi_penv_mod.F90 index 0c77c6df..7091411d 100644 --- a/base/modules/penv/psi_penv_mod.F90 +++ b/base/modules/penv/psi_penv_mod.F90 @@ -29,10 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! Provide a fake mpi module just to keep the compiler(s) happy. module mpi use psb_const_mod + use iso_c_binding integer(psb_mpk_), parameter :: mpi_success = 0 integer(psb_mpk_), parameter :: mpi_request_null = 0 integer(psb_mpk_), parameter :: mpi_status_size = 1 @@ -49,13 +50,135 @@ module mpi integer(psb_mpk_), parameter :: mpi_comm_null = -1 integer(psb_mpk_), parameter :: mpi_comm_world = 1 - real(psb_dpk_), external :: mpi_wtime + !real(psb_dpk_), external :: mpi_wtime + + interface + function mpi_wtime() result(res) bind(c,name='mpi_wtime') + import + real(c_double) :: res + end function mpi_wtime + end interface + + interface + subroutine mpi_wait(request, status,ierr) bind(c,name='mpi_wait') + import + type(*), dimension(..) :: request + integer(psb_mpk_) :: status(*) + integer(psb_mpk_) :: ierr + end subroutine mpi_wait + end interface + + interface + subroutine mpi_send(buf,count,datatype,dest,tag,comm,ierr) & + & bind(c,name='mpi_send') + import + type(*), dimension(..) :: buf + integer(psb_mpk_) :: count, datatype, dest, tag, comm, ierr + end subroutine mpi_send + end interface + + + interface + subroutine mpi_isend(buf,count,datatype,dest,tag,comm,request,ierr) & + & bind(c,name='mpi_isend') + import + type(*), dimension(..) :: buf + integer(psb_mpk_) :: count, datatype, dest, tag, comm, request,ierr + end subroutine mpi_isend + end interface + + interface + subroutine mpi_irecv(buf,count,datatype,src,tag,comm,request,ierr) & + & bind(c,name='mpi_irecv') + import + type(*), dimension(..) :: buf + integer(psb_mpk_) :: count, datatype, src, tag, comm, request, ierr + end subroutine mpi_irecv + end interface + + interface + subroutine mpi_alltoall(sdb,sdc,sdt,rvb,rvc,rvt,comm,ierr) & + & bind(c,name='mpi_alltoall') + import + type(*), dimension(..) :: sdb, rvb + integer(psb_mpk_) :: sdc,sdt,rvc,rvt, comm, ierr + end subroutine mpi_alltoall + end interface + + interface + subroutine mpi_alltoallv(sdb,sdc,sdspl,sdt,rvb,rvc,rdspl,rvt,comm,ierr) & + & bind(c,name='mpi_alltoallv') + import + type(*), dimension(..) :: sdb, rvb + integer(psb_mpk_) :: sdspl(*), rdspl(*), sdc(*), rvc(*) + integer(psb_mpk_) :: sdt,rvt, comm, ierr + end subroutine mpi_alltoallv + end interface + + interface + subroutine mpi_gather(sdb,sdc,sdt,rvb,rvc,rvt,root,comm,ierr) & + & bind(c,name='mpi_gather') + import + type(*), dimension(..) :: sdb, rvb + integer(psb_mpk_) :: sdc,sdt,rvc,rvt, root, comm, ierr + end subroutine mpi_gather + end interface + + interface + subroutine mpi_gatherv(sdb,sdc,sdt,rvb,rvc,rdspl,rvt,root,comm,ierr) & + & bind(c,name='mpi_gatherv') + import + type(*), dimension(..) :: sdb, rvb + integer(psb_mpk_) :: rdspl(*), rvc(*) + integer(psb_mpk_) :: sdt,sdc,rvt, root, comm, ierr + end subroutine mpi_gatherv + end interface + + interface + subroutine mpi_scatter(sdb,sdc,sdt,rvb,rvc,rvt,root,comm,ierr) & + & bind(c,name='mpi_scatter') + import + type(*), dimension(..) :: sdb, rvb + integer(psb_mpk_) :: sdc,sdt,rvc,rvt, root, comm, ierr + end subroutine mpi_scatter + end interface + + interface + subroutine mpi_scatterv(sdb,sdc,sdspl,sdt,rvb,rvc,rvt,root,comm,ierr) & + & bind(c,name='mpi_scatterv') + import + type(*), dimension(..) :: sdb, rvb + integer(psb_mpk_) :: sdspl(*), sdc(*) + integer(psb_mpk_) :: sdt,rvc,rvt, root, comm, ierr + end subroutine mpi_scatterv + end interface + + interface + subroutine mpi_allgather(sdb,sdc,sdt,rvb,rvc,rvt,comm,ierr) & + & bind(c,name='mpi_allgather') + import + type(*), dimension(..) :: sdb, rvb + integer(psb_mpk_) :: sdc,sdt,rvc,rvt, comm, ierr + end subroutine mpi_allgather + end interface + + interface + subroutine mpi_allgatherv(sdb,sdc,sdt,rvb,rvc,rdspl,rvt,comm,ierr) & + & bind(c,name='mpi_allgatherv') + import + type(*), dimension(..) :: sdb, rvb + integer(psb_mpk_) :: rdspl(*),rvc(*) + integer(psb_mpk_) :: sdc,sdt,rvt, comm, ierr + end subroutine mpi_allgatherv + end interface + end module mpi #endif module psi_penv_mod use psb_const_mod + use iso_c_binding integer(psb_mpk_), parameter:: psb_int_tag = 543987 integer(psb_mpk_), parameter:: psb_real_tag = psb_int_tag + 1 @@ -140,7 +263,7 @@ module psi_penv_mod interface psb_info module procedure psb_info_mpik end interface -#if defined(IPK4) && defined(LPK8) +#if (defined(PSB_IPK4) && defined(PSB_LPK8))||defined(PSB_IPK8) interface psb_info module procedure psb_info_epk end interface @@ -162,11 +285,12 @@ module psi_penv_mod module procedure psb_m_get_mpi_rank!, psb_e_get_mpi_rank end interface psb_get_mpi_rank -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) integer(psb_mpk_), private, save :: nctxt=0 #else + integer(psb_mpk_), save :: mpi_i2amx_op, mpi_i2amn_op integer(psb_mpk_), save :: mpi_iamx_op, mpi_iamn_op integer(psb_mpk_), save :: mpi_mamx_op, mpi_mamn_op integer(psb_mpk_), save :: mpi_eamx_op, mpi_eamn_op @@ -181,6 +305,7 @@ module psi_penv_mod #endif private :: psi_get_sizes, psi_register_mpi_extras + private :: psi_i2amx_op, psi_i2amn_op private :: psi_iamx_op, psi_iamn_op private :: psi_mamx_op, psi_mamn_op private :: psi_eamx_op, psi_eamn_op @@ -216,11 +341,11 @@ contains end subroutine psb_init_queue subroutine psb_wait_buffer(node, info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_buffer_node), intent(inout) :: node @@ -232,11 +357,11 @@ contains end subroutine psb_wait_buffer subroutine psb_test_buffer(node, flag, info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_buffer_node), intent(inout) :: node @@ -244,7 +369,7 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: status(mpi_status_size), minfo minfo = mpi_success -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) flag = .true. #else call mpi_test(node%request,flag,status,minfo) @@ -351,11 +476,11 @@ contains ! ! !!!!!!!!!!!!!!!!! subroutine psi_msnd(ctxt,tag,dest,buffer,mesg_queue) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type) :: ctxt @@ -390,11 +515,11 @@ contains subroutine psi_esnd(ctxt,tag,dest,buffer,mesg_queue) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type) :: ctxt @@ -427,11 +552,11 @@ contains end subroutine psi_esnd subroutine psi_i2snd(ctxt,tag,dest,buffer,mesg_queue) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type) :: ctxt @@ -464,11 +589,11 @@ contains end subroutine psi_i2snd subroutine psi_ssnd(ctxt,tag,dest,buffer,mesg_queue) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type) :: ctxt @@ -501,11 +626,11 @@ contains end subroutine psi_ssnd subroutine psi_dsnd(ctxt,tag,dest,buffer,mesg_queue) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type) :: ctxt @@ -538,11 +663,11 @@ contains end subroutine psi_dsnd subroutine psi_csnd(ctxt,tag,dest,buffer,mesg_queue) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type) :: ctxt @@ -575,11 +700,11 @@ contains end subroutine psi_csnd subroutine psi_zsnd(ctxt,tag,dest,buffer,mesg_queue) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type) :: ctxt @@ -613,11 +738,11 @@ contains subroutine psi_logsnd(ctxt,tag,dest,buffer,mesg_queue) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type) :: ctxt @@ -651,11 +776,11 @@ contains subroutine psi_hsnd(ctxt,tag,dest,buffer,mesg_queue) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type) :: ctxt @@ -709,9 +834,9 @@ contains subroutine psi_c_diffadd(p1, p2, val) & & bind(c,name="psi_c_diffadd") use iso_c_binding - import :: psb_mpk_ + import :: psb_mpk_, psb_epk_ type(c_ptr), value :: p1, p2 - integer(psb_mpk_) :: val + integer(psb_epk_) :: val end subroutine psi_c_diffadd end interface @@ -726,11 +851,11 @@ contains end subroutine psi_get_sizes subroutine psi_register_mpi_extras(info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_mpk_) :: info @@ -746,18 +871,18 @@ contains if (info == 0) call mpi_type_create_f90_complex(psb_spk_p_,psb_spk_r_, psb_mpi_c_spk_,info) if (info == 0) call mpi_type_create_f90_complex(psb_dpk_p_,psb_dpk_r_, psb_mpi_c_dpk_,info) #else -#if defined(IPK4) && defined(LPK4) +#if defined(PSB_IPK4) && defined(PSB_LPK4) psb_mpi_ipk_ = mpi_integer4 psb_mpi_lpk_ = mpi_integer4 -#elif defined(IPK4) && defined(LPK8) +#elif defined(PSB_IPK4) && defined(PSB_LPK8) psb_mpi_ipk_ = mpi_integer4 psb_mpi_lpk_ = mpi_integer8 -#elif defined(IPK8) && defined(LPK8) +#elif defined(PSB_IPK8) && defined(PSB_LPK8) psb_mpi_ipk_ = mpi_integer8 psb_mpi_lpk_ = mpi_integer8 #else ! This should never happen - write(psb_err_unit,*) 'Warning: an impossible IPK/LPK combination.' + write(psb_err_unit,*) 'Warning: an impossible PSB_IPK/PSB_LPK combination.' write(psb_err_unit,*) 'Something went wrong at configuration time.' psb_mpi_ipk_ = -1 psb_mpi_lpk_ = -1 @@ -771,8 +896,10 @@ contains psb_mpi_c_dpk_ = mpi_double_complex #endif -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else + if (info == 0) call mpi_op_create(psi_i2amx_op,.true.,mpi_i2amx_op,info) + if (info == 0) call mpi_op_create(psi_i2amn_op,.true.,mpi_i2amn_op,info) if (info == 0) call mpi_op_create(psi_mamx_op,.true.,mpi_mamx_op,info) if (info == 0) call mpi_op_create(psi_mamn_op,.true.,mpi_mamn_op,info) if (info == 0) call mpi_op_create(psi_eamx_op,.true.,mpi_eamx_op,info) @@ -791,7 +918,7 @@ contains end subroutine psi_register_mpi_extras -#if defined(IPK4) && defined(LPK8) +#if (defined(PSB_IPK4) && defined(PSB_LPK8))||defined(PSB_IPK8) subroutine psb_info_epk(ctxt,iam,np) type(psb_ctxt_type), intent(in) :: ctxt @@ -808,22 +935,22 @@ contains end subroutine psb_info_epk #endif - subroutine psb_init_mpik(ctxt,np,basectxt,ids) + subroutine psb_init_mpik(ctxt,np,basectxt,ids,extcomm) use psb_const_mod use psb_error_mod use psb_mat_mod use psb_vect_mod ! !$ use psb_rsb_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(out) :: ctxt type(psb_ctxt_type), intent(in), optional :: basectxt - integer(psb_mpk_), intent(in), optional :: np, ids(:) + integer(psb_mpk_), intent(in), optional :: np, ids(:), extcomm integer(psb_mpk_) :: i, isnullcomm, icomm integer(psb_mpk_), allocatable :: iids(:) @@ -834,7 +961,7 @@ contains ! call psb_set_debug_unit(psb_err_unit) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ctxt%ctxt = nctxt ! allocate on assignment nctxt = nctxt + 1 @@ -857,6 +984,8 @@ contains else basecomm = mpi_comm_world end if + else if (present(extcomm)) then + basecomm = extcomm else basecomm = mpi_comm_world end if @@ -951,11 +1080,11 @@ contains use psb_mat_mod use psb_vect_mod ! !$ use psb_rsb_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(inout) :: ctxt @@ -980,7 +1109,7 @@ contains ! !$ call psb_error(ctxt) ! !$ endif ! !$ endif -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! Under serial mode, CLOSE has no effect, but reclaim ! the used ctxt number. nctxt = max(0, nctxt - 1) @@ -997,6 +1126,8 @@ contains & call mpi_comm_Free(ctxt%ctxt,info) end if if (close_) then + if (info == 0) call mpi_op_free(mpi_i2amx_op,info) + if (info == 0) call mpi_op_free(mpi_i2amn_op,info) if (info == 0) call mpi_op_free(mpi_mamx_op,info) if (info == 0) call mpi_op_free(mpi_mamn_op,info) if (info == 0) call mpi_op_free(mpi_eamx_op,info) @@ -1023,17 +1154,17 @@ contains subroutine psb_barrier_mpik(ctxt) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt integer(psb_mpk_) :: info -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) if (allocated(ctxt%ctxt)) then if (ctxt%ctxt /= mpi_comm_null) call mpi_barrier(ctxt%ctxt, info) end if @@ -1044,11 +1175,11 @@ contains function psb_wtime() use psb_const_mod ! use mpi_constants -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif real(psb_dpk_) :: psb_wtime @@ -1063,7 +1194,7 @@ contains integer(psb_mpk_) :: code, info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) stop #else if (present(errc)) then @@ -1079,11 +1210,11 @@ contains subroutine psb_info_mpik(ctxt,iam,np) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -1111,7 +1242,7 @@ contains ! it's valid or not. ! -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) iam = 0 np = 1 #else @@ -1138,11 +1269,11 @@ contains function psb_m_get_mpi_comm(ctxt) result(comm) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type) :: ctxt @@ -1160,11 +1291,11 @@ contains end function psb_m_get_mpi_rank subroutine psb_get_mpicomm(ctxt,comm) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type) :: ctxt @@ -1188,6 +1319,26 @@ contains ! Note: len & type are always default integer. ! ! !!!!!!!!!!!!!!!!!!!!!! + subroutine psi_i2amx_op(inv, outv,len,type) + integer(psb_i2pk_) :: inv(len), outv(len) + integer(psb_mpk_) :: len,type + integer(psb_mpk_) :: i + + do i=1, len + if (abs(inv(i)) > abs(outv(i))) outv(i) = inv(i) + end do + end subroutine psi_i2amx_op + + subroutine psi_i2amn_op(inv, outv,len,type) + integer(psb_i2pk_) :: inv(len), outv(len) + integer(psb_mpk_) :: len,type + integer(psb_mpk_) :: i + + do i=1, len + if (abs(inv(i)) < abs(outv(i))) outv(i) = inv(i) + end do + end subroutine psi_i2amn_op + subroutine psi_mamx_op(inv, outv,len,type) integer(psb_mpk_) :: inv(len), outv(len) integer(psb_mpk_) :: len,type diff --git a/base/modules/penv/psi_s_collective_mod.F90 b/base/modules/penv/psi_s_collective_mod.F90 index d8e6ba82..9936395a 100644 --- a/base/modules/penv/psi_s_collective_mod.F90 +++ b/base/modules/penv/psi_s_collective_mod.F90 @@ -32,6 +32,7 @@ module psi_s_collective_mod use psi_penv_mod use psb_desc_const_mod + use iso_c_binding interface psb_max module procedure psb_smaxs, psb_smaxv, psb_smaxm @@ -45,6 +46,14 @@ module psi_s_collective_mod module procedure psb_s_nrm2s, psb_s_nrm2v end interface psb_nrm2 + interface psb_gather + module procedure psb_sgather_s, psb_sgather_v + end interface psb_gather + + interface psb_gatherv + module procedure psb_sgatherv_v + end interface + interface psb_sum module procedure psb_ssums, psb_ssumv, psb_ssumm end interface @@ -93,11 +102,11 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_smaxs(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -110,8 +119,9 @@ contains integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + real(psb_spk_) :: dat_ -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -134,20 +144,29 @@ contains collective_start = .false. collective_end = .false. end if - if (collective_sync) then + if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_r_spk_,mpi_max,icomm,info) + call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_r_spk_,mpi_max,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_r_spk_,mpi_max,root_,icomm,info) + if (iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,psb_mpi_r_spk_,mpi_max,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,psb_mpi_r_spk_,mpi_max,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_r_spk_,mpi_max,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_spk_,mpi_max,root_,icomm,request,info) + if (iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_r_spk_,mpi_max,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_r_spk_,mpi_max,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -157,11 +176,11 @@ contains end subroutine psb_smaxs subroutine psb_smaxv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -174,9 +193,10 @@ contains integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + real(psb_spk_) :: dat_(1) ! This is a dummy -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -200,21 +220,31 @@ contains collective_end = .false. end if if (collective_sync) then - if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + if (root_ == -1) then + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_max,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_max,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_max,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_max,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_max,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_max,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_max,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_max,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -225,11 +255,11 @@ contains end subroutine psb_smaxv subroutine psb_smaxm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -242,9 +272,10 @@ contains integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + real(psb_spk_) :: dat_(1,1) ! this is a dummy -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -268,28 +299,37 @@ contains collective_start = .false. collective_end = .false. end if - if (collective_sync) then + if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_max,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_max,root_,icomm,info) - endif + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_max,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_max,root_,icomm,info) + endif + end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_max,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_max,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_max,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_max,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) end if end if - #endif end subroutine psb_smaxm @@ -297,11 +337,11 @@ contains ! MIN: Minimum Value ! subroutine psb_smins(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -315,7 +355,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -340,18 +380,27 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,psb_mpi_r_spk_,mpi_min,icomm,info) + call mpi_allreduce(mpi_in_place,dat,1,psb_mpi_r_spk_,mpi_min,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,psb_mpi_r_spk_,mpi_min,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,psb_mpi_r_spk_,mpi_min,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,psb_mpi_r_spk_,mpi_min,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_r_spk_,mpi_min,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_spk_,mpi_min,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_r_spk_,mpi_min,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_r_spk_,mpi_min,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -362,11 +411,11 @@ contains end subroutine psb_smins subroutine psb_sminv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -380,7 +429,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -405,20 +454,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_min,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_min,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_min,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_min,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_min,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_min,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_min,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_min,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -429,11 +488,11 @@ contains end subroutine psb_sminv subroutine psb_sminm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -447,7 +506,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -473,20 +532,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_min,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_min,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_min,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_min,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_min,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_min,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_min,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_min,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -502,11 +571,11 @@ contains ! ! !!!!!!!!!!!! subroutine psb_s_nrm2s(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -520,7 +589,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -545,20 +614,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_r_spk_,mpi_snrm2_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_r_spk_,mpi_snrm2_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -569,11 +648,11 @@ contains end subroutine psb_s_nrm2s subroutine psb_s_nrm2v(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -587,7 +666,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -612,20 +691,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),psb_mpi_r_spk_,& + call mpi_allreduce(mpi_in_place,dat,size(dat),psb_mpi_r_spk_,& & mpi_snrm2_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),psb_mpi_r_spk_,& - & mpi_snrm2_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),psb_mpi_r_spk_,& + & mpi_snrm2_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),psb_mpi_r_spk_,& + & mpi_snrm2_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_snrm2_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_snrm2_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -636,16 +725,260 @@ contains end subroutine psb_s_nrm2v + ! + ! gather + ! + subroutine psb_sgather_s(ctxt,dat,resv,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + real(psb_spk_), intent(inout) :: dat, resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(1) = dat +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + call mpi_allgather(dat,1,psb_mpi_r_spk_,& + & resv,1,psb_mpi_r_spk_,icomm,info) + else + call mpi_gather(dat,1,psb_mpi_r_spk_,& + & resv,1,psb_mpi_r_spk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + call mpi_iallgather(dat,1,psb_mpi_r_spk_,& + & resv,1,psb_mpi_r_spk_,icomm,request,info) + else + call mpi_igather(dat,1,psb_mpi_r_spk_,& + & resv,1,psb_mpi_r_spk_,root_,icomm,request,info) + endif + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_sgather_s + + subroutine psb_sgather_v(ctxt,dat,resv,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + real(psb_spk_), intent(inout) :: dat(:), resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(:) = dat(:) +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + call mpi_allgather(dat,size(dat),psb_mpi_r_spk_,& + & resv,size(dat),psb_mpi_r_spk_,icomm,info) + else + call mpi_gather(dat,size(dat),psb_mpi_r_spk_,& + & resv,size(dat),psb_mpi_r_spk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + call mpi_iallgather(dat,size(dat),psb_mpi_r_spk_,& + & resv,size(dat),psb_mpi_r_spk_,icomm,request,info) + else + call mpi_igather(dat,size(dat),psb_mpi_r_spk_,& + & resv,size(dat),psb_mpi_r_spk_,root_,icomm,request,info) + endif + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_sgather_v + + subroutine psb_sgatherv_v(ctxt,dat,resv,szs,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + real(psb_spk_), intent(inout) :: dat(:), resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_mpk_), intent(in), optional :: szs(:) + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info,i + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + integer(psb_mpk_), allocatable :: displs(:) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(:) = dat(:) +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + call mpi_allgatherv(dat,size(dat),psb_mpi_r_spk_,& + & resv,szs,displs,psb_mpi_r_spk_,icomm,info) + else + if (iam == root_) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + else + allocate(displs(0)) + end if + call mpi_gatherv(dat,size(dat),psb_mpi_r_spk_,& + & resv,szs,displs,psb_mpi_r_spk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + call mpi_iallgatherv(dat,size(dat),psb_mpi_r_spk_,& + & resv,szs,displs,psb_mpi_r_spk_,icomm,request,info) + else + if (iam == root_) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + else + allocate(displs(0)) + end if + call mpi_igatherv(dat,size(dat),psb_mpi_r_spk_,& + & resv,szs,displs,psb_mpi_r_spk_,root_,icomm,request,info) + endif + + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_sgatherv_v + + + ! ! SUM ! subroutine psb_ssums(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -659,7 +992,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -684,20 +1017,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_r_spk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_spk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_r_spk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_r_spk_,mpi_sum,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_r_spk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_spk_,mpi_sum,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_r_spk_,mpi_sum,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_r_spk_,mpi_sum,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -707,11 +1050,11 @@ contains end subroutine psb_ssums subroutine psb_ssumv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -725,7 +1068,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -750,20 +1093,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_sum,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_sum,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_sum,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -774,11 +1127,11 @@ contains end subroutine psb_ssumv subroutine psb_ssumm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -792,7 +1145,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -818,20 +1171,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_sum,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_sum,root_, icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_sum,root_, icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_sum,root_, icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -845,11 +1208,11 @@ contains ! subroutine psb_samxs(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -863,7 +1226,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -888,20 +1251,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_r_spk_,mpi_samx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_spk_,mpi_samx_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_r_spk_,mpi_samx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_r_spk_,mpi_samx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_r_spk_,mpi_samx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_spk_,mpi_samx_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_r_spk_,mpi_samx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_r_spk_,mpi_samx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -912,11 +1285,11 @@ contains end subroutine psb_samxs subroutine psb_samxv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -930,7 +1303,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -955,20 +1328,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& psb_mpi_r_spk_,mpi_samx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_samx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_samx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_samx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_samx_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_samx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_samx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -979,11 +1362,11 @@ contains end subroutine psb_samxv subroutine psb_samxm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -997,7 +1380,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -1023,20 +1406,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_samx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_samx_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_samx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_samx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_samx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_samx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& & psb_mpi_r_spk_,mpi_samx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -1049,11 +1442,11 @@ contains ! AMN: Minimum Absolute Value ! subroutine psb_samns(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1067,7 +1460,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1092,20 +1485,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_r_spk_,mpi_samn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_r_spk_,mpi_samn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -1116,11 +1519,11 @@ contains end subroutine psb_samns subroutine psb_samnv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1134,7 +1537,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1159,20 +1562,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_samn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_samn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -1183,11 +1596,11 @@ contains end subroutine psb_samnv subroutine psb_samnm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1201,7 +1614,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -1227,20 +1640,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_samn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_r_spk_,mpi_samn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_r_spk_,mpi_samn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -1254,11 +1677,11 @@ contains ! BCAST Broadcast ! subroutine psb_sbcasts(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1273,7 +1696,7 @@ contains logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1309,11 +1732,11 @@ contains end subroutine psb_sbcasts subroutine psb_sbcastv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1327,7 +1750,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -1364,11 +1787,11 @@ contains end subroutine psb_sbcastv subroutine psb_sbcastm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1382,7 +1805,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -1426,11 +1849,11 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_sscan_sums(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1438,13 +1861,13 @@ contains integer(psb_mpk_), intent(inout), optional :: request real(psb_spk_), intent(inout) :: dat real(psb_spk_) :: dat_ - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1461,26 +1884,28 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_scan(MPI_IN_PLACE,dat,1,& + call mpi_scan(dat_,dat,1,& & psb_mpi_r_spk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iscan(MPI_IN_PLACE,dat,1,& + call mpi_iscan(dat_,dat,1,& & psb_mpi_r_spk_,mpi_sum,icomm,request,minfo) else if (collective_end) then call mpi_wait(request,status,minfo) end if end if + info = minfo #endif end subroutine psb_sscan_sums subroutine psb_sexscan_sums(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -1488,14 +1913,14 @@ contains integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request real(psb_spk_) :: dat_ - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1512,41 +1937,44 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_exscan(MPI_IN_PLACE,dat,1,& + call mpi_exscan(dat_,dat,1,& & psb_mpi_r_spk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iexscan(MPI_IN_PLACE,dat,1,& + call mpi_iexscan(dat_,dat,1,& & psb_mpi_r_spk_,mpi_sum,icomm,request,minfo) else if (collective_end) then call mpi_wait(request,status,minfo) end if end if + info = minfo #else dat = szero #endif end subroutine psb_sexscan_sums subroutine psb_sscan_sumv(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt real(psb_spk_), intent(inout) :: dat(:) integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync - -#if !defined(SERIAL_MPI) + real(psb_spk_), allocatable :: dat_(:) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1563,40 +1991,43 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_scan(MPI_IN_PLACE,dat,size(dat),& + call mpi_scan(dat_,dat,size(dat),& & psb_mpi_r_spk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iscan(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_sum,icomm,request,info) + call mpi_iscan(dat_,dat,size(dat),& + & psb_mpi_r_spk_,mpi_sum,icomm,request,minfo) else if (collective_end) then - call mpi_wait(request,status,info) + call mpi_wait(request,status,minfo) end if end if + info = minfo #endif end subroutine psb_sscan_sumv subroutine psb_sexscan_sumv(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt real(psb_spk_), intent(inout) :: dat(:) integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request - real(psb_spk_), allocatable :: dat_(:) - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + real(psb_spk_), allocatable :: dat_(:) -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1613,18 +2044,19 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_exscan(MPI_IN_PLACE,dat,size(dat),& + call mpi_exscan(dat_,dat,size(dat),& & psb_mpi_r_spk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iexscan(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_r_spk_,mpi_sum,icomm,request,info) + call mpi_iexscan(dat_,dat,size(dat),& + & psb_mpi_r_spk_,mpi_sum,icomm,request,minfo) else if (collective_end) then - call mpi_wait(request,status,info) + call mpi_wait(request,status,minfo) end if end if - + info = minfo #else dat = szero #endif @@ -1639,7 +2071,9 @@ contains integer(psb_mpk_), intent(in) :: bsdindx(:), brvindx(:), sdsz(:), rvsz(:) type(psb_ctxt_type), intent(in) :: ctxt integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz + integer(psb_ipk_) :: i,j,k, ipx, idx + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1668,11 +2102,11 @@ contains subroutine psb_s_m_simple_triad_a2av(valsnd,iasnd,jasnd,sdsz,bsdindx,& & valrcv,iarcv,jarcv,rvsz,brvindx,ctxt,info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif real(psb_spk_), intent(in) :: valsnd(:) @@ -1684,9 +2118,11 @@ contains integer(psb_ipk_), intent(out) :: info !Local variables - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz, counter + integer(psb_ipk_) :: i,j,k, ipx, idx, counter integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret, icomm integer(psb_mpk_), allocatable :: prcid(:), rvhd(:,:) + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1751,11 +2187,11 @@ contains subroutine psb_s_e_simple_triad_a2av(valsnd,iasnd,jasnd,sdsz,bsdindx,& & valrcv,iarcv,jarcv,rvsz,brvindx,ctxt,info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif real(psb_spk_), intent(in) :: valsnd(:) @@ -1767,9 +2203,11 @@ contains integer(psb_ipk_), intent(out) :: info !Local variables - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz, counter + integer(psb_ipk_) :: i,j,k, ipx, idx, counter integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret, icomm integer(psb_mpk_), allocatable :: prcid(:), rvhd(:,:) + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1831,6 +2269,5 @@ contains Enddo end subroutine psb_s_e_simple_triad_a2av - end module psi_s_collective_mod diff --git a/base/modules/penv/psi_s_p2p_mod.F90 b/base/modules/penv/psi_s_p2p_mod.F90 index 9c7f9d66..d8352bd5 100644 --- a/base/modules/penv/psi_s_p2p_mod.F90 +++ b/base/modules/penv/psi_s_p2p_mod.F90 @@ -44,11 +44,11 @@ module psi_s_p2p_mod contains subroutine psb_ssnds(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -56,7 +56,7 @@ contains integer(psb_mpk_), intent(in) :: dst real(psb_spk_), allocatable :: dat_(:) integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else allocate(dat_(1), stat=info) @@ -67,11 +67,11 @@ contains subroutine psb_ssndv(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -80,7 +80,7 @@ contains real(psb_spk_), allocatable :: dat_(:) integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else allocate(dat_(size(dat)), stat=info) dat_(:) = dat(:) @@ -91,11 +91,11 @@ contains subroutine psb_ssndm(ctxt,dat,dst,m) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -106,7 +106,7 @@ contains integer(psb_ipk_) :: i,j,k,m_,n_ integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else if (present(m)) then m_ = m @@ -127,11 +127,11 @@ contains end subroutine psb_ssndm subroutine psb_srcvs(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -139,7 +139,7 @@ contains integer(psb_mpk_), intent(in) :: src integer(psb_mpk_) :: info, icomm integer(psb_mpk_) :: status(mpi_status_size) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else icomm = psb_get_mpi_comm(ctxt) @@ -150,11 +150,11 @@ contains subroutine psb_srcvv(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -163,7 +163,7 @@ contains real(psb_spk_), allocatable :: dat_(:) integer(psb_mpk_) :: info, icomm integer(psb_mpk_) :: status(mpi_status_size) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else icomm = psb_get_mpi_comm(ctxt) call mpi_recv(dat,size(dat),psb_mpi_r_spk_,src,psb_real_tag,icomm,status,info) @@ -174,11 +174,11 @@ contains subroutine psb_srcvm(ctxt,dat,src,m) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -189,7 +189,7 @@ contains integer(psb_mpk_) :: info ,m_,n_, ld, mp_rcv_type integer(psb_mpk_) :: i,j,k integer(psb_mpk_) :: status(mpi_status_size), icomm -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! What should we do here?? #else if (present(m)) then diff --git a/base/modules/penv/psi_z_collective_mod.F90 b/base/modules/penv/psi_z_collective_mod.F90 index 6f43742f..de4e5bcc 100644 --- a/base/modules/penv/psi_z_collective_mod.F90 +++ b/base/modules/penv/psi_z_collective_mod.F90 @@ -32,8 +32,17 @@ module psi_z_collective_mod use psi_penv_mod use psb_desc_const_mod + use iso_c_binding + interface psb_gather + module procedure psb_zgather_s, psb_zgather_v + end interface psb_gather + + interface psb_gatherv + module procedure psb_zgatherv_v + end interface + interface psb_sum module procedure psb_zsums, psb_zsumv, psb_zsumm end interface @@ -76,16 +85,260 @@ contains + ! + ! gather + ! + subroutine psb_zgather_s(ctxt,dat,resv,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + complex(psb_dpk_), intent(inout) :: dat, resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(1) = dat +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + call mpi_allgather(dat,1,psb_mpi_c_dpk_,& + & resv,1,psb_mpi_c_dpk_,icomm,info) + else + call mpi_gather(dat,1,psb_mpi_c_dpk_,& + & resv,1,psb_mpi_c_dpk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + call mpi_iallgather(dat,1,psb_mpi_c_dpk_,& + & resv,1,psb_mpi_c_dpk_,icomm,request,info) + else + call mpi_igather(dat,1,psb_mpi_c_dpk_,& + & resv,1,psb_mpi_c_dpk_,root_,icomm,request,info) + endif + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_zgather_s + + subroutine psb_zgather_v(ctxt,dat,resv,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + complex(psb_dpk_), intent(inout) :: dat(:), resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(:) = dat(:) +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + call mpi_allgather(dat,size(dat),psb_mpi_c_dpk_,& + & resv,size(dat),psb_mpi_c_dpk_,icomm,info) + else + call mpi_gather(dat,size(dat),psb_mpi_c_dpk_,& + & resv,size(dat),psb_mpi_c_dpk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + call mpi_iallgather(dat,size(dat),psb_mpi_c_dpk_,& + & resv,size(dat),psb_mpi_c_dpk_,icomm,request,info) + else + call mpi_igather(dat,size(dat),psb_mpi_c_dpk_,& + & resv,size(dat),psb_mpi_c_dpk_,root_,icomm,request,info) + endif + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_zgather_v + + subroutine psb_zgatherv_v(ctxt,dat,resv,szs,root,mode,request) +#ifdef PSB_MPI_MOD + use mpi +#endif + implicit none +#ifdef PSB_MPI_H + include 'mpif.h' +#endif + type(psb_ctxt_type), intent(in) :: ctxt + complex(psb_dpk_), intent(inout) :: dat(:), resv(:) + integer(psb_mpk_), intent(in), optional :: root + integer(psb_mpk_), intent(in), optional :: szs(:) + integer(psb_ipk_), intent(in), optional :: mode + integer(psb_mpk_), intent(inout), optional :: request + integer(psb_mpk_) :: root_ + integer(psb_mpk_) :: iam, np, info,i + integer(psb_mpk_) :: icomm + integer(psb_mpk_) :: status(mpi_status_size) + integer(psb_mpk_), allocatable :: displs(:) + logical :: collective_start, collective_end, collective_sync + +#if defined(PSB_SERIAL_MPI) + resv(:) = dat(:) +#else + call psb_info(ctxt,iam,np) + + if (present(root)) then + root_ = root + else + root_ = -1 + endif + icomm = psb_get_mpi_comm(ctxt) + if (present(mode)) then + collective_sync = .false. + collective_start = iand(mode,psb_collective_start_) /= 0 + collective_end = iand(mode,psb_collective_end_) /= 0 + if (.not.present(request)) then + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + else + collective_sync = .true. + collective_start = .false. + collective_end = .false. + end if + if (collective_sync) then + if (root_ == -1) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + call mpi_allgatherv(dat,size(dat),psb_mpi_c_dpk_,& + & resv,szs,displs,psb_mpi_c_dpk_,icomm,info) + else + if (iam == root_) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + else + allocate(displs(0)) + end if + call mpi_gatherv(dat,size(dat),psb_mpi_c_dpk_,& + & resv,szs,displs,psb_mpi_c_dpk_,root_,icomm,info) + endif + else + if (collective_start) then + if (root_ == -1) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + call mpi_iallgatherv(dat,size(dat),psb_mpi_c_dpk_,& + & resv,szs,displs,psb_mpi_c_dpk_,icomm,request,info) + else + if (iam == root_) then + if (size(szs) < np) write(0,*) 'Error: bad input sizes' + allocate(displs(np)) + displs(1) = 0 + do i=2, np + displs(i) = displs(i-1) + szs(i-1) + end do + else + allocate(displs(0)) + end if + call mpi_igatherv(dat,size(dat),psb_mpi_c_dpk_,& + & resv,szs,displs,psb_mpi_c_dpk_,root_,icomm,request,info) + endif + + else if (collective_end) then + call mpi_wait(request,status,info) + end if + end if +#endif + end subroutine psb_zgatherv_v + + + ! ! SUM ! subroutine psb_zsums(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -99,7 +352,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -124,20 +377,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_c_dpk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_c_dpk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_c_dpk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_c_dpk_,mpi_sum,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_c_dpk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_c_dpk_,mpi_sum,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_c_dpk_,mpi_sum,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_c_dpk_,mpi_sum,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -147,11 +410,11 @@ contains end subroutine psb_zsums subroutine psb_zsumv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -165,7 +428,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -190,20 +453,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_dpk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_dpk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_sum,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_dpk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_dpk_,mpi_sum,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_sum,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -214,11 +487,11 @@ contains end subroutine psb_zsumv subroutine psb_zsumm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -232,7 +505,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -258,20 +531,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_dpk_,mpi_sum,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_dpk_,mpi_sum,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_sum,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_sum,root_,icomm,info) + end if end if else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_dpk_,mpi_sum,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_dpk_,mpi_sum,root_, icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_sum,root_, icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_sum,root_, icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -285,11 +568,11 @@ contains ! subroutine psb_zamxs(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -303,7 +586,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -328,20 +611,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_c_dpk_,mpi_zamx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_c_dpk_,mpi_zamx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -352,11 +645,11 @@ contains end subroutine psb_zamxs subroutine psb_zamxv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -370,7 +663,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -395,20 +688,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& psb_mpi_c_dpk_,mpi_zamx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_dpk_,mpi_zamx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -419,11 +722,11 @@ contains end subroutine psb_zamxv subroutine psb_zamxm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -437,7 +740,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -463,20 +766,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_dpk_,mpi_zamx_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_dpk_,mpi_zamx_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& & psb_mpi_c_dpk_,mpi_zamx_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -489,11 +802,11 @@ contains ! AMN: Minimum Absolute Value ! subroutine psb_zamns(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -507,7 +820,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -532,20 +845,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,1,& + call mpi_allreduce(mpi_in_place,dat,1,& & psb_mpi_c_dpk_,mpi_zamn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,1,& + & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,1,& + & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,1,& + call mpi_iallreduce(mpi_in_place,dat,1,& & psb_mpi_c_dpk_,mpi_zamn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,1,& - & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,1,& + & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,1,& + & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -556,11 +879,11 @@ contains end subroutine psb_zamns subroutine psb_zamnv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -574,7 +897,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -599,20 +922,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_dpk_,mpi_zamn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_dpk_,mpi_zamn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -623,11 +956,11 @@ contains end subroutine psb_zamnv subroutine psb_zamnm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -641,7 +974,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -667,20 +1000,30 @@ contains end if if (collective_sync) then if (root_ == -1) then - call mpi_allreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_allreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_dpk_,mpi_zamn_op,icomm,info) else - call mpi_reduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,info) + if(iam==root_) then + call mpi_reduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,info) + else + call mpi_reduce(dat,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,info) + end if endif else if (collective_start) then if (root_ == -1) then - call mpi_iallreduce(MPI_IN_PLACE,dat,size(dat),& + call mpi_iallreduce(mpi_in_place,dat,size(dat),& & psb_mpi_c_dpk_,mpi_zamn_op,icomm,request,info) else - call mpi_ireduce(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info) + if(iam==root_) then + call mpi_ireduce(mpi_in_place,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info) + else + call mpi_ireduce(dat,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_zamn_op,root_,icomm,request,info) + end if end if else if (collective_end) then call mpi_wait(request,status,info) @@ -694,11 +1037,11 @@ contains ! BCAST Broadcast ! subroutine psb_zbcasts(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -713,7 +1056,7 @@ contains logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -749,11 +1092,11 @@ contains end subroutine psb_zbcasts subroutine psb_zbcastv(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -767,7 +1110,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) if (present(root)) then @@ -804,11 +1147,11 @@ contains end subroutine psb_zbcastv subroutine psb_zbcastm(ctxt,dat,root,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -822,7 +1165,7 @@ contains integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) @@ -866,11 +1209,11 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_zscan_sums(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -878,13 +1221,13 @@ contains integer(psb_mpk_), intent(inout), optional :: request complex(psb_dpk_), intent(inout) :: dat complex(psb_dpk_) :: dat_ - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -901,26 +1244,28 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_scan(MPI_IN_PLACE,dat,1,& + call mpi_scan(dat_,dat,1,& & psb_mpi_c_dpk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iscan(MPI_IN_PLACE,dat,1,& + call mpi_iscan(dat_,dat,1,& & psb_mpi_c_dpk_,mpi_sum,icomm,request,minfo) else if (collective_end) then call mpi_wait(request,status,minfo) end if end if + info = minfo #endif end subroutine psb_zscan_sums subroutine psb_zexscan_sums(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -928,14 +1273,14 @@ contains integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request complex(psb_dpk_) :: dat_ - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -952,41 +1297,44 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_exscan(MPI_IN_PLACE,dat,1,& + call mpi_exscan(dat_,dat,1,& & psb_mpi_c_dpk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iexscan(MPI_IN_PLACE,dat,1,& + call mpi_iexscan(dat_,dat,1,& & psb_mpi_c_dpk_,mpi_sum,icomm,request,minfo) else if (collective_end) then call mpi_wait(request,status,minfo) end if end if + info = minfo #else dat = zzero #endif end subroutine psb_zexscan_sums subroutine psb_zscan_sumv(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt complex(psb_dpk_), intent(inout) :: dat(:) integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync - -#if !defined(SERIAL_MPI) + complex(psb_dpk_), allocatable :: dat_(:) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1003,40 +1351,43 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_scan(MPI_IN_PLACE,dat,size(dat),& + call mpi_scan(dat_,dat,size(dat),& & psb_mpi_c_dpk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iscan(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_dpk_,mpi_sum,icomm,request,info) + call mpi_iscan(dat_,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_sum,icomm,request,minfo) else if (collective_end) then - call mpi_wait(request,status,info) + call mpi_wait(request,status,minfo) end if end if + info = minfo #endif end subroutine psb_zscan_sumv subroutine psb_zexscan_sumv(ctxt,dat,mode,request) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt complex(psb_dpk_), intent(inout) :: dat(:) integer(psb_ipk_), intent(in), optional :: mode integer(psb_mpk_), intent(inout), optional :: request - complex(psb_dpk_), allocatable :: dat_(:) - integer(psb_ipk_) :: iam, np, info - integer(psb_mpk_) :: minfo + + integer(psb_ipk_) :: info + integer(psb_mpk_) :: iam, np, minfo integer(psb_mpk_) :: icomm integer(psb_mpk_) :: status(mpi_status_size) logical :: collective_start, collective_end, collective_sync + complex(psb_dpk_), allocatable :: dat_(:) -#if !defined(SERIAL_MPI) +#if !defined(PSB_SERIAL_MPI) call psb_info(ctxt,iam,np) icomm = psb_get_mpi_comm(ctxt) if (present(mode)) then @@ -1053,18 +1404,19 @@ contains collective_start = .false. collective_end = .false. end if + dat_ = dat if (collective_sync) then - call mpi_exscan(MPI_IN_PLACE,dat,size(dat),& + call mpi_exscan(dat_,dat,size(dat),& & psb_mpi_c_dpk_,mpi_sum,icomm,minfo) else if (collective_start) then - call mpi_iexscan(MPI_IN_PLACE,dat,size(dat),& - & psb_mpi_c_dpk_,mpi_sum,icomm,request,info) + call mpi_iexscan(dat_,dat,size(dat),& + & psb_mpi_c_dpk_,mpi_sum,icomm,request,minfo) else if (collective_end) then - call mpi_wait(request,status,info) + call mpi_wait(request,status,minfo) end if end if - + info = minfo #else dat = zzero #endif @@ -1079,7 +1431,9 @@ contains integer(psb_mpk_), intent(in) :: bsdindx(:), brvindx(:), sdsz(:), rvsz(:) type(psb_ctxt_type), intent(in) :: ctxt integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz + integer(psb_ipk_) :: i,j,k, ipx, idx + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1108,11 +1462,11 @@ contains subroutine psb_z_m_simple_triad_a2av(valsnd,iasnd,jasnd,sdsz,bsdindx,& & valrcv,iarcv,jarcv,rvsz,brvindx,ctxt,info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif complex(psb_dpk_), intent(in) :: valsnd(:) @@ -1124,9 +1478,11 @@ contains integer(psb_ipk_), intent(out) :: info !Local variables - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz, counter + integer(psb_ipk_) :: i,j,k, ipx, idx, counter integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret, icomm integer(psb_mpk_), allocatable :: prcid(:), rvhd(:,:) + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1191,11 +1547,11 @@ contains subroutine psb_z_e_simple_triad_a2av(valsnd,iasnd,jasnd,sdsz,bsdindx,& & valrcv,iarcv,jarcv,rvsz,brvindx,ctxt,info) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif complex(psb_dpk_), intent(in) :: valsnd(:) @@ -1207,9 +1563,11 @@ contains integer(psb_ipk_), intent(out) :: info !Local variables - integer(psb_ipk_) :: iam, np, i,j,k, ip, ipx, idx, sz, counter + integer(psb_ipk_) :: i,j,k, ipx, idx, counter integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret, icomm integer(psb_mpk_), allocatable :: prcid(:), rvhd(:,:) + integer(psb_mpk_) :: ip, sz + integer(psb_mpk_) :: iam, np call psb_info(ctxt,iam,np) @@ -1271,6 +1629,5 @@ contains Enddo end subroutine psb_z_e_simple_triad_a2av - end module psi_z_collective_mod diff --git a/base/modules/penv/psi_z_p2p_mod.F90 b/base/modules/penv/psi_z_p2p_mod.F90 index cf12d978..6606d4ed 100644 --- a/base/modules/penv/psi_z_p2p_mod.F90 +++ b/base/modules/penv/psi_z_p2p_mod.F90 @@ -44,11 +44,11 @@ module psi_z_p2p_mod contains subroutine psb_zsnds(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -56,7 +56,7 @@ contains integer(psb_mpk_), intent(in) :: dst complex(psb_dpk_), allocatable :: dat_(:) integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else allocate(dat_(1), stat=info) @@ -67,11 +67,11 @@ contains subroutine psb_zsndv(ctxt,dat,dst) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -80,7 +80,7 @@ contains complex(psb_dpk_), allocatable :: dat_(:) integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else allocate(dat_(size(dat)), stat=info) dat_(:) = dat(:) @@ -91,11 +91,11 @@ contains subroutine psb_zsndm(ctxt,dat,dst,m) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -106,7 +106,7 @@ contains integer(psb_ipk_) :: i,j,k,m_,n_ integer(psb_mpk_) :: info -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else if (present(m)) then m_ = m @@ -127,11 +127,11 @@ contains end subroutine psb_zsndm subroutine psb_zrcvs(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -139,7 +139,7 @@ contains integer(psb_mpk_), intent(in) :: src integer(psb_mpk_) :: info, icomm integer(psb_mpk_) :: status(mpi_status_size) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! do nothing #else icomm = psb_get_mpi_comm(ctxt) @@ -150,11 +150,11 @@ contains subroutine psb_zrcvv(ctxt,dat,src) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -163,7 +163,7 @@ contains complex(psb_dpk_), allocatable :: dat_(:) integer(psb_mpk_) :: info, icomm integer(psb_mpk_) :: status(mpi_status_size) -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) #else icomm = psb_get_mpi_comm(ctxt) call mpi_recv(dat,size(dat),psb_mpi_c_dpk_,src,psb_dcomplex_tag,icomm,status,info) @@ -174,11 +174,11 @@ contains subroutine psb_zrcvm(ctxt,dat,src,m) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ctxt_type), intent(in) :: ctxt @@ -189,7 +189,7 @@ contains integer(psb_mpk_) :: info ,m_,n_, ld, mp_rcv_type integer(psb_mpk_) :: i,j,k integer(psb_mpk_) :: status(mpi_status_size), icomm -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) ! What should we do here?? #else if (present(m)) then diff --git a/base/modules/psb_cbind_const_mod.F90 b/base/modules/psb_cbind_const_mod.F90 index a20258ac..a976d55d 100644 --- a/base/modules/psb_cbind_const_mod.F90 +++ b/base/modules/psb_cbind_const_mod.F90 @@ -35,13 +35,13 @@ module psb_cbind_const_mod use psb_const_mod integer, parameter :: psb_c_mpk_ = c_int32_t -#if defined(IPK4) && defined(LPK4) +#if defined(PSB_IPK4) && defined(PSB_LPK4) integer, parameter :: psb_c_ipk_ = c_int32_t integer, parameter :: psb_c_lpk_ = c_int32_t -#elif defined(IPK4) && defined(LPK8) +#elif defined(PSB_IPK4) && defined(PSB_LPK8) integer, parameter :: psb_c_ipk_ = c_int32_t integer, parameter :: psb_c_lpk_ = c_int64_t -#elif defined(IPK8) && defined(LPK8) +#elif defined(PSB_IPK8) && defined(PSB_LPK8) integer, parameter :: psb_c_ipk_ = c_int64_t integer, parameter :: psb_c_lpk_ = c_int64_t #else diff --git a/base/modules/psb_config.h.in b/base/modules/psb_config.h.in new file mode 100644 index 00000000..3bf9a645 --- /dev/null +++ b/base/modules/psb_config.h.in @@ -0,0 +1,29 @@ +#ifndef PSB_CONFIG_H +#define PSB_CONFIG_H + +#define PSB_ERR_ERROR -1 +#define PSB_ERR_SUCCESS 0 + +@CSERIALMPI@ + +@PSB_IPKDEF@ +@PSB_LPKDEF@ + +@CHAVE_OPENMP@ + +@CHAVEMETIS@ +@CINTMETIS@ +@CREALMETIS@ + +@CHAVEAMD@ + +@CHAVECUDA@ +@CSHORTVCUDA@ +@CVERSIONCUDA@ + + +@CHAVELIBRSB@ + + + +#endif diff --git a/base/modules/psb_const_mod.F90 b/base/modules/psb_const_mod.F90 index 56134474..b4cacdca 100644 --- a/base/modules/psb_const_mod.F90 +++ b/base/modules/psb_const_mod.F90 @@ -70,29 +70,29 @@ module psb_const_mod #endif ! Now for the choices: - ! IPK = integer kind for "local" indices and sizes. + ! PSB_IPK = integer kind for "local" indices and sizes. ! Can be 4 or 8 bytes. - ! LPK = integer kind for "global" indices and sizes. + ! PSB_LPK = integer kind for "global" indices and sizes. ! Can be 4 or 8 bytes. - ! Size must be >= size of IPK + ! Size must be >= size of PSB_IPK ! ! Additional rules: ! 1. MPI related stuff is always MPK ! 2. ctxt,IAM,NP: should we have two versions of everything, ! one with MPK the other with EPK? - ! 3. INFO, ERR_ACT, IERR etc are always IPK + ! 3. INFO, ERR_ACT, IERR etc are always PSB_IPK ! 4. For the array version of things, where it makes sense ! e.g. realloc, snd/receive, define as MPK,EPK and the ! compiler will later pick up the correct version according - ! to what IPK/LPK are mapped onto. + ! to what PSB_IPK/PSB_LPK are mapped onto. ! -#if defined(IPK4) && defined(LPK4) +#if defined(PSB_IPK4) && defined(PSB_LPK4) integer, parameter :: psb_ipk_ = psb_mpk_ integer, parameter :: psb_lpk_ = psb_mpk_ -#elif defined(IPK4) && defined(LPK8) +#elif defined(PSB_IPK4) && defined(PSB_LPK8) integer, parameter :: psb_ipk_ = psb_mpk_ integer, parameter :: psb_lpk_ = psb_epk_ -#elif defined(IPK8) && defined(LPK8) +#elif defined(PSB_IPK8) && defined(PSB_LPK8) integer, parameter :: psb_ipk_ = psb_epk_ integer, parameter :: psb_lpk_ = psb_epk_ #else @@ -101,24 +101,24 @@ module psb_const_mod integer, parameter :: psb_lpk_ = -1 #endif - integer(psb_mpk_), save :: psb_sizeof_sp - integer(psb_mpk_), save :: psb_sizeof_dp - integer(psb_mpk_), save :: psb_sizeof_i2p = 2 - integer(psb_mpk_), save :: psb_sizeof_mp = 4 - integer(psb_mpk_), save :: psb_sizeof_ep = 8 + integer(psb_epk_), save :: psb_sizeof_sp + integer(psb_epk_), save :: psb_sizeof_dp + integer(psb_epk_), save :: psb_sizeof_i2p = 2 + integer(psb_epk_), save :: psb_sizeof_mp = 4 + integer(psb_epk_), save :: psb_sizeof_ep = 8 -#if defined(IPK4) && defined(LPK4) - integer(psb_mpk_), save :: psb_sizeof_ip = 4 - integer(psb_mpk_), save :: psb_sizeof_lp = 4 -#elif defined(IPK4) && defined(LPK8) - integer(psb_mpk_), save :: psb_sizeof_ip = 4 - integer(psb_mpk_), save :: psb_sizeof_lp = 8 -#elif defined(IPK8) && defined(LPK8) - integer(psb_mpk_), save :: psb_sizeof_ip = 8 - integer(psb_mpk_), save :: psb_sizeof_lp = 8 +#if defined(PSB_IPK4) && defined(PSB_LPK4) + integer(psb_epk_), save :: psb_sizeof_ip = 4 + integer(psb_epk_), save :: psb_sizeof_lp = 4 +#elif defined(PSB_IPK4) && defined(PSB_LPK8) + integer(psb_epk_), save :: psb_sizeof_ip = 4 + integer(psb_epk_), save :: psb_sizeof_lp = 8 +#elif defined(PSB_IPK8) && defined(PSB_LPK8) + integer(psb_epk_), save :: psb_sizeof_ip = 8 + integer(psb_epk_), save :: psb_sizeof_lp = 8 #else - integer(psb_mpk_), save :: psb_sizeof_ip = -1 - integer(psb_mpk_), save :: psb_sizeof_lp = -1 + integer(psb_epk_), save :: psb_sizeof_ip = -1 + integer(psb_epk_), save :: psb_sizeof_lp = -1 #endif ! @@ -136,9 +136,9 @@ module psb_const_mod ! ! Version ! - character(len=*), parameter :: psb_version_string_ = "3.8.0" + character(len=*), parameter :: psb_version_string_ = "3.9.0" integer(psb_ipk_), parameter :: psb_version_major_ = 3 - integer(psb_ipk_), parameter :: psb_version_minor_ = 8 + integer(psb_ipk_), parameter :: psb_version_minor_ = 9 integer(psb_ipk_), parameter :: psb_patchlevel_ = 0 ! @@ -155,7 +155,7 @@ module psb_const_mod integer(psb_i2pk_), parameter :: i2zero=0, i2one=1 integer(psb_i2pk_), parameter :: i2two=2, i2three=3, i2mone=-1 - integer(psb_ipk_), parameter :: psb_root_=0 + integer(psb_mpk_), parameter :: psb_root_=0 real(psb_spk_), parameter :: szero=0.0_psb_spk_, sone=1.0_psb_spk_ real(psb_dpk_), parameter :: dzero=0.0_psb_dpk_, done=1.0_psb_dpk_ complex(psb_spk_), parameter :: czero=(0.0_psb_spk_,0.0_psb_spk_) diff --git a/base/modules/psb_error_impl.F90 b/base/modules/psb_error_impl.F90 index f4308816..76a3c4fb 100644 --- a/base/modules/psb_error_impl.F90 +++ b/base/modules/psb_error_impl.F90 @@ -9,13 +9,13 @@ subroutine psb_errcomm_i(ctxt, err) end subroutine psb_errcomm_i -#if defined(IPK8) +#if defined(PSB_IPK8) subroutine psb_errcomm_m(ctxt, err) use psb_error_mod, psb_protect_name => psb_errcomm use psb_penv_mod type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_ipk_), intent(inout) :: err + integer(psb_mpk_), intent(inout) :: err if (psb_get_global_checks()) call psb_amx(ctxt, err) @@ -105,7 +105,7 @@ subroutine psb_serror() end do end if end if -#if defined(HAVE_FLUSH_STMT) +#if defined(PSB_HAVE_FLUSH_STMT) flush(psb_err_unit) #endif @@ -142,7 +142,7 @@ subroutine psb_perror(ctxt,abrt) call psb_errmsg(psb_err_unit,err_c, r_name, e_e_d, a_e_d,iam) ! write(psb_err_unit,'(50("="))') end do -#if defined(HAVE_FLUSH_STMT) +#if defined(PSB_HAVE_FLUSH_STMT) flush(psb_err_unit) #endif @@ -155,7 +155,7 @@ subroutine psb_perror(ctxt,abrt) do while (psb_get_numerr() > 0) call psb_errpop(err_c, r_name, e_e_d, a_e_d) end do -#if defined(HAVE_FLUSH_STMT) +#if defined(PSB_HAVE_FLUSH_STMT) flush(psb_err_unit) #endif diff --git a/base/modules/psb_error_mod.F90 b/base/modules/psb_error_mod.F90 index 28a73498..f47f884b 100644 --- a/base/modules/psb_error_mod.F90 +++ b/base/modules/psb_error_mod.F90 @@ -100,18 +100,18 @@ module psb_error_mod end interface interface psb_errcomm -#if defined(IPK8) - subroutine psb_errcomm_m(ctxt, err) - import :: psb_ipk_, psb_mpk_, psb_ctxt_type - type(pxb_ctxt_type), intent(in) :: ctxt - integer(psb_ipk_), intent(inout) :: err - end subroutine psb_errcomm_m -#endif subroutine psb_errcomm_i(ctxt, err) import :: psb_ipk_, psb_ctxt_type type(psb_ctxt_type), intent(in) :: ctxt integer(psb_ipk_), intent(inout) :: err end subroutine psb_errcomm_i +#if defined(PSB_IPK8) + subroutine psb_errcomm_m(ctxt, err) + import :: psb_ipk_, psb_mpk_, psb_ctxt_type + type(psb_ctxt_type), intent(in) :: ctxt + integer(psb_mpk_), intent(inout) :: err + end subroutine psb_errcomm_m +#endif end interface psb_errcomm interface psb_errpop diff --git a/base/modules/psb_fakempi.c b/base/modules/psb_fakempi.c new file mode 100644 index 00000000..8e37f7fe --- /dev/null +++ b/base/modules/psb_fakempi.c @@ -0,0 +1,324 @@ +#include +#include +#include +#include "psb_fakempi.h" + +double mpi_wtime() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + temp=0.0; + } else { + temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6; + } + return(temp); +} + + +void mpi_wait(int *request, int* status, int *ierr) + +{ + *ierr = 0; + return; +} +void mpi_send(void* buf, int* count, int* datatype, + int *dest, int *tag, int *comm, int *ierr) +{ + *ierr = 0; + return; +} +void mpi_isend(void* buf, int* count, int* datatype, + int *dest, int *tag, int *comm, int *request, + int *ierr) +{ + *ierr = 0; + return; +} +void mpi_irecv(void* buf, int* count, int* datatype, + int *src, int *tag, int *comm, int *request, + int *ierr) +{ + *ierr = 0; + return; +} + + +void mpi_alltoall(void* sdb, int* sdc, int* sdt, + void* rvb, int* rvc, int* rvt, int* comm, int* ierr) +{ + int i,j,k; + + if ((*sdt == MPI_INTEGER)||(*sdt == MPI_INTEGER4)||(*sdt == MPI_LOGICAL)) { + memcpy(rvb,sdb, (*sdc)*sizeof(int32_t)); + } + if (*sdt == MPI_CHARACTER) { + memcpy(rvb,sdb, (*sdc)*sizeof(char)); + } + if (*sdt == MPI_INTEGER8) { + memcpy(rvb,sdb, (*sdc)*sizeof(int64_t)); + } + if (*sdt == MPI_INTEGER2) { + memcpy(rvb,sdb, (*sdc)*sizeof(int16_t)); + } + if (*sdt == MPI_REAL) { + memcpy(rvb,sdb, (*sdc)*sizeof(float)); + } + if (*sdt == MPI_DOUBLE) { + memcpy(rvb,sdb, (*sdc)*sizeof(double)); + } + if (*sdt == MPI_COMPLEX) { + memcpy(rvb,sdb, (*sdc)*2*sizeof(float)); + } + if (*sdt == MPI_DOUBLE_COMPLEX) { + memcpy(rvb,sdb, (*sdc)*2*sizeof(double)); + } + *ierr = 0; +} + +void mpi_alltoallv(void* sdb, int* sdc, int* sdspl, int* sdt, + void* rvb, int* rvc, int* rdspl, int* rvt, int* comm, int* ierr) +{ + int i,j,k; + + + if ((*sdt == MPI_INTEGER)||(*sdt == MPI_INTEGER4)||(*sdt == MPI_LOGICAL)) { + memcpy((void *)((char *)rvb+rdspl[0]*sizeof(int32_t)), + (void *)((char *)sdb+sdspl[0]*sizeof(int32_t)),(*sdc)*sizeof(int32_t)); + } + if (*sdt == MPI_CHARACTER) { + memcpy((void *)((char *)rvb+rdspl[0]*sizeof(char)), + (void *)((char *)sdb+sdspl[0]*sizeof(char)),(*sdc)*sizeof(char)); + } + if (*sdt == MPI_INTEGER8) { + memcpy((void *)((char *)rvb+rdspl[0]*sizeof(int64_t)), + (void *)((char *)sdb+sdspl[0]*sizeof(int64_t)),(*sdc)*sizeof(int64_t)); + } + if (*sdt == MPI_REAL) { + memcpy((void *)((char *)rvb+rdspl[0]*sizeof(float)), + (void *)((char *)sdb+sdspl[0]*sizeof(float)),(*sdc)*sizeof(float)); + } + if (*sdt == MPI_DOUBLE) { + memcpy((void *)((char *)rvb+rdspl[0]*sizeof(double)), + (void *)((char *)sdb+sdspl[0]*sizeof(double)),(*sdc)*sizeof(double)); + } + if (*sdt == MPI_COMPLEX) { + memcpy((void *)((char *)rvb+rdspl[0]*2*sizeof(float)), + (void *)((char *)sdb+sdspl[0]*2*sizeof(float)),(*sdc)*2*sizeof(float)); + } + if (*sdt == MPI_DOUBLE_COMPLEX) { + memcpy((void *)((char *)rvb+rdspl[0]*2*sizeof(double)), + (void *)((char *)sdb+sdspl[0]*2*sizeof(double)),(*sdc)*2*sizeof(double)); + } + *ierr = 0; +} + + +void mpi_gather(void* sdb, int* sdc, int* sdt, + void* rvb, int* rvc, int* rvt, int *root, int* comm, int* ierr) +{ + int i,j,k; + + if ((*sdt == MPI_INTEGER)||(*sdt == MPI_INTEGER4)||(*sdt == MPI_LOGICAL)) { + memcpy(rvb,sdb, (*sdc)*sizeof(int32_t)); + } + if (*sdt == MPI_INTEGER8) { + memcpy(rvb,sdb, (*sdc)*sizeof(int64_t)); + } + if (*sdt == MPI_CHARACTER) { + memcpy(rvb,sdb, (*sdc)*sizeof(char)); + } + if (*sdt == MPI_REAL) { + memcpy(rvb,sdb, (*sdc)*sizeof(float)); + } + if (*sdt == MPI_DOUBLE) { + memcpy(rvb,sdb, (*sdc)*sizeof(double)); + } + if (*sdt == MPI_COMPLEX) { + memcpy(rvb,sdb, (*sdc)*2*sizeof(float)); + } + if (*sdt == MPI_DOUBLE_COMPLEX) { + memcpy(rvb,sdb, (*sdc)*2*sizeof(double)); + } + *ierr = 0; +} + + +void mpi_gatherv(void* sdb, int* sdc, int* sdt, + void* rvb, int* rvc, int* rdspl, + int* rvt, int* comm, int *root, int* ierr) +{ + int i,j,k; + + if ((*sdt == MPI_INTEGER)||(*sdt == MPI_INTEGER4)||(*sdt == MPI_LOGICAL)) { + memcpy((void *)((char *)rvb+rdspl[0]*sizeof(int32_t)), + (void *)((char *)sdb),(*sdc)*sizeof(int32_t)); + } + if (*sdt == MPI_INTEGER8) { + memcpy((void *)((char *)rvb+rdspl[0]*sizeof(int64_t)), + (void *)((char *)sdb),(*sdc)*sizeof(int64_t)); + } + if (*sdt == MPI_CHARACTER) { + memcpy((void *)((char *)rvb+rdspl[0]*sizeof(char)), + (void *)((char *)sdb),(*sdc)*sizeof(char)); + } + if (*sdt == MPI_REAL) { + memcpy((void *)((char *)rvb+rdspl[0]*sizeof(float)), + (void *)((char *)sdb),(*sdc)*sizeof(float)); + } + if (*sdt == MPI_DOUBLE) { + memcpy((void *)((char *)rvb+rdspl[0]*sizeof(double)), + (void *)((char *)sdb),(*sdc)*sizeof(double)); + } + if (*sdt == MPI_COMPLEX) { + memcpy((void *)((char *)rvb+rdspl[0]*2*sizeof(float)), + (void *)((char *)sdb),(*sdc)*2*sizeof(float)); + } + if (*sdt == MPI_DOUBLE_COMPLEX) { + memcpy((void *)((char *)rvb+rdspl[0]*2*sizeof(double)), + (void *)((char *)sdb),(*sdc)*2*sizeof(double)); + } + + + *ierr = 0; +} + + +void mpi_scatter(void* sdb, int* sdc, int* sdt, + void* rvb, int* rvc, int* rvt, int *root, int* comm, int* ierr) +{ + int i,j,k; + + if ((*sdt == MPI_INTEGER)||(*sdt == MPI_INTEGER4)||(*sdt == MPI_LOGICAL)) { + memcpy(rvb,sdb, (*sdc)*sizeof(int32_t)); + } + if (*sdt == MPI_CHARACTER) { + memcpy(rvb,sdb, (*sdc)*sizeof(char)); + } + if (*sdt == MPI_INTEGER8) { + memcpy(rvb,sdb, (*sdc)*sizeof(int64_t)); + } + if (*sdt == MPI_REAL) { + memcpy(rvb,sdb, (*sdc)*sizeof(float)); + } + if (*sdt == MPI_DOUBLE) { + memcpy(rvb,sdb, (*sdc)*sizeof(double)); + } + if (*sdt == MPI_COMPLEX) { + memcpy(rvb,sdb, (*sdc)*2*sizeof(float)); + } + if (*sdt == MPI_DOUBLE_COMPLEX) { + memcpy(rvb,sdb, (*sdc)*2*sizeof(double)); + } + *ierr = 0; +} + + +void mpi_scatterv(void* sdb, int* sdc, int* sdspl, int* sdt, + void* rvb, int* rvc, + int* rvt, int* comm, int *root, int* ierr) +{ + int i,j,k; + + if ((*sdt == MPI_INTEGER)||(*sdt == MPI_INTEGER4)||(*sdt == MPI_LOGICAL)) { + memcpy((void *)((char *)rvb+sdspl[0]*sizeof(int32_t)), + (void *)((char *)sdb),(*sdc)*sizeof(int32_t)); + } + if (*sdt == MPI_CHARACTER) { + memcpy((void *)((char *)rvb+sdspl[0]*sizeof(char)), + (void *)((char *)sdb),(*sdc)*sizeof(char)); + } + if (*sdt == MPI_INTEGER8) { + memcpy((void *)((char *)rvb+sdspl[0]*sizeof(int64_t)), + (void *)((char *)sdb),(*sdc)*sizeof(int64_t)); + } + if (*sdt == MPI_REAL) { + memcpy((void *)((char *)rvb+sdspl[0]*sizeof(float)), + (void *)((char *)sdb),(*sdc)*sizeof(float)); + } + if (*sdt == MPI_DOUBLE) { + memcpy((void *)((char *)rvb+sdspl[0]*sizeof(double)), + (void *)((char *)sdb),(*sdc)*sizeof(double)); + } + if (*sdt == MPI_COMPLEX) { + memcpy((void *)((char *)rvb+sdspl[0]*2*sizeof(float)), + (void *)((char *)sdb),(*sdc)*2*sizeof(float)); + } + if (*sdt == MPI_DOUBLE_COMPLEX) { + memcpy((void *)((char *)rvb+sdspl[0]*2*sizeof(double)), + (void *)((char *)sdb),(*sdc)*2*sizeof(double)); + } + + + *ierr = 0; +} + + +void mpi_allgather(void* sdb, int* sdc, int* sdt, + void* rvb, int* rvc, int* rvt, int* comm, int* ierr) +{ + int i,j,k; + + if ((*sdt == MPI_INTEGER)||(*sdt == MPI_INTEGER4)||(*sdt == MPI_LOGICAL)) { + memcpy(rvb,sdb, (*sdc)*sizeof(int32_t)); + } + if (*sdt == MPI_CHARACTER) { + memcpy(rvb,sdb, (*sdc)*sizeof(char)); + } + if (*sdt == MPI_INTEGER8) { + memcpy(rvb,sdb, (*sdc)*sizeof(int64_t)); + } + if (*sdt == MPI_REAL) { + memcpy(rvb,sdb, (*sdc)*sizeof(float)); + } + if (*sdt == MPI_DOUBLE) { + memcpy(rvb,sdb, (*sdc)*sizeof(double)); + } + if (*sdt == MPI_COMPLEX) { + memcpy(rvb,sdb, (*sdc)*2*sizeof(float)); + } + if (*sdt == MPI_DOUBLE_COMPLEX) { + memcpy(rvb,sdb, (*sdc)*2*sizeof(double)); + } + *ierr = 0; +} + +void mpi_allgatherv(void* sdb, int* sdc, int* sdt, + void* rvb, int* rvc, int* rdspl, + int* rvt, int* comm, int* ierr) +{ + int i,j,k; + + if ((*sdt == MPI_INTEGER)||(*sdt == MPI_INTEGER4)||(*sdt == MPI_LOGICAL)) { + memcpy((void *)((char *)rvb+rdspl[0]*sizeof(int32_t)), + (void *)((char *)sdb),(*sdc)*sizeof(int32_t)); + } + if (*sdt == MPI_CHARACTER) { + memcpy((void *)((char *)rvb+rdspl[0]*sizeof(char)), + (void *)((char *)sdb),(*sdc)*sizeof(char)); + } + if (*sdt == MPI_INTEGER8) { + memcpy((void *)((char *)rvb+rdspl[0]*sizeof(int64_t)), + (void *)((char *)sdb),(*sdc)*sizeof(int64_t)); + } + if (*sdt == MPI_REAL) { + memcpy((void *)((char *)rvb+rdspl[0]*sizeof(float)), + (void *)((char *)sdb),(*sdc)*sizeof(float)); + } + if (*sdt == MPI_DOUBLE) { + memcpy((void *)((char *)rvb+rdspl[0]*sizeof(double)), + (void *)((char *)sdb),(*sdc)*sizeof(double)); + } + if (*sdt == MPI_COMPLEX) { + memcpy((void *)((char *)rvb+rdspl[0]*2*sizeof(float)), + (void *)((char *)sdb),(*sdc)*2*sizeof(float)); + } + if (*sdt == MPI_DOUBLE_COMPLEX) { + memcpy((void *)((char *)rvb+rdspl[0]*2*sizeof(double)), + (void *)((char *)sdb),(*sdc)*2*sizeof(double)); + } + + *ierr = 0; +} diff --git a/base/modules/psb_fakempi.h b/base/modules/psb_fakempi.h new file mode 100644 index 00000000..164f4794 --- /dev/null +++ b/base/modules/psb_fakempi.h @@ -0,0 +1,49 @@ +#ifndef PSB_FAKEMPI_H +#define PSB_FAKEMPI_H +#include "psb_config.h" +#include "psb_types.h" + +#define MPI_INTEGER 1 +#define MPI_INTEGER8 2 +#define MPI_REAL 3 +#define MPI_DOUBLE 4 +#define MPI_COMPLEX 5 +#define MPI_DOUBLE_COMPLEX 6 +#define MPI_CHARACTER 7 +#define MPI_LOGICAL 8 +#define MPI_INTEGER2 9 +#define MPI_INTEGER4 10 +#define MPI_COMM_NULL -1 +#define MPI_COMM_WORLD 1 + + +double mpi_wtime(); +void mpi_wait(int *request, int* status, int *ierr); +void mpi_send(void* buf, int* count, int* datatype, + int *dest, int *tag, int *comm, int *ierr); +void mpi_isend(void* buf, int* count, int* datatype, + int *dest, int *tag, int *comm, int *request, + int *ierr); +void mpi_irecv(void* buf, int* count, int* datatype, + int *src, int *tag, int *comm, int *request, + int *ierr); +void mpi_alltoall(void* sdb, int* sdc, int* sdt, + void* rvb, int* rvc, int* rvt, int* comm, int* ierr); +void mpi_alltoallv(void* sdb, int* sdc, int* sdspl, int* sdt, + void* rvb, int* rvc, int* rdspl, int* rvt, int* comm, int* ierr); +void mpi_gather(void* sdb, int* sdc, int* sdt, + void* rvb, int* rvc, int* rvt, int *root, int* comm, int* ierr); +void mpi_gatherv(void* sdb, int* sdc, int* sdt, + void* rvb, int* rvc, int* rdspl, + int* rvt, int* comm, int *root, int* ierr); +void mpi_scatter(void* sdb, int* sdc, int* sdt, + void* rvb, int* rvc, int* rvt, int *root, int* comm, int* ierr); +void mpi_scatterv(void* sdb, int* sdc, int* sdspl, int* sdt, + void* rvb, int* rvc, + int* rvt, int* comm, int *root, int* ierr); +void mpi_allgather(void* sdb, int* sdc, int* sdt, + void* rvb, int* rvc, int* rvt, int* comm, int* ierr); +void mpi_allgatherv(void* sdb, int* sdc, int* sdt, + void* rvb, int* rvc, int* rdspl, + int* rvt, int* comm, int* ierr); +#endif diff --git a/base/modules/psb_internals.h b/base/modules/psb_internals.h deleted file mode 100644 index 5335ffd2..00000000 --- a/base/modules/psb_internals.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef PSB_INTERNALS_H -#define PSB_INTERNALS_H - -/* providing a default mangling scheme */ -#ifndef LowerUnderscore -#ifndef LowerDoubleUnderscore -#ifndef LowerCase -#ifndef UpperUnderscore -#ifndef UpperDoubleUnderscore -#ifndef UpperCase -#define LowerUnderscore 1 /* 20110404 the default */ -/* #error "should specify a default mangling scheme" */ -#endif -#endif -#endif -#endif -#endif -#endif - -#endif diff --git a/base/modules/psb_types.h b/base/modules/psb_types.h new file mode 100644 index 00000000..d8495ff9 --- /dev/null +++ b/base/modules/psb_types.h @@ -0,0 +1,38 @@ +#ifndef PSB_TYPES_H +#define PSB_TYPES_H + + +#include + +#ifdef __cplusplus +#include +#else +#include +#include +#endif + typedef int32_t psb_m_t; + +#if defined(PSB_IPK4) && defined(PSB_LPK4) + typedef int32_t psb_i_t; + typedef int32_t psb_l_t; +#elif defined(PSB_IPK4) && defined(PSB_LPK8) + typedef int32_t psb_i_t; + typedef int64_t psb_l_t; +#elif defined(PSB_IPK8) && defined(PSB_LPK8) + typedef int64_t psb_i_t; + typedef int64_t psb_l_t; +#else +#endif + typedef int64_t psb_e_t; + + typedef float psb_s_t; + typedef double psb_d_t; + +#ifdef __cplusplus + typedef std::complex psb_c_t; + typedef std::complex psb_z_t; +#else + typedef float complex psb_c_t; + typedef double complex psb_z_t; +#endif +#endif diff --git a/base/modules/psblas/psb_c_psblas_mod.F90 b/base/modules/psblas/psb_c_psblas_mod.F90 index 98deebd8..130159bc 100644 --- a/base/modules/psblas/psb_c_psblas_mod.F90 +++ b/base/modules/psblas/psb_c_psblas_mod.F90 @@ -143,6 +143,20 @@ module psb_c_psblas_mod end subroutine psb_caxpby end interface + interface psb_upd_xyz + subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_spk_, psb_ipk_, & + & psb_c_vect_type, psb_cspmat_type + type(psb_c_vect_type), intent (inout) :: x + type(psb_c_vect_type), intent (inout) :: y + type(psb_c_vect_type), intent (inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_upd_xyz_vect + end interface psb_upd_xyz + interface psb_geamax function psb_camax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_spk_, psb_ipk_, & @@ -174,7 +188,7 @@ module psb_c_psblas_mod end function psb_camax_vect end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_genrmi procedure psb_camax, psb_camaxv, psb_camax_vect end interface @@ -258,7 +272,7 @@ module psb_c_psblas_mod end subroutine psb_cmasum end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_genrm1 procedure psb_casum, psb_casumv, psb_casum_vect end interface @@ -321,7 +335,7 @@ module psb_c_psblas_mod end function psb_cnrm2_weightmask_vect end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_norm2 procedure psb_cnrm2, psb_cnrm2v, psb_cnrm2_vect, psb_cnrm2_weight_vect, psb_cnrm2_weightmask_vect end interface @@ -352,7 +366,7 @@ module psb_c_psblas_mod end function psb_cnrmi end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_normi procedure psb_cnrmi end interface @@ -370,7 +384,7 @@ module psb_c_psblas_mod end function psb_cspnrm1 end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_norm1 procedure psb_cspnrm1 end interface diff --git a/base/modules/psblas/psb_d_psblas_mod.F90 b/base/modules/psblas/psb_d_psblas_mod.F90 index e4988387..afc3acaf 100644 --- a/base/modules/psblas/psb_d_psblas_mod.F90 +++ b/base/modules/psblas/psb_d_psblas_mod.F90 @@ -143,6 +143,20 @@ module psb_d_psblas_mod end subroutine psb_daxpby end interface + interface psb_upd_xyz + subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_d_vect_type, psb_dspmat_type + type(psb_d_vect_type), intent (inout) :: x + type(psb_d_vect_type), intent (inout) :: y + type(psb_d_vect_type), intent (inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_upd_xyz_vect + end interface psb_upd_xyz + interface psb_geamax function psb_damax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_dpk_, psb_ipk_, & @@ -174,7 +188,7 @@ module psb_d_psblas_mod end function psb_damax_vect end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_genrmi procedure psb_damax, psb_damaxv, psb_damax_vect end interface @@ -269,7 +283,7 @@ module psb_d_psblas_mod end subroutine psb_dmasum end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_genrm1 procedure psb_dasum, psb_dasumv, psb_dasum_vect end interface @@ -332,7 +346,7 @@ module psb_d_psblas_mod end function psb_dnrm2_weightmask_vect end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_norm2 procedure psb_dnrm2, psb_dnrm2v, psb_dnrm2_vect, psb_dnrm2_weight_vect, psb_dnrm2_weightmask_vect end interface @@ -363,7 +377,7 @@ module psb_d_psblas_mod end function psb_dnrmi end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_normi procedure psb_dnrmi end interface @@ -381,7 +395,7 @@ module psb_d_psblas_mod end function psb_dspnrm1 end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_norm1 procedure psb_dspnrm1 end interface diff --git a/base/modules/psblas/psb_s_psblas_mod.F90 b/base/modules/psblas/psb_s_psblas_mod.F90 index 93fe74b9..6048d023 100644 --- a/base/modules/psblas/psb_s_psblas_mod.F90 +++ b/base/modules/psblas/psb_s_psblas_mod.F90 @@ -143,6 +143,20 @@ module psb_s_psblas_mod end subroutine psb_saxpby end interface + interface psb_upd_xyz + subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_spk_, psb_ipk_, & + & psb_s_vect_type, psb_sspmat_type + type(psb_s_vect_type), intent (inout) :: x + type(psb_s_vect_type), intent (inout) :: y + type(psb_s_vect_type), intent (inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_upd_xyz_vect + end interface psb_upd_xyz + interface psb_geamax function psb_samax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_spk_, psb_ipk_, & @@ -174,7 +188,7 @@ module psb_s_psblas_mod end function psb_samax_vect end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_genrmi procedure psb_samax, psb_samaxv, psb_samax_vect end interface @@ -269,7 +283,7 @@ module psb_s_psblas_mod end subroutine psb_smasum end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_genrm1 procedure psb_sasum, psb_sasumv, psb_sasum_vect end interface @@ -332,7 +346,7 @@ module psb_s_psblas_mod end function psb_snrm2_weightmask_vect end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_norm2 procedure psb_snrm2, psb_snrm2v, psb_snrm2_vect, psb_snrm2_weight_vect, psb_snrm2_weightmask_vect end interface @@ -363,7 +377,7 @@ module psb_s_psblas_mod end function psb_snrmi end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_normi procedure psb_snrmi end interface @@ -381,7 +395,7 @@ module psb_s_psblas_mod end function psb_sspnrm1 end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_norm1 procedure psb_sspnrm1 end interface diff --git a/base/modules/psblas/psb_z_psblas_mod.F90 b/base/modules/psblas/psb_z_psblas_mod.F90 index 06be1b82..fd0cc300 100644 --- a/base/modules/psblas/psb_z_psblas_mod.F90 +++ b/base/modules/psblas/psb_z_psblas_mod.F90 @@ -143,6 +143,20 @@ module psb_z_psblas_mod end subroutine psb_zaxpby end interface + interface psb_upd_xyz + subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_z_vect_type, psb_zspmat_type + type(psb_z_vect_type), intent (inout) :: x + type(psb_z_vect_type), intent (inout) :: y + type(psb_z_vect_type), intent (inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_upd_xyz_vect + end interface psb_upd_xyz + interface psb_geamax function psb_zamax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_dpk_, psb_ipk_, & @@ -174,7 +188,7 @@ module psb_z_psblas_mod end function psb_zamax_vect end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_genrmi procedure psb_zamax, psb_zamaxv, psb_zamax_vect end interface @@ -258,7 +272,7 @@ module psb_z_psblas_mod end subroutine psb_zmasum end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_genrm1 procedure psb_zasum, psb_zasumv, psb_zasum_vect end interface @@ -321,7 +335,7 @@ module psb_z_psblas_mod end function psb_znrm2_weightmask_vect end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_norm2 procedure psb_znrm2, psb_znrm2v, psb_znrm2_vect, psb_znrm2_weight_vect, psb_znrm2_weightmask_vect end interface @@ -352,7 +366,7 @@ module psb_z_psblas_mod end function psb_znrmi end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_normi procedure psb_znrmi end interface @@ -370,7 +384,7 @@ module psb_z_psblas_mod end function psb_zspnrm1 end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface psb_norm1 procedure psb_zspnrm1 end interface diff --git a/base/modules/psi_i_mod.F90 b/base/modules/psi_i_mod.F90 index e852dd99..881ae078 100644 --- a/base/modules/psi_i_mod.F90 +++ b/base/modules/psi_i_mod.F90 @@ -87,8 +87,8 @@ module psi_i_mod subroutine psi_i_csr_sort_dl(dl_ptr,c_dep_list,l_dep_list,ctxt,info) import implicit none - integer(psb_ipk_), intent(in) :: dl_ptr(0:) - integer(psb_ipk_), intent(inout) :: c_dep_list(:), l_dep_list(0:) + integer(psb_mpk_), intent(in) :: dl_ptr(0:), l_dep_list(0:) + integer(psb_ipk_), intent(inout) :: c_dep_list(:) type(psb_ctxt_type), intent(in) :: ctxt integer(psb_ipk_), intent(out) :: info end subroutine psi_i_csr_sort_dl @@ -98,8 +98,10 @@ module psi_i_mod subroutine psi_i_bld_glb_dep_list(ctxt,loc_dl,length_dl,c_dep_list,dl_ptr,info) import type(psb_ctxt_type), intent(in) :: ctxt - integer(psb_ipk_), intent(in) :: loc_dl(:), length_dl(0:) - integer(psb_ipk_), allocatable, intent(out) :: c_dep_list(:), dl_ptr(:) + integer(psb_ipk_), intent(in) :: loc_dl(:) + integer(psb_mpk_), intent(in) :: length_dl(0:) + integer(psb_mpk_), allocatable, intent(out) :: dl_ptr(:) + integer(psb_ipk_), allocatable, intent(out) :: c_dep_list(:) integer(psb_ipk_), intent(out) :: info end subroutine psi_i_bld_glb_dep_list end interface @@ -110,7 +112,8 @@ module psi_i_mod logical, intent(in) :: is_bld, is_upd type(psb_ctxt_type), intent(in) :: ctxt integer(psb_ipk_), intent(in) :: desc_str(:) - integer(psb_ipk_), allocatable, intent(out) :: loc_dl(:), length_dl(:) + integer(psb_ipk_), allocatable, intent(out) :: loc_dl(:) + integer(psb_mpk_), allocatable, intent(out) :: length_dl(:) integer(psb_ipk_), intent(out) :: info end subroutine psi_i_xtr_loc_dl end interface diff --git a/base/modules/serial/psb_base_mat_mod.F90 b/base/modules/serial/psb_base_mat_mod.F90 index 2380fcb2..42d480d8 100644 --- a/base/modules/serial/psb_base_mat_mod.F90 +++ b/base/modules/serial/psb_base_mat_mod.F90 @@ -61,7 +61,7 @@ ! ! We are also introducing the type psb_lbase_sparse_mat. ! The basic difference is in the type -! of the indices, which are PSB_LPK_ so that the entries +! of the indices, which are PSB_PSB_LPK_ so that the entries ! are guaranteed to be able to contain global indices. ! This type only supports data handling and preprocessing, it is ! not supposed to be used for computations. @@ -80,7 +80,7 @@ module psb_base_mat_mod integer(psb_ipk_), parameter :: spspmm_serial_rb_tree = 3 integer(psb_ipk_), parameter :: spspmm_omp_rb_tree = 4 integer(psb_ipk_), parameter :: spspmm_omp_two_pass = 5 -#if defined(OPENMP) +#if defined(PSB_OPENMP) integer(psb_ipk_), save :: spspmm_impl = spspmm_omp_gustavson #else integer(psb_ipk_), save :: spspmm_impl = spspmm_serial @@ -564,7 +564,7 @@ module psb_base_mat_mod ! == = ================================= procedure, pass(a) :: set_lnrows => psb_lbase_set_lnrows procedure, pass(a) :: set_lncols => psb_lbase_set_lncols -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) procedure, pass(a) :: set_inrows => psb_lbase_set_inrows procedure, pass(a) :: set_incols => psb_lbase_set_incols generic, public :: set_nrows => set_lnrows, set_inrows @@ -601,7 +601,7 @@ module psb_base_mat_mod procedure, pass(a) :: reinit => psb_lbase_reinit procedure, pass(a) :: allocate_mnnz => psb_lbase_allocate_mnnz procedure, pass(a) :: reallocate_nz => psb_lbase_reallocate_nz -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) procedure, pass(a) :: allocate_imnnz => psb_lbase_allocate_imnnz procedure, pass(a) :: reallocate_inz => psb_lbase_reallocate_inz generic, public :: allocate => allocate_mnnz, allocate_imnnz @@ -1433,7 +1433,7 @@ contains a%n = n end subroutine psb_lbase_set_lncols -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_lbase_allocate_imnnz(m,n,a,nz) implicit none integer(psb_ipk_), intent(in) :: m,n diff --git a/base/modules/serial/psb_c_base_mat_mod.F90 b/base/modules/serial/psb_c_base_mat_mod.F90 index 33982e3a..a5537034 100644 --- a/base/modules/serial/psb_c_base_mat_mod.F90 +++ b/base/modules/serial/psb_c_base_mat_mod.F90 @@ -416,7 +416,7 @@ module psb_c_base_mat_mod ! ! This is COO specific ! -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) procedure, pass(a) :: iset_nzeros => lc_coo_iset_nzeros procedure, pass(a) :: lset_nzeros => lc_coo_lset_nzeros generic, public :: set_nzeros => iset_nzeros, lset_nzeros @@ -439,7 +439,7 @@ module psb_c_base_mat_mod private :: lc_coo_get_nzeros, lc_coo_iset_nzeros, & & lc_coo_get_fmt, lc_coo_free, lc_coo_sizeof, & & lc_coo_transp_1mat, lc_coo_transc_1mat -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) private :: lc_coo_lset_nzeros #endif @@ -3499,7 +3499,7 @@ module psb_c_base_mat_mod end subroutine psb_lc_coo_clean_negidx end interface -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) ! !> Funtion: coo_clean_negidx_inner !! \brief Take out any entries with negative row or column index @@ -4323,7 +4323,7 @@ contains end subroutine lc_coo_iset_nzeros -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine lc_coo_lset_nzeros(nz,a) implicit none integer(psb_lpk_), intent(in) :: nz diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index df15e0c9..65286969 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -155,6 +155,9 @@ module psb_c_base_vect_mod procedure, pass(z) :: axpby_v2 => c_base_axpby_v2 procedure, pass(z) :: axpby_a2 => c_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: upd_xyz => c_base_upd_xyz + procedure, pass(w) :: xyzw => c_base_xyzw + ! ! Vector by vector multiplication. Need all variants ! to handle multiple requirements from preconditioners @@ -273,7 +276,7 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if -#if defined (OPENMP) +#if defined (PSB_OPENMP) !$omp parallel do private(i) do i = 1, size(this) x%v(i) = this(i) @@ -567,8 +570,8 @@ contains info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) - if (info == 0) call x%free_buffer(info) - if (info == 0) call x%free_comid(info) + if ((info == 0).and.allocated(x%combuf)) call x%free_buffer(info) + if ((info == 0).and.allocated(x%comid)) call x%free_comid(info) if (info /= 0) call & & psb_errpush(psb_err_alloc_dealloc_,'vect_free') @@ -838,7 +841,7 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i = first_, last_ x%v(i) = val @@ -876,7 +879,7 @@ contains if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i = first_, last_ x%v(i) = val(i-first_+1) @@ -925,7 +928,7 @@ contains if (allocated(x%v)) then if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i=1, size(x%v) x%v(i) = abs(x%v(i)) @@ -1018,7 +1021,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine c_base_axpby_v(m,alpha, x, beta, y, info) @@ -1047,7 +1050,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param y The class(base_vect) to be added !! \param z The class(base_vect) to be returned !! \param info return code @@ -1078,7 +1081,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x(:) The array to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine c_base_axpby_a(m,alpha, x, beta, y, info) @@ -1126,6 +1129,64 @@ contains end subroutine c_base_axpby_a2 + ! + ! UPD_XYZ is invoked via Z, hence the structure below. + ! + ! + !> Function base_upd_xyz + !! \memberof psb_c_base_vect_type + !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \param m Number of entries to be considered + !! \param alpha scalar alpha + !! \param beta scalar beta + !! \param gamma scalar gamma + !! \param delta scalar delta + !! \param x The class(base_vect) to be added + !! \param y The class(base_vect) to be added + !! \param z The class(base_vect) to be added + !! \param info return code + !! + subroutine c_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_base_vect_type), intent(inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(alpha/=czero)) call x%sync() + if (y%is_dev().and.(beta/=czero)) call y%sync() + if (z%is_dev().and.(delta/=czero)) call z%sync() + call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() + + end subroutine c_base_upd_xyz + + subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_base_vect_type), intent(inout) :: z + class(psb_c_base_vect_type), intent(inout) :: w + complex(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(a/=czero)) call x%sync() + if (y%is_dev().and.(b/=czero)) call y%sync() + if (z%is_dev().and.(d/=czero)) call z%sync() + if (w%is_dev().and.(f/=czero)) call w%sync() + call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info) + call y%set_host() + call z%set_host() + call w%set_host() + + end subroutine c_base_xyzw + ! ! Multiple variants of two operations: @@ -1674,7 +1735,7 @@ contains integer(psb_ipk_) :: i if (allocated(x%v)) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i=1,size(x%v) x%v(i) = alpha*x%v(i) @@ -1718,7 +1779,7 @@ contains integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) res = szero !$omp parallel do private(i) reduction(max: res) do i=1, n @@ -1743,7 +1804,7 @@ contains integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) res=szero !$omp parallel do private(i) reduction(+: res) do i= 1, size(x%v) @@ -1770,7 +1831,8 @@ contains subroutine c_base_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: alpha, beta, y(:) class(psb_c_base_vect_type) :: x @@ -1790,7 +1852,8 @@ contains subroutine c_base_gthzv_x(i,n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n class(psb_i_base_vect_type) :: idx complex(psb_spk_) :: y(:) class(psb_c_base_vect_type) :: x @@ -1806,7 +1869,8 @@ contains subroutine c_base_gthzbuf(i,n,idx,x) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n class(psb_i_base_vect_type) :: idx class(psb_c_base_vect_type) :: x @@ -1869,7 +1933,8 @@ contains subroutine c_base_gthzv(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: y(:) class(psb_c_base_vect_type) :: x @@ -1894,7 +1959,8 @@ contains subroutine c_base_sctb(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: beta, x(:) class(psb_c_base_vect_type) :: y @@ -1907,7 +1973,8 @@ contains subroutine c_base_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx complex(psb_spk_) :: beta, x(:) class(psb_c_base_vect_type) :: y @@ -1921,7 +1988,8 @@ contains subroutine c_base_sctb_buf(i,n,idx,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx complex(psb_spk_) :: beta class(psb_c_base_vect_type) :: y @@ -1958,7 +2026,7 @@ contains integer(psb_ipk_) :: i, n if (z%is_dev()) call z%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) n = size(x) !$omp parallel do private(i) do i = 1, n @@ -3186,10 +3254,11 @@ contains subroutine c_base_mlv_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: alpha, beta, y(:) class(psb_c_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -3211,7 +3280,8 @@ contains subroutine c_base_mlv_gthzv_x(i,n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx complex(psb_spk_) :: y(:) class(psb_c_base_multivect_type) :: x @@ -3233,10 +3303,11 @@ contains subroutine c_base_mlv_gthzv(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: y(:) class(psb_c_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -3259,10 +3330,11 @@ contains subroutine c_base_mlv_gthzm(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: y(:,:) class(psb_c_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -3280,7 +3352,8 @@ contains subroutine c_base_mlv_gthzbuf(i,ixb,n,idx,x) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, ixb, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i, ixb class(psb_i_base_vect_type) :: idx class(psb_c_base_multivect_type) :: x integer(psb_ipk_) :: nc @@ -3312,10 +3385,11 @@ contains subroutine c_base_mlv_sctb(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: beta, x(:) class(psb_c_base_multivect_type) :: y - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (y%is_dev()) call y%sync() nc = psb_size(y%v,2_psb_ipk_) @@ -3327,10 +3401,11 @@ contains subroutine c_base_mlv_sctbr2(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: beta, x(:,:) class(psb_c_base_multivect_type) :: y - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (y%is_dev()) call y%sync() nc = y%get_ncols() @@ -3342,7 +3417,8 @@ contains subroutine c_base_mlv_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx complex( psb_spk_) :: beta, x(:) class(psb_c_base_multivect_type) :: y @@ -3354,7 +3430,8 @@ contains subroutine c_base_mlv_sctb_buf(i,iyb,n,idx,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, iyb, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i, iyb class(psb_i_base_vect_type) :: idx complex(psb_spk_) :: beta class(psb_c_base_multivect_type) :: y diff --git a/base/modules/serial/psb_c_csc_mat_mod.f90 b/base/modules/serial/psb_c_csc_mat_mod.f90 index bb06977b..5ccdd19a 100644 --- a/base/modules/serial/psb_c_csc_mat_mod.f90 +++ b/base/modules/serial/psb_c_csc_mat_mod.f90 @@ -87,7 +87,7 @@ module psb_c_csc_mat_mod procedure, pass(a) :: mv_from_coo => psb_c_mv_csc_from_coo procedure, pass(a) :: mv_to_fmt => psb_c_mv_csc_to_fmt procedure, pass(a) :: mv_from_fmt => psb_c_mv_csc_from_fmt - procedure, pass(a) :: clean_zeros => psb_c_csc_clean_zeros +! procedure, pass(a) :: clean_zeros => psb_c_csc_clean_zeros procedure, pass(a) :: csput_a => psb_c_csc_csput_a procedure, pass(a) :: get_diag => psb_c_csc_get_diag procedure, pass(a) :: csgetptn => psb_c_csc_csgetptn @@ -143,7 +143,7 @@ module psb_c_csc_mat_mod procedure, pass(a) :: mv_from_coo => psb_lc_mv_csc_from_coo procedure, pass(a) :: mv_to_fmt => psb_lc_mv_csc_to_fmt procedure, pass(a) :: mv_from_fmt => psb_lc_mv_csc_from_fmt - procedure, pass(a) :: clean_zeros => psb_lc_csc_clean_zeros +! procedure, pass(a) :: clean_zeros => psb_lc_csc_clean_zeros procedure, pass(a) :: csput_a => psb_lc_csc_csput_a procedure, pass(a) :: get_diag => psb_lc_csc_get_diag procedure, pass(a) :: csgetptn => psb_lc_csc_csgetptn @@ -313,18 +313,18 @@ module psb_c_csc_mat_mod end subroutine psb_c_mv_csc_from_fmt end interface - ! - !> - !! \memberof psb_c_csc_sparse_mat - !! \see psb_c_base_mat_mod::psb_c_base_clean_zeros - ! - interface - subroutine psb_c_csc_clean_zeros(a, info) - import - class(psb_c_csc_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_csc_clean_zeros - end interface +!!$ ! +!!$ !> +!!$ !! \memberof psb_c_csc_sparse_mat +!!$ !! \see psb_c_base_mat_mod::psb_c_base_clean_zeros +!!$ ! +!!$ interface +!!$ subroutine psb_c_csc_clean_zeros(a, info) +!!$ import +!!$ class(psb_c_csc_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_csc_clean_zeros +!!$ end interface !> \memberof psb_c_csc_sparse_mat @@ -717,18 +717,18 @@ module psb_c_csc_mat_mod end subroutine psb_lc_mv_csc_from_fmt end interface - ! - !> - !! \memberof psb_lc_csc_sparse_mat - !! \see psb_lc_base_mat_mod::psb_lc_base_clean_zeros - ! - interface - subroutine psb_lc_csc_clean_zeros(a, info) - import - class(psb_lc_csc_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_lc_csc_clean_zeros - end interface +!!$ ! +!!$ !> +!!$ !! \memberof psb_lc_csc_sparse_mat +!!$ !! \see psb_lc_base_mat_mod::psb_lc_base_clean_zeros +!!$ ! +!!$ interface +!!$ subroutine psb_lc_csc_clean_zeros(a, info) +!!$ import +!!$ class(psb_lc_csc_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_lc_csc_clean_zeros +!!$ end interface !> \memberof psb_lc_csc_sparse_mat !! \see psb_lc_base_mat_mod::psb_lc_base_cp_from diff --git a/base/modules/serial/psb_c_csr_mat_mod.f90 b/base/modules/serial/psb_c_csr_mat_mod.f90 index 8b076cc2..a39c204b 100644 --- a/base/modules/serial/psb_c_csr_mat_mod.f90 +++ b/base/modules/serial/psb_c_csr_mat_mod.f90 @@ -91,7 +91,7 @@ module psb_c_csr_mat_mod procedure, pass(a) :: mv_from_coo => psb_c_mv_csr_from_coo procedure, pass(a) :: mv_to_fmt => psb_c_mv_csr_to_fmt procedure, pass(a) :: mv_from_fmt => psb_c_mv_csr_from_fmt - procedure, pass(a) :: clean_zeros => psb_c_csr_clean_zeros +! procedure, pass(a) :: clean_zeros => psb_c_csr_clean_zeros procedure, pass(a) :: csput_a => psb_c_csr_csput_a procedure, pass(a) :: get_diag => psb_c_csr_get_diag procedure, pass(a) :: csgetptn => psb_c_csr_csgetptn @@ -261,18 +261,18 @@ module psb_c_csr_mat_mod end subroutine psb_c_csr_triu end interface - ! - !> - !! \memberof psb_c_csr_sparse_mat - !! \see psb_c_base_mat_mod::psb_c_base_clean_zeros - ! - interface - subroutine psb_c_csr_clean_zeros(a, info) - import - class(psb_c_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_csr_clean_zeros - end interface +!!$ ! +!!$ !> +!!$ !! \memberof psb_c_csr_sparse_mat +!!$ !! \see psb_c_base_mat_mod::psb_c_base_clean_zeros +!!$ ! +!!$ interface +!!$ subroutine psb_c_csr_clean_zeros(a, info) +!!$ import +!!$ class(psb_c_csr_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_csr_clean_zeros +!!$ end interface !> \memberof psb_c_csr_sparse_mat !! \see psb_c_base_mat_mod::psb_c_base_cp_to_coo @@ -579,7 +579,111 @@ module psb_c_csr_mat_mod end subroutine psb_c_csr_scals end interface - !> \namespace psb_base_mod \class psb_lc_csr_sparse_mat + + type, extends(psb_c_csr_sparse_mat) :: psb_c_ecsr_sparse_mat + + !> Number of non-empty rows + integer(psb_ipk_) :: nnerws + !> Indices of non-empty rows + integer(psb_ipk_), allocatable :: nerwp(:) + + contains + procedure, nopass :: get_fmt => c_ecsr_get_fmt + + ! procedure, pass(a) :: csmm => psb_c_ecsr_csmm + procedure, pass(a) :: csmv => psb_c_ecsr_csmv + + procedure, pass(a) :: cp_from_coo => psb_c_cp_ecsr_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cp_ecsr_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_ecsr_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_mv_ecsr_from_fmt + + procedure, pass(a) :: cmp_nerwp => psb_c_ecsr_cmp_nerwp + procedure, pass(a) :: free => c_ecsr_free + procedure, pass(a) :: mold => psb_c_ecsr_mold + + end type psb_c_ecsr_sparse_mat + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_csmv + interface + subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans) + import + class(psb_c_ecsr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ecsr_csmv + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_cp_from_coo + interface + subroutine psb_c_ecsr_cmp_nerwp(a,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ecsr_cmp_nerwp + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_cp_from_coo + interface + subroutine psb_c_cp_ecsr_from_coo(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ecsr_from_coo + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_cp_from_fmt + interface + subroutine psb_c_cp_ecsr_from_fmt(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ecsr_from_fmt + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_mv_from_coo + interface + subroutine psb_c_mv_ecsr_from_coo(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ecsr_from_coo + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_mv_from_fmt + interface + subroutine psb_c_mv_ecsr_from_fmt(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ecsr_from_fmt + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !| \see psb_base_mat_mod::psb_base_mold + interface + subroutine psb_c_ecsr_mold(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ecsr_mold + end interface + + + + !> \namespace psb_base_mod \class psb_lc_csr_sparse_mat !! \extends psb_lc_base_mat_mod::psb_lc_base_sparse_mat !! !! psb_lc_csr_sparse_mat type and the related methods. @@ -612,7 +716,7 @@ module psb_c_csr_mat_mod procedure, pass(a) :: mv_from_coo => psb_lc_mv_csr_from_coo procedure, pass(a) :: mv_to_fmt => psb_lc_mv_csr_to_fmt procedure, pass(a) :: mv_from_fmt => psb_lc_mv_csr_from_fmt - procedure, pass(a) :: clean_zeros => psb_lc_csr_clean_zeros +! procedure, pass(a) :: clean_zeros => psb_lc_csr_clean_zeros procedure, pass(a) :: csput_a => psb_lc_csr_csput_a procedure, pass(a) :: get_diag => psb_lc_csr_get_diag procedure, pass(a) :: csgetptn => psb_lc_csr_csgetptn @@ -791,17 +895,17 @@ module psb_c_csr_mat_mod end interface ! - !> - !! \memberof psb_lc_csr_sparse_mat - !! \see psb_lc_base_mat_mod::psb_lc_base_clean_zeros - ! - interface - subroutine psb_lc_csr_clean_zeros(a, info) - import - class(psb_lc_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_lc_csr_clean_zeros - end interface +!!$ !> +!!$ !! \memberof psb_lc_csr_sparse_mat +!!$ !! \see psb_lc_base_mat_mod::psb_lc_base_clean_zeros +!!$ ! +!!$ interface +!!$ subroutine psb_lc_csr_clean_zeros(a, info) +!!$ import +!!$ class(psb_lc_csr_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_lc_csr_clean_zeros +!!$ end interface @@ -1178,6 +1282,26 @@ contains + function c_ecsr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ECSR' + end function c_ecsr_get_fmt + + subroutine c_ecsr_free(a) + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + + + if (allocated(a%nerwp)) deallocate(a%nerwp) + a%nnerws = 0 + call a%psb_c_csr_sparse_mat%free() + + return + end subroutine c_ecsr_free + + ! == =================================== ! ! diff --git a/base/modules/serial/psb_c_mat_mod.F90 b/base/modules/serial/psb_c_mat_mod.F90 index fd423de3..165a5451 100644 --- a/base/modules/serial/psb_c_mat_mod.F90 +++ b/base/modules/serial/psb_c_mat_mod.F90 @@ -71,7 +71,7 @@ ! ! We are also introducing the type psb_lcspmat_type. ! The basic difference with psb_cspmat_type is in the type -! of the indices, which are PSB_LPK_ so that the entries +! of the indices, which are PSB_PSB_LPK_ so that the entries ! are guaranteed to be able to contain global indices. ! This type only supports data handling and preprocessing, it is ! not supposed to be used for computations. @@ -79,12 +79,14 @@ module psb_c_mat_mod use psb_c_base_mat_mod - use psb_c_csr_mat_mod, only : psb_c_csr_sparse_mat, psb_lc_csr_sparse_mat + use psb_c_csr_mat_mod, only : psb_c_csr_sparse_mat, psb_lc_csr_sparse_mat,& + & psb_c_ecsr_sparse_mat use psb_c_csc_mat_mod, only : psb_c_csc_sparse_mat, psb_lc_csc_sparse_mat type :: psb_cspmat_type class(psb_c_base_sparse_mat), allocatable :: a + class(psb_c_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_lc_coo_sparse_mat), allocatable :: rmta @@ -143,7 +145,7 @@ module psb_c_mat_mod procedure, pass(a) :: csgetrow => psb_c_csgetrow procedure, pass(a) :: csgetblk => psb_c_csgetblk generic, public :: csget => csgetptn, csgetrow, csgetblk -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) procedure, pass(a) :: lcsgetptn => psb_c_lcsgetptn procedure, pass(a) :: lcsgetrow => psb_c_lcsgetrow generic, public :: csget => lcsgetptn, lcsgetrow @@ -202,6 +204,8 @@ module psb_c_mat_mod procedure, pass(a) :: cscnv_ip => psb_c_cscnv_ip procedure, pass(a) :: cscnv_base => psb_c_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base + procedure, pass(a) :: split_nd => psb_c_split_nd + procedure, pass(a) :: merge_nd => psb_c_merge_nd procedure, pass(a) :: clone => psb_cspmat_clone procedure, pass(a) :: move_alloc => psb_cspmat_type_move ! @@ -307,7 +311,7 @@ module psb_c_mat_mod ! Setters procedure, pass(a) :: set_lnrows => psb_lc_set_lnrows procedure, pass(a) :: set_lncols => psb_lc_set_lncols -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) procedure, pass(a) :: set_inrows => psb_lc_set_inrows procedure, pass(a) :: set_incols => psb_lc_set_incols generic, public :: set_nrows => set_inrows, set_lnrows @@ -342,7 +346,7 @@ module psb_c_mat_mod procedure, pass(a) :: csgetrow => psb_lc_csgetrow procedure, pass(a) :: csgetblk => psb_lc_csgetblk generic, public :: csget => csgetptn, csgetrow, csgetblk -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) !!$ procedure, pass(a) :: icsgetptn => psb_lc_icsgetptn !!$ procedure, pass(a) :: icsgetrow => psb_lc_icsgetrow !!$ generic, public :: csget => icsgetptn, icsgetrow @@ -840,6 +844,24 @@ module psb_c_mat_mod ! ! + interface + subroutine psb_c_split_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_split_nd + end interface + + interface + subroutine psb_c_merge_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_merge_nd + end interface + ! ! CSCNV: switches to a different internal derived type. ! 3 versions: copying to target @@ -859,7 +881,6 @@ module psb_c_mat_mod end subroutine psb_c_cscnv end interface - interface subroutine psb_c_cscnv_ip(a,iinfo,type,mold,dupl) import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat @@ -871,7 +892,6 @@ module psb_c_mat_mod end subroutine psb_c_cscnv_ip end interface - interface subroutine psb_c_cscnv_base(a,b,info,dupl) import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat @@ -1250,7 +1270,7 @@ module psb_c_mat_mod class(psb_lcspmat_type), intent(inout) :: a integer(psb_lpk_), intent(in) :: m end subroutine psb_lc_set_lnrows -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_lc_set_inrows(m,a) import :: psb_ipk_, psb_lpk_, psb_lcspmat_type class(psb_lcspmat_type), intent(inout) :: a @@ -1265,7 +1285,7 @@ module psb_c_mat_mod class(psb_lcspmat_type), intent(inout) :: a integer(psb_lpk_), intent(in) :: n end subroutine psb_lc_set_lncols -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_lc_set_incols(n,a) import :: psb_ipk_, psb_lpk_, psb_lcspmat_type class(psb_lcspmat_type), intent(inout) :: a @@ -2390,7 +2410,7 @@ contains end subroutine psb_c_clean_zeros -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_c_lcsgetptn(imin,imax,a,nz,ia,ja,info,& & jmin,jmax,iren,append,nzin,rscale,cscale) implicit none @@ -2909,7 +2929,7 @@ contains end subroutine psb_lc_clean_zeros -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) !!$ subroutine psb_lc_icsgetptn(imin,imax,a,nz,ia,ja,info,& !!$ & jmin,jmax,iren,append,nzin,rscale,cscale) !!$ implicit none diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90 index 1a336d11..9effe9ef 100644 --- a/base/modules/serial/psb_c_vect_mod.F90 +++ b/base/modules/serial/psb_c_vect_mod.F90 @@ -102,6 +102,8 @@ module psb_c_vect_mod procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: upd_xyz => c_vect_upd_xyz + procedure, pass(z) :: xyzw => c_vect_xyzw procedure, pass(y) :: mlt_v => c_vect_mlt_v procedure, pass(y) :: mlt_a => c_vect_mlt_a procedure, pass(z) :: mlt_a_2 => c_vect_mlt_a_2 @@ -489,7 +491,8 @@ contains subroutine c_vect_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: alpha, beta, y(:) class(psb_c_vect_type) :: x @@ -500,7 +503,8 @@ contains subroutine c_vect_gthzv(n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: y(:) class(psb_c_vect_type) :: x @@ -511,7 +515,8 @@ contains subroutine c_vect_sctb(n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: beta, x(:) class(psb_c_vect_type) :: y @@ -703,7 +708,7 @@ contains res = czero if (allocated(x%v)) & - & res = x%v%dot(n,y) + & res = x%v%dot_a(n,y) end function c_vect_dot_a @@ -771,6 +776,38 @@ contains end subroutine c_vect_axpby_a2 + subroutine c_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_vect_type), intent(inout) :: x + class(psb_c_vect_type), intent(inout) :: y + class(psb_c_vect_type), intent(inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (allocated(z%v)) & + call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + + end subroutine c_vect_upd_xyz + + subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_vect_type), intent(inout) :: x + class(psb_c_vect_type), intent(inout) :: y + class(psb_c_vect_type), intent(inout) :: z + class(psb_c_vect_type), intent(inout) :: w + complex(psb_spk_), intent (in) :: a, b, c, d, e, f + integer(psb_ipk_), intent(out) :: info + + if (allocated(w%v)) & + call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info) + + end subroutine c_vect_xyzw + + subroutine c_vect_mlt_v(x, y, info) use psi_serial_mod implicit none @@ -1134,7 +1171,7 @@ contains end if end function c_vect_nrm2_weight - + function c_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res) use psi_serial_mod implicit none @@ -1266,56 +1303,56 @@ module psb_c_multivect_mod integer(psb_ipk_) :: dupl = psb_dupl_add_ complex(psb_spk_), allocatable :: rmtv(:,:) contains - procedure, pass(x) :: get_nrows => c_vect_get_nrows - procedure, pass(x) :: get_ncols => c_vect_get_ncols - procedure, pass(x) :: sizeof => c_vect_sizeof - procedure, pass(x) :: get_fmt => c_vect_get_fmt + procedure, pass(x) :: get_nrows => c_mvect_get_nrows + procedure, pass(x) :: get_ncols => c_mvect_get_ncols + procedure, pass(x) :: sizeof => c_mvect_sizeof + procedure, pass(x) :: get_fmt => c_mvect_get_fmt procedure, pass(x) :: is_remote_build => c_mvect_is_remote_build procedure, pass(x) :: set_remote_build => c_mvect_set_remote_build procedure, pass(x) :: get_dupl => c_mvect_get_dupl procedure, pass(x) :: set_dupl => c_mvect_set_dupl - procedure, pass(x) :: all => c_vect_all - procedure, pass(x) :: reall => c_vect_reall - procedure, pass(x) :: zero => c_vect_zero - procedure, pass(x) :: asb => c_vect_asb - procedure, pass(x) :: sync => c_vect_sync - procedure, pass(x) :: free => c_vect_free - procedure, pass(x) :: ins => c_vect_ins - procedure, pass(x) :: bld_x => c_vect_bld_x - procedure, pass(x) :: bld_n => c_vect_bld_n + procedure, pass(x) :: all => c_mvect_all + procedure, pass(x) :: reall => c_mvect_reall + procedure, pass(x) :: zero => c_mvect_zero + procedure, pass(x) :: asb => c_mvect_asb + procedure, pass(x) :: sync => c_mvect_sync + procedure, pass(x) :: free => c_mvect_free + procedure, pass(x) :: ins => c_mvect_ins + procedure, pass(x) :: bld_x => c_mvect_bld_x + procedure, pass(x) :: bld_n => c_mvect_bld_n generic, public :: bld => bld_x, bld_n - procedure, pass(x) :: get_vect => c_vect_get_vect - procedure, pass(x) :: cnv => c_vect_cnv - procedure, pass(x) :: set_scal => c_vect_set_scal - procedure, pass(x) :: set_vect => c_vect_set_vect + procedure, pass(x) :: get_vect => c_mvect_get_vect + procedure, pass(x) :: cnv => c_mvect_cnv + procedure, pass(x) :: set_scal => c_mvect_set_scal + procedure, pass(x) :: set_vect => c_mvect_set_vect generic, public :: set => set_vect, set_scal - procedure, pass(x) :: clone => c_vect_clone - procedure, pass(x) :: gthab => c_vect_gthab - procedure, pass(x) :: gthzv => c_vect_gthzv - procedure, pass(x) :: gthzv_x => c_vect_gthzv_x + procedure, pass(x) :: clone => c_mvect_clone + procedure, pass(x) :: gthab => c_mvect_gthab + procedure, pass(x) :: gthzv => c_mvect_gthzv + procedure, pass(x) :: gthzv_x => c_mvect_gthzv_x generic, public :: gth => gthab, gthzv - procedure, pass(y) :: sctb => c_vect_sctb - procedure, pass(y) :: sctb_x => c_vect_sctb_x + procedure, pass(y) :: sctb => c_mvect_sctb + procedure, pass(y) :: sctb_x => c_mvect_sctb_x generic, public :: sct => sctb, sctb_x -!!$ procedure, pass(x) :: dot_v => c_vect_dot_v -!!$ procedure, pass(x) :: dot_a => c_vect_dot_a +!!$ procedure, pass(x) :: dot_v => c_mvect_dot_v +!!$ procedure, pass(x) :: dot_a => c_mvect_dot_a !!$ generic, public :: dot => dot_v, dot_a -!!$ procedure, pass(y) :: axpby_v => c_vect_axpby_v -!!$ procedure, pass(y) :: axpby_a => c_vect_axpby_a +!!$ procedure, pass(y) :: axpby_v => c_mvect_axpby_v +!!$ procedure, pass(y) :: axpby_a => c_mvect_axpby_a !!$ generic, public :: axpby => axpby_v, axpby_a -!!$ procedure, pass(y) :: mlt_v => c_vect_mlt_v -!!$ procedure, pass(y) :: mlt_a => c_vect_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => c_vect_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => c_vect_mlt_v_2 -!!$ procedure, pass(z) :: mlt_va => c_vect_mlt_va -!!$ procedure, pass(z) :: mlt_av => c_vect_mlt_av +!!$ procedure, pass(y) :: mlt_v => c_mvect_mlt_v +!!$ procedure, pass(y) :: mlt_a => c_mvect_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => c_mvect_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => c_mvect_mlt_v_2 +!!$ procedure, pass(z) :: mlt_va => c_mvect_mlt_va +!!$ procedure, pass(z) :: mlt_av => c_mvect_mlt_av !!$ generic, public :: mlt => mlt_v, mlt_a, mlt_a_2,& !!$ & mlt_v_2, mlt_av, mlt_va -!!$ procedure, pass(x) :: scal => c_vect_scal -!!$ procedure, pass(x) :: nrm2 => c_vect_nrm2 -!!$ procedure, pass(x) :: amax => c_vect_amax -!!$ procedure, pass(x) :: asum => c_vect_asum +!!$ procedure, pass(x) :: scal => c_mvect_scal +!!$ procedure, pass(x) :: nrm2 => c_mvect_nrm2 +!!$ procedure, pass(x) :: amax => c_mvect_amax +!!$ procedure, pass(x) :: asum => c_mvect_asum end type psb_c_multivect_type public :: psb_c_multivect, psb_c_multivect_type,& @@ -1416,7 +1453,7 @@ contains end function psb_c_get_base_multivect_default - subroutine c_vect_clone(x,y,info) + subroutine c_mvect_clone(x,y,info) implicit none class(psb_c_multivect_type), intent(inout) :: x class(psb_c_multivect_type), intent(inout) :: y @@ -1425,11 +1462,11 @@ contains info = psb_success_ call y%free(info) if ((info==0).and.allocated(x%v)) then - call y%bld(x%get_vect(),mold=x%v) + call y%bld_x(x%get_vect(),mold=x%v) end if - end subroutine c_vect_clone + end subroutine c_mvect_clone - subroutine c_vect_bld_x(x,invect,mold) + subroutine c_mvect_bld_x(x,invect,mold) complex(psb_spk_), intent(in) :: invect(:,:) class(psb_c_multivect_type), intent(out) :: x class(psb_c_base_multivect_type), intent(in), optional :: mold @@ -1445,10 +1482,10 @@ contains if (info == psb_success_) call x%v%bld(invect) - end subroutine c_vect_bld_x + end subroutine c_mvect_bld_x - subroutine c_vect_bld_n(x,m,n,mold) + subroutine c_mvect_bld_n(x,m,n,mold) integer(psb_ipk_), intent(in) :: m,n class(psb_c_multivect_type), intent(out) :: x class(psb_c_base_multivect_type), intent(in), optional :: mold @@ -1462,9 +1499,9 @@ contains endif if (info == psb_success_) call x%v%bld(m,n) - end subroutine c_vect_bld_n + end subroutine c_mvect_bld_n - function c_vect_get_vect(x) result(res) + function c_mvect_get_vect(x) result(res) class(psb_c_multivect_type), intent(inout) :: x complex(psb_spk_), allocatable :: res(:,:) integer(psb_ipk_) :: info @@ -1472,25 +1509,25 @@ contains if (allocated(x%v)) then res = x%v%get_vect() end if - end function c_vect_get_vect + end function c_mvect_get_vect - subroutine c_vect_set_scal(x,val) + subroutine c_mvect_set_scal(x,val) class(psb_c_multivect_type), intent(inout) :: x complex(psb_spk_), intent(in) :: val integer(psb_ipk_) :: info if (allocated(x%v)) call x%v%set(val) - end subroutine c_vect_set_scal + end subroutine c_mvect_set_scal - subroutine c_vect_set_vect(x,val) + subroutine c_mvect_set_vect(x,val) class(psb_c_multivect_type), intent(inout) :: x complex(psb_spk_), intent(in) :: val(:,:) integer(psb_ipk_) :: info if (allocated(x%v)) call x%v%set(val) - end subroutine c_vect_set_vect + end subroutine c_mvect_set_vect function constructor(x) result(this) @@ -1498,7 +1535,7 @@ contains type(psb_c_multivect_type) :: this integer(psb_ipk_) :: info - call this%bld(x) + call this%bld_x(x) call this%asb(size(x,dim=1,kind=psb_ipk_),size(x,dim=2,kind=psb_ipk_),info) end function constructor @@ -1509,44 +1546,44 @@ contains type(psb_c_multivect_type) :: this integer(psb_ipk_) :: info - call this%bld(m,n) + call this%bld_n(m,n) call this%asb(m,n,info) end function size_const - function c_vect_get_nrows(x) result(res) + function c_mvect_get_nrows(x) result(res) implicit none class(psb_c_multivect_type), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = x%v%get_nrows() - end function c_vect_get_nrows + end function c_mvect_get_nrows - function c_vect_get_ncols(x) result(res) + function c_mvect_get_ncols(x) result(res) implicit none class(psb_c_multivect_type), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = x%v%get_ncols() - end function c_vect_get_ncols + end function c_mvect_get_ncols - function c_vect_sizeof(x) result(res) + function c_mvect_sizeof(x) result(res) implicit none class(psb_c_multivect_type), intent(in) :: x integer(psb_epk_) :: res res = 0 if (allocated(x%v)) res = x%v%sizeof() - end function c_vect_sizeof + end function c_mvect_sizeof - function c_vect_get_fmt(x) result(res) + function c_mvect_get_fmt(x) result(res) implicit none class(psb_c_multivect_type), intent(in) :: x character(len=5) :: res res = 'NULL' if (allocated(x%v)) res = x%v%get_fmt() - end function c_vect_get_fmt + end function c_mvect_get_fmt - subroutine c_vect_all(m,n, x, info, mold) + subroutine c_mvect_all(m,n, x, info, mold) implicit none integer(psb_ipk_), intent(in) :: m,n @@ -1565,9 +1602,9 @@ contains info = psb_err_alloc_dealloc_ end if - end subroutine c_vect_all + end subroutine c_mvect_all - subroutine c_vect_reall(m,n, x, info) + subroutine c_mvect_reall(m,n, x, info) implicit none integer(psb_ipk_), intent(in) :: m,n @@ -1580,18 +1617,18 @@ contains if (info == 0) & & call x%asb(m,n,info) - end subroutine c_vect_reall + end subroutine c_mvect_reall - subroutine c_vect_zero(x) + subroutine c_mvect_zero(x) use psi_serial_mod implicit none class(psb_c_multivect_type), intent(inout) :: x if (allocated(x%v)) call x%v%zero() - end subroutine c_vect_zero + end subroutine c_mvect_zero - subroutine c_vect_asb(m,n, x, info) + subroutine c_mvect_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none @@ -1602,42 +1639,45 @@ contains if (allocated(x%v)) & & call x%v%asb(m,n,info) - end subroutine c_vect_asb + end subroutine c_mvect_asb - subroutine c_vect_sync(x) + subroutine c_mvect_sync(x) implicit none class(psb_c_multivect_type), intent(inout) :: x if (allocated(x%v)) & & call x%v%sync() - end subroutine c_vect_sync + end subroutine c_mvect_sync - subroutine c_vect_gthab(n,idx,alpha,x,beta,y) + subroutine c_mvect_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: alpha, beta, y(:) class(psb_c_multivect_type) :: x if (allocated(x%v)) & & call x%v%gth(n,idx,alpha,beta,y) - end subroutine c_vect_gthab + end subroutine c_mvect_gthab - subroutine c_vect_gthzv(n,idx,x,y) + subroutine c_mvect_gthzv(n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: y(:) class(psb_c_multivect_type) :: x if (allocated(x%v)) & & call x%v%gth(n,idx,y) - end subroutine c_vect_gthzv + end subroutine c_mvect_gthzv - subroutine c_vect_gthzv_x(i,n,idx,x,y) + subroutine c_mvect_gthzv_x(i,n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: i,n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx complex(psb_spk_) :: y(:) class(psb_c_multivect_type) :: x @@ -1645,22 +1685,24 @@ contains if (allocated(x%v)) & & call x%v%gth(i,n,idx,y) - end subroutine c_vect_gthzv_x + end subroutine c_mvect_gthzv_x - subroutine c_vect_sctb(n,idx,x,beta,y) + subroutine c_mvect_sctb(n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: beta, x(:) class(psb_c_multivect_type) :: y if (allocated(y%v)) & & call y%v%sct(n,idx,x,beta) - end subroutine c_vect_sctb + end subroutine c_mvect_sctb - subroutine c_vect_sctb_x(i,n,idx,x,beta,y) + subroutine c_mvect_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx complex(psb_spk_) :: beta, x(:) class(psb_c_multivect_type) :: y @@ -1668,9 +1710,9 @@ contains if (allocated(y%v)) & & call y%v%sct(i,n,idx,x,beta) - end subroutine c_vect_sctb_x + end subroutine c_mvect_sctb_x - subroutine c_vect_free(x, info) + subroutine c_mvect_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none @@ -1683,9 +1725,9 @@ contains if (info == 0) deallocate(x%v,stat=info) end if - end subroutine c_vect_free + end subroutine c_mvect_free - subroutine c_vect_ins(n,irl,val,x,info) + subroutine c_mvect_ins(n,irl,val,x,info) use psi_serial_mod implicit none class(psb_c_multivect_type), intent(inout) :: x @@ -1704,10 +1746,10 @@ contains dupl = x%get_dupl() call x%v%ins(n,irl,val,dupl,info) - end subroutine c_vect_ins + end subroutine c_mvect_ins - subroutine c_vect_cnv(x,mold) + subroutine c_mvect_cnv(x,mold) class(psb_c_multivect_type), intent(inout) :: x class(psb_c_base_multivect_type), intent(in), optional :: mold class(psb_c_base_multivect_type), allocatable :: tmp @@ -1724,10 +1766,10 @@ contains call x%v%free(info) end if call move_alloc(tmp,x%v) - end subroutine c_vect_cnv + end subroutine c_mvect_cnv -!!$ function c_vect_dot_v(n,x,y) result(res) +!!$ function c_mvect_dot_v(n,x,y) result(res) !!$ implicit none !!$ class(psb_c_multivect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(in) :: n @@ -1737,9 +1779,9 @@ contains !!$ if (allocated(x%v).and.allocated(y%v)) & !!$ & res = x%v%dot(n,y%v) !!$ -!!$ end function c_vect_dot_v +!!$ end function c_mvect_dot_v !!$ -!!$ function c_vect_dot_a(n,x,y) result(res) +!!$ function c_mvect_dot_a(n,x,y) result(res) !!$ implicit none !!$ class(psb_c_multivect_type), intent(inout) :: x !!$ complex(psb_spk_), intent(in) :: y(:) @@ -1750,9 +1792,9 @@ contains !!$ if (allocated(x%v)) & !!$ & res = x%v%dot(n,y) !!$ -!!$ end function c_vect_dot_a +!!$ end function c_mvect_dot_a !!$ -!!$ subroutine c_vect_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine c_mvect_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m @@ -1764,12 +1806,12 @@ contains !!$ if (allocated(x%v).and.allocated(y%v)) then !!$ call y%v%axpby(m,alpha,x%v,beta,info) !!$ else -!!$ info = psb_err_invalid_vect_state_ +!!$ info = psb_err_invalid_mvect_state_ !!$ end if !!$ -!!$ end subroutine c_vect_axpby_v +!!$ end subroutine c_mvect_axpby_v !!$ -!!$ subroutine c_vect_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine c_mvect_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m @@ -1781,10 +1823,10 @@ contains !!$ if (allocated(y%v)) & !!$ & call y%v%axpby(m,alpha,x,beta,info) !!$ -!!$ end subroutine c_vect_axpby_a +!!$ end subroutine c_mvect_axpby_a !!$ !!$ -!!$ subroutine c_vect_mlt_v(x, y, info) +!!$ subroutine c_mvect_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_c_multivect_type), intent(inout) :: x @@ -1796,9 +1838,9 @@ contains !!$ if (allocated(x%v).and.allocated(y%v)) & !!$ & call y%v%mlt(x%v,info) !!$ -!!$ end subroutine c_vect_mlt_v +!!$ end subroutine c_mvect_mlt_v !!$ -!!$ subroutine c_vect_mlt_a(x, y, info) +!!$ subroutine c_mvect_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_spk_), intent(in) :: x(:) @@ -1811,10 +1853,10 @@ contains !!$ if (allocated(y%v)) & !!$ & call y%v%mlt(x,info) !!$ -!!$ end subroutine c_vect_mlt_a +!!$ end subroutine c_mvect_mlt_a !!$ !!$ -!!$ subroutine c_vect_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine c_mvect_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_spk_), intent(in) :: alpha,beta @@ -1828,9 +1870,9 @@ contains !!$ if (allocated(z%v)) & !!$ & call z%v%mlt(alpha,x,y,beta,info) !!$ -!!$ end subroutine c_vect_mlt_a_2 +!!$ end subroutine c_mvect_mlt_a_2 !!$ -!!$ subroutine c_vect_mlt_v_2(alpha,x,y,beta,z,info,conjgx,conjgy) +!!$ subroutine c_mvect_mlt_v_2(alpha,x,y,beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_spk_), intent(in) :: alpha,beta @@ -1847,9 +1889,9 @@ contains !!$ & allocated(z%v)) & !!$ & call z%v%mlt(alpha,x%v,y%v,beta,info,conjgx,conjgy) !!$ -!!$ end subroutine c_vect_mlt_v_2 +!!$ end subroutine c_mvect_mlt_v_2 !!$ -!!$ subroutine c_vect_mlt_av(alpha,x,y,beta,z,info) +!!$ subroutine c_mvect_mlt_av(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_spk_), intent(in) :: alpha,beta @@ -1863,9 +1905,9 @@ contains !!$ if (allocated(z%v).and.allocated(y%v)) & !!$ & call z%v%mlt(alpha,x,y%v,beta,info) !!$ -!!$ end subroutine c_vect_mlt_av +!!$ end subroutine c_mvect_mlt_av !!$ -!!$ subroutine c_vect_mlt_va(alpha,x,y,beta,z,info) +!!$ subroutine c_mvect_mlt_va(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_spk_), intent(in) :: alpha,beta @@ -1880,9 +1922,9 @@ contains !!$ if (allocated(z%v).and.allocated(x%v)) & !!$ & call z%v%mlt(alpha,x%v,y,beta,info) !!$ -!!$ end subroutine c_vect_mlt_va +!!$ end subroutine c_mvect_mlt_va !!$ -!!$ subroutine c_vect_scal(alpha, x) +!!$ subroutine c_mvect_scal(alpha, x) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_c_multivect_type), intent(inout) :: x @@ -1890,10 +1932,10 @@ contains !!$ !!$ if (allocated(x%v)) call x%v%scal(alpha) !!$ -!!$ end subroutine c_vect_scal +!!$ end subroutine c_mvect_scal !!$ !!$ -!!$ function c_vect_nrm2(n,x) result(res) +!!$ function c_mvect_nrm2(n,x) result(res) !!$ implicit none !!$ class(psb_c_multivect_type), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n @@ -1905,9 +1947,9 @@ contains !!$ res = szero !!$ end if !!$ -!!$ end function c_vect_nrm2 +!!$ end function c_mvect_nrm2 !!$ -!!$ function c_vect_amax(n,x) result(res) +!!$ function c_mvect_amax(n,x) result(res) !!$ implicit none !!$ class(psb_c_multivect_type), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n @@ -1919,9 +1961,9 @@ contains !!$ res = szero !!$ end if !!$ -!!$ end function c_vect_amax +!!$ end function c_mvect_amax !!$ -!!$ function c_vect_asum(n,x) result(res) +!!$ function c_mvect_asum(n,x) result(res) !!$ implicit none !!$ class(psb_c_multivect_type), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n @@ -1933,6 +1975,6 @@ contains !!$ res = szero !!$ end if !!$ -!!$ end function c_vect_asum +!!$ end function c_mvect_asum end module psb_c_multivect_mod diff --git a/base/modules/serial/psb_d_base_mat_mod.F90 b/base/modules/serial/psb_d_base_mat_mod.F90 index 5f4c76df..b2f49f2d 100644 --- a/base/modules/serial/psb_d_base_mat_mod.F90 +++ b/base/modules/serial/psb_d_base_mat_mod.F90 @@ -416,7 +416,7 @@ module psb_d_base_mat_mod ! ! This is COO specific ! -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) procedure, pass(a) :: iset_nzeros => ld_coo_iset_nzeros procedure, pass(a) :: lset_nzeros => ld_coo_lset_nzeros generic, public :: set_nzeros => iset_nzeros, lset_nzeros @@ -439,7 +439,7 @@ module psb_d_base_mat_mod private :: ld_coo_get_nzeros, ld_coo_iset_nzeros, & & ld_coo_get_fmt, ld_coo_free, ld_coo_sizeof, & & ld_coo_transp_1mat, ld_coo_transc_1mat -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) private :: ld_coo_lset_nzeros #endif @@ -3499,7 +3499,7 @@ module psb_d_base_mat_mod end subroutine psb_ld_coo_clean_negidx end interface -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) ! !> Funtion: coo_clean_negidx_inner !! \brief Take out any entries with negative row or column index @@ -4323,7 +4323,7 @@ contains end subroutine ld_coo_iset_nzeros -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine ld_coo_lset_nzeros(nz,a) implicit none integer(psb_lpk_), intent(in) :: nz diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 87f5b0e4..8bc8a171 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -155,6 +155,9 @@ module psb_d_base_vect_mod procedure, pass(z) :: axpby_v2 => d_base_axpby_v2 procedure, pass(z) :: axpby_a2 => d_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: upd_xyz => d_base_upd_xyz + procedure, pass(w) :: xyzw => d_base_xyzw + ! ! Vector by vector multiplication. Need all variants ! to handle multiple requirements from preconditioners @@ -280,7 +283,7 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if -#if defined (OPENMP) +#if defined (PSB_OPENMP) !$omp parallel do private(i) do i = 1, size(this) x%v(i) = this(i) @@ -574,8 +577,8 @@ contains info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) - if (info == 0) call x%free_buffer(info) - if (info == 0) call x%free_comid(info) + if ((info == 0).and.allocated(x%combuf)) call x%free_buffer(info) + if ((info == 0).and.allocated(x%comid)) call x%free_comid(info) if (info /= 0) call & & psb_errpush(psb_err_alloc_dealloc_,'vect_free') @@ -845,7 +848,7 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i = first_, last_ x%v(i) = val @@ -883,7 +886,7 @@ contains if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i = first_, last_ x%v(i) = val(i-first_+1) @@ -932,7 +935,7 @@ contains if (allocated(x%v)) then if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i=1, size(x%v) x%v(i) = abs(x%v(i)) @@ -1025,7 +1028,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine d_base_axpby_v(m,alpha, x, beta, y, info) @@ -1054,7 +1057,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param y The class(base_vect) to be added !! \param z The class(base_vect) to be returned !! \param info return code @@ -1085,7 +1088,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x(:) The array to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine d_base_axpby_a(m,alpha, x, beta, y, info) @@ -1133,6 +1136,64 @@ contains end subroutine d_base_axpby_a2 + ! + ! UPD_XYZ is invoked via Z, hence the structure below. + ! + ! + !> Function base_upd_xyz + !! \memberof psb_d_base_vect_type + !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \param m Number of entries to be considered + !! \param alpha scalar alpha + !! \param beta scalar beta + !! \param gamma scalar gamma + !! \param delta scalar delta + !! \param x The class(base_vect) to be added + !! \param y The class(base_vect) to be added + !! \param z The class(base_vect) to be added + !! \param info return code + !! + subroutine d_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_base_vect_type), intent(inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(alpha/=dzero)) call x%sync() + if (y%is_dev().and.(beta/=dzero)) call y%sync() + if (z%is_dev().and.(delta/=dzero)) call z%sync() + call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() + + end subroutine d_base_upd_xyz + + subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_base_vect_type), intent(inout) :: z + class(psb_d_base_vect_type), intent(inout) :: w + real(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(a/=dzero)) call x%sync() + if (y%is_dev().and.(b/=dzero)) call y%sync() + if (z%is_dev().and.(d/=dzero)) call z%sync() + if (w%is_dev().and.(f/=dzero)) call w%sync() + call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info) + call y%set_host() + call z%set_host() + call w%set_host() + + end subroutine d_base_xyzw + ! ! Multiple variants of two operations: @@ -1681,7 +1742,7 @@ contains integer(psb_ipk_) :: i if (allocated(x%v)) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i=1,size(x%v) x%v(i) = alpha*x%v(i) @@ -1725,7 +1786,7 @@ contains integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) res = dzero !$omp parallel do private(i) reduction(max: res) do i=1, n @@ -1749,7 +1810,7 @@ contains integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) res = HUGE(done) !$omp parallel do private(i) reduction(min: res) do i=1, n @@ -1830,7 +1891,7 @@ contains integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) res=dzero !$omp parallel do private(i) reduction(+: res) do i= 1, size(x%v) @@ -1857,7 +1918,8 @@ contains subroutine d_base_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: alpha, beta, y(:) class(psb_d_base_vect_type) :: x @@ -1877,7 +1939,8 @@ contains subroutine d_base_gthzv_x(i,n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n class(psb_i_base_vect_type) :: idx real(psb_dpk_) :: y(:) class(psb_d_base_vect_type) :: x @@ -1893,7 +1956,8 @@ contains subroutine d_base_gthzbuf(i,n,idx,x) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n class(psb_i_base_vect_type) :: idx class(psb_d_base_vect_type) :: x @@ -1956,7 +2020,8 @@ contains subroutine d_base_gthzv(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: y(:) class(psb_d_base_vect_type) :: x @@ -1981,7 +2046,8 @@ contains subroutine d_base_sctb(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: beta, x(:) class(psb_d_base_vect_type) :: y @@ -1994,7 +2060,8 @@ contains subroutine d_base_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx real(psb_dpk_) :: beta, x(:) class(psb_d_base_vect_type) :: y @@ -2008,7 +2075,8 @@ contains subroutine d_base_sctb_buf(i,n,idx,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx real(psb_dpk_) :: beta class(psb_d_base_vect_type) :: y @@ -2137,7 +2205,7 @@ contains integer(psb_ipk_) :: i, n if (z%is_dev()) call z%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) n = size(x) !$omp parallel do private(i) do i = 1, n @@ -3365,10 +3433,11 @@ contains subroutine d_base_mlv_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: alpha, beta, y(:) class(psb_d_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -3390,7 +3459,8 @@ contains subroutine d_base_mlv_gthzv_x(i,n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx real(psb_dpk_) :: y(:) class(psb_d_base_multivect_type) :: x @@ -3412,10 +3482,11 @@ contains subroutine d_base_mlv_gthzv(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: y(:) class(psb_d_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -3438,10 +3509,11 @@ contains subroutine d_base_mlv_gthzm(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: y(:,:) class(psb_d_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -3459,7 +3531,8 @@ contains subroutine d_base_mlv_gthzbuf(i,ixb,n,idx,x) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, ixb, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i, ixb class(psb_i_base_vect_type) :: idx class(psb_d_base_multivect_type) :: x integer(psb_ipk_) :: nc @@ -3491,10 +3564,11 @@ contains subroutine d_base_mlv_sctb(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: beta, x(:) class(psb_d_base_multivect_type) :: y - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (y%is_dev()) call y%sync() nc = psb_size(y%v,2_psb_ipk_) @@ -3506,10 +3580,11 @@ contains subroutine d_base_mlv_sctbr2(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: beta, x(:,:) class(psb_d_base_multivect_type) :: y - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (y%is_dev()) call y%sync() nc = y%get_ncols() @@ -3521,7 +3596,8 @@ contains subroutine d_base_mlv_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx real( psb_dpk_) :: beta, x(:) class(psb_d_base_multivect_type) :: y @@ -3533,7 +3609,8 @@ contains subroutine d_base_mlv_sctb_buf(i,iyb,n,idx,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, iyb, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i, iyb class(psb_i_base_vect_type) :: idx real(psb_dpk_) :: beta class(psb_d_base_multivect_type) :: y diff --git a/base/modules/serial/psb_d_csc_mat_mod.f90 b/base/modules/serial/psb_d_csc_mat_mod.f90 index 60d91bf2..08d31256 100644 --- a/base/modules/serial/psb_d_csc_mat_mod.f90 +++ b/base/modules/serial/psb_d_csc_mat_mod.f90 @@ -87,7 +87,7 @@ module psb_d_csc_mat_mod procedure, pass(a) :: mv_from_coo => psb_d_mv_csc_from_coo procedure, pass(a) :: mv_to_fmt => psb_d_mv_csc_to_fmt procedure, pass(a) :: mv_from_fmt => psb_d_mv_csc_from_fmt - procedure, pass(a) :: clean_zeros => psb_d_csc_clean_zeros +! procedure, pass(a) :: clean_zeros => psb_d_csc_clean_zeros procedure, pass(a) :: csput_a => psb_d_csc_csput_a procedure, pass(a) :: get_diag => psb_d_csc_get_diag procedure, pass(a) :: csgetptn => psb_d_csc_csgetptn @@ -143,7 +143,7 @@ module psb_d_csc_mat_mod procedure, pass(a) :: mv_from_coo => psb_ld_mv_csc_from_coo procedure, pass(a) :: mv_to_fmt => psb_ld_mv_csc_to_fmt procedure, pass(a) :: mv_from_fmt => psb_ld_mv_csc_from_fmt - procedure, pass(a) :: clean_zeros => psb_ld_csc_clean_zeros +! procedure, pass(a) :: clean_zeros => psb_ld_csc_clean_zeros procedure, pass(a) :: csput_a => psb_ld_csc_csput_a procedure, pass(a) :: get_diag => psb_ld_csc_get_diag procedure, pass(a) :: csgetptn => psb_ld_csc_csgetptn @@ -313,18 +313,18 @@ module psb_d_csc_mat_mod end subroutine psb_d_mv_csc_from_fmt end interface - ! - !> - !! \memberof psb_d_csc_sparse_mat - !! \see psb_d_base_mat_mod::psb_d_base_clean_zeros - ! - interface - subroutine psb_d_csc_clean_zeros(a, info) - import - class(psb_d_csc_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_csc_clean_zeros - end interface +!!$ ! +!!$ !> +!!$ !! \memberof psb_d_csc_sparse_mat +!!$ !! \see psb_d_base_mat_mod::psb_d_base_clean_zeros +!!$ ! +!!$ interface +!!$ subroutine psb_d_csc_clean_zeros(a, info) +!!$ import +!!$ class(psb_d_csc_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_csc_clean_zeros +!!$ end interface !> \memberof psb_d_csc_sparse_mat @@ -717,18 +717,18 @@ module psb_d_csc_mat_mod end subroutine psb_ld_mv_csc_from_fmt end interface - ! - !> - !! \memberof psb_ld_csc_sparse_mat - !! \see psb_ld_base_mat_mod::psb_ld_base_clean_zeros - ! - interface - subroutine psb_ld_csc_clean_zeros(a, info) - import - class(psb_ld_csc_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_ld_csc_clean_zeros - end interface +!!$ ! +!!$ !> +!!$ !! \memberof psb_ld_csc_sparse_mat +!!$ !! \see psb_ld_base_mat_mod::psb_ld_base_clean_zeros +!!$ ! +!!$ interface +!!$ subroutine psb_ld_csc_clean_zeros(a, info) +!!$ import +!!$ class(psb_ld_csc_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_ld_csc_clean_zeros +!!$ end interface !> \memberof psb_ld_csc_sparse_mat !! \see psb_ld_base_mat_mod::psb_ld_base_cp_from diff --git a/base/modules/serial/psb_d_csr_mat_mod.f90 b/base/modules/serial/psb_d_csr_mat_mod.f90 index d0aa622b..0669725f 100644 --- a/base/modules/serial/psb_d_csr_mat_mod.f90 +++ b/base/modules/serial/psb_d_csr_mat_mod.f90 @@ -91,7 +91,7 @@ module psb_d_csr_mat_mod procedure, pass(a) :: mv_from_coo => psb_d_mv_csr_from_coo procedure, pass(a) :: mv_to_fmt => psb_d_mv_csr_to_fmt procedure, pass(a) :: mv_from_fmt => psb_d_mv_csr_from_fmt - procedure, pass(a) :: clean_zeros => psb_d_csr_clean_zeros +! procedure, pass(a) :: clean_zeros => psb_d_csr_clean_zeros procedure, pass(a) :: csput_a => psb_d_csr_csput_a procedure, pass(a) :: get_diag => psb_d_csr_get_diag procedure, pass(a) :: csgetptn => psb_d_csr_csgetptn @@ -261,18 +261,18 @@ module psb_d_csr_mat_mod end subroutine psb_d_csr_triu end interface - ! - !> - !! \memberof psb_d_csr_sparse_mat - !! \see psb_d_base_mat_mod::psb_d_base_clean_zeros - ! - interface - subroutine psb_d_csr_clean_zeros(a, info) - import - class(psb_d_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_csr_clean_zeros - end interface +!!$ ! +!!$ !> +!!$ !! \memberof psb_d_csr_sparse_mat +!!$ !! \see psb_d_base_mat_mod::psb_d_base_clean_zeros +!!$ ! +!!$ interface +!!$ subroutine psb_d_csr_clean_zeros(a, info) +!!$ import +!!$ class(psb_d_csr_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_csr_clean_zeros +!!$ end interface !> \memberof psb_d_csr_sparse_mat !! \see psb_d_base_mat_mod::psb_d_base_cp_to_coo @@ -579,7 +579,111 @@ module psb_d_csr_mat_mod end subroutine psb_d_csr_scals end interface - !> \namespace psb_base_mod \class psb_ld_csr_sparse_mat + + type, extends(psb_d_csr_sparse_mat) :: psb_d_ecsr_sparse_mat + + !> Number of non-empty rows + integer(psb_ipk_) :: nnerws + !> Indices of non-empty rows + integer(psb_ipk_), allocatable :: nerwp(:) + + contains + procedure, nopass :: get_fmt => d_ecsr_get_fmt + + ! procedure, pass(a) :: csmm => psb_d_ecsr_csmm + procedure, pass(a) :: csmv => psb_d_ecsr_csmv + + procedure, pass(a) :: cp_from_coo => psb_d_cp_ecsr_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cp_ecsr_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_ecsr_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_mv_ecsr_from_fmt + + procedure, pass(a) :: cmp_nerwp => psb_d_ecsr_cmp_nerwp + procedure, pass(a) :: free => d_ecsr_free + procedure, pass(a) :: mold => psb_d_ecsr_mold + + end type psb_d_ecsr_sparse_mat + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_csmv + interface + subroutine psb_d_ecsr_csmv(alpha,a,x,beta,y,info,trans) + import + class(psb_d_ecsr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ecsr_csmv + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_cp_from_coo + interface + subroutine psb_d_ecsr_cmp_nerwp(a,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ecsr_cmp_nerwp + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_cp_from_coo + interface + subroutine psb_d_cp_ecsr_from_coo(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ecsr_from_coo + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_cp_from_fmt + interface + subroutine psb_d_cp_ecsr_from_fmt(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ecsr_from_fmt + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_mv_from_coo + interface + subroutine psb_d_mv_ecsr_from_coo(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ecsr_from_coo + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_mv_from_fmt + interface + subroutine psb_d_mv_ecsr_from_fmt(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ecsr_from_fmt + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !| \see psb_base_mat_mod::psb_base_mold + interface + subroutine psb_d_ecsr_mold(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ecsr_mold + end interface + + + + !> \namespace psb_base_mod \class psb_ld_csr_sparse_mat !! \extends psb_ld_base_mat_mod::psb_ld_base_sparse_mat !! !! psb_ld_csr_sparse_mat type and the related methods. @@ -612,7 +716,7 @@ module psb_d_csr_mat_mod procedure, pass(a) :: mv_from_coo => psb_ld_mv_csr_from_coo procedure, pass(a) :: mv_to_fmt => psb_ld_mv_csr_to_fmt procedure, pass(a) :: mv_from_fmt => psb_ld_mv_csr_from_fmt - procedure, pass(a) :: clean_zeros => psb_ld_csr_clean_zeros +! procedure, pass(a) :: clean_zeros => psb_ld_csr_clean_zeros procedure, pass(a) :: csput_a => psb_ld_csr_csput_a procedure, pass(a) :: get_diag => psb_ld_csr_get_diag procedure, pass(a) :: csgetptn => psb_ld_csr_csgetptn @@ -791,17 +895,17 @@ module psb_d_csr_mat_mod end interface ! - !> - !! \memberof psb_ld_csr_sparse_mat - !! \see psb_ld_base_mat_mod::psb_ld_base_clean_zeros - ! - interface - subroutine psb_ld_csr_clean_zeros(a, info) - import - class(psb_ld_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_ld_csr_clean_zeros - end interface +!!$ !> +!!$ !! \memberof psb_ld_csr_sparse_mat +!!$ !! \see psb_ld_base_mat_mod::psb_ld_base_clean_zeros +!!$ ! +!!$ interface +!!$ subroutine psb_ld_csr_clean_zeros(a, info) +!!$ import +!!$ class(psb_ld_csr_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_ld_csr_clean_zeros +!!$ end interface @@ -1178,6 +1282,26 @@ contains + function d_ecsr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ECSR' + end function d_ecsr_get_fmt + + subroutine d_ecsr_free(a) + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + + + if (allocated(a%nerwp)) deallocate(a%nerwp) + a%nnerws = 0 + call a%psb_d_csr_sparse_mat%free() + + return + end subroutine d_ecsr_free + + ! == =================================== ! ! diff --git a/base/modules/serial/psb_d_mat_mod.F90 b/base/modules/serial/psb_d_mat_mod.F90 index 8f967ce1..a4318ba4 100644 --- a/base/modules/serial/psb_d_mat_mod.F90 +++ b/base/modules/serial/psb_d_mat_mod.F90 @@ -71,7 +71,7 @@ ! ! We are also introducing the type psb_ldspmat_type. ! The basic difference with psb_dspmat_type is in the type -! of the indices, which are PSB_LPK_ so that the entries +! of the indices, which are PSB_PSB_LPK_ so that the entries ! are guaranteed to be able to contain global indices. ! This type only supports data handling and preprocessing, it is ! not supposed to be used for computations. @@ -79,12 +79,14 @@ module psb_d_mat_mod use psb_d_base_mat_mod - use psb_d_csr_mat_mod, only : psb_d_csr_sparse_mat, psb_ld_csr_sparse_mat + use psb_d_csr_mat_mod, only : psb_d_csr_sparse_mat, psb_ld_csr_sparse_mat,& + & psb_d_ecsr_sparse_mat use psb_d_csc_mat_mod, only : psb_d_csc_sparse_mat, psb_ld_csc_sparse_mat type :: psb_dspmat_type class(psb_d_base_sparse_mat), allocatable :: a + class(psb_d_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_ld_coo_sparse_mat), allocatable :: rmta @@ -143,7 +145,7 @@ module psb_d_mat_mod procedure, pass(a) :: csgetrow => psb_d_csgetrow procedure, pass(a) :: csgetblk => psb_d_csgetblk generic, public :: csget => csgetptn, csgetrow, csgetblk -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) procedure, pass(a) :: lcsgetptn => psb_d_lcsgetptn procedure, pass(a) :: lcsgetrow => psb_d_lcsgetrow generic, public :: csget => lcsgetptn, lcsgetrow @@ -202,6 +204,8 @@ module psb_d_mat_mod procedure, pass(a) :: cscnv_ip => psb_d_cscnv_ip procedure, pass(a) :: cscnv_base => psb_d_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base + procedure, pass(a) :: split_nd => psb_d_split_nd + procedure, pass(a) :: merge_nd => psb_d_merge_nd procedure, pass(a) :: clone => psb_dspmat_clone procedure, pass(a) :: move_alloc => psb_dspmat_type_move ! @@ -307,7 +311,7 @@ module psb_d_mat_mod ! Setters procedure, pass(a) :: set_lnrows => psb_ld_set_lnrows procedure, pass(a) :: set_lncols => psb_ld_set_lncols -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) procedure, pass(a) :: set_inrows => psb_ld_set_inrows procedure, pass(a) :: set_incols => psb_ld_set_incols generic, public :: set_nrows => set_inrows, set_lnrows @@ -342,7 +346,7 @@ module psb_d_mat_mod procedure, pass(a) :: csgetrow => psb_ld_csgetrow procedure, pass(a) :: csgetblk => psb_ld_csgetblk generic, public :: csget => csgetptn, csgetrow, csgetblk -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) !!$ procedure, pass(a) :: icsgetptn => psb_ld_icsgetptn !!$ procedure, pass(a) :: icsgetrow => psb_ld_icsgetrow !!$ generic, public :: csget => icsgetptn, icsgetrow @@ -840,6 +844,24 @@ module psb_d_mat_mod ! ! + interface + subroutine psb_d_split_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_split_nd + end interface + + interface + subroutine psb_d_merge_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_merge_nd + end interface + ! ! CSCNV: switches to a different internal derived type. ! 3 versions: copying to target @@ -859,7 +881,6 @@ module psb_d_mat_mod end subroutine psb_d_cscnv end interface - interface subroutine psb_d_cscnv_ip(a,iinfo,type,mold,dupl) import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat @@ -871,7 +892,6 @@ module psb_d_mat_mod end subroutine psb_d_cscnv_ip end interface - interface subroutine psb_d_cscnv_base(a,b,info,dupl) import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat @@ -1250,7 +1270,7 @@ module psb_d_mat_mod class(psb_ldspmat_type), intent(inout) :: a integer(psb_lpk_), intent(in) :: m end subroutine psb_ld_set_lnrows -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_ld_set_inrows(m,a) import :: psb_ipk_, psb_lpk_, psb_ldspmat_type class(psb_ldspmat_type), intent(inout) :: a @@ -1265,7 +1285,7 @@ module psb_d_mat_mod class(psb_ldspmat_type), intent(inout) :: a integer(psb_lpk_), intent(in) :: n end subroutine psb_ld_set_lncols -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_ld_set_incols(n,a) import :: psb_ipk_, psb_lpk_, psb_ldspmat_type class(psb_ldspmat_type), intent(inout) :: a @@ -2390,7 +2410,7 @@ contains end subroutine psb_d_clean_zeros -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_d_lcsgetptn(imin,imax,a,nz,ia,ja,info,& & jmin,jmax,iren,append,nzin,rscale,cscale) implicit none @@ -2909,7 +2929,7 @@ contains end subroutine psb_ld_clean_zeros -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) !!$ subroutine psb_ld_icsgetptn(imin,imax,a,nz,ia,ja,info,& !!$ & jmin,jmax,iren,append,nzin,rscale,cscale) !!$ implicit none diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index 88fa3262..302e6fc1 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -102,6 +102,8 @@ module psb_d_vect_mod procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: upd_xyz => d_vect_upd_xyz + procedure, pass(z) :: xyzw => d_vect_xyzw procedure, pass(y) :: mlt_v => d_vect_mlt_v procedure, pass(y) :: mlt_a => d_vect_mlt_a procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2 @@ -496,7 +498,8 @@ contains subroutine d_vect_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: alpha, beta, y(:) class(psb_d_vect_type) :: x @@ -507,7 +510,8 @@ contains subroutine d_vect_gthzv(n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: y(:) class(psb_d_vect_type) :: x @@ -518,7 +522,8 @@ contains subroutine d_vect_sctb(n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: beta, x(:) class(psb_d_vect_type) :: y @@ -710,7 +715,7 @@ contains res = dzero if (allocated(x%v)) & - & res = x%v%dot(n,y) + & res = x%v%dot_a(n,y) end function d_vect_dot_a @@ -778,6 +783,38 @@ contains end subroutine d_vect_axpby_a2 + subroutine d_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_vect_type), intent(inout) :: x + class(psb_d_vect_type), intent(inout) :: y + class(psb_d_vect_type), intent(inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (allocated(z%v)) & + call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + + end subroutine d_vect_upd_xyz + + subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_vect_type), intent(inout) :: x + class(psb_d_vect_type), intent(inout) :: y + class(psb_d_vect_type), intent(inout) :: z + class(psb_d_vect_type), intent(inout) :: w + real(psb_dpk_), intent (in) :: a, b, c, d, e, f + integer(psb_ipk_), intent(out) :: info + + if (allocated(w%v)) & + call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info) + + end subroutine d_vect_xyzw + + subroutine d_vect_mlt_v(x, y, info) use psi_serial_mod implicit none @@ -1141,7 +1178,7 @@ contains end if end function d_vect_nrm2_weight - + function d_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res) use psi_serial_mod implicit none @@ -1345,56 +1382,56 @@ module psb_d_multivect_mod integer(psb_ipk_) :: dupl = psb_dupl_add_ real(psb_dpk_), allocatable :: rmtv(:,:) contains - procedure, pass(x) :: get_nrows => d_vect_get_nrows - procedure, pass(x) :: get_ncols => d_vect_get_ncols - procedure, pass(x) :: sizeof => d_vect_sizeof - procedure, pass(x) :: get_fmt => d_vect_get_fmt + procedure, pass(x) :: get_nrows => d_mvect_get_nrows + procedure, pass(x) :: get_ncols => d_mvect_get_ncols + procedure, pass(x) :: sizeof => d_mvect_sizeof + procedure, pass(x) :: get_fmt => d_mvect_get_fmt procedure, pass(x) :: is_remote_build => d_mvect_is_remote_build procedure, pass(x) :: set_remote_build => d_mvect_set_remote_build procedure, pass(x) :: get_dupl => d_mvect_get_dupl procedure, pass(x) :: set_dupl => d_mvect_set_dupl - procedure, pass(x) :: all => d_vect_all - procedure, pass(x) :: reall => d_vect_reall - procedure, pass(x) :: zero => d_vect_zero - procedure, pass(x) :: asb => d_vect_asb - procedure, pass(x) :: sync => d_vect_sync - procedure, pass(x) :: free => d_vect_free - procedure, pass(x) :: ins => d_vect_ins - procedure, pass(x) :: bld_x => d_vect_bld_x - procedure, pass(x) :: bld_n => d_vect_bld_n + procedure, pass(x) :: all => d_mvect_all + procedure, pass(x) :: reall => d_mvect_reall + procedure, pass(x) :: zero => d_mvect_zero + procedure, pass(x) :: asb => d_mvect_asb + procedure, pass(x) :: sync => d_mvect_sync + procedure, pass(x) :: free => d_mvect_free + procedure, pass(x) :: ins => d_mvect_ins + procedure, pass(x) :: bld_x => d_mvect_bld_x + procedure, pass(x) :: bld_n => d_mvect_bld_n generic, public :: bld => bld_x, bld_n - procedure, pass(x) :: get_vect => d_vect_get_vect - procedure, pass(x) :: cnv => d_vect_cnv - procedure, pass(x) :: set_scal => d_vect_set_scal - procedure, pass(x) :: set_vect => d_vect_set_vect + procedure, pass(x) :: get_vect => d_mvect_get_vect + procedure, pass(x) :: cnv => d_mvect_cnv + procedure, pass(x) :: set_scal => d_mvect_set_scal + procedure, pass(x) :: set_vect => d_mvect_set_vect generic, public :: set => set_vect, set_scal - procedure, pass(x) :: clone => d_vect_clone - procedure, pass(x) :: gthab => d_vect_gthab - procedure, pass(x) :: gthzv => d_vect_gthzv - procedure, pass(x) :: gthzv_x => d_vect_gthzv_x + procedure, pass(x) :: clone => d_mvect_clone + procedure, pass(x) :: gthab => d_mvect_gthab + procedure, pass(x) :: gthzv => d_mvect_gthzv + procedure, pass(x) :: gthzv_x => d_mvect_gthzv_x generic, public :: gth => gthab, gthzv - procedure, pass(y) :: sctb => d_vect_sctb - procedure, pass(y) :: sctb_x => d_vect_sctb_x + procedure, pass(y) :: sctb => d_mvect_sctb + procedure, pass(y) :: sctb_x => d_mvect_sctb_x generic, public :: sct => sctb, sctb_x -!!$ procedure, pass(x) :: dot_v => d_vect_dot_v -!!$ procedure, pass(x) :: dot_a => d_vect_dot_a +!!$ procedure, pass(x) :: dot_v => d_mvect_dot_v +!!$ procedure, pass(x) :: dot_a => d_mvect_dot_a !!$ generic, public :: dot => dot_v, dot_a -!!$ procedure, pass(y) :: axpby_v => d_vect_axpby_v -!!$ procedure, pass(y) :: axpby_a => d_vect_axpby_a +!!$ procedure, pass(y) :: axpby_v => d_mvect_axpby_v +!!$ procedure, pass(y) :: axpby_a => d_mvect_axpby_a !!$ generic, public :: axpby => axpby_v, axpby_a -!!$ procedure, pass(y) :: mlt_v => d_vect_mlt_v -!!$ procedure, pass(y) :: mlt_a => d_vect_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => d_vect_mlt_v_2 -!!$ procedure, pass(z) :: mlt_va => d_vect_mlt_va -!!$ procedure, pass(z) :: mlt_av => d_vect_mlt_av +!!$ procedure, pass(y) :: mlt_v => d_mvect_mlt_v +!!$ procedure, pass(y) :: mlt_a => d_mvect_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => d_mvect_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => d_mvect_mlt_v_2 +!!$ procedure, pass(z) :: mlt_va => d_mvect_mlt_va +!!$ procedure, pass(z) :: mlt_av => d_mvect_mlt_av !!$ generic, public :: mlt => mlt_v, mlt_a, mlt_a_2,& !!$ & mlt_v_2, mlt_av, mlt_va -!!$ procedure, pass(x) :: scal => d_vect_scal -!!$ procedure, pass(x) :: nrm2 => d_vect_nrm2 -!!$ procedure, pass(x) :: amax => d_vect_amax -!!$ procedure, pass(x) :: asum => d_vect_asum +!!$ procedure, pass(x) :: scal => d_mvect_scal +!!$ procedure, pass(x) :: nrm2 => d_mvect_nrm2 +!!$ procedure, pass(x) :: amax => d_mvect_amax +!!$ procedure, pass(x) :: asum => d_mvect_asum end type psb_d_multivect_type public :: psb_d_multivect, psb_d_multivect_type,& @@ -1495,7 +1532,7 @@ contains end function psb_d_get_base_multivect_default - subroutine d_vect_clone(x,y,info) + subroutine d_mvect_clone(x,y,info) implicit none class(psb_d_multivect_type), intent(inout) :: x class(psb_d_multivect_type), intent(inout) :: y @@ -1504,11 +1541,11 @@ contains info = psb_success_ call y%free(info) if ((info==0).and.allocated(x%v)) then - call y%bld(x%get_vect(),mold=x%v) + call y%bld_x(x%get_vect(),mold=x%v) end if - end subroutine d_vect_clone + end subroutine d_mvect_clone - subroutine d_vect_bld_x(x,invect,mold) + subroutine d_mvect_bld_x(x,invect,mold) real(psb_dpk_), intent(in) :: invect(:,:) class(psb_d_multivect_type), intent(out) :: x class(psb_d_base_multivect_type), intent(in), optional :: mold @@ -1524,10 +1561,10 @@ contains if (info == psb_success_) call x%v%bld(invect) - end subroutine d_vect_bld_x + end subroutine d_mvect_bld_x - subroutine d_vect_bld_n(x,m,n,mold) + subroutine d_mvect_bld_n(x,m,n,mold) integer(psb_ipk_), intent(in) :: m,n class(psb_d_multivect_type), intent(out) :: x class(psb_d_base_multivect_type), intent(in), optional :: mold @@ -1541,9 +1578,9 @@ contains endif if (info == psb_success_) call x%v%bld(m,n) - end subroutine d_vect_bld_n + end subroutine d_mvect_bld_n - function d_vect_get_vect(x) result(res) + function d_mvect_get_vect(x) result(res) class(psb_d_multivect_type), intent(inout) :: x real(psb_dpk_), allocatable :: res(:,:) integer(psb_ipk_) :: info @@ -1551,25 +1588,25 @@ contains if (allocated(x%v)) then res = x%v%get_vect() end if - end function d_vect_get_vect + end function d_mvect_get_vect - subroutine d_vect_set_scal(x,val) + subroutine d_mvect_set_scal(x,val) class(psb_d_multivect_type), intent(inout) :: x real(psb_dpk_), intent(in) :: val integer(psb_ipk_) :: info if (allocated(x%v)) call x%v%set(val) - end subroutine d_vect_set_scal + end subroutine d_mvect_set_scal - subroutine d_vect_set_vect(x,val) + subroutine d_mvect_set_vect(x,val) class(psb_d_multivect_type), intent(inout) :: x real(psb_dpk_), intent(in) :: val(:,:) integer(psb_ipk_) :: info if (allocated(x%v)) call x%v%set(val) - end subroutine d_vect_set_vect + end subroutine d_mvect_set_vect function constructor(x) result(this) @@ -1577,7 +1614,7 @@ contains type(psb_d_multivect_type) :: this integer(psb_ipk_) :: info - call this%bld(x) + call this%bld_x(x) call this%asb(size(x,dim=1,kind=psb_ipk_),size(x,dim=2,kind=psb_ipk_),info) end function constructor @@ -1588,44 +1625,44 @@ contains type(psb_d_multivect_type) :: this integer(psb_ipk_) :: info - call this%bld(m,n) + call this%bld_n(m,n) call this%asb(m,n,info) end function size_const - function d_vect_get_nrows(x) result(res) + function d_mvect_get_nrows(x) result(res) implicit none class(psb_d_multivect_type), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = x%v%get_nrows() - end function d_vect_get_nrows + end function d_mvect_get_nrows - function d_vect_get_ncols(x) result(res) + function d_mvect_get_ncols(x) result(res) implicit none class(psb_d_multivect_type), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = x%v%get_ncols() - end function d_vect_get_ncols + end function d_mvect_get_ncols - function d_vect_sizeof(x) result(res) + function d_mvect_sizeof(x) result(res) implicit none class(psb_d_multivect_type), intent(in) :: x integer(psb_epk_) :: res res = 0 if (allocated(x%v)) res = x%v%sizeof() - end function d_vect_sizeof + end function d_mvect_sizeof - function d_vect_get_fmt(x) result(res) + function d_mvect_get_fmt(x) result(res) implicit none class(psb_d_multivect_type), intent(in) :: x character(len=5) :: res res = 'NULL' if (allocated(x%v)) res = x%v%get_fmt() - end function d_vect_get_fmt + end function d_mvect_get_fmt - subroutine d_vect_all(m,n, x, info, mold) + subroutine d_mvect_all(m,n, x, info, mold) implicit none integer(psb_ipk_), intent(in) :: m,n @@ -1644,9 +1681,9 @@ contains info = psb_err_alloc_dealloc_ end if - end subroutine d_vect_all + end subroutine d_mvect_all - subroutine d_vect_reall(m,n, x, info) + subroutine d_mvect_reall(m,n, x, info) implicit none integer(psb_ipk_), intent(in) :: m,n @@ -1659,18 +1696,18 @@ contains if (info == 0) & & call x%asb(m,n,info) - end subroutine d_vect_reall + end subroutine d_mvect_reall - subroutine d_vect_zero(x) + subroutine d_mvect_zero(x) use psi_serial_mod implicit none class(psb_d_multivect_type), intent(inout) :: x if (allocated(x%v)) call x%v%zero() - end subroutine d_vect_zero + end subroutine d_mvect_zero - subroutine d_vect_asb(m,n, x, info) + subroutine d_mvect_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none @@ -1681,42 +1718,45 @@ contains if (allocated(x%v)) & & call x%v%asb(m,n,info) - end subroutine d_vect_asb + end subroutine d_mvect_asb - subroutine d_vect_sync(x) + subroutine d_mvect_sync(x) implicit none class(psb_d_multivect_type), intent(inout) :: x if (allocated(x%v)) & & call x%v%sync() - end subroutine d_vect_sync + end subroutine d_mvect_sync - subroutine d_vect_gthab(n,idx,alpha,x,beta,y) + subroutine d_mvect_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: alpha, beta, y(:) class(psb_d_multivect_type) :: x if (allocated(x%v)) & & call x%v%gth(n,idx,alpha,beta,y) - end subroutine d_vect_gthab + end subroutine d_mvect_gthab - subroutine d_vect_gthzv(n,idx,x,y) + subroutine d_mvect_gthzv(n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: y(:) class(psb_d_multivect_type) :: x if (allocated(x%v)) & & call x%v%gth(n,idx,y) - end subroutine d_vect_gthzv + end subroutine d_mvect_gthzv - subroutine d_vect_gthzv_x(i,n,idx,x,y) + subroutine d_mvect_gthzv_x(i,n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: i,n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx real(psb_dpk_) :: y(:) class(psb_d_multivect_type) :: x @@ -1724,22 +1764,24 @@ contains if (allocated(x%v)) & & call x%v%gth(i,n,idx,y) - end subroutine d_vect_gthzv_x + end subroutine d_mvect_gthzv_x - subroutine d_vect_sctb(n,idx,x,beta,y) + subroutine d_mvect_sctb(n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: beta, x(:) class(psb_d_multivect_type) :: y if (allocated(y%v)) & & call y%v%sct(n,idx,x,beta) - end subroutine d_vect_sctb + end subroutine d_mvect_sctb - subroutine d_vect_sctb_x(i,n,idx,x,beta,y) + subroutine d_mvect_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx real(psb_dpk_) :: beta, x(:) class(psb_d_multivect_type) :: y @@ -1747,9 +1789,9 @@ contains if (allocated(y%v)) & & call y%v%sct(i,n,idx,x,beta) - end subroutine d_vect_sctb_x + end subroutine d_mvect_sctb_x - subroutine d_vect_free(x, info) + subroutine d_mvect_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none @@ -1762,9 +1804,9 @@ contains if (info == 0) deallocate(x%v,stat=info) end if - end subroutine d_vect_free + end subroutine d_mvect_free - subroutine d_vect_ins(n,irl,val,x,info) + subroutine d_mvect_ins(n,irl,val,x,info) use psi_serial_mod implicit none class(psb_d_multivect_type), intent(inout) :: x @@ -1783,10 +1825,10 @@ contains dupl = x%get_dupl() call x%v%ins(n,irl,val,dupl,info) - end subroutine d_vect_ins + end subroutine d_mvect_ins - subroutine d_vect_cnv(x,mold) + subroutine d_mvect_cnv(x,mold) class(psb_d_multivect_type), intent(inout) :: x class(psb_d_base_multivect_type), intent(in), optional :: mold class(psb_d_base_multivect_type), allocatable :: tmp @@ -1803,10 +1845,10 @@ contains call x%v%free(info) end if call move_alloc(tmp,x%v) - end subroutine d_vect_cnv + end subroutine d_mvect_cnv -!!$ function d_vect_dot_v(n,x,y) result(res) +!!$ function d_mvect_dot_v(n,x,y) result(res) !!$ implicit none !!$ class(psb_d_multivect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(in) :: n @@ -1816,9 +1858,9 @@ contains !!$ if (allocated(x%v).and.allocated(y%v)) & !!$ & res = x%v%dot(n,y%v) !!$ -!!$ end function d_vect_dot_v +!!$ end function d_mvect_dot_v !!$ -!!$ function d_vect_dot_a(n,x,y) result(res) +!!$ function d_mvect_dot_a(n,x,y) result(res) !!$ implicit none !!$ class(psb_d_multivect_type), intent(inout) :: x !!$ real(psb_dpk_), intent(in) :: y(:) @@ -1829,9 +1871,9 @@ contains !!$ if (allocated(x%v)) & !!$ & res = x%v%dot(n,y) !!$ -!!$ end function d_vect_dot_a +!!$ end function d_mvect_dot_a !!$ -!!$ subroutine d_vect_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine d_mvect_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m @@ -1843,12 +1885,12 @@ contains !!$ if (allocated(x%v).and.allocated(y%v)) then !!$ call y%v%axpby(m,alpha,x%v,beta,info) !!$ else -!!$ info = psb_err_invalid_vect_state_ +!!$ info = psb_err_invalid_mvect_state_ !!$ end if !!$ -!!$ end subroutine d_vect_axpby_v +!!$ end subroutine d_mvect_axpby_v !!$ -!!$ subroutine d_vect_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine d_mvect_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m @@ -1860,10 +1902,10 @@ contains !!$ if (allocated(y%v)) & !!$ & call y%v%axpby(m,alpha,x,beta,info) !!$ -!!$ end subroutine d_vect_axpby_a +!!$ end subroutine d_mvect_axpby_a !!$ !!$ -!!$ subroutine d_vect_mlt_v(x, y, info) +!!$ subroutine d_mvect_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_d_multivect_type), intent(inout) :: x @@ -1875,9 +1917,9 @@ contains !!$ if (allocated(x%v).and.allocated(y%v)) & !!$ & call y%v%mlt(x%v,info) !!$ -!!$ end subroutine d_vect_mlt_v +!!$ end subroutine d_mvect_mlt_v !!$ -!!$ subroutine d_vect_mlt_a(x, y, info) +!!$ subroutine d_mvect_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_dpk_), intent(in) :: x(:) @@ -1890,10 +1932,10 @@ contains !!$ if (allocated(y%v)) & !!$ & call y%v%mlt(x,info) !!$ -!!$ end subroutine d_vect_mlt_a +!!$ end subroutine d_mvect_mlt_a !!$ !!$ -!!$ subroutine d_vect_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine d_mvect_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_dpk_), intent(in) :: alpha,beta @@ -1907,9 +1949,9 @@ contains !!$ if (allocated(z%v)) & !!$ & call z%v%mlt(alpha,x,y,beta,info) !!$ -!!$ end subroutine d_vect_mlt_a_2 +!!$ end subroutine d_mvect_mlt_a_2 !!$ -!!$ subroutine d_vect_mlt_v_2(alpha,x,y,beta,z,info,conjgx,conjgy) +!!$ subroutine d_mvect_mlt_v_2(alpha,x,y,beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_dpk_), intent(in) :: alpha,beta @@ -1926,9 +1968,9 @@ contains !!$ & allocated(z%v)) & !!$ & call z%v%mlt(alpha,x%v,y%v,beta,info,conjgx,conjgy) !!$ -!!$ end subroutine d_vect_mlt_v_2 +!!$ end subroutine d_mvect_mlt_v_2 !!$ -!!$ subroutine d_vect_mlt_av(alpha,x,y,beta,z,info) +!!$ subroutine d_mvect_mlt_av(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_dpk_), intent(in) :: alpha,beta @@ -1942,9 +1984,9 @@ contains !!$ if (allocated(z%v).and.allocated(y%v)) & !!$ & call z%v%mlt(alpha,x,y%v,beta,info) !!$ -!!$ end subroutine d_vect_mlt_av +!!$ end subroutine d_mvect_mlt_av !!$ -!!$ subroutine d_vect_mlt_va(alpha,x,y,beta,z,info) +!!$ subroutine d_mvect_mlt_va(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_dpk_), intent(in) :: alpha,beta @@ -1959,9 +2001,9 @@ contains !!$ if (allocated(z%v).and.allocated(x%v)) & !!$ & call z%v%mlt(alpha,x%v,y,beta,info) !!$ -!!$ end subroutine d_vect_mlt_va +!!$ end subroutine d_mvect_mlt_va !!$ -!!$ subroutine d_vect_scal(alpha, x) +!!$ subroutine d_mvect_scal(alpha, x) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_d_multivect_type), intent(inout) :: x @@ -1969,10 +2011,10 @@ contains !!$ !!$ if (allocated(x%v)) call x%v%scal(alpha) !!$ -!!$ end subroutine d_vect_scal +!!$ end subroutine d_mvect_scal !!$ !!$ -!!$ function d_vect_nrm2(n,x) result(res) +!!$ function d_mvect_nrm2(n,x) result(res) !!$ implicit none !!$ class(psb_d_multivect_type), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n @@ -1984,9 +2026,9 @@ contains !!$ res = dzero !!$ end if !!$ -!!$ end function d_vect_nrm2 +!!$ end function d_mvect_nrm2 !!$ -!!$ function d_vect_amax(n,x) result(res) +!!$ function d_mvect_amax(n,x) result(res) !!$ implicit none !!$ class(psb_d_multivect_type), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n @@ -1998,9 +2040,9 @@ contains !!$ res = dzero !!$ end if !!$ -!!$ end function d_vect_amax +!!$ end function d_mvect_amax !!$ -!!$ function d_vect_asum(n,x) result(res) +!!$ function d_mvect_asum(n,x) result(res) !!$ implicit none !!$ class(psb_d_multivect_type), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n @@ -2012,6 +2054,6 @@ contains !!$ res = dzero !!$ end if !!$ -!!$ end function d_vect_asum +!!$ end function d_mvect_asum end module psb_d_multivect_mod diff --git a/base/modules/serial/psb_i_base_vect_mod.F90 b/base/modules/serial/psb_i_base_vect_mod.F90 index a5cddeb5..e2c64af3 100644 --- a/base/modules/serial/psb_i_base_vect_mod.F90 +++ b/base/modules/serial/psb_i_base_vect_mod.F90 @@ -209,7 +209,7 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if -#if defined (OPENMP) +#if defined (PSB_OPENMP) !$omp parallel do private(i) do i = 1, size(this) x%v(i) = this(i) @@ -503,8 +503,8 @@ contains info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) - if (info == 0) call x%free_buffer(info) - if (info == 0) call x%free_comid(info) + if ((info == 0).and.allocated(x%combuf)) call x%free_buffer(info) + if ((info == 0).and.allocated(x%comid)) call x%free_comid(info) if (info /= 0) call & & psb_errpush(psb_err_alloc_dealloc_,'vect_free') @@ -774,7 +774,7 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i = first_, last_ x%v(i) = val @@ -812,7 +812,7 @@ contains if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i = first_, last_ x%v(i) = val(i-first_+1) @@ -841,7 +841,8 @@ contains subroutine i_base_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_ipk_) :: alpha, beta, y(:) class(psb_i_base_vect_type) :: x @@ -861,7 +862,8 @@ contains subroutine i_base_gthzv_x(i,n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n class(psb_i_base_vect_type) :: idx integer(psb_ipk_) :: y(:) class(psb_i_base_vect_type) :: x @@ -877,7 +879,8 @@ contains subroutine i_base_gthzbuf(i,n,idx,x) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n class(psb_i_base_vect_type) :: idx class(psb_i_base_vect_type) :: x @@ -940,7 +943,8 @@ contains subroutine i_base_gthzv(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_ipk_) :: y(:) class(psb_i_base_vect_type) :: x @@ -965,7 +969,8 @@ contains subroutine i_base_sctb(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_ipk_) :: beta, x(:) class(psb_i_base_vect_type) :: y @@ -978,7 +983,8 @@ contains subroutine i_base_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx integer(psb_ipk_) :: beta, x(:) class(psb_i_base_vect_type) :: y @@ -992,7 +998,8 @@ contains subroutine i_base_sctb_buf(i,n,idx,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx integer(psb_ipk_) :: beta class(psb_i_base_vect_type) :: y @@ -1690,10 +1697,11 @@ contains subroutine i_base_mlv_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_ipk_) :: alpha, beta, y(:) class(psb_i_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -1715,7 +1723,8 @@ contains subroutine i_base_mlv_gthzv_x(i,n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx integer(psb_ipk_) :: y(:) class(psb_i_base_multivect_type) :: x @@ -1737,10 +1746,11 @@ contains subroutine i_base_mlv_gthzv(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_ipk_) :: y(:) class(psb_i_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -1763,10 +1773,11 @@ contains subroutine i_base_mlv_gthzm(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_ipk_) :: y(:,:) class(psb_i_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -1784,7 +1795,8 @@ contains subroutine i_base_mlv_gthzbuf(i,ixb,n,idx,x) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, ixb, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i, ixb class(psb_i_base_vect_type) :: idx class(psb_i_base_multivect_type) :: x integer(psb_ipk_) :: nc @@ -1816,10 +1828,11 @@ contains subroutine i_base_mlv_sctb(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_ipk_) :: beta, x(:) class(psb_i_base_multivect_type) :: y - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (y%is_dev()) call y%sync() nc = psb_size(y%v,2_psb_ipk_) @@ -1831,10 +1844,11 @@ contains subroutine i_base_mlv_sctbr2(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_ipk_) :: beta, x(:,:) class(psb_i_base_multivect_type) :: y - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (y%is_dev()) call y%sync() nc = y%get_ncols() @@ -1846,7 +1860,8 @@ contains subroutine i_base_mlv_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx integer( psb_ipk_) :: beta, x(:) class(psb_i_base_multivect_type) :: y @@ -1858,7 +1873,8 @@ contains subroutine i_base_mlv_sctb_buf(i,iyb,n,idx,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, iyb, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i, iyb class(psb_i_base_vect_type) :: idx integer(psb_ipk_) :: beta class(psb_i_base_multivect_type) :: y diff --git a/base/modules/serial/psb_i_vect_mod.F90 b/base/modules/serial/psb_i_vect_mod.F90 index ab371bd5..55ed7e9d 100644 --- a/base/modules/serial/psb_i_vect_mod.F90 +++ b/base/modules/serial/psb_i_vect_mod.F90 @@ -436,7 +436,8 @@ contains subroutine i_vect_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_ipk_) :: alpha, beta, y(:) class(psb_i_vect_type) :: x @@ -447,7 +448,8 @@ contains subroutine i_vect_gthzv(n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_ipk_) :: y(:) class(psb_i_vect_type) :: x @@ -458,7 +460,8 @@ contains subroutine i_vect_sctb(n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_ipk_) :: beta, x(:) class(psb_i_vect_type) :: y @@ -641,37 +644,37 @@ module psb_i_multivect_mod integer(psb_ipk_) :: dupl = psb_dupl_add_ integer(psb_ipk_), allocatable :: rmtv(:,:) contains - procedure, pass(x) :: get_nrows => i_vect_get_nrows - procedure, pass(x) :: get_ncols => i_vect_get_ncols - procedure, pass(x) :: sizeof => i_vect_sizeof - procedure, pass(x) :: get_fmt => i_vect_get_fmt + procedure, pass(x) :: get_nrows => i_mvect_get_nrows + procedure, pass(x) :: get_ncols => i_mvect_get_ncols + procedure, pass(x) :: sizeof => i_mvect_sizeof + procedure, pass(x) :: get_fmt => i_mvect_get_fmt procedure, pass(x) :: is_remote_build => i_mvect_is_remote_build procedure, pass(x) :: set_remote_build => i_mvect_set_remote_build procedure, pass(x) :: get_dupl => i_mvect_get_dupl procedure, pass(x) :: set_dupl => i_mvect_set_dupl - procedure, pass(x) :: all => i_vect_all - procedure, pass(x) :: reall => i_vect_reall - procedure, pass(x) :: zero => i_vect_zero - procedure, pass(x) :: asb => i_vect_asb - procedure, pass(x) :: sync => i_vect_sync - procedure, pass(x) :: free => i_vect_free - procedure, pass(x) :: ins => i_vect_ins - procedure, pass(x) :: bld_x => i_vect_bld_x - procedure, pass(x) :: bld_n => i_vect_bld_n + procedure, pass(x) :: all => i_mvect_all + procedure, pass(x) :: reall => i_mvect_reall + procedure, pass(x) :: zero => i_mvect_zero + procedure, pass(x) :: asb => i_mvect_asb + procedure, pass(x) :: sync => i_mvect_sync + procedure, pass(x) :: free => i_mvect_free + procedure, pass(x) :: ins => i_mvect_ins + procedure, pass(x) :: bld_x => i_mvect_bld_x + procedure, pass(x) :: bld_n => i_mvect_bld_n generic, public :: bld => bld_x, bld_n - procedure, pass(x) :: get_vect => i_vect_get_vect - procedure, pass(x) :: cnv => i_vect_cnv - procedure, pass(x) :: set_scal => i_vect_set_scal - procedure, pass(x) :: set_vect => i_vect_set_vect + procedure, pass(x) :: get_vect => i_mvect_get_vect + procedure, pass(x) :: cnv => i_mvect_cnv + procedure, pass(x) :: set_scal => i_mvect_set_scal + procedure, pass(x) :: set_vect => i_mvect_set_vect generic, public :: set => set_vect, set_scal - procedure, pass(x) :: clone => i_vect_clone - procedure, pass(x) :: gthab => i_vect_gthab - procedure, pass(x) :: gthzv => i_vect_gthzv - procedure, pass(x) :: gthzv_x => i_vect_gthzv_x + procedure, pass(x) :: clone => i_mvect_clone + procedure, pass(x) :: gthab => i_mvect_gthab + procedure, pass(x) :: gthzv => i_mvect_gthzv + procedure, pass(x) :: gthzv_x => i_mvect_gthzv_x generic, public :: gth => gthab, gthzv - procedure, pass(y) :: sctb => i_vect_sctb - procedure, pass(y) :: sctb_x => i_vect_sctb_x + procedure, pass(y) :: sctb => i_mvect_sctb + procedure, pass(y) :: sctb_x => i_mvect_sctb_x generic, public :: sct => sctb, sctb_x end type psb_i_multivect_type @@ -773,7 +776,7 @@ contains end function psb_i_get_base_multivect_default - subroutine i_vect_clone(x,y,info) + subroutine i_mvect_clone(x,y,info) implicit none class(psb_i_multivect_type), intent(inout) :: x class(psb_i_multivect_type), intent(inout) :: y @@ -782,11 +785,11 @@ contains info = psb_success_ call y%free(info) if ((info==0).and.allocated(x%v)) then - call y%bld(x%get_vect(),mold=x%v) + call y%bld_x(x%get_vect(),mold=x%v) end if - end subroutine i_vect_clone + end subroutine i_mvect_clone - subroutine i_vect_bld_x(x,invect,mold) + subroutine i_mvect_bld_x(x,invect,mold) integer(psb_ipk_), intent(in) :: invect(:,:) class(psb_i_multivect_type), intent(out) :: x class(psb_i_base_multivect_type), intent(in), optional :: mold @@ -802,10 +805,10 @@ contains if (info == psb_success_) call x%v%bld(invect) - end subroutine i_vect_bld_x + end subroutine i_mvect_bld_x - subroutine i_vect_bld_n(x,m,n,mold) + subroutine i_mvect_bld_n(x,m,n,mold) integer(psb_ipk_), intent(in) :: m,n class(psb_i_multivect_type), intent(out) :: x class(psb_i_base_multivect_type), intent(in), optional :: mold @@ -819,9 +822,9 @@ contains endif if (info == psb_success_) call x%v%bld(m,n) - end subroutine i_vect_bld_n + end subroutine i_mvect_bld_n - function i_vect_get_vect(x) result(res) + function i_mvect_get_vect(x) result(res) class(psb_i_multivect_type), intent(inout) :: x integer(psb_ipk_), allocatable :: res(:,:) integer(psb_ipk_) :: info @@ -829,25 +832,25 @@ contains if (allocated(x%v)) then res = x%v%get_vect() end if - end function i_vect_get_vect + end function i_mvect_get_vect - subroutine i_vect_set_scal(x,val) + subroutine i_mvect_set_scal(x,val) class(psb_i_multivect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: val integer(psb_ipk_) :: info if (allocated(x%v)) call x%v%set(val) - end subroutine i_vect_set_scal + end subroutine i_mvect_set_scal - subroutine i_vect_set_vect(x,val) + subroutine i_mvect_set_vect(x,val) class(psb_i_multivect_type), intent(inout) :: x integer(psb_ipk_), intent(in) :: val(:,:) integer(psb_ipk_) :: info if (allocated(x%v)) call x%v%set(val) - end subroutine i_vect_set_vect + end subroutine i_mvect_set_vect function constructor(x) result(this) @@ -855,7 +858,7 @@ contains type(psb_i_multivect_type) :: this integer(psb_ipk_) :: info - call this%bld(x) + call this%bld_x(x) call this%asb(size(x,dim=1,kind=psb_ipk_),size(x,dim=2,kind=psb_ipk_),info) end function constructor @@ -866,44 +869,44 @@ contains type(psb_i_multivect_type) :: this integer(psb_ipk_) :: info - call this%bld(m,n) + call this%bld_n(m,n) call this%asb(m,n,info) end function size_const - function i_vect_get_nrows(x) result(res) + function i_mvect_get_nrows(x) result(res) implicit none class(psb_i_multivect_type), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = x%v%get_nrows() - end function i_vect_get_nrows + end function i_mvect_get_nrows - function i_vect_get_ncols(x) result(res) + function i_mvect_get_ncols(x) result(res) implicit none class(psb_i_multivect_type), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = x%v%get_ncols() - end function i_vect_get_ncols + end function i_mvect_get_ncols - function i_vect_sizeof(x) result(res) + function i_mvect_sizeof(x) result(res) implicit none class(psb_i_multivect_type), intent(in) :: x integer(psb_epk_) :: res res = 0 if (allocated(x%v)) res = x%v%sizeof() - end function i_vect_sizeof + end function i_mvect_sizeof - function i_vect_get_fmt(x) result(res) + function i_mvect_get_fmt(x) result(res) implicit none class(psb_i_multivect_type), intent(in) :: x character(len=5) :: res res = 'NULL' if (allocated(x%v)) res = x%v%get_fmt() - end function i_vect_get_fmt + end function i_mvect_get_fmt - subroutine i_vect_all(m,n, x, info, mold) + subroutine i_mvect_all(m,n, x, info, mold) implicit none integer(psb_ipk_), intent(in) :: m,n @@ -922,9 +925,9 @@ contains info = psb_err_alloc_dealloc_ end if - end subroutine i_vect_all + end subroutine i_mvect_all - subroutine i_vect_reall(m,n, x, info) + subroutine i_mvect_reall(m,n, x, info) implicit none integer(psb_ipk_), intent(in) :: m,n @@ -937,18 +940,18 @@ contains if (info == 0) & & call x%asb(m,n,info) - end subroutine i_vect_reall + end subroutine i_mvect_reall - subroutine i_vect_zero(x) + subroutine i_mvect_zero(x) use psi_serial_mod implicit none class(psb_i_multivect_type), intent(inout) :: x if (allocated(x%v)) call x%v%zero() - end subroutine i_vect_zero + end subroutine i_mvect_zero - subroutine i_vect_asb(m,n, x, info) + subroutine i_mvect_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none @@ -959,42 +962,45 @@ contains if (allocated(x%v)) & & call x%v%asb(m,n,info) - end subroutine i_vect_asb + end subroutine i_mvect_asb - subroutine i_vect_sync(x) + subroutine i_mvect_sync(x) implicit none class(psb_i_multivect_type), intent(inout) :: x if (allocated(x%v)) & & call x%v%sync() - end subroutine i_vect_sync + end subroutine i_mvect_sync - subroutine i_vect_gthab(n,idx,alpha,x,beta,y) + subroutine i_mvect_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_ipk_) :: alpha, beta, y(:) class(psb_i_multivect_type) :: x if (allocated(x%v)) & & call x%v%gth(n,idx,alpha,beta,y) - end subroutine i_vect_gthab + end subroutine i_mvect_gthab - subroutine i_vect_gthzv(n,idx,x,y) + subroutine i_mvect_gthzv(n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_ipk_) :: y(:) class(psb_i_multivect_type) :: x if (allocated(x%v)) & & call x%v%gth(n,idx,y) - end subroutine i_vect_gthzv + end subroutine i_mvect_gthzv - subroutine i_vect_gthzv_x(i,n,idx,x,y) + subroutine i_mvect_gthzv_x(i,n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: i,n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx integer(psb_ipk_) :: y(:) class(psb_i_multivect_type) :: x @@ -1002,22 +1008,24 @@ contains if (allocated(x%v)) & & call x%v%gth(i,n,idx,y) - end subroutine i_vect_gthzv_x + end subroutine i_mvect_gthzv_x - subroutine i_vect_sctb(n,idx,x,beta,y) + subroutine i_mvect_sctb(n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_ipk_) :: beta, x(:) class(psb_i_multivect_type) :: y if (allocated(y%v)) & & call y%v%sct(n,idx,x,beta) - end subroutine i_vect_sctb + end subroutine i_mvect_sctb - subroutine i_vect_sctb_x(i,n,idx,x,beta,y) + subroutine i_mvect_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx integer(psb_ipk_) :: beta, x(:) class(psb_i_multivect_type) :: y @@ -1025,9 +1033,9 @@ contains if (allocated(y%v)) & & call y%v%sct(i,n,idx,x,beta) - end subroutine i_vect_sctb_x + end subroutine i_mvect_sctb_x - subroutine i_vect_free(x, info) + subroutine i_mvect_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none @@ -1040,9 +1048,9 @@ contains if (info == 0) deallocate(x%v,stat=info) end if - end subroutine i_vect_free + end subroutine i_mvect_free - subroutine i_vect_ins(n,irl,val,x,info) + subroutine i_mvect_ins(n,irl,val,x,info) use psi_serial_mod implicit none class(psb_i_multivect_type), intent(inout) :: x @@ -1061,10 +1069,10 @@ contains dupl = x%get_dupl() call x%v%ins(n,irl,val,dupl,info) - end subroutine i_vect_ins + end subroutine i_mvect_ins - subroutine i_vect_cnv(x,mold) + subroutine i_mvect_cnv(x,mold) class(psb_i_multivect_type), intent(inout) :: x class(psb_i_base_multivect_type), intent(in), optional :: mold class(psb_i_base_multivect_type), allocatable :: tmp @@ -1081,7 +1089,7 @@ contains call x%v%free(info) end if call move_alloc(tmp,x%v) - end subroutine i_vect_cnv + end subroutine i_mvect_cnv end module psb_i_multivect_mod diff --git a/base/modules/serial/psb_l_base_vect_mod.F90 b/base/modules/serial/psb_l_base_vect_mod.F90 index 93b29e17..cf30db74 100644 --- a/base/modules/serial/psb_l_base_vect_mod.F90 +++ b/base/modules/serial/psb_l_base_vect_mod.F90 @@ -210,7 +210,7 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if -#if defined (OPENMP) +#if defined (PSB_OPENMP) !$omp parallel do private(i) do i = 1, size(this) x%v(i) = this(i) @@ -504,8 +504,8 @@ contains info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) - if (info == 0) call x%free_buffer(info) - if (info == 0) call x%free_comid(info) + if ((info == 0).and.allocated(x%combuf)) call x%free_buffer(info) + if ((info == 0).and.allocated(x%comid)) call x%free_comid(info) if (info /= 0) call & & psb_errpush(psb_err_alloc_dealloc_,'vect_free') @@ -775,7 +775,7 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i = first_, last_ x%v(i) = val @@ -813,7 +813,7 @@ contains if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i = first_, last_ x%v(i) = val(i-first_+1) @@ -842,7 +842,8 @@ contains subroutine l_base_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_lpk_) :: alpha, beta, y(:) class(psb_l_base_vect_type) :: x @@ -862,7 +863,8 @@ contains subroutine l_base_gthzv_x(i,n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n class(psb_i_base_vect_type) :: idx integer(psb_lpk_) :: y(:) class(psb_l_base_vect_type) :: x @@ -878,7 +880,8 @@ contains subroutine l_base_gthzbuf(i,n,idx,x) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n class(psb_i_base_vect_type) :: idx class(psb_l_base_vect_type) :: x @@ -941,7 +944,8 @@ contains subroutine l_base_gthzv(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_lpk_) :: y(:) class(psb_l_base_vect_type) :: x @@ -966,7 +970,8 @@ contains subroutine l_base_sctb(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_lpk_) :: beta, x(:) class(psb_l_base_vect_type) :: y @@ -979,7 +984,8 @@ contains subroutine l_base_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx integer(psb_lpk_) :: beta, x(:) class(psb_l_base_vect_type) :: y @@ -993,7 +999,8 @@ contains subroutine l_base_sctb_buf(i,n,idx,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx integer(psb_lpk_) :: beta class(psb_l_base_vect_type) :: y @@ -1691,10 +1698,11 @@ contains subroutine l_base_mlv_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_lpk_) :: alpha, beta, y(:) class(psb_l_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -1716,7 +1724,8 @@ contains subroutine l_base_mlv_gthzv_x(i,n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx integer(psb_lpk_) :: y(:) class(psb_l_base_multivect_type) :: x @@ -1738,10 +1747,11 @@ contains subroutine l_base_mlv_gthzv(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_lpk_) :: y(:) class(psb_l_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -1764,10 +1774,11 @@ contains subroutine l_base_mlv_gthzm(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_lpk_) :: y(:,:) class(psb_l_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -1785,7 +1796,8 @@ contains subroutine l_base_mlv_gthzbuf(i,ixb,n,idx,x) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, ixb, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i, ixb class(psb_i_base_vect_type) :: idx class(psb_l_base_multivect_type) :: x integer(psb_ipk_) :: nc @@ -1817,10 +1829,11 @@ contains subroutine l_base_mlv_sctb(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_lpk_) :: beta, x(:) class(psb_l_base_multivect_type) :: y - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (y%is_dev()) call y%sync() nc = psb_size(y%v,2_psb_ipk_) @@ -1832,10 +1845,11 @@ contains subroutine l_base_mlv_sctbr2(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_lpk_) :: beta, x(:,:) class(psb_l_base_multivect_type) :: y - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (y%is_dev()) call y%sync() nc = y%get_ncols() @@ -1847,7 +1861,8 @@ contains subroutine l_base_mlv_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx integer( psb_lpk_) :: beta, x(:) class(psb_l_base_multivect_type) :: y @@ -1859,7 +1874,8 @@ contains subroutine l_base_mlv_sctb_buf(i,iyb,n,idx,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, iyb, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i, iyb class(psb_i_base_vect_type) :: idx integer(psb_lpk_) :: beta class(psb_l_base_multivect_type) :: y diff --git a/base/modules/serial/psb_l_vect_mod.F90 b/base/modules/serial/psb_l_vect_mod.F90 index 779d4723..6936e75f 100644 --- a/base/modules/serial/psb_l_vect_mod.F90 +++ b/base/modules/serial/psb_l_vect_mod.F90 @@ -437,7 +437,8 @@ contains subroutine l_vect_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_lpk_) :: alpha, beta, y(:) class(psb_l_vect_type) :: x @@ -448,7 +449,8 @@ contains subroutine l_vect_gthzv(n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_lpk_) :: y(:) class(psb_l_vect_type) :: x @@ -459,7 +461,8 @@ contains subroutine l_vect_sctb(n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_lpk_) :: beta, x(:) class(psb_l_vect_type) :: y @@ -642,37 +645,37 @@ module psb_l_multivect_mod integer(psb_ipk_) :: dupl = psb_dupl_add_ integer(psb_lpk_), allocatable :: rmtv(:,:) contains - procedure, pass(x) :: get_nrows => l_vect_get_nrows - procedure, pass(x) :: get_ncols => l_vect_get_ncols - procedure, pass(x) :: sizeof => l_vect_sizeof - procedure, pass(x) :: get_fmt => l_vect_get_fmt + procedure, pass(x) :: get_nrows => l_mvect_get_nrows + procedure, pass(x) :: get_ncols => l_mvect_get_ncols + procedure, pass(x) :: sizeof => l_mvect_sizeof + procedure, pass(x) :: get_fmt => l_mvect_get_fmt procedure, pass(x) :: is_remote_build => l_mvect_is_remote_build procedure, pass(x) :: set_remote_build => l_mvect_set_remote_build procedure, pass(x) :: get_dupl => l_mvect_get_dupl procedure, pass(x) :: set_dupl => l_mvect_set_dupl - procedure, pass(x) :: all => l_vect_all - procedure, pass(x) :: reall => l_vect_reall - procedure, pass(x) :: zero => l_vect_zero - procedure, pass(x) :: asb => l_vect_asb - procedure, pass(x) :: sync => l_vect_sync - procedure, pass(x) :: free => l_vect_free - procedure, pass(x) :: ins => l_vect_ins - procedure, pass(x) :: bld_x => l_vect_bld_x - procedure, pass(x) :: bld_n => l_vect_bld_n + procedure, pass(x) :: all => l_mvect_all + procedure, pass(x) :: reall => l_mvect_reall + procedure, pass(x) :: zero => l_mvect_zero + procedure, pass(x) :: asb => l_mvect_asb + procedure, pass(x) :: sync => l_mvect_sync + procedure, pass(x) :: free => l_mvect_free + procedure, pass(x) :: ins => l_mvect_ins + procedure, pass(x) :: bld_x => l_mvect_bld_x + procedure, pass(x) :: bld_n => l_mvect_bld_n generic, public :: bld => bld_x, bld_n - procedure, pass(x) :: get_vect => l_vect_get_vect - procedure, pass(x) :: cnv => l_vect_cnv - procedure, pass(x) :: set_scal => l_vect_set_scal - procedure, pass(x) :: set_vect => l_vect_set_vect + procedure, pass(x) :: get_vect => l_mvect_get_vect + procedure, pass(x) :: cnv => l_mvect_cnv + procedure, pass(x) :: set_scal => l_mvect_set_scal + procedure, pass(x) :: set_vect => l_mvect_set_vect generic, public :: set => set_vect, set_scal - procedure, pass(x) :: clone => l_vect_clone - procedure, pass(x) :: gthab => l_vect_gthab - procedure, pass(x) :: gthzv => l_vect_gthzv - procedure, pass(x) :: gthzv_x => l_vect_gthzv_x + procedure, pass(x) :: clone => l_mvect_clone + procedure, pass(x) :: gthab => l_mvect_gthab + procedure, pass(x) :: gthzv => l_mvect_gthzv + procedure, pass(x) :: gthzv_x => l_mvect_gthzv_x generic, public :: gth => gthab, gthzv - procedure, pass(y) :: sctb => l_vect_sctb - procedure, pass(y) :: sctb_x => l_vect_sctb_x + procedure, pass(y) :: sctb => l_mvect_sctb + procedure, pass(y) :: sctb_x => l_mvect_sctb_x generic, public :: sct => sctb, sctb_x end type psb_l_multivect_type @@ -774,7 +777,7 @@ contains end function psb_l_get_base_multivect_default - subroutine l_vect_clone(x,y,info) + subroutine l_mvect_clone(x,y,info) implicit none class(psb_l_multivect_type), intent(inout) :: x class(psb_l_multivect_type), intent(inout) :: y @@ -783,11 +786,11 @@ contains info = psb_success_ call y%free(info) if ((info==0).and.allocated(x%v)) then - call y%bld(x%get_vect(),mold=x%v) + call y%bld_x(x%get_vect(),mold=x%v) end if - end subroutine l_vect_clone + end subroutine l_mvect_clone - subroutine l_vect_bld_x(x,invect,mold) + subroutine l_mvect_bld_x(x,invect,mold) integer(psb_lpk_), intent(in) :: invect(:,:) class(psb_l_multivect_type), intent(out) :: x class(psb_l_base_multivect_type), intent(in), optional :: mold @@ -803,10 +806,10 @@ contains if (info == psb_success_) call x%v%bld(invect) - end subroutine l_vect_bld_x + end subroutine l_mvect_bld_x - subroutine l_vect_bld_n(x,m,n,mold) + subroutine l_mvect_bld_n(x,m,n,mold) integer(psb_ipk_), intent(in) :: m,n class(psb_l_multivect_type), intent(out) :: x class(psb_l_base_multivect_type), intent(in), optional :: mold @@ -820,9 +823,9 @@ contains endif if (info == psb_success_) call x%v%bld(m,n) - end subroutine l_vect_bld_n + end subroutine l_mvect_bld_n - function l_vect_get_vect(x) result(res) + function l_mvect_get_vect(x) result(res) class(psb_l_multivect_type), intent(inout) :: x integer(psb_lpk_), allocatable :: res(:,:) integer(psb_ipk_) :: info @@ -830,25 +833,25 @@ contains if (allocated(x%v)) then res = x%v%get_vect() end if - end function l_vect_get_vect + end function l_mvect_get_vect - subroutine l_vect_set_scal(x,val) + subroutine l_mvect_set_scal(x,val) class(psb_l_multivect_type), intent(inout) :: x integer(psb_lpk_), intent(in) :: val integer(psb_ipk_) :: info if (allocated(x%v)) call x%v%set(val) - end subroutine l_vect_set_scal + end subroutine l_mvect_set_scal - subroutine l_vect_set_vect(x,val) + subroutine l_mvect_set_vect(x,val) class(psb_l_multivect_type), intent(inout) :: x integer(psb_lpk_), intent(in) :: val(:,:) integer(psb_ipk_) :: info if (allocated(x%v)) call x%v%set(val) - end subroutine l_vect_set_vect + end subroutine l_mvect_set_vect function constructor(x) result(this) @@ -856,7 +859,7 @@ contains type(psb_l_multivect_type) :: this integer(psb_ipk_) :: info - call this%bld(x) + call this%bld_x(x) call this%asb(size(x,dim=1,kind=psb_ipk_),size(x,dim=2,kind=psb_ipk_),info) end function constructor @@ -867,44 +870,44 @@ contains type(psb_l_multivect_type) :: this integer(psb_ipk_) :: info - call this%bld(m,n) + call this%bld_n(m,n) call this%asb(m,n,info) end function size_const - function l_vect_get_nrows(x) result(res) + function l_mvect_get_nrows(x) result(res) implicit none class(psb_l_multivect_type), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = x%v%get_nrows() - end function l_vect_get_nrows + end function l_mvect_get_nrows - function l_vect_get_ncols(x) result(res) + function l_mvect_get_ncols(x) result(res) implicit none class(psb_l_multivect_type), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = x%v%get_ncols() - end function l_vect_get_ncols + end function l_mvect_get_ncols - function l_vect_sizeof(x) result(res) + function l_mvect_sizeof(x) result(res) implicit none class(psb_l_multivect_type), intent(in) :: x integer(psb_epk_) :: res res = 0 if (allocated(x%v)) res = x%v%sizeof() - end function l_vect_sizeof + end function l_mvect_sizeof - function l_vect_get_fmt(x) result(res) + function l_mvect_get_fmt(x) result(res) implicit none class(psb_l_multivect_type), intent(in) :: x character(len=5) :: res res = 'NULL' if (allocated(x%v)) res = x%v%get_fmt() - end function l_vect_get_fmt + end function l_mvect_get_fmt - subroutine l_vect_all(m,n, x, info, mold) + subroutine l_mvect_all(m,n, x, info, mold) implicit none integer(psb_ipk_), intent(in) :: m,n @@ -923,9 +926,9 @@ contains info = psb_err_alloc_dealloc_ end if - end subroutine l_vect_all + end subroutine l_mvect_all - subroutine l_vect_reall(m,n, x, info) + subroutine l_mvect_reall(m,n, x, info) implicit none integer(psb_ipk_), intent(in) :: m,n @@ -938,18 +941,18 @@ contains if (info == 0) & & call x%asb(m,n,info) - end subroutine l_vect_reall + end subroutine l_mvect_reall - subroutine l_vect_zero(x) + subroutine l_mvect_zero(x) use psi_serial_mod implicit none class(psb_l_multivect_type), intent(inout) :: x if (allocated(x%v)) call x%v%zero() - end subroutine l_vect_zero + end subroutine l_mvect_zero - subroutine l_vect_asb(m,n, x, info) + subroutine l_mvect_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none @@ -960,42 +963,45 @@ contains if (allocated(x%v)) & & call x%v%asb(m,n,info) - end subroutine l_vect_asb + end subroutine l_mvect_asb - subroutine l_vect_sync(x) + subroutine l_mvect_sync(x) implicit none class(psb_l_multivect_type), intent(inout) :: x if (allocated(x%v)) & & call x%v%sync() - end subroutine l_vect_sync + end subroutine l_mvect_sync - subroutine l_vect_gthab(n,idx,alpha,x,beta,y) + subroutine l_mvect_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_lpk_) :: alpha, beta, y(:) class(psb_l_multivect_type) :: x if (allocated(x%v)) & & call x%v%gth(n,idx,alpha,beta,y) - end subroutine l_vect_gthab + end subroutine l_mvect_gthab - subroutine l_vect_gthzv(n,idx,x,y) + subroutine l_mvect_gthzv(n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_lpk_) :: y(:) class(psb_l_multivect_type) :: x if (allocated(x%v)) & & call x%v%gth(n,idx,y) - end subroutine l_vect_gthzv + end subroutine l_mvect_gthzv - subroutine l_vect_gthzv_x(i,n,idx,x,y) + subroutine l_mvect_gthzv_x(i,n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: i,n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx integer(psb_lpk_) :: y(:) class(psb_l_multivect_type) :: x @@ -1003,22 +1009,24 @@ contains if (allocated(x%v)) & & call x%v%gth(i,n,idx,y) - end subroutine l_vect_gthzv_x + end subroutine l_mvect_gthzv_x - subroutine l_vect_sctb(n,idx,x,beta,y) + subroutine l_mvect_sctb(n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_lpk_) :: beta, x(:) class(psb_l_multivect_type) :: y if (allocated(y%v)) & & call y%v%sct(n,idx,x,beta) - end subroutine l_vect_sctb + end subroutine l_mvect_sctb - subroutine l_vect_sctb_x(i,n,idx,x,beta,y) + subroutine l_mvect_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx integer(psb_lpk_) :: beta, x(:) class(psb_l_multivect_type) :: y @@ -1026,9 +1034,9 @@ contains if (allocated(y%v)) & & call y%v%sct(i,n,idx,x,beta) - end subroutine l_vect_sctb_x + end subroutine l_mvect_sctb_x - subroutine l_vect_free(x, info) + subroutine l_mvect_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none @@ -1041,9 +1049,9 @@ contains if (info == 0) deallocate(x%v,stat=info) end if - end subroutine l_vect_free + end subroutine l_mvect_free - subroutine l_vect_ins(n,irl,val,x,info) + subroutine l_mvect_ins(n,irl,val,x,info) use psi_serial_mod implicit none class(psb_l_multivect_type), intent(inout) :: x @@ -1062,10 +1070,10 @@ contains dupl = x%get_dupl() call x%v%ins(n,irl,val,dupl,info) - end subroutine l_vect_ins + end subroutine l_mvect_ins - subroutine l_vect_cnv(x,mold) + subroutine l_mvect_cnv(x,mold) class(psb_l_multivect_type), intent(inout) :: x class(psb_l_base_multivect_type), intent(in), optional :: mold class(psb_l_base_multivect_type), allocatable :: tmp @@ -1082,7 +1090,7 @@ contains call x%v%free(info) end if call move_alloc(tmp,x%v) - end subroutine l_vect_cnv + end subroutine l_mvect_cnv end module psb_l_multivect_mod diff --git a/base/modules/serial/psb_s_base_mat_mod.F90 b/base/modules/serial/psb_s_base_mat_mod.F90 index 92bda7d8..27803e0f 100644 --- a/base/modules/serial/psb_s_base_mat_mod.F90 +++ b/base/modules/serial/psb_s_base_mat_mod.F90 @@ -416,7 +416,7 @@ module psb_s_base_mat_mod ! ! This is COO specific ! -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) procedure, pass(a) :: iset_nzeros => ls_coo_iset_nzeros procedure, pass(a) :: lset_nzeros => ls_coo_lset_nzeros generic, public :: set_nzeros => iset_nzeros, lset_nzeros @@ -439,7 +439,7 @@ module psb_s_base_mat_mod private :: ls_coo_get_nzeros, ls_coo_iset_nzeros, & & ls_coo_get_fmt, ls_coo_free, ls_coo_sizeof, & & ls_coo_transp_1mat, ls_coo_transc_1mat -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) private :: ls_coo_lset_nzeros #endif @@ -3499,7 +3499,7 @@ module psb_s_base_mat_mod end subroutine psb_ls_coo_clean_negidx end interface -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) ! !> Funtion: coo_clean_negidx_inner !! \brief Take out any entries with negative row or column index @@ -4323,7 +4323,7 @@ contains end subroutine ls_coo_iset_nzeros -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine ls_coo_lset_nzeros(nz,a) implicit none integer(psb_lpk_), intent(in) :: nz diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index fccd846b..702a1af3 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -155,6 +155,9 @@ module psb_s_base_vect_mod procedure, pass(z) :: axpby_v2 => s_base_axpby_v2 procedure, pass(z) :: axpby_a2 => s_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: upd_xyz => s_base_upd_xyz + procedure, pass(w) :: xyzw => s_base_xyzw + ! ! Vector by vector multiplication. Need all variants ! to handle multiple requirements from preconditioners @@ -280,7 +283,7 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if -#if defined (OPENMP) +#if defined (PSB_OPENMP) !$omp parallel do private(i) do i = 1, size(this) x%v(i) = this(i) @@ -574,8 +577,8 @@ contains info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) - if (info == 0) call x%free_buffer(info) - if (info == 0) call x%free_comid(info) + if ((info == 0).and.allocated(x%combuf)) call x%free_buffer(info) + if ((info == 0).and.allocated(x%comid)) call x%free_comid(info) if (info /= 0) call & & psb_errpush(psb_err_alloc_dealloc_,'vect_free') @@ -845,7 +848,7 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i = first_, last_ x%v(i) = val @@ -883,7 +886,7 @@ contains if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i = first_, last_ x%v(i) = val(i-first_+1) @@ -932,7 +935,7 @@ contains if (allocated(x%v)) then if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i=1, size(x%v) x%v(i) = abs(x%v(i)) @@ -1025,7 +1028,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine s_base_axpby_v(m,alpha, x, beta, y, info) @@ -1054,7 +1057,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param y The class(base_vect) to be added !! \param z The class(base_vect) to be returned !! \param info return code @@ -1085,7 +1088,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x(:) The array to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine s_base_axpby_a(m,alpha, x, beta, y, info) @@ -1133,6 +1136,64 @@ contains end subroutine s_base_axpby_a2 + ! + ! UPD_XYZ is invoked via Z, hence the structure below. + ! + ! + !> Function base_upd_xyz + !! \memberof psb_s_base_vect_type + !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \param m Number of entries to be considered + !! \param alpha scalar alpha + !! \param beta scalar beta + !! \param gamma scalar gamma + !! \param delta scalar delta + !! \param x The class(base_vect) to be added + !! \param y The class(base_vect) to be added + !! \param z The class(base_vect) to be added + !! \param info return code + !! + subroutine s_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_base_vect_type), intent(inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(alpha/=szero)) call x%sync() + if (y%is_dev().and.(beta/=szero)) call y%sync() + if (z%is_dev().and.(delta/=szero)) call z%sync() + call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() + + end subroutine s_base_upd_xyz + + subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_base_vect_type), intent(inout) :: z + class(psb_s_base_vect_type), intent(inout) :: w + real(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(a/=szero)) call x%sync() + if (y%is_dev().and.(b/=szero)) call y%sync() + if (z%is_dev().and.(d/=szero)) call z%sync() + if (w%is_dev().and.(f/=szero)) call w%sync() + call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info) + call y%set_host() + call z%set_host() + call w%set_host() + + end subroutine s_base_xyzw + ! ! Multiple variants of two operations: @@ -1681,7 +1742,7 @@ contains integer(psb_ipk_) :: i if (allocated(x%v)) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i=1,size(x%v) x%v(i) = alpha*x%v(i) @@ -1725,7 +1786,7 @@ contains integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) res = szero !$omp parallel do private(i) reduction(max: res) do i=1, n @@ -1749,7 +1810,7 @@ contains integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) res = HUGE(sone) !$omp parallel do private(i) reduction(min: res) do i=1, n @@ -1830,7 +1891,7 @@ contains integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) res=szero !$omp parallel do private(i) reduction(+: res) do i= 1, size(x%v) @@ -1857,7 +1918,8 @@ contains subroutine s_base_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: alpha, beta, y(:) class(psb_s_base_vect_type) :: x @@ -1877,7 +1939,8 @@ contains subroutine s_base_gthzv_x(i,n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n class(psb_i_base_vect_type) :: idx real(psb_spk_) :: y(:) class(psb_s_base_vect_type) :: x @@ -1893,7 +1956,8 @@ contains subroutine s_base_gthzbuf(i,n,idx,x) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n class(psb_i_base_vect_type) :: idx class(psb_s_base_vect_type) :: x @@ -1956,7 +2020,8 @@ contains subroutine s_base_gthzv(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: y(:) class(psb_s_base_vect_type) :: x @@ -1981,7 +2046,8 @@ contains subroutine s_base_sctb(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: beta, x(:) class(psb_s_base_vect_type) :: y @@ -1994,7 +2060,8 @@ contains subroutine s_base_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx real(psb_spk_) :: beta, x(:) class(psb_s_base_vect_type) :: y @@ -2008,7 +2075,8 @@ contains subroutine s_base_sctb_buf(i,n,idx,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx real(psb_spk_) :: beta class(psb_s_base_vect_type) :: y @@ -2137,7 +2205,7 @@ contains integer(psb_ipk_) :: i, n if (z%is_dev()) call z%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) n = size(x) !$omp parallel do private(i) do i = 1, n @@ -3365,10 +3433,11 @@ contains subroutine s_base_mlv_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: alpha, beta, y(:) class(psb_s_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -3390,7 +3459,8 @@ contains subroutine s_base_mlv_gthzv_x(i,n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx real(psb_spk_) :: y(:) class(psb_s_base_multivect_type) :: x @@ -3412,10 +3482,11 @@ contains subroutine s_base_mlv_gthzv(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: y(:) class(psb_s_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -3438,10 +3509,11 @@ contains subroutine s_base_mlv_gthzm(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: y(:,:) class(psb_s_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -3459,7 +3531,8 @@ contains subroutine s_base_mlv_gthzbuf(i,ixb,n,idx,x) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, ixb, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i, ixb class(psb_i_base_vect_type) :: idx class(psb_s_base_multivect_type) :: x integer(psb_ipk_) :: nc @@ -3491,10 +3564,11 @@ contains subroutine s_base_mlv_sctb(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: beta, x(:) class(psb_s_base_multivect_type) :: y - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (y%is_dev()) call y%sync() nc = psb_size(y%v,2_psb_ipk_) @@ -3506,10 +3580,11 @@ contains subroutine s_base_mlv_sctbr2(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: beta, x(:,:) class(psb_s_base_multivect_type) :: y - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (y%is_dev()) call y%sync() nc = y%get_ncols() @@ -3521,7 +3596,8 @@ contains subroutine s_base_mlv_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx real( psb_spk_) :: beta, x(:) class(psb_s_base_multivect_type) :: y @@ -3533,7 +3609,8 @@ contains subroutine s_base_mlv_sctb_buf(i,iyb,n,idx,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, iyb, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i, iyb class(psb_i_base_vect_type) :: idx real(psb_spk_) :: beta class(psb_s_base_multivect_type) :: y diff --git a/base/modules/serial/psb_s_csc_mat_mod.f90 b/base/modules/serial/psb_s_csc_mat_mod.f90 index ccd4f445..db874600 100644 --- a/base/modules/serial/psb_s_csc_mat_mod.f90 +++ b/base/modules/serial/psb_s_csc_mat_mod.f90 @@ -87,7 +87,7 @@ module psb_s_csc_mat_mod procedure, pass(a) :: mv_from_coo => psb_s_mv_csc_from_coo procedure, pass(a) :: mv_to_fmt => psb_s_mv_csc_to_fmt procedure, pass(a) :: mv_from_fmt => psb_s_mv_csc_from_fmt - procedure, pass(a) :: clean_zeros => psb_s_csc_clean_zeros +! procedure, pass(a) :: clean_zeros => psb_s_csc_clean_zeros procedure, pass(a) :: csput_a => psb_s_csc_csput_a procedure, pass(a) :: get_diag => psb_s_csc_get_diag procedure, pass(a) :: csgetptn => psb_s_csc_csgetptn @@ -143,7 +143,7 @@ module psb_s_csc_mat_mod procedure, pass(a) :: mv_from_coo => psb_ls_mv_csc_from_coo procedure, pass(a) :: mv_to_fmt => psb_ls_mv_csc_to_fmt procedure, pass(a) :: mv_from_fmt => psb_ls_mv_csc_from_fmt - procedure, pass(a) :: clean_zeros => psb_ls_csc_clean_zeros +! procedure, pass(a) :: clean_zeros => psb_ls_csc_clean_zeros procedure, pass(a) :: csput_a => psb_ls_csc_csput_a procedure, pass(a) :: get_diag => psb_ls_csc_get_diag procedure, pass(a) :: csgetptn => psb_ls_csc_csgetptn @@ -313,18 +313,18 @@ module psb_s_csc_mat_mod end subroutine psb_s_mv_csc_from_fmt end interface - ! - !> - !! \memberof psb_s_csc_sparse_mat - !! \see psb_s_base_mat_mod::psb_s_base_clean_zeros - ! - interface - subroutine psb_s_csc_clean_zeros(a, info) - import - class(psb_s_csc_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_csc_clean_zeros - end interface +!!$ ! +!!$ !> +!!$ !! \memberof psb_s_csc_sparse_mat +!!$ !! \see psb_s_base_mat_mod::psb_s_base_clean_zeros +!!$ ! +!!$ interface +!!$ subroutine psb_s_csc_clean_zeros(a, info) +!!$ import +!!$ class(psb_s_csc_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_csc_clean_zeros +!!$ end interface !> \memberof psb_s_csc_sparse_mat @@ -717,18 +717,18 @@ module psb_s_csc_mat_mod end subroutine psb_ls_mv_csc_from_fmt end interface - ! - !> - !! \memberof psb_ls_csc_sparse_mat - !! \see psb_ls_base_mat_mod::psb_ls_base_clean_zeros - ! - interface - subroutine psb_ls_csc_clean_zeros(a, info) - import - class(psb_ls_csc_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_ls_csc_clean_zeros - end interface +!!$ ! +!!$ !> +!!$ !! \memberof psb_ls_csc_sparse_mat +!!$ !! \see psb_ls_base_mat_mod::psb_ls_base_clean_zeros +!!$ ! +!!$ interface +!!$ subroutine psb_ls_csc_clean_zeros(a, info) +!!$ import +!!$ class(psb_ls_csc_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_ls_csc_clean_zeros +!!$ end interface !> \memberof psb_ls_csc_sparse_mat !! \see psb_ls_base_mat_mod::psb_ls_base_cp_from diff --git a/base/modules/serial/psb_s_csr_mat_mod.f90 b/base/modules/serial/psb_s_csr_mat_mod.f90 index 6b4c51c7..356e5b32 100644 --- a/base/modules/serial/psb_s_csr_mat_mod.f90 +++ b/base/modules/serial/psb_s_csr_mat_mod.f90 @@ -91,7 +91,7 @@ module psb_s_csr_mat_mod procedure, pass(a) :: mv_from_coo => psb_s_mv_csr_from_coo procedure, pass(a) :: mv_to_fmt => psb_s_mv_csr_to_fmt procedure, pass(a) :: mv_from_fmt => psb_s_mv_csr_from_fmt - procedure, pass(a) :: clean_zeros => psb_s_csr_clean_zeros +! procedure, pass(a) :: clean_zeros => psb_s_csr_clean_zeros procedure, pass(a) :: csput_a => psb_s_csr_csput_a procedure, pass(a) :: get_diag => psb_s_csr_get_diag procedure, pass(a) :: csgetptn => psb_s_csr_csgetptn @@ -261,18 +261,18 @@ module psb_s_csr_mat_mod end subroutine psb_s_csr_triu end interface - ! - !> - !! \memberof psb_s_csr_sparse_mat - !! \see psb_s_base_mat_mod::psb_s_base_clean_zeros - ! - interface - subroutine psb_s_csr_clean_zeros(a, info) - import - class(psb_s_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_csr_clean_zeros - end interface +!!$ ! +!!$ !> +!!$ !! \memberof psb_s_csr_sparse_mat +!!$ !! \see psb_s_base_mat_mod::psb_s_base_clean_zeros +!!$ ! +!!$ interface +!!$ subroutine psb_s_csr_clean_zeros(a, info) +!!$ import +!!$ class(psb_s_csr_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_csr_clean_zeros +!!$ end interface !> \memberof psb_s_csr_sparse_mat !! \see psb_s_base_mat_mod::psb_s_base_cp_to_coo @@ -579,7 +579,111 @@ module psb_s_csr_mat_mod end subroutine psb_s_csr_scals end interface - !> \namespace psb_base_mod \class psb_ls_csr_sparse_mat + + type, extends(psb_s_csr_sparse_mat) :: psb_s_ecsr_sparse_mat + + !> Number of non-empty rows + integer(psb_ipk_) :: nnerws + !> Indices of non-empty rows + integer(psb_ipk_), allocatable :: nerwp(:) + + contains + procedure, nopass :: get_fmt => s_ecsr_get_fmt + + ! procedure, pass(a) :: csmm => psb_s_ecsr_csmm + procedure, pass(a) :: csmv => psb_s_ecsr_csmv + + procedure, pass(a) :: cp_from_coo => psb_s_cp_ecsr_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cp_ecsr_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_ecsr_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_mv_ecsr_from_fmt + + procedure, pass(a) :: cmp_nerwp => psb_s_ecsr_cmp_nerwp + procedure, pass(a) :: free => s_ecsr_free + procedure, pass(a) :: mold => psb_s_ecsr_mold + + end type psb_s_ecsr_sparse_mat + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_csmv + interface + subroutine psb_s_ecsr_csmv(alpha,a,x,beta,y,info,trans) + import + class(psb_s_ecsr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ecsr_csmv + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_cp_from_coo + interface + subroutine psb_s_ecsr_cmp_nerwp(a,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ecsr_cmp_nerwp + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_cp_from_coo + interface + subroutine psb_s_cp_ecsr_from_coo(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ecsr_from_coo + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_cp_from_fmt + interface + subroutine psb_s_cp_ecsr_from_fmt(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ecsr_from_fmt + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_mv_from_coo + interface + subroutine psb_s_mv_ecsr_from_coo(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ecsr_from_coo + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_mv_from_fmt + interface + subroutine psb_s_mv_ecsr_from_fmt(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ecsr_from_fmt + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !| \see psb_base_mat_mod::psb_base_mold + interface + subroutine psb_s_ecsr_mold(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ecsr_mold + end interface + + + + !> \namespace psb_base_mod \class psb_ls_csr_sparse_mat !! \extends psb_ls_base_mat_mod::psb_ls_base_sparse_mat !! !! psb_ls_csr_sparse_mat type and the related methods. @@ -612,7 +716,7 @@ module psb_s_csr_mat_mod procedure, pass(a) :: mv_from_coo => psb_ls_mv_csr_from_coo procedure, pass(a) :: mv_to_fmt => psb_ls_mv_csr_to_fmt procedure, pass(a) :: mv_from_fmt => psb_ls_mv_csr_from_fmt - procedure, pass(a) :: clean_zeros => psb_ls_csr_clean_zeros +! procedure, pass(a) :: clean_zeros => psb_ls_csr_clean_zeros procedure, pass(a) :: csput_a => psb_ls_csr_csput_a procedure, pass(a) :: get_diag => psb_ls_csr_get_diag procedure, pass(a) :: csgetptn => psb_ls_csr_csgetptn @@ -791,17 +895,17 @@ module psb_s_csr_mat_mod end interface ! - !> - !! \memberof psb_ls_csr_sparse_mat - !! \see psb_ls_base_mat_mod::psb_ls_base_clean_zeros - ! - interface - subroutine psb_ls_csr_clean_zeros(a, info) - import - class(psb_ls_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_ls_csr_clean_zeros - end interface +!!$ !> +!!$ !! \memberof psb_ls_csr_sparse_mat +!!$ !! \see psb_ls_base_mat_mod::psb_ls_base_clean_zeros +!!$ ! +!!$ interface +!!$ subroutine psb_ls_csr_clean_zeros(a, info) +!!$ import +!!$ class(psb_ls_csr_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_ls_csr_clean_zeros +!!$ end interface @@ -1178,6 +1282,26 @@ contains + function s_ecsr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ECSR' + end function s_ecsr_get_fmt + + subroutine s_ecsr_free(a) + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + + + if (allocated(a%nerwp)) deallocate(a%nerwp) + a%nnerws = 0 + call a%psb_s_csr_sparse_mat%free() + + return + end subroutine s_ecsr_free + + ! == =================================== ! ! diff --git a/base/modules/serial/psb_s_mat_mod.F90 b/base/modules/serial/psb_s_mat_mod.F90 index 43f1c619..e342ed8c 100644 --- a/base/modules/serial/psb_s_mat_mod.F90 +++ b/base/modules/serial/psb_s_mat_mod.F90 @@ -71,7 +71,7 @@ ! ! We are also introducing the type psb_lsspmat_type. ! The basic difference with psb_sspmat_type is in the type -! of the indices, which are PSB_LPK_ so that the entries +! of the indices, which are PSB_PSB_LPK_ so that the entries ! are guaranteed to be able to contain global indices. ! This type only supports data handling and preprocessing, it is ! not supposed to be used for computations. @@ -79,12 +79,14 @@ module psb_s_mat_mod use psb_s_base_mat_mod - use psb_s_csr_mat_mod, only : psb_s_csr_sparse_mat, psb_ls_csr_sparse_mat + use psb_s_csr_mat_mod, only : psb_s_csr_sparse_mat, psb_ls_csr_sparse_mat,& + & psb_s_ecsr_sparse_mat use psb_s_csc_mat_mod, only : psb_s_csc_sparse_mat, psb_ls_csc_sparse_mat type :: psb_sspmat_type class(psb_s_base_sparse_mat), allocatable :: a + class(psb_s_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_ls_coo_sparse_mat), allocatable :: rmta @@ -143,7 +145,7 @@ module psb_s_mat_mod procedure, pass(a) :: csgetrow => psb_s_csgetrow procedure, pass(a) :: csgetblk => psb_s_csgetblk generic, public :: csget => csgetptn, csgetrow, csgetblk -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) procedure, pass(a) :: lcsgetptn => psb_s_lcsgetptn procedure, pass(a) :: lcsgetrow => psb_s_lcsgetrow generic, public :: csget => lcsgetptn, lcsgetrow @@ -202,6 +204,8 @@ module psb_s_mat_mod procedure, pass(a) :: cscnv_ip => psb_s_cscnv_ip procedure, pass(a) :: cscnv_base => psb_s_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base + procedure, pass(a) :: split_nd => psb_s_split_nd + procedure, pass(a) :: merge_nd => psb_s_merge_nd procedure, pass(a) :: clone => psb_sspmat_clone procedure, pass(a) :: move_alloc => psb_sspmat_type_move ! @@ -307,7 +311,7 @@ module psb_s_mat_mod ! Setters procedure, pass(a) :: set_lnrows => psb_ls_set_lnrows procedure, pass(a) :: set_lncols => psb_ls_set_lncols -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) procedure, pass(a) :: set_inrows => psb_ls_set_inrows procedure, pass(a) :: set_incols => psb_ls_set_incols generic, public :: set_nrows => set_inrows, set_lnrows @@ -342,7 +346,7 @@ module psb_s_mat_mod procedure, pass(a) :: csgetrow => psb_ls_csgetrow procedure, pass(a) :: csgetblk => psb_ls_csgetblk generic, public :: csget => csgetptn, csgetrow, csgetblk -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) !!$ procedure, pass(a) :: icsgetptn => psb_ls_icsgetptn !!$ procedure, pass(a) :: icsgetrow => psb_ls_icsgetrow !!$ generic, public :: csget => icsgetptn, icsgetrow @@ -840,6 +844,24 @@ module psb_s_mat_mod ! ! + interface + subroutine psb_s_split_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_split_nd + end interface + + interface + subroutine psb_s_merge_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_merge_nd + end interface + ! ! CSCNV: switches to a different internal derived type. ! 3 versions: copying to target @@ -859,7 +881,6 @@ module psb_s_mat_mod end subroutine psb_s_cscnv end interface - interface subroutine psb_s_cscnv_ip(a,iinfo,type,mold,dupl) import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat @@ -871,7 +892,6 @@ module psb_s_mat_mod end subroutine psb_s_cscnv_ip end interface - interface subroutine psb_s_cscnv_base(a,b,info,dupl) import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat @@ -1250,7 +1270,7 @@ module psb_s_mat_mod class(psb_lsspmat_type), intent(inout) :: a integer(psb_lpk_), intent(in) :: m end subroutine psb_ls_set_lnrows -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_ls_set_inrows(m,a) import :: psb_ipk_, psb_lpk_, psb_lsspmat_type class(psb_lsspmat_type), intent(inout) :: a @@ -1265,7 +1285,7 @@ module psb_s_mat_mod class(psb_lsspmat_type), intent(inout) :: a integer(psb_lpk_), intent(in) :: n end subroutine psb_ls_set_lncols -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_ls_set_incols(n,a) import :: psb_ipk_, psb_lpk_, psb_lsspmat_type class(psb_lsspmat_type), intent(inout) :: a @@ -2390,7 +2410,7 @@ contains end subroutine psb_s_clean_zeros -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_s_lcsgetptn(imin,imax,a,nz,ia,ja,info,& & jmin,jmax,iren,append,nzin,rscale,cscale) implicit none @@ -2909,7 +2929,7 @@ contains end subroutine psb_ls_clean_zeros -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) !!$ subroutine psb_ls_icsgetptn(imin,imax,a,nz,ia,ja,info,& !!$ & jmin,jmax,iren,append,nzin,rscale,cscale) !!$ implicit none diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90 index 7a54ecf0..3e27495a 100644 --- a/base/modules/serial/psb_s_vect_mod.F90 +++ b/base/modules/serial/psb_s_vect_mod.F90 @@ -102,6 +102,8 @@ module psb_s_vect_mod procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: upd_xyz => s_vect_upd_xyz + procedure, pass(z) :: xyzw => s_vect_xyzw procedure, pass(y) :: mlt_v => s_vect_mlt_v procedure, pass(y) :: mlt_a => s_vect_mlt_a procedure, pass(z) :: mlt_a_2 => s_vect_mlt_a_2 @@ -496,7 +498,8 @@ contains subroutine s_vect_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: alpha, beta, y(:) class(psb_s_vect_type) :: x @@ -507,7 +510,8 @@ contains subroutine s_vect_gthzv(n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: y(:) class(psb_s_vect_type) :: x @@ -518,7 +522,8 @@ contains subroutine s_vect_sctb(n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: beta, x(:) class(psb_s_vect_type) :: y @@ -710,7 +715,7 @@ contains res = szero if (allocated(x%v)) & - & res = x%v%dot(n,y) + & res = x%v%dot_a(n,y) end function s_vect_dot_a @@ -778,6 +783,38 @@ contains end subroutine s_vect_axpby_a2 + subroutine s_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_vect_type), intent(inout) :: x + class(psb_s_vect_type), intent(inout) :: y + class(psb_s_vect_type), intent(inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (allocated(z%v)) & + call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + + end subroutine s_vect_upd_xyz + + subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_vect_type), intent(inout) :: x + class(psb_s_vect_type), intent(inout) :: y + class(psb_s_vect_type), intent(inout) :: z + class(psb_s_vect_type), intent(inout) :: w + real(psb_spk_), intent (in) :: a, b, c, d, e, f + integer(psb_ipk_), intent(out) :: info + + if (allocated(w%v)) & + call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info) + + end subroutine s_vect_xyzw + + subroutine s_vect_mlt_v(x, y, info) use psi_serial_mod implicit none @@ -1141,7 +1178,7 @@ contains end if end function s_vect_nrm2_weight - + function s_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res) use psi_serial_mod implicit none @@ -1345,56 +1382,56 @@ module psb_s_multivect_mod integer(psb_ipk_) :: dupl = psb_dupl_add_ real(psb_spk_), allocatable :: rmtv(:,:) contains - procedure, pass(x) :: get_nrows => s_vect_get_nrows - procedure, pass(x) :: get_ncols => s_vect_get_ncols - procedure, pass(x) :: sizeof => s_vect_sizeof - procedure, pass(x) :: get_fmt => s_vect_get_fmt + procedure, pass(x) :: get_nrows => s_mvect_get_nrows + procedure, pass(x) :: get_ncols => s_mvect_get_ncols + procedure, pass(x) :: sizeof => s_mvect_sizeof + procedure, pass(x) :: get_fmt => s_mvect_get_fmt procedure, pass(x) :: is_remote_build => s_mvect_is_remote_build procedure, pass(x) :: set_remote_build => s_mvect_set_remote_build procedure, pass(x) :: get_dupl => s_mvect_get_dupl procedure, pass(x) :: set_dupl => s_mvect_set_dupl - procedure, pass(x) :: all => s_vect_all - procedure, pass(x) :: reall => s_vect_reall - procedure, pass(x) :: zero => s_vect_zero - procedure, pass(x) :: asb => s_vect_asb - procedure, pass(x) :: sync => s_vect_sync - procedure, pass(x) :: free => s_vect_free - procedure, pass(x) :: ins => s_vect_ins - procedure, pass(x) :: bld_x => s_vect_bld_x - procedure, pass(x) :: bld_n => s_vect_bld_n + procedure, pass(x) :: all => s_mvect_all + procedure, pass(x) :: reall => s_mvect_reall + procedure, pass(x) :: zero => s_mvect_zero + procedure, pass(x) :: asb => s_mvect_asb + procedure, pass(x) :: sync => s_mvect_sync + procedure, pass(x) :: free => s_mvect_free + procedure, pass(x) :: ins => s_mvect_ins + procedure, pass(x) :: bld_x => s_mvect_bld_x + procedure, pass(x) :: bld_n => s_mvect_bld_n generic, public :: bld => bld_x, bld_n - procedure, pass(x) :: get_vect => s_vect_get_vect - procedure, pass(x) :: cnv => s_vect_cnv - procedure, pass(x) :: set_scal => s_vect_set_scal - procedure, pass(x) :: set_vect => s_vect_set_vect + procedure, pass(x) :: get_vect => s_mvect_get_vect + procedure, pass(x) :: cnv => s_mvect_cnv + procedure, pass(x) :: set_scal => s_mvect_set_scal + procedure, pass(x) :: set_vect => s_mvect_set_vect generic, public :: set => set_vect, set_scal - procedure, pass(x) :: clone => s_vect_clone - procedure, pass(x) :: gthab => s_vect_gthab - procedure, pass(x) :: gthzv => s_vect_gthzv - procedure, pass(x) :: gthzv_x => s_vect_gthzv_x + procedure, pass(x) :: clone => s_mvect_clone + procedure, pass(x) :: gthab => s_mvect_gthab + procedure, pass(x) :: gthzv => s_mvect_gthzv + procedure, pass(x) :: gthzv_x => s_mvect_gthzv_x generic, public :: gth => gthab, gthzv - procedure, pass(y) :: sctb => s_vect_sctb - procedure, pass(y) :: sctb_x => s_vect_sctb_x + procedure, pass(y) :: sctb => s_mvect_sctb + procedure, pass(y) :: sctb_x => s_mvect_sctb_x generic, public :: sct => sctb, sctb_x -!!$ procedure, pass(x) :: dot_v => s_vect_dot_v -!!$ procedure, pass(x) :: dot_a => s_vect_dot_a +!!$ procedure, pass(x) :: dot_v => s_mvect_dot_v +!!$ procedure, pass(x) :: dot_a => s_mvect_dot_a !!$ generic, public :: dot => dot_v, dot_a -!!$ procedure, pass(y) :: axpby_v => s_vect_axpby_v -!!$ procedure, pass(y) :: axpby_a => s_vect_axpby_a +!!$ procedure, pass(y) :: axpby_v => s_mvect_axpby_v +!!$ procedure, pass(y) :: axpby_a => s_mvect_axpby_a !!$ generic, public :: axpby => axpby_v, axpby_a -!!$ procedure, pass(y) :: mlt_v => s_vect_mlt_v -!!$ procedure, pass(y) :: mlt_a => s_vect_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => s_vect_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => s_vect_mlt_v_2 -!!$ procedure, pass(z) :: mlt_va => s_vect_mlt_va -!!$ procedure, pass(z) :: mlt_av => s_vect_mlt_av +!!$ procedure, pass(y) :: mlt_v => s_mvect_mlt_v +!!$ procedure, pass(y) :: mlt_a => s_mvect_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => s_mvect_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => s_mvect_mlt_v_2 +!!$ procedure, pass(z) :: mlt_va => s_mvect_mlt_va +!!$ procedure, pass(z) :: mlt_av => s_mvect_mlt_av !!$ generic, public :: mlt => mlt_v, mlt_a, mlt_a_2,& !!$ & mlt_v_2, mlt_av, mlt_va -!!$ procedure, pass(x) :: scal => s_vect_scal -!!$ procedure, pass(x) :: nrm2 => s_vect_nrm2 -!!$ procedure, pass(x) :: amax => s_vect_amax -!!$ procedure, pass(x) :: asum => s_vect_asum +!!$ procedure, pass(x) :: scal => s_mvect_scal +!!$ procedure, pass(x) :: nrm2 => s_mvect_nrm2 +!!$ procedure, pass(x) :: amax => s_mvect_amax +!!$ procedure, pass(x) :: asum => s_mvect_asum end type psb_s_multivect_type public :: psb_s_multivect, psb_s_multivect_type,& @@ -1495,7 +1532,7 @@ contains end function psb_s_get_base_multivect_default - subroutine s_vect_clone(x,y,info) + subroutine s_mvect_clone(x,y,info) implicit none class(psb_s_multivect_type), intent(inout) :: x class(psb_s_multivect_type), intent(inout) :: y @@ -1504,11 +1541,11 @@ contains info = psb_success_ call y%free(info) if ((info==0).and.allocated(x%v)) then - call y%bld(x%get_vect(),mold=x%v) + call y%bld_x(x%get_vect(),mold=x%v) end if - end subroutine s_vect_clone + end subroutine s_mvect_clone - subroutine s_vect_bld_x(x,invect,mold) + subroutine s_mvect_bld_x(x,invect,mold) real(psb_spk_), intent(in) :: invect(:,:) class(psb_s_multivect_type), intent(out) :: x class(psb_s_base_multivect_type), intent(in), optional :: mold @@ -1524,10 +1561,10 @@ contains if (info == psb_success_) call x%v%bld(invect) - end subroutine s_vect_bld_x + end subroutine s_mvect_bld_x - subroutine s_vect_bld_n(x,m,n,mold) + subroutine s_mvect_bld_n(x,m,n,mold) integer(psb_ipk_), intent(in) :: m,n class(psb_s_multivect_type), intent(out) :: x class(psb_s_base_multivect_type), intent(in), optional :: mold @@ -1541,9 +1578,9 @@ contains endif if (info == psb_success_) call x%v%bld(m,n) - end subroutine s_vect_bld_n + end subroutine s_mvect_bld_n - function s_vect_get_vect(x) result(res) + function s_mvect_get_vect(x) result(res) class(psb_s_multivect_type), intent(inout) :: x real(psb_spk_), allocatable :: res(:,:) integer(psb_ipk_) :: info @@ -1551,25 +1588,25 @@ contains if (allocated(x%v)) then res = x%v%get_vect() end if - end function s_vect_get_vect + end function s_mvect_get_vect - subroutine s_vect_set_scal(x,val) + subroutine s_mvect_set_scal(x,val) class(psb_s_multivect_type), intent(inout) :: x real(psb_spk_), intent(in) :: val integer(psb_ipk_) :: info if (allocated(x%v)) call x%v%set(val) - end subroutine s_vect_set_scal + end subroutine s_mvect_set_scal - subroutine s_vect_set_vect(x,val) + subroutine s_mvect_set_vect(x,val) class(psb_s_multivect_type), intent(inout) :: x real(psb_spk_), intent(in) :: val(:,:) integer(psb_ipk_) :: info if (allocated(x%v)) call x%v%set(val) - end subroutine s_vect_set_vect + end subroutine s_mvect_set_vect function constructor(x) result(this) @@ -1577,7 +1614,7 @@ contains type(psb_s_multivect_type) :: this integer(psb_ipk_) :: info - call this%bld(x) + call this%bld_x(x) call this%asb(size(x,dim=1,kind=psb_ipk_),size(x,dim=2,kind=psb_ipk_),info) end function constructor @@ -1588,44 +1625,44 @@ contains type(psb_s_multivect_type) :: this integer(psb_ipk_) :: info - call this%bld(m,n) + call this%bld_n(m,n) call this%asb(m,n,info) end function size_const - function s_vect_get_nrows(x) result(res) + function s_mvect_get_nrows(x) result(res) implicit none class(psb_s_multivect_type), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = x%v%get_nrows() - end function s_vect_get_nrows + end function s_mvect_get_nrows - function s_vect_get_ncols(x) result(res) + function s_mvect_get_ncols(x) result(res) implicit none class(psb_s_multivect_type), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = x%v%get_ncols() - end function s_vect_get_ncols + end function s_mvect_get_ncols - function s_vect_sizeof(x) result(res) + function s_mvect_sizeof(x) result(res) implicit none class(psb_s_multivect_type), intent(in) :: x integer(psb_epk_) :: res res = 0 if (allocated(x%v)) res = x%v%sizeof() - end function s_vect_sizeof + end function s_mvect_sizeof - function s_vect_get_fmt(x) result(res) + function s_mvect_get_fmt(x) result(res) implicit none class(psb_s_multivect_type), intent(in) :: x character(len=5) :: res res = 'NULL' if (allocated(x%v)) res = x%v%get_fmt() - end function s_vect_get_fmt + end function s_mvect_get_fmt - subroutine s_vect_all(m,n, x, info, mold) + subroutine s_mvect_all(m,n, x, info, mold) implicit none integer(psb_ipk_), intent(in) :: m,n @@ -1644,9 +1681,9 @@ contains info = psb_err_alloc_dealloc_ end if - end subroutine s_vect_all + end subroutine s_mvect_all - subroutine s_vect_reall(m,n, x, info) + subroutine s_mvect_reall(m,n, x, info) implicit none integer(psb_ipk_), intent(in) :: m,n @@ -1659,18 +1696,18 @@ contains if (info == 0) & & call x%asb(m,n,info) - end subroutine s_vect_reall + end subroutine s_mvect_reall - subroutine s_vect_zero(x) + subroutine s_mvect_zero(x) use psi_serial_mod implicit none class(psb_s_multivect_type), intent(inout) :: x if (allocated(x%v)) call x%v%zero() - end subroutine s_vect_zero + end subroutine s_mvect_zero - subroutine s_vect_asb(m,n, x, info) + subroutine s_mvect_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none @@ -1681,42 +1718,45 @@ contains if (allocated(x%v)) & & call x%v%asb(m,n,info) - end subroutine s_vect_asb + end subroutine s_mvect_asb - subroutine s_vect_sync(x) + subroutine s_mvect_sync(x) implicit none class(psb_s_multivect_type), intent(inout) :: x if (allocated(x%v)) & & call x%v%sync() - end subroutine s_vect_sync + end subroutine s_mvect_sync - subroutine s_vect_gthab(n,idx,alpha,x,beta,y) + subroutine s_mvect_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: alpha, beta, y(:) class(psb_s_multivect_type) :: x if (allocated(x%v)) & & call x%v%gth(n,idx,alpha,beta,y) - end subroutine s_vect_gthab + end subroutine s_mvect_gthab - subroutine s_vect_gthzv(n,idx,x,y) + subroutine s_mvect_gthzv(n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: y(:) class(psb_s_multivect_type) :: x if (allocated(x%v)) & & call x%v%gth(n,idx,y) - end subroutine s_vect_gthzv + end subroutine s_mvect_gthzv - subroutine s_vect_gthzv_x(i,n,idx,x,y) + subroutine s_mvect_gthzv_x(i,n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: i,n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx real(psb_spk_) :: y(:) class(psb_s_multivect_type) :: x @@ -1724,22 +1764,24 @@ contains if (allocated(x%v)) & & call x%v%gth(i,n,idx,y) - end subroutine s_vect_gthzv_x + end subroutine s_mvect_gthzv_x - subroutine s_vect_sctb(n,idx,x,beta,y) + subroutine s_mvect_sctb(n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: beta, x(:) class(psb_s_multivect_type) :: y if (allocated(y%v)) & & call y%v%sct(n,idx,x,beta) - end subroutine s_vect_sctb + end subroutine s_mvect_sctb - subroutine s_vect_sctb_x(i,n,idx,x,beta,y) + subroutine s_mvect_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx real(psb_spk_) :: beta, x(:) class(psb_s_multivect_type) :: y @@ -1747,9 +1789,9 @@ contains if (allocated(y%v)) & & call y%v%sct(i,n,idx,x,beta) - end subroutine s_vect_sctb_x + end subroutine s_mvect_sctb_x - subroutine s_vect_free(x, info) + subroutine s_mvect_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none @@ -1762,9 +1804,9 @@ contains if (info == 0) deallocate(x%v,stat=info) end if - end subroutine s_vect_free + end subroutine s_mvect_free - subroutine s_vect_ins(n,irl,val,x,info) + subroutine s_mvect_ins(n,irl,val,x,info) use psi_serial_mod implicit none class(psb_s_multivect_type), intent(inout) :: x @@ -1783,10 +1825,10 @@ contains dupl = x%get_dupl() call x%v%ins(n,irl,val,dupl,info) - end subroutine s_vect_ins + end subroutine s_mvect_ins - subroutine s_vect_cnv(x,mold) + subroutine s_mvect_cnv(x,mold) class(psb_s_multivect_type), intent(inout) :: x class(psb_s_base_multivect_type), intent(in), optional :: mold class(psb_s_base_multivect_type), allocatable :: tmp @@ -1803,10 +1845,10 @@ contains call x%v%free(info) end if call move_alloc(tmp,x%v) - end subroutine s_vect_cnv + end subroutine s_mvect_cnv -!!$ function s_vect_dot_v(n,x,y) result(res) +!!$ function s_mvect_dot_v(n,x,y) result(res) !!$ implicit none !!$ class(psb_s_multivect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(in) :: n @@ -1816,9 +1858,9 @@ contains !!$ if (allocated(x%v).and.allocated(y%v)) & !!$ & res = x%v%dot(n,y%v) !!$ -!!$ end function s_vect_dot_v +!!$ end function s_mvect_dot_v !!$ -!!$ function s_vect_dot_a(n,x,y) result(res) +!!$ function s_mvect_dot_a(n,x,y) result(res) !!$ implicit none !!$ class(psb_s_multivect_type), intent(inout) :: x !!$ real(psb_spk_), intent(in) :: y(:) @@ -1829,9 +1871,9 @@ contains !!$ if (allocated(x%v)) & !!$ & res = x%v%dot(n,y) !!$ -!!$ end function s_vect_dot_a +!!$ end function s_mvect_dot_a !!$ -!!$ subroutine s_vect_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine s_mvect_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m @@ -1843,12 +1885,12 @@ contains !!$ if (allocated(x%v).and.allocated(y%v)) then !!$ call y%v%axpby(m,alpha,x%v,beta,info) !!$ else -!!$ info = psb_err_invalid_vect_state_ +!!$ info = psb_err_invalid_mvect_state_ !!$ end if !!$ -!!$ end subroutine s_vect_axpby_v +!!$ end subroutine s_mvect_axpby_v !!$ -!!$ subroutine s_vect_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine s_mvect_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m @@ -1860,10 +1902,10 @@ contains !!$ if (allocated(y%v)) & !!$ & call y%v%axpby(m,alpha,x,beta,info) !!$ -!!$ end subroutine s_vect_axpby_a +!!$ end subroutine s_mvect_axpby_a !!$ !!$ -!!$ subroutine s_vect_mlt_v(x, y, info) +!!$ subroutine s_mvect_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_s_multivect_type), intent(inout) :: x @@ -1875,9 +1917,9 @@ contains !!$ if (allocated(x%v).and.allocated(y%v)) & !!$ & call y%v%mlt(x%v,info) !!$ -!!$ end subroutine s_vect_mlt_v +!!$ end subroutine s_mvect_mlt_v !!$ -!!$ subroutine s_vect_mlt_a(x, y, info) +!!$ subroutine s_mvect_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_spk_), intent(in) :: x(:) @@ -1890,10 +1932,10 @@ contains !!$ if (allocated(y%v)) & !!$ & call y%v%mlt(x,info) !!$ -!!$ end subroutine s_vect_mlt_a +!!$ end subroutine s_mvect_mlt_a !!$ !!$ -!!$ subroutine s_vect_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine s_mvect_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_spk_), intent(in) :: alpha,beta @@ -1907,9 +1949,9 @@ contains !!$ if (allocated(z%v)) & !!$ & call z%v%mlt(alpha,x,y,beta,info) !!$ -!!$ end subroutine s_vect_mlt_a_2 +!!$ end subroutine s_mvect_mlt_a_2 !!$ -!!$ subroutine s_vect_mlt_v_2(alpha,x,y,beta,z,info,conjgx,conjgy) +!!$ subroutine s_mvect_mlt_v_2(alpha,x,y,beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_spk_), intent(in) :: alpha,beta @@ -1926,9 +1968,9 @@ contains !!$ & allocated(z%v)) & !!$ & call z%v%mlt(alpha,x%v,y%v,beta,info,conjgx,conjgy) !!$ -!!$ end subroutine s_vect_mlt_v_2 +!!$ end subroutine s_mvect_mlt_v_2 !!$ -!!$ subroutine s_vect_mlt_av(alpha,x,y,beta,z,info) +!!$ subroutine s_mvect_mlt_av(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_spk_), intent(in) :: alpha,beta @@ -1942,9 +1984,9 @@ contains !!$ if (allocated(z%v).and.allocated(y%v)) & !!$ & call z%v%mlt(alpha,x,y%v,beta,info) !!$ -!!$ end subroutine s_vect_mlt_av +!!$ end subroutine s_mvect_mlt_av !!$ -!!$ subroutine s_vect_mlt_va(alpha,x,y,beta,z,info) +!!$ subroutine s_mvect_mlt_va(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_spk_), intent(in) :: alpha,beta @@ -1959,9 +2001,9 @@ contains !!$ if (allocated(z%v).and.allocated(x%v)) & !!$ & call z%v%mlt(alpha,x%v,y,beta,info) !!$ -!!$ end subroutine s_vect_mlt_va +!!$ end subroutine s_mvect_mlt_va !!$ -!!$ subroutine s_vect_scal(alpha, x) +!!$ subroutine s_mvect_scal(alpha, x) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_s_multivect_type), intent(inout) :: x @@ -1969,10 +2011,10 @@ contains !!$ !!$ if (allocated(x%v)) call x%v%scal(alpha) !!$ -!!$ end subroutine s_vect_scal +!!$ end subroutine s_mvect_scal !!$ !!$ -!!$ function s_vect_nrm2(n,x) result(res) +!!$ function s_mvect_nrm2(n,x) result(res) !!$ implicit none !!$ class(psb_s_multivect_type), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n @@ -1984,9 +2026,9 @@ contains !!$ res = szero !!$ end if !!$ -!!$ end function s_vect_nrm2 +!!$ end function s_mvect_nrm2 !!$ -!!$ function s_vect_amax(n,x) result(res) +!!$ function s_mvect_amax(n,x) result(res) !!$ implicit none !!$ class(psb_s_multivect_type), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n @@ -1998,9 +2040,9 @@ contains !!$ res = szero !!$ end if !!$ -!!$ end function s_vect_amax +!!$ end function s_mvect_amax !!$ -!!$ function s_vect_asum(n,x) result(res) +!!$ function s_mvect_asum(n,x) result(res) !!$ implicit none !!$ class(psb_s_multivect_type), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n @@ -2012,6 +2054,6 @@ contains !!$ res = szero !!$ end if !!$ -!!$ end function s_vect_asum +!!$ end function s_mvect_asum end module psb_s_multivect_mod diff --git a/base/modules/serial/psb_serial_mod.f90 b/base/modules/serial/psb_serial_mod.f90 index 627b318e..a25c1c37 100644 --- a/base/modules/serial/psb_serial_mod.f90 +++ b/base/modules/serial/psb_serial_mod.f90 @@ -36,9 +36,7 @@ module psb_serial_mod use psb_string_mod use psb_sort_mod - use psi_serial_mod, & - & psb_gth => psi_gth,& - & psb_sct => psi_sct + use psi_serial_mod use psb_s_serial_mod use psb_d_serial_mod diff --git a/base/modules/serial/psb_z_base_mat_mod.F90 b/base/modules/serial/psb_z_base_mat_mod.F90 index 3e8196f4..0d2c9e76 100644 --- a/base/modules/serial/psb_z_base_mat_mod.F90 +++ b/base/modules/serial/psb_z_base_mat_mod.F90 @@ -416,7 +416,7 @@ module psb_z_base_mat_mod ! ! This is COO specific ! -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) procedure, pass(a) :: iset_nzeros => lz_coo_iset_nzeros procedure, pass(a) :: lset_nzeros => lz_coo_lset_nzeros generic, public :: set_nzeros => iset_nzeros, lset_nzeros @@ -439,7 +439,7 @@ module psb_z_base_mat_mod private :: lz_coo_get_nzeros, lz_coo_iset_nzeros, & & lz_coo_get_fmt, lz_coo_free, lz_coo_sizeof, & & lz_coo_transp_1mat, lz_coo_transc_1mat -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) private :: lz_coo_lset_nzeros #endif @@ -3499,7 +3499,7 @@ module psb_z_base_mat_mod end subroutine psb_lz_coo_clean_negidx end interface -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) ! !> Funtion: coo_clean_negidx_inner !! \brief Take out any entries with negative row or column index @@ -4323,7 +4323,7 @@ contains end subroutine lz_coo_iset_nzeros -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine lz_coo_lset_nzeros(nz,a) implicit none integer(psb_lpk_), intent(in) :: nz diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index 2a14de21..bf1a276c 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -155,6 +155,9 @@ module psb_z_base_vect_mod procedure, pass(z) :: axpby_v2 => z_base_axpby_v2 procedure, pass(z) :: axpby_a2 => z_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: upd_xyz => z_base_upd_xyz + procedure, pass(w) :: xyzw => z_base_xyzw + ! ! Vector by vector multiplication. Need all variants ! to handle multiple requirements from preconditioners @@ -273,7 +276,7 @@ contains call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld') return end if -#if defined (OPENMP) +#if defined (PSB_OPENMP) !$omp parallel do private(i) do i = 1, size(this) x%v(i) = this(i) @@ -567,8 +570,8 @@ contains info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) - if (info == 0) call x%free_buffer(info) - if (info == 0) call x%free_comid(info) + if ((info == 0).and.allocated(x%combuf)) call x%free_buffer(info) + if ((info == 0).and.allocated(x%comid)) call x%free_comid(info) if (info /= 0) call & & psb_errpush(psb_err_alloc_dealloc_,'vect_free') @@ -838,7 +841,7 @@ contains if (present(last)) last_ = min(last,last_) if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i = first_, last_ x%v(i) = val @@ -876,7 +879,7 @@ contains if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i = first_, last_ x%v(i) = val(i-first_+1) @@ -925,7 +928,7 @@ contains if (allocated(x%v)) then if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i=1, size(x%v) x%v(i) = abs(x%v(i)) @@ -1018,7 +1021,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine z_base_axpby_v(m,alpha, x, beta, y, info) @@ -1047,7 +1050,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param y The class(base_vect) to be added !! \param z The class(base_vect) to be returned !! \param info return code @@ -1078,7 +1081,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x(:) The array to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine z_base_axpby_a(m,alpha, x, beta, y, info) @@ -1126,6 +1129,64 @@ contains end subroutine z_base_axpby_a2 + ! + ! UPD_XYZ is invoked via Z, hence the structure below. + ! + ! + !> Function base_upd_xyz + !! \memberof psb_z_base_vect_type + !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \param m Number of entries to be considered + !! \param alpha scalar alpha + !! \param beta scalar beta + !! \param gamma scalar gamma + !! \param delta scalar delta + !! \param x The class(base_vect) to be added + !! \param y The class(base_vect) to be added + !! \param z The class(base_vect) to be added + !! \param info return code + !! + subroutine z_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_base_vect_type), intent(inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(alpha/=zzero)) call x%sync() + if (y%is_dev().and.(beta/=zzero)) call y%sync() + if (z%is_dev().and.(delta/=zzero)) call z%sync() + call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() + + end subroutine z_base_upd_xyz + + subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_base_vect_type), intent(inout) :: z + class(psb_z_base_vect_type), intent(inout) :: w + complex(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(a/=zzero)) call x%sync() + if (y%is_dev().and.(b/=zzero)) call y%sync() + if (z%is_dev().and.(d/=zzero)) call z%sync() + if (w%is_dev().and.(f/=zzero)) call w%sync() + call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info) + call y%set_host() + call z%set_host() + call w%set_host() + + end subroutine z_base_xyzw + ! ! Multiple variants of two operations: @@ -1674,7 +1735,7 @@ contains integer(psb_ipk_) :: i if (allocated(x%v)) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do private(i) do i=1,size(x%v) x%v(i) = alpha*x%v(i) @@ -1718,7 +1779,7 @@ contains integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) res = dzero !$omp parallel do private(i) reduction(max: res) do i=1, n @@ -1743,7 +1804,7 @@ contains integer(psb_ipk_) :: i if (x%is_dev()) call x%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) res=dzero !$omp parallel do private(i) reduction(+: res) do i= 1, size(x%v) @@ -1770,7 +1831,8 @@ contains subroutine z_base_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: alpha, beta, y(:) class(psb_z_base_vect_type) :: x @@ -1790,7 +1852,8 @@ contains subroutine z_base_gthzv_x(i,n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n class(psb_i_base_vect_type) :: idx complex(psb_dpk_) :: y(:) class(psb_z_base_vect_type) :: x @@ -1806,7 +1869,8 @@ contains subroutine z_base_gthzbuf(i,n,idx,x) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n class(psb_i_base_vect_type) :: idx class(psb_z_base_vect_type) :: x @@ -1869,7 +1933,8 @@ contains subroutine z_base_gthzv(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: y(:) class(psb_z_base_vect_type) :: x @@ -1894,7 +1959,8 @@ contains subroutine z_base_sctb(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: beta, x(:) class(psb_z_base_vect_type) :: y @@ -1907,7 +1973,8 @@ contains subroutine z_base_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx complex(psb_dpk_) :: beta, x(:) class(psb_z_base_vect_type) :: y @@ -1921,7 +1988,8 @@ contains subroutine z_base_sctb_buf(i,n,idx,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx complex(psb_dpk_) :: beta class(psb_z_base_vect_type) :: y @@ -1958,7 +2026,7 @@ contains integer(psb_ipk_) :: i, n if (z%is_dev()) call z%sync() -#if defined(OPENMP) +#if defined(PSB_OPENMP) n = size(x) !$omp parallel do private(i) do i = 1, n @@ -3186,10 +3254,11 @@ contains subroutine z_base_mlv_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: alpha, beta, y(:) class(psb_z_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -3211,7 +3280,8 @@ contains subroutine z_base_mlv_gthzv_x(i,n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i,n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx complex(psb_dpk_) :: y(:) class(psb_z_base_multivect_type) :: x @@ -3233,10 +3303,11 @@ contains subroutine z_base_mlv_gthzv(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: y(:) class(psb_z_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -3259,10 +3330,11 @@ contains subroutine z_base_mlv_gthzm(n,idx,x,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: y(:,:) class(psb_z_base_multivect_type) :: x - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (x%is_dev()) call x%sync() if (.not.allocated(x%v)) then @@ -3280,7 +3352,8 @@ contains subroutine z_base_mlv_gthzbuf(i,ixb,n,idx,x) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, ixb, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i, ixb class(psb_i_base_vect_type) :: idx class(psb_z_base_multivect_type) :: x integer(psb_ipk_) :: nc @@ -3312,10 +3385,11 @@ contains subroutine z_base_mlv_sctb(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: beta, x(:) class(psb_z_base_multivect_type) :: y - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (y%is_dev()) call y%sync() nc = psb_size(y%v,2_psb_ipk_) @@ -3327,10 +3401,11 @@ contains subroutine z_base_mlv_sctbr2(n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: beta, x(:,:) class(psb_z_base_multivect_type) :: y - integer(psb_ipk_) :: nc + integer(psb_mpk_) :: nc if (y%is_dev()) call y%sync() nc = y%get_ncols() @@ -3342,7 +3417,8 @@ contains subroutine z_base_mlv_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx complex( psb_dpk_) :: beta, x(:) class(psb_z_base_multivect_type) :: y @@ -3354,7 +3430,8 @@ contains subroutine z_base_mlv_sctb_buf(i,iyb,n,idx,beta,y) use psi_serial_mod implicit none - integer(psb_ipk_) :: i, iyb, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i, iyb class(psb_i_base_vect_type) :: idx complex(psb_dpk_) :: beta class(psb_z_base_multivect_type) :: y diff --git a/base/modules/serial/psb_z_csc_mat_mod.f90 b/base/modules/serial/psb_z_csc_mat_mod.f90 index 222742eb..b9828f59 100644 --- a/base/modules/serial/psb_z_csc_mat_mod.f90 +++ b/base/modules/serial/psb_z_csc_mat_mod.f90 @@ -87,7 +87,7 @@ module psb_z_csc_mat_mod procedure, pass(a) :: mv_from_coo => psb_z_mv_csc_from_coo procedure, pass(a) :: mv_to_fmt => psb_z_mv_csc_to_fmt procedure, pass(a) :: mv_from_fmt => psb_z_mv_csc_from_fmt - procedure, pass(a) :: clean_zeros => psb_z_csc_clean_zeros +! procedure, pass(a) :: clean_zeros => psb_z_csc_clean_zeros procedure, pass(a) :: csput_a => psb_z_csc_csput_a procedure, pass(a) :: get_diag => psb_z_csc_get_diag procedure, pass(a) :: csgetptn => psb_z_csc_csgetptn @@ -143,7 +143,7 @@ module psb_z_csc_mat_mod procedure, pass(a) :: mv_from_coo => psb_lz_mv_csc_from_coo procedure, pass(a) :: mv_to_fmt => psb_lz_mv_csc_to_fmt procedure, pass(a) :: mv_from_fmt => psb_lz_mv_csc_from_fmt - procedure, pass(a) :: clean_zeros => psb_lz_csc_clean_zeros +! procedure, pass(a) :: clean_zeros => psb_lz_csc_clean_zeros procedure, pass(a) :: csput_a => psb_lz_csc_csput_a procedure, pass(a) :: get_diag => psb_lz_csc_get_diag procedure, pass(a) :: csgetptn => psb_lz_csc_csgetptn @@ -313,18 +313,18 @@ module psb_z_csc_mat_mod end subroutine psb_z_mv_csc_from_fmt end interface - ! - !> - !! \memberof psb_z_csc_sparse_mat - !! \see psb_z_base_mat_mod::psb_z_base_clean_zeros - ! - interface - subroutine psb_z_csc_clean_zeros(a, info) - import - class(psb_z_csc_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_csc_clean_zeros - end interface +!!$ ! +!!$ !> +!!$ !! \memberof psb_z_csc_sparse_mat +!!$ !! \see psb_z_base_mat_mod::psb_z_base_clean_zeros +!!$ ! +!!$ interface +!!$ subroutine psb_z_csc_clean_zeros(a, info) +!!$ import +!!$ class(psb_z_csc_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_csc_clean_zeros +!!$ end interface !> \memberof psb_z_csc_sparse_mat @@ -717,18 +717,18 @@ module psb_z_csc_mat_mod end subroutine psb_lz_mv_csc_from_fmt end interface - ! - !> - !! \memberof psb_lz_csc_sparse_mat - !! \see psb_lz_base_mat_mod::psb_lz_base_clean_zeros - ! - interface - subroutine psb_lz_csc_clean_zeros(a, info) - import - class(psb_lz_csc_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_lz_csc_clean_zeros - end interface +!!$ ! +!!$ !> +!!$ !! \memberof psb_lz_csc_sparse_mat +!!$ !! \see psb_lz_base_mat_mod::psb_lz_base_clean_zeros +!!$ ! +!!$ interface +!!$ subroutine psb_lz_csc_clean_zeros(a, info) +!!$ import +!!$ class(psb_lz_csc_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_lz_csc_clean_zeros +!!$ end interface !> \memberof psb_lz_csc_sparse_mat !! \see psb_lz_base_mat_mod::psb_lz_base_cp_from diff --git a/base/modules/serial/psb_z_csr_mat_mod.f90 b/base/modules/serial/psb_z_csr_mat_mod.f90 index 4ec8dd00..0bc66bcc 100644 --- a/base/modules/serial/psb_z_csr_mat_mod.f90 +++ b/base/modules/serial/psb_z_csr_mat_mod.f90 @@ -91,7 +91,7 @@ module psb_z_csr_mat_mod procedure, pass(a) :: mv_from_coo => psb_z_mv_csr_from_coo procedure, pass(a) :: mv_to_fmt => psb_z_mv_csr_to_fmt procedure, pass(a) :: mv_from_fmt => psb_z_mv_csr_from_fmt - procedure, pass(a) :: clean_zeros => psb_z_csr_clean_zeros +! procedure, pass(a) :: clean_zeros => psb_z_csr_clean_zeros procedure, pass(a) :: csput_a => psb_z_csr_csput_a procedure, pass(a) :: get_diag => psb_z_csr_get_diag procedure, pass(a) :: csgetptn => psb_z_csr_csgetptn @@ -261,18 +261,18 @@ module psb_z_csr_mat_mod end subroutine psb_z_csr_triu end interface - ! - !> - !! \memberof psb_z_csr_sparse_mat - !! \see psb_z_base_mat_mod::psb_z_base_clean_zeros - ! - interface - subroutine psb_z_csr_clean_zeros(a, info) - import - class(psb_z_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_csr_clean_zeros - end interface +!!$ ! +!!$ !> +!!$ !! \memberof psb_z_csr_sparse_mat +!!$ !! \see psb_z_base_mat_mod::psb_z_base_clean_zeros +!!$ ! +!!$ interface +!!$ subroutine psb_z_csr_clean_zeros(a, info) +!!$ import +!!$ class(psb_z_csr_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_csr_clean_zeros +!!$ end interface !> \memberof psb_z_csr_sparse_mat !! \see psb_z_base_mat_mod::psb_z_base_cp_to_coo @@ -579,7 +579,111 @@ module psb_z_csr_mat_mod end subroutine psb_z_csr_scals end interface - !> \namespace psb_base_mod \class psb_lz_csr_sparse_mat + + type, extends(psb_z_csr_sparse_mat) :: psb_z_ecsr_sparse_mat + + !> Number of non-empty rows + integer(psb_ipk_) :: nnerws + !> Indices of non-empty rows + integer(psb_ipk_), allocatable :: nerwp(:) + + contains + procedure, nopass :: get_fmt => z_ecsr_get_fmt + + ! procedure, pass(a) :: csmm => psb_z_ecsr_csmm + procedure, pass(a) :: csmv => psb_z_ecsr_csmv + + procedure, pass(a) :: cp_from_coo => psb_z_cp_ecsr_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cp_ecsr_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_ecsr_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_mv_ecsr_from_fmt + + procedure, pass(a) :: cmp_nerwp => psb_z_ecsr_cmp_nerwp + procedure, pass(a) :: free => z_ecsr_free + procedure, pass(a) :: mold => psb_z_ecsr_mold + + end type psb_z_ecsr_sparse_mat + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_csmv + interface + subroutine psb_z_ecsr_csmv(alpha,a,x,beta,y,info,trans) + import + class(psb_z_ecsr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ecsr_csmv + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_cp_from_coo + interface + subroutine psb_z_ecsr_cmp_nerwp(a,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ecsr_cmp_nerwp + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_cp_from_coo + interface + subroutine psb_z_cp_ecsr_from_coo(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ecsr_from_coo + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_cp_from_fmt + interface + subroutine psb_z_cp_ecsr_from_fmt(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ecsr_from_fmt + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_mv_from_coo + interface + subroutine psb_z_mv_ecsr_from_coo(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ecsr_from_coo + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_mv_from_fmt + interface + subroutine psb_z_mv_ecsr_from_fmt(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ecsr_from_fmt + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !| \see psb_base_mat_mod::psb_base_mold + interface + subroutine psb_z_ecsr_mold(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ecsr_mold + end interface + + + + !> \namespace psb_base_mod \class psb_lz_csr_sparse_mat !! \extends psb_lz_base_mat_mod::psb_lz_base_sparse_mat !! !! psb_lz_csr_sparse_mat type and the related methods. @@ -612,7 +716,7 @@ module psb_z_csr_mat_mod procedure, pass(a) :: mv_from_coo => psb_lz_mv_csr_from_coo procedure, pass(a) :: mv_to_fmt => psb_lz_mv_csr_to_fmt procedure, pass(a) :: mv_from_fmt => psb_lz_mv_csr_from_fmt - procedure, pass(a) :: clean_zeros => psb_lz_csr_clean_zeros +! procedure, pass(a) :: clean_zeros => psb_lz_csr_clean_zeros procedure, pass(a) :: csput_a => psb_lz_csr_csput_a procedure, pass(a) :: get_diag => psb_lz_csr_get_diag procedure, pass(a) :: csgetptn => psb_lz_csr_csgetptn @@ -791,17 +895,17 @@ module psb_z_csr_mat_mod end interface ! - !> - !! \memberof psb_lz_csr_sparse_mat - !! \see psb_lz_base_mat_mod::psb_lz_base_clean_zeros - ! - interface - subroutine psb_lz_csr_clean_zeros(a, info) - import - class(psb_lz_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_lz_csr_clean_zeros - end interface +!!$ !> +!!$ !! \memberof psb_lz_csr_sparse_mat +!!$ !! \see psb_lz_base_mat_mod::psb_lz_base_clean_zeros +!!$ ! +!!$ interface +!!$ subroutine psb_lz_csr_clean_zeros(a, info) +!!$ import +!!$ class(psb_lz_csr_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_lz_csr_clean_zeros +!!$ end interface @@ -1178,6 +1282,26 @@ contains + function z_ecsr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ECSR' + end function z_ecsr_get_fmt + + subroutine z_ecsr_free(a) + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + + + if (allocated(a%nerwp)) deallocate(a%nerwp) + a%nnerws = 0 + call a%psb_z_csr_sparse_mat%free() + + return + end subroutine z_ecsr_free + + ! == =================================== ! ! diff --git a/base/modules/serial/psb_z_mat_mod.F90 b/base/modules/serial/psb_z_mat_mod.F90 index c534cad5..47342aee 100644 --- a/base/modules/serial/psb_z_mat_mod.F90 +++ b/base/modules/serial/psb_z_mat_mod.F90 @@ -71,7 +71,7 @@ ! ! We are also introducing the type psb_lzspmat_type. ! The basic difference with psb_zspmat_type is in the type -! of the indices, which are PSB_LPK_ so that the entries +! of the indices, which are PSB_PSB_LPK_ so that the entries ! are guaranteed to be able to contain global indices. ! This type only supports data handling and preprocessing, it is ! not supposed to be used for computations. @@ -79,12 +79,14 @@ module psb_z_mat_mod use psb_z_base_mat_mod - use psb_z_csr_mat_mod, only : psb_z_csr_sparse_mat, psb_lz_csr_sparse_mat + use psb_z_csr_mat_mod, only : psb_z_csr_sparse_mat, psb_lz_csr_sparse_mat,& + & psb_z_ecsr_sparse_mat use psb_z_csc_mat_mod, only : psb_z_csc_sparse_mat, psb_lz_csc_sparse_mat type :: psb_zspmat_type class(psb_z_base_sparse_mat), allocatable :: a + class(psb_z_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_lz_coo_sparse_mat), allocatable :: rmta @@ -143,7 +145,7 @@ module psb_z_mat_mod procedure, pass(a) :: csgetrow => psb_z_csgetrow procedure, pass(a) :: csgetblk => psb_z_csgetblk generic, public :: csget => csgetptn, csgetrow, csgetblk -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) procedure, pass(a) :: lcsgetptn => psb_z_lcsgetptn procedure, pass(a) :: lcsgetrow => psb_z_lcsgetrow generic, public :: csget => lcsgetptn, lcsgetrow @@ -202,6 +204,8 @@ module psb_z_mat_mod procedure, pass(a) :: cscnv_ip => psb_z_cscnv_ip procedure, pass(a) :: cscnv_base => psb_z_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base + procedure, pass(a) :: split_nd => psb_z_split_nd + procedure, pass(a) :: merge_nd => psb_z_merge_nd procedure, pass(a) :: clone => psb_zspmat_clone procedure, pass(a) :: move_alloc => psb_zspmat_type_move ! @@ -307,7 +311,7 @@ module psb_z_mat_mod ! Setters procedure, pass(a) :: set_lnrows => psb_lz_set_lnrows procedure, pass(a) :: set_lncols => psb_lz_set_lncols -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) procedure, pass(a) :: set_inrows => psb_lz_set_inrows procedure, pass(a) :: set_incols => psb_lz_set_incols generic, public :: set_nrows => set_inrows, set_lnrows @@ -342,7 +346,7 @@ module psb_z_mat_mod procedure, pass(a) :: csgetrow => psb_lz_csgetrow procedure, pass(a) :: csgetblk => psb_lz_csgetblk generic, public :: csget => csgetptn, csgetrow, csgetblk -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) !!$ procedure, pass(a) :: icsgetptn => psb_lz_icsgetptn !!$ procedure, pass(a) :: icsgetrow => psb_lz_icsgetrow !!$ generic, public :: csget => icsgetptn, icsgetrow @@ -840,6 +844,24 @@ module psb_z_mat_mod ! ! + interface + subroutine psb_z_split_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_split_nd + end interface + + interface + subroutine psb_z_merge_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_merge_nd + end interface + ! ! CSCNV: switches to a different internal derived type. ! 3 versions: copying to target @@ -859,7 +881,6 @@ module psb_z_mat_mod end subroutine psb_z_cscnv end interface - interface subroutine psb_z_cscnv_ip(a,iinfo,type,mold,dupl) import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat @@ -871,7 +892,6 @@ module psb_z_mat_mod end subroutine psb_z_cscnv_ip end interface - interface subroutine psb_z_cscnv_base(a,b,info,dupl) import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat @@ -1250,7 +1270,7 @@ module psb_z_mat_mod class(psb_lzspmat_type), intent(inout) :: a integer(psb_lpk_), intent(in) :: m end subroutine psb_lz_set_lnrows -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_lz_set_inrows(m,a) import :: psb_ipk_, psb_lpk_, psb_lzspmat_type class(psb_lzspmat_type), intent(inout) :: a @@ -1265,7 +1285,7 @@ module psb_z_mat_mod class(psb_lzspmat_type), intent(inout) :: a integer(psb_lpk_), intent(in) :: n end subroutine psb_lz_set_lncols -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_lz_set_incols(n,a) import :: psb_ipk_, psb_lpk_, psb_lzspmat_type class(psb_lzspmat_type), intent(inout) :: a @@ -2390,7 +2410,7 @@ contains end subroutine psb_z_clean_zeros -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_z_lcsgetptn(imin,imax,a,nz,ia,ja,info,& & jmin,jmax,iren,append,nzin,rscale,cscale) implicit none @@ -2909,7 +2929,7 @@ contains end subroutine psb_lz_clean_zeros -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) !!$ subroutine psb_lz_icsgetptn(imin,imax,a,nz,ia,ja,info,& !!$ & jmin,jmax,iren,append,nzin,rscale,cscale) !!$ implicit none diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90 index e8a34859..79606f3b 100644 --- a/base/modules/serial/psb_z_vect_mod.F90 +++ b/base/modules/serial/psb_z_vect_mod.F90 @@ -102,6 +102,8 @@ module psb_z_vect_mod procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: upd_xyz => z_vect_upd_xyz + procedure, pass(z) :: xyzw => z_vect_xyzw procedure, pass(y) :: mlt_v => z_vect_mlt_v procedure, pass(y) :: mlt_a => z_vect_mlt_a procedure, pass(z) :: mlt_a_2 => z_vect_mlt_a_2 @@ -489,7 +491,8 @@ contains subroutine z_vect_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: alpha, beta, y(:) class(psb_z_vect_type) :: x @@ -500,7 +503,8 @@ contains subroutine z_vect_gthzv(n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: y(:) class(psb_z_vect_type) :: x @@ -511,7 +515,8 @@ contains subroutine z_vect_sctb(n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: beta, x(:) class(psb_z_vect_type) :: y @@ -703,7 +708,7 @@ contains res = zzero if (allocated(x%v)) & - & res = x%v%dot(n,y) + & res = x%v%dot_a(n,y) end function z_vect_dot_a @@ -771,6 +776,38 @@ contains end subroutine z_vect_axpby_a2 + subroutine z_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_vect_type), intent(inout) :: x + class(psb_z_vect_type), intent(inout) :: y + class(psb_z_vect_type), intent(inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (allocated(z%v)) & + call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + + end subroutine z_vect_upd_xyz + + subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_vect_type), intent(inout) :: x + class(psb_z_vect_type), intent(inout) :: y + class(psb_z_vect_type), intent(inout) :: z + class(psb_z_vect_type), intent(inout) :: w + complex(psb_dpk_), intent (in) :: a, b, c, d, e, f + integer(psb_ipk_), intent(out) :: info + + if (allocated(w%v)) & + call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info) + + end subroutine z_vect_xyzw + + subroutine z_vect_mlt_v(x, y, info) use psi_serial_mod implicit none @@ -1134,7 +1171,7 @@ contains end if end function z_vect_nrm2_weight - + function z_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res) use psi_serial_mod implicit none @@ -1266,56 +1303,56 @@ module psb_z_multivect_mod integer(psb_ipk_) :: dupl = psb_dupl_add_ complex(psb_dpk_), allocatable :: rmtv(:,:) contains - procedure, pass(x) :: get_nrows => z_vect_get_nrows - procedure, pass(x) :: get_ncols => z_vect_get_ncols - procedure, pass(x) :: sizeof => z_vect_sizeof - procedure, pass(x) :: get_fmt => z_vect_get_fmt + procedure, pass(x) :: get_nrows => z_mvect_get_nrows + procedure, pass(x) :: get_ncols => z_mvect_get_ncols + procedure, pass(x) :: sizeof => z_mvect_sizeof + procedure, pass(x) :: get_fmt => z_mvect_get_fmt procedure, pass(x) :: is_remote_build => z_mvect_is_remote_build procedure, pass(x) :: set_remote_build => z_mvect_set_remote_build procedure, pass(x) :: get_dupl => z_mvect_get_dupl procedure, pass(x) :: set_dupl => z_mvect_set_dupl - procedure, pass(x) :: all => z_vect_all - procedure, pass(x) :: reall => z_vect_reall - procedure, pass(x) :: zero => z_vect_zero - procedure, pass(x) :: asb => z_vect_asb - procedure, pass(x) :: sync => z_vect_sync - procedure, pass(x) :: free => z_vect_free - procedure, pass(x) :: ins => z_vect_ins - procedure, pass(x) :: bld_x => z_vect_bld_x - procedure, pass(x) :: bld_n => z_vect_bld_n + procedure, pass(x) :: all => z_mvect_all + procedure, pass(x) :: reall => z_mvect_reall + procedure, pass(x) :: zero => z_mvect_zero + procedure, pass(x) :: asb => z_mvect_asb + procedure, pass(x) :: sync => z_mvect_sync + procedure, pass(x) :: free => z_mvect_free + procedure, pass(x) :: ins => z_mvect_ins + procedure, pass(x) :: bld_x => z_mvect_bld_x + procedure, pass(x) :: bld_n => z_mvect_bld_n generic, public :: bld => bld_x, bld_n - procedure, pass(x) :: get_vect => z_vect_get_vect - procedure, pass(x) :: cnv => z_vect_cnv - procedure, pass(x) :: set_scal => z_vect_set_scal - procedure, pass(x) :: set_vect => z_vect_set_vect + procedure, pass(x) :: get_vect => z_mvect_get_vect + procedure, pass(x) :: cnv => z_mvect_cnv + procedure, pass(x) :: set_scal => z_mvect_set_scal + procedure, pass(x) :: set_vect => z_mvect_set_vect generic, public :: set => set_vect, set_scal - procedure, pass(x) :: clone => z_vect_clone - procedure, pass(x) :: gthab => z_vect_gthab - procedure, pass(x) :: gthzv => z_vect_gthzv - procedure, pass(x) :: gthzv_x => z_vect_gthzv_x + procedure, pass(x) :: clone => z_mvect_clone + procedure, pass(x) :: gthab => z_mvect_gthab + procedure, pass(x) :: gthzv => z_mvect_gthzv + procedure, pass(x) :: gthzv_x => z_mvect_gthzv_x generic, public :: gth => gthab, gthzv - procedure, pass(y) :: sctb => z_vect_sctb - procedure, pass(y) :: sctb_x => z_vect_sctb_x + procedure, pass(y) :: sctb => z_mvect_sctb + procedure, pass(y) :: sctb_x => z_mvect_sctb_x generic, public :: sct => sctb, sctb_x -!!$ procedure, pass(x) :: dot_v => z_vect_dot_v -!!$ procedure, pass(x) :: dot_a => z_vect_dot_a +!!$ procedure, pass(x) :: dot_v => z_mvect_dot_v +!!$ procedure, pass(x) :: dot_a => z_mvect_dot_a !!$ generic, public :: dot => dot_v, dot_a -!!$ procedure, pass(y) :: axpby_v => z_vect_axpby_v -!!$ procedure, pass(y) :: axpby_a => z_vect_axpby_a +!!$ procedure, pass(y) :: axpby_v => z_mvect_axpby_v +!!$ procedure, pass(y) :: axpby_a => z_mvect_axpby_a !!$ generic, public :: axpby => axpby_v, axpby_a -!!$ procedure, pass(y) :: mlt_v => z_vect_mlt_v -!!$ procedure, pass(y) :: mlt_a => z_vect_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => z_vect_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => z_vect_mlt_v_2 -!!$ procedure, pass(z) :: mlt_va => z_vect_mlt_va -!!$ procedure, pass(z) :: mlt_av => z_vect_mlt_av +!!$ procedure, pass(y) :: mlt_v => z_mvect_mlt_v +!!$ procedure, pass(y) :: mlt_a => z_mvect_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => z_mvect_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => z_mvect_mlt_v_2 +!!$ procedure, pass(z) :: mlt_va => z_mvect_mlt_va +!!$ procedure, pass(z) :: mlt_av => z_mvect_mlt_av !!$ generic, public :: mlt => mlt_v, mlt_a, mlt_a_2,& !!$ & mlt_v_2, mlt_av, mlt_va -!!$ procedure, pass(x) :: scal => z_vect_scal -!!$ procedure, pass(x) :: nrm2 => z_vect_nrm2 -!!$ procedure, pass(x) :: amax => z_vect_amax -!!$ procedure, pass(x) :: asum => z_vect_asum +!!$ procedure, pass(x) :: scal => z_mvect_scal +!!$ procedure, pass(x) :: nrm2 => z_mvect_nrm2 +!!$ procedure, pass(x) :: amax => z_mvect_amax +!!$ procedure, pass(x) :: asum => z_mvect_asum end type psb_z_multivect_type public :: psb_z_multivect, psb_z_multivect_type,& @@ -1416,7 +1453,7 @@ contains end function psb_z_get_base_multivect_default - subroutine z_vect_clone(x,y,info) + subroutine z_mvect_clone(x,y,info) implicit none class(psb_z_multivect_type), intent(inout) :: x class(psb_z_multivect_type), intent(inout) :: y @@ -1425,11 +1462,11 @@ contains info = psb_success_ call y%free(info) if ((info==0).and.allocated(x%v)) then - call y%bld(x%get_vect(),mold=x%v) + call y%bld_x(x%get_vect(),mold=x%v) end if - end subroutine z_vect_clone + end subroutine z_mvect_clone - subroutine z_vect_bld_x(x,invect,mold) + subroutine z_mvect_bld_x(x,invect,mold) complex(psb_dpk_), intent(in) :: invect(:,:) class(psb_z_multivect_type), intent(out) :: x class(psb_z_base_multivect_type), intent(in), optional :: mold @@ -1445,10 +1482,10 @@ contains if (info == psb_success_) call x%v%bld(invect) - end subroutine z_vect_bld_x + end subroutine z_mvect_bld_x - subroutine z_vect_bld_n(x,m,n,mold) + subroutine z_mvect_bld_n(x,m,n,mold) integer(psb_ipk_), intent(in) :: m,n class(psb_z_multivect_type), intent(out) :: x class(psb_z_base_multivect_type), intent(in), optional :: mold @@ -1462,9 +1499,9 @@ contains endif if (info == psb_success_) call x%v%bld(m,n) - end subroutine z_vect_bld_n + end subroutine z_mvect_bld_n - function z_vect_get_vect(x) result(res) + function z_mvect_get_vect(x) result(res) class(psb_z_multivect_type), intent(inout) :: x complex(psb_dpk_), allocatable :: res(:,:) integer(psb_ipk_) :: info @@ -1472,25 +1509,25 @@ contains if (allocated(x%v)) then res = x%v%get_vect() end if - end function z_vect_get_vect + end function z_mvect_get_vect - subroutine z_vect_set_scal(x,val) + subroutine z_mvect_set_scal(x,val) class(psb_z_multivect_type), intent(inout) :: x complex(psb_dpk_), intent(in) :: val integer(psb_ipk_) :: info if (allocated(x%v)) call x%v%set(val) - end subroutine z_vect_set_scal + end subroutine z_mvect_set_scal - subroutine z_vect_set_vect(x,val) + subroutine z_mvect_set_vect(x,val) class(psb_z_multivect_type), intent(inout) :: x complex(psb_dpk_), intent(in) :: val(:,:) integer(psb_ipk_) :: info if (allocated(x%v)) call x%v%set(val) - end subroutine z_vect_set_vect + end subroutine z_mvect_set_vect function constructor(x) result(this) @@ -1498,7 +1535,7 @@ contains type(psb_z_multivect_type) :: this integer(psb_ipk_) :: info - call this%bld(x) + call this%bld_x(x) call this%asb(size(x,dim=1,kind=psb_ipk_),size(x,dim=2,kind=psb_ipk_),info) end function constructor @@ -1509,44 +1546,44 @@ contains type(psb_z_multivect_type) :: this integer(psb_ipk_) :: info - call this%bld(m,n) + call this%bld_n(m,n) call this%asb(m,n,info) end function size_const - function z_vect_get_nrows(x) result(res) + function z_mvect_get_nrows(x) result(res) implicit none class(psb_z_multivect_type), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = x%v%get_nrows() - end function z_vect_get_nrows + end function z_mvect_get_nrows - function z_vect_get_ncols(x) result(res) + function z_mvect_get_ncols(x) result(res) implicit none class(psb_z_multivect_type), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = x%v%get_ncols() - end function z_vect_get_ncols + end function z_mvect_get_ncols - function z_vect_sizeof(x) result(res) + function z_mvect_sizeof(x) result(res) implicit none class(psb_z_multivect_type), intent(in) :: x integer(psb_epk_) :: res res = 0 if (allocated(x%v)) res = x%v%sizeof() - end function z_vect_sizeof + end function z_mvect_sizeof - function z_vect_get_fmt(x) result(res) + function z_mvect_get_fmt(x) result(res) implicit none class(psb_z_multivect_type), intent(in) :: x character(len=5) :: res res = 'NULL' if (allocated(x%v)) res = x%v%get_fmt() - end function z_vect_get_fmt + end function z_mvect_get_fmt - subroutine z_vect_all(m,n, x, info, mold) + subroutine z_mvect_all(m,n, x, info, mold) implicit none integer(psb_ipk_), intent(in) :: m,n @@ -1565,9 +1602,9 @@ contains info = psb_err_alloc_dealloc_ end if - end subroutine z_vect_all + end subroutine z_mvect_all - subroutine z_vect_reall(m,n, x, info) + subroutine z_mvect_reall(m,n, x, info) implicit none integer(psb_ipk_), intent(in) :: m,n @@ -1580,18 +1617,18 @@ contains if (info == 0) & & call x%asb(m,n,info) - end subroutine z_vect_reall + end subroutine z_mvect_reall - subroutine z_vect_zero(x) + subroutine z_mvect_zero(x) use psi_serial_mod implicit none class(psb_z_multivect_type), intent(inout) :: x if (allocated(x%v)) call x%v%zero() - end subroutine z_vect_zero + end subroutine z_mvect_zero - subroutine z_vect_asb(m,n, x, info) + subroutine z_mvect_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none @@ -1602,42 +1639,45 @@ contains if (allocated(x%v)) & & call x%v%asb(m,n,info) - end subroutine z_vect_asb + end subroutine z_mvect_asb - subroutine z_vect_sync(x) + subroutine z_mvect_sync(x) implicit none class(psb_z_multivect_type), intent(inout) :: x if (allocated(x%v)) & & call x%v%sync() - end subroutine z_vect_sync + end subroutine z_mvect_sync - subroutine z_vect_gthab(n,idx,alpha,x,beta,y) + subroutine z_mvect_gthab(n,idx,alpha,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: alpha, beta, y(:) class(psb_z_multivect_type) :: x if (allocated(x%v)) & & call x%v%gth(n,idx,alpha,beta,y) - end subroutine z_vect_gthab + end subroutine z_mvect_gthab - subroutine z_vect_gthzv(n,idx,x,y) + subroutine z_mvect_gthzv(n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: y(:) class(psb_z_multivect_type) :: x if (allocated(x%v)) & & call x%v%gth(n,idx,y) - end subroutine z_vect_gthzv + end subroutine z_mvect_gthzv - subroutine z_vect_gthzv_x(i,n,idx,x,y) + subroutine z_mvect_gthzv_x(i,n,idx,x,y) use psi_serial_mod - integer(psb_ipk_) :: i,n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx complex(psb_dpk_) :: y(:) class(psb_z_multivect_type) :: x @@ -1645,22 +1685,24 @@ contains if (allocated(x%v)) & & call x%v%gth(i,n,idx,y) - end subroutine z_vect_gthzv_x + end subroutine z_mvect_gthzv_x - subroutine z_vect_sctb(n,idx,x,beta,y) + subroutine z_mvect_sctb(n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: beta, x(:) class(psb_z_multivect_type) :: y if (allocated(y%v)) & & call y%v%sct(n,idx,x,beta) - end subroutine z_vect_sctb + end subroutine z_mvect_sctb - subroutine z_vect_sctb_x(i,n,idx,x,beta,y) + subroutine z_mvect_sctb_x(i,n,idx,x,beta,y) use psi_serial_mod - integer(psb_ipk_) :: i, n + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i class(psb_i_base_vect_type) :: idx complex(psb_dpk_) :: beta, x(:) class(psb_z_multivect_type) :: y @@ -1668,9 +1710,9 @@ contains if (allocated(y%v)) & & call y%v%sct(i,n,idx,x,beta) - end subroutine z_vect_sctb_x + end subroutine z_mvect_sctb_x - subroutine z_vect_free(x, info) + subroutine z_mvect_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none @@ -1683,9 +1725,9 @@ contains if (info == 0) deallocate(x%v,stat=info) end if - end subroutine z_vect_free + end subroutine z_mvect_free - subroutine z_vect_ins(n,irl,val,x,info) + subroutine z_mvect_ins(n,irl,val,x,info) use psi_serial_mod implicit none class(psb_z_multivect_type), intent(inout) :: x @@ -1704,10 +1746,10 @@ contains dupl = x%get_dupl() call x%v%ins(n,irl,val,dupl,info) - end subroutine z_vect_ins + end subroutine z_mvect_ins - subroutine z_vect_cnv(x,mold) + subroutine z_mvect_cnv(x,mold) class(psb_z_multivect_type), intent(inout) :: x class(psb_z_base_multivect_type), intent(in), optional :: mold class(psb_z_base_multivect_type), allocatable :: tmp @@ -1724,10 +1766,10 @@ contains call x%v%free(info) end if call move_alloc(tmp,x%v) - end subroutine z_vect_cnv + end subroutine z_mvect_cnv -!!$ function z_vect_dot_v(n,x,y) result(res) +!!$ function z_mvect_dot_v(n,x,y) result(res) !!$ implicit none !!$ class(psb_z_multivect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(in) :: n @@ -1737,9 +1779,9 @@ contains !!$ if (allocated(x%v).and.allocated(y%v)) & !!$ & res = x%v%dot(n,y%v) !!$ -!!$ end function z_vect_dot_v +!!$ end function z_mvect_dot_v !!$ -!!$ function z_vect_dot_a(n,x,y) result(res) +!!$ function z_mvect_dot_a(n,x,y) result(res) !!$ implicit none !!$ class(psb_z_multivect_type), intent(inout) :: x !!$ complex(psb_dpk_), intent(in) :: y(:) @@ -1750,9 +1792,9 @@ contains !!$ if (allocated(x%v)) & !!$ & res = x%v%dot(n,y) !!$ -!!$ end function z_vect_dot_a +!!$ end function z_mvect_dot_a !!$ -!!$ subroutine z_vect_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine z_mvect_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m @@ -1764,12 +1806,12 @@ contains !!$ if (allocated(x%v).and.allocated(y%v)) then !!$ call y%v%axpby(m,alpha,x%v,beta,info) !!$ else -!!$ info = psb_err_invalid_vect_state_ +!!$ info = psb_err_invalid_mvect_state_ !!$ end if !!$ -!!$ end subroutine z_vect_axpby_v +!!$ end subroutine z_mvect_axpby_v !!$ -!!$ subroutine z_vect_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine z_mvect_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m @@ -1781,10 +1823,10 @@ contains !!$ if (allocated(y%v)) & !!$ & call y%v%axpby(m,alpha,x,beta,info) !!$ -!!$ end subroutine z_vect_axpby_a +!!$ end subroutine z_mvect_axpby_a !!$ !!$ -!!$ subroutine z_vect_mlt_v(x, y, info) +!!$ subroutine z_mvect_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_z_multivect_type), intent(inout) :: x @@ -1796,9 +1838,9 @@ contains !!$ if (allocated(x%v).and.allocated(y%v)) & !!$ & call y%v%mlt(x%v,info) !!$ -!!$ end subroutine z_vect_mlt_v +!!$ end subroutine z_mvect_mlt_v !!$ -!!$ subroutine z_vect_mlt_a(x, y, info) +!!$ subroutine z_mvect_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_dpk_), intent(in) :: x(:) @@ -1811,10 +1853,10 @@ contains !!$ if (allocated(y%v)) & !!$ & call y%v%mlt(x,info) !!$ -!!$ end subroutine z_vect_mlt_a +!!$ end subroutine z_mvect_mlt_a !!$ !!$ -!!$ subroutine z_vect_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine z_mvect_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_dpk_), intent(in) :: alpha,beta @@ -1828,9 +1870,9 @@ contains !!$ if (allocated(z%v)) & !!$ & call z%v%mlt(alpha,x,y,beta,info) !!$ -!!$ end subroutine z_vect_mlt_a_2 +!!$ end subroutine z_mvect_mlt_a_2 !!$ -!!$ subroutine z_vect_mlt_v_2(alpha,x,y,beta,z,info,conjgx,conjgy) +!!$ subroutine z_mvect_mlt_v_2(alpha,x,y,beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_dpk_), intent(in) :: alpha,beta @@ -1847,9 +1889,9 @@ contains !!$ & allocated(z%v)) & !!$ & call z%v%mlt(alpha,x%v,y%v,beta,info,conjgx,conjgy) !!$ -!!$ end subroutine z_vect_mlt_v_2 +!!$ end subroutine z_mvect_mlt_v_2 !!$ -!!$ subroutine z_vect_mlt_av(alpha,x,y,beta,z,info) +!!$ subroutine z_mvect_mlt_av(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_dpk_), intent(in) :: alpha,beta @@ -1863,9 +1905,9 @@ contains !!$ if (allocated(z%v).and.allocated(y%v)) & !!$ & call z%v%mlt(alpha,x,y%v,beta,info) !!$ -!!$ end subroutine z_vect_mlt_av +!!$ end subroutine z_mvect_mlt_av !!$ -!!$ subroutine z_vect_mlt_va(alpha,x,y,beta,z,info) +!!$ subroutine z_mvect_mlt_va(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_dpk_), intent(in) :: alpha,beta @@ -1880,9 +1922,9 @@ contains !!$ if (allocated(z%v).and.allocated(x%v)) & !!$ & call z%v%mlt(alpha,x%v,y,beta,info) !!$ -!!$ end subroutine z_vect_mlt_va +!!$ end subroutine z_mvect_mlt_va !!$ -!!$ subroutine z_vect_scal(alpha, x) +!!$ subroutine z_mvect_scal(alpha, x) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_z_multivect_type), intent(inout) :: x @@ -1890,10 +1932,10 @@ contains !!$ !!$ if (allocated(x%v)) call x%v%scal(alpha) !!$ -!!$ end subroutine z_vect_scal +!!$ end subroutine z_mvect_scal !!$ !!$ -!!$ function z_vect_nrm2(n,x) result(res) +!!$ function z_mvect_nrm2(n,x) result(res) !!$ implicit none !!$ class(psb_z_multivect_type), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n @@ -1905,9 +1947,9 @@ contains !!$ res = dzero !!$ end if !!$ -!!$ end function z_vect_nrm2 +!!$ end function z_mvect_nrm2 !!$ -!!$ function z_vect_amax(n,x) result(res) +!!$ function z_mvect_amax(n,x) result(res) !!$ implicit none !!$ class(psb_z_multivect_type), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n @@ -1919,9 +1961,9 @@ contains !!$ res = dzero !!$ end if !!$ -!!$ end function z_vect_amax +!!$ end function z_mvect_amax !!$ -!!$ function z_vect_asum(n,x) result(res) +!!$ function z_mvect_asum(n,x) result(res) !!$ implicit none !!$ class(psb_z_multivect_type), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n @@ -1933,6 +1975,6 @@ contains !!$ res = dzero !!$ end if !!$ -!!$ end function z_vect_asum +!!$ end function z_mvect_asum end module psb_z_multivect_mod diff --git a/base/modules/tools/psb_c_tools_mod.F90 b/base/modules/tools/psb_c_tools_mod.F90 index 2de8f906..97ee169f 100644 --- a/base/modules/tools/psb_c_tools_mod.F90 +++ b/base/modules/tools/psb_c_tools_mod.F90 @@ -250,7 +250,7 @@ Module psb_c_tools_mod end interface interface psb_spasb - subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) + subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and) import implicit none type(psb_cspmat_type), intent (inout) :: a @@ -259,6 +259,7 @@ Module psb_c_tools_mod integer(psb_ipk_),optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_c_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and end subroutine psb_cspasb end interface @@ -308,7 +309,7 @@ Module psb_c_tools_mod integer(psb_ipk_), intent(out) :: info logical, intent(in), optional :: rebuild, local end subroutine psb_cspins_csr_lirp -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_cspins_csr_iirp(nr,irw,irp,ja,val,a,desc_a,info,rebuild,local) import implicit none diff --git a/base/modules/tools/psb_cd_tools_mod.F90 b/base/modules/tools/psb_cd_tools_mod.F90 index 8912e96a..579b6d5f 100644 --- a/base/modules/tools/psb_cd_tools_mod.F90 +++ b/base/modules/tools/psb_cd_tools_mod.F90 @@ -104,7 +104,7 @@ module psb_cd_tools_mod end interface interface psb_cdins -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_cdinsrc(nz,ia,ja,desc_a,info,ila,jla) import :: psb_ipk_, psb_lpk_, psb_desc_type type(psb_desc_type), intent(inout) :: desc_a diff --git a/base/modules/tools/psb_d_tools_mod.F90 b/base/modules/tools/psb_d_tools_mod.F90 index 30e45d53..b2ac3a66 100644 --- a/base/modules/tools/psb_d_tools_mod.F90 +++ b/base/modules/tools/psb_d_tools_mod.F90 @@ -250,7 +250,7 @@ Module psb_d_tools_mod end interface interface psb_spasb - subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) + subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, bld_and) import implicit none type(psb_dspmat_type), intent (inout) :: a @@ -259,6 +259,7 @@ Module psb_d_tools_mod integer(psb_ipk_),optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_d_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and end subroutine psb_dspasb end interface @@ -308,7 +309,7 @@ Module psb_d_tools_mod integer(psb_ipk_), intent(out) :: info logical, intent(in), optional :: rebuild, local end subroutine psb_dspins_csr_lirp -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_dspins_csr_iirp(nr,irw,irp,ja,val,a,desc_a,info,rebuild,local) import implicit none diff --git a/base/modules/tools/psb_s_tools_mod.F90 b/base/modules/tools/psb_s_tools_mod.F90 index 5d2f8d00..18addeda 100644 --- a/base/modules/tools/psb_s_tools_mod.F90 +++ b/base/modules/tools/psb_s_tools_mod.F90 @@ -250,7 +250,7 @@ Module psb_s_tools_mod end interface interface psb_spasb - subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) + subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and) import implicit none type(psb_sspmat_type), intent (inout) :: a @@ -259,6 +259,7 @@ Module psb_s_tools_mod integer(psb_ipk_),optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_s_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and end subroutine psb_sspasb end interface @@ -308,7 +309,7 @@ Module psb_s_tools_mod integer(psb_ipk_), intent(out) :: info logical, intent(in), optional :: rebuild, local end subroutine psb_sspins_csr_lirp -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_sspins_csr_iirp(nr,irw,irp,ja,val,a,desc_a,info,rebuild,local) import implicit none diff --git a/base/modules/tools/psb_z_tools_mod.F90 b/base/modules/tools/psb_z_tools_mod.F90 index 9d6bd77b..fd9d5e22 100644 --- a/base/modules/tools/psb_z_tools_mod.F90 +++ b/base/modules/tools/psb_z_tools_mod.F90 @@ -250,7 +250,7 @@ Module psb_z_tools_mod end interface interface psb_spasb - subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) + subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and) import implicit none type(psb_zspmat_type), intent (inout) :: a @@ -259,6 +259,7 @@ Module psb_z_tools_mod integer(psb_ipk_),optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_z_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and end subroutine psb_zspasb end interface @@ -308,7 +309,7 @@ Module psb_z_tools_mod integer(psb_ipk_), intent(out) :: info logical, intent(in), optional :: rebuild, local end subroutine psb_zspins_csr_lirp -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_zspins_csr_iirp(nr,irw,irp,ja,val,a,desc_a,info,rebuild,local) import implicit none diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90 index da3dd93b..da945849 100644 --- a/base/psblas/psb_caxpby.f90 +++ b/base/psblas/psb_caxpby.f90 @@ -299,7 +299,7 @@ end subroutine psb_caxpby_vect_out ! subroutine psb_caxpby(alpha, x, beta,y,desc_a,info, n, jx, jy) use psb_base_mod, psb_protect_name => psb_caxpby - + use psi_c_serial_mod implicit none integer(psb_ipk_), intent(in), optional :: n, jx, jy @@ -384,9 +384,9 @@ subroutine psb_caxpby(alpha, x, beta,y,desc_a,info, n, jx, jy) if ((in /= 0)) then if(desc_a%get_local_rows() > 0) then - call caxpby(desc_a%get_local_cols(),in,& - & alpha,x(iix:,jjx),lldx,beta,& - & y(iiy:,jjy),lldy,info) + call psi_caxpby(desc_a%get_local_cols(),in,& + & alpha,x(iix:,jjx:),beta,& + & y(iiy:,jjy:),info) end if end if @@ -510,9 +510,8 @@ subroutine psb_caxpbyv(alpha, x, beta,y,desc_a,info) end if if(desc_a%get_local_rows() > 0) then - call caxpby(desc_a%get_local_cols(),ione,& - & alpha,x,lldx,beta,& - & y,lldy,info) + call psb_geaxpby(desc_a%get_local_cols(),& + & alpha,x,beta,y,info) end if call psb_erractionrestore(err_act) @@ -642,9 +641,8 @@ subroutine psb_caxpbyvout(alpha, x, beta,y, z, desc_a,info) end if if(desc_a%get_local_rows() > 0) then - call caxpbyv2(desc_a%get_local_cols(),ione,& - & alpha,x,lldx,beta,& - & y,lldy,z,lldz,info) + call psb_geaxpby(desc_a%get_local_cols(),& + & alpha,x,beta,y,z,info) end if call psb_erractionrestore(err_act) @@ -741,3 +739,86 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info) return end subroutine psb_caddconst_vect + + +subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + use psb_base_mod, psb_protect_name => psb_c_upd_xyz_vect + implicit none + type(psb_c_vect_type), intent (inout) :: x + type(psb_c_vect_type), intent (inout) :: y + type(psb_c_vect_type), intent (inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iiy, jjy, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, m + character(len=20) :: name, ch_err + + name='psb_c_addconst_vect' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(z%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%get_global_rows() + nr = desc_a%get_local_rows() + + ! check vector correctness + call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(desc_a%get_local_rows() > 0) then + call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_c_upd_xyz_vect + diff --git a/base/psblas/psb_cgetmatinfo.F90 b/base/psblas/psb_cgetmatinfo.F90 index fdfb0cba..66db7693 100644 --- a/base/psblas/psb_cgetmatinfo.F90 +++ b/base/psblas/psb_cgetmatinfo.F90 @@ -37,11 +37,11 @@ function psb_cget_nnz(a,desc_a,info) result(res) use psb_base_mod, psb_protect_name => psb_cget_nnz use psi_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -54,7 +54,6 @@ function psb_cget_nnz(a,desc_a,info) result(res) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& & err_act, iia, jja - integer(psb_lpk_) :: localnnz character(len=20) :: name, ch_err ! name='psb_cget_nnz' @@ -72,9 +71,9 @@ function psb_cget_nnz(a,desc_a,info) result(res) goto 9999 endif - localnnz = a%get_nzeros() + res = a%get_nzeros() - call psb_sum(ctxt,localnnz) + call psb_sum(ctxt,res) call psb_erractionrestore(err_act) return diff --git a/base/psblas/psb_cspmm.f90 b/base/psblas/psb_cspmm.f90 index fd8a9c39..777ade06 100644 --- a/base/psblas/psb_cspmm.f90 +++ b/base/psblas/psb_cspmm.f90 @@ -83,6 +83,9 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& character(len=20) :: name, ch_err logical :: aliw, doswap_ integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: do_timings=.false. + integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1 + integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1 name='psb_cspmv' info=psb_success_ @@ -130,6 +133,19 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name) goto 9999 end if + if ((do_timings).and.(mv_phase1==-1)) & + & mv_phase1 = psb_get_timer_idx("SPMM: and send ") + if ((do_timings).and.(mv_phase2==-1)) & + & mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad") + if ((do_timings).and.(mv_phase3==-1)) & + & mv_phase3 = psb_get_timer_idx("SPMM: and rcv") + if ((do_timings).and.(mv_phase4==-1)) & + & mv_phase4 = psb_get_timer_idx("SPMM: and cmp and") + if ((do_timings).and.(mv_phase11==-1)) & + & mv_phase11 = psb_get_timer_idx("SPMM: noand exch ") + if ((do_timings).and.(mv_phase12==-1)) & + & mv_phase12 = psb_get_timer_idx("SPMM: noand cmp") + m = desc_a%get_global_rows() n = desc_a%get_global_cols() @@ -178,14 +194,46 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + + if (allocated(a%ad)) then + block + logical, parameter :: do_timings=.false. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() + if (do_timings) call psb_barrier(ctxt) + if (do_timings) call psb_tic(mv_phase1) + if (doswap_) call psi_swapdata(psb_swap_send_,& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) call psb_tic(mv_phase3) + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) + call a%and%spmm(alpha,x%v,cone,y%v,info) + if (do_timings) call psb_toc(mv_phase4) + end block + + else + block + logical, parameter :: do_timings=.false. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + + if (do_timings) call psb_tic(mv_phase11) + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase12) + end block end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) @@ -311,9 +359,9 @@ subroutine psb_cspmm(alpha,a,x,beta,y,desc_a,info,& ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & - & liwork, iiy, jjy, i, ib, ib1, ip, idx, ik + integer(psb_mpk_) :: np, me, ib1, ik + integer(psb_ipk_) :: err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & + & liwork, iiy, jjy, i, ib, ip, idx integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja, lik integer(psb_ipk_), parameter :: nb=4 complex(psb_spk_), pointer :: xp(:,:), yp(:,:), iwork(:) @@ -551,7 +599,7 @@ subroutine psb_cspmm(alpha,a,x,beta,y,desc_a,info,& if (doswap_)then ik = lik ! This should not be an issue, we are expecting the values - ! to be small, within IPK + ! to be small, within PSB_IPK call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),& & ik,cone,y(:,1:ik),desc_a,iwork,info) if (info == psb_success_) call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& @@ -659,9 +707,9 @@ subroutine psb_cspmv(alpha,a,x,beta,y,desc_a,info,& ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & - & liwork, iiy, jjy, ib, ip, idx, ik + integer(psb_mpk_) :: np, me, ik + integer(psb_ipk_) :: err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & + & liwork, iiy, jjy, ib, ip, idx integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja, lik, jx, jy integer(psb_ipk_), parameter :: nb=4 complex(psb_spk_), pointer :: iwork(:), xp(:), yp(:) diff --git a/base/psblas/psb_cspsm.f90 b/base/psblas/psb_cspsm.f90 index da99b8e9..9787b2d8 100644 --- a/base/psblas/psb_cspsm.f90 +++ b/base/psblas/psb_cspsm.f90 @@ -291,9 +291,9 @@ subroutine psb_cspsm(alpha,a,x,beta,y,desc_a,info,& ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iia, jja, lldx,lldy, choice_,& - & ik, i, lld, nrow, ncol, liwork, llwork, iiy, jjy, idx, ndm + integer(psb_mpk_) :: np, me, ik + integer(psb_ipk_) :: err_act, iix, jjx, iia, jja, lldx,lldy, choice_,& + & i, lld, nrow, ncol, liwork, llwork, iiy, jjy, idx, ndm integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja, lik character :: lscale @@ -447,7 +447,7 @@ subroutine psb_cspsm(alpha,a,x,beta,y,desc_a,info,& end if ik = lik ! This should not be a problem. - ! We expect ik to be small, well within IPK + ! We expect ik to be small, well within PSB_IPK ! Perform local triangular system solve xp => x(iix:lldx,jjx:jjx+ik-1) yp => y(iiy:lldy,jjy:jjy+ik-1) diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index c386f8f2..f86b7fe9 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -299,7 +299,7 @@ end subroutine psb_daxpby_vect_out ! subroutine psb_daxpby(alpha, x, beta,y,desc_a,info, n, jx, jy) use psb_base_mod, psb_protect_name => psb_daxpby - + use psi_d_serial_mod implicit none integer(psb_ipk_), intent(in), optional :: n, jx, jy @@ -384,9 +384,9 @@ subroutine psb_daxpby(alpha, x, beta,y,desc_a,info, n, jx, jy) if ((in /= 0)) then if(desc_a%get_local_rows() > 0) then - call daxpby(desc_a%get_local_cols(),in,& - & alpha,x(iix:,jjx),lldx,beta,& - & y(iiy:,jjy),lldy,info) + call psi_daxpby(desc_a%get_local_cols(),in,& + & alpha,x(iix:,jjx:),beta,& + & y(iiy:,jjy:),info) end if end if @@ -510,9 +510,8 @@ subroutine psb_daxpbyv(alpha, x, beta,y,desc_a,info) end if if(desc_a%get_local_rows() > 0) then - call daxpby(desc_a%get_local_cols(),ione,& - & alpha,x,lldx,beta,& - & y,lldy,info) + call psb_geaxpby(desc_a%get_local_cols(),& + & alpha,x,beta,y,info) end if call psb_erractionrestore(err_act) @@ -642,9 +641,8 @@ subroutine psb_daxpbyvout(alpha, x, beta,y, z, desc_a,info) end if if(desc_a%get_local_rows() > 0) then - call daxpbyv2(desc_a%get_local_cols(),ione,& - & alpha,x,lldx,beta,& - & y,lldy,z,lldz,info) + call psb_geaxpby(desc_a%get_local_cols(),& + & alpha,x,beta,y,z,info) end if call psb_erractionrestore(err_act) @@ -741,3 +739,86 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info) return end subroutine psb_daddconst_vect + + +subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + use psb_base_mod, psb_protect_name => psb_d_upd_xyz_vect + implicit none + type(psb_d_vect_type), intent (inout) :: x + type(psb_d_vect_type), intent (inout) :: y + type(psb_d_vect_type), intent (inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iiy, jjy, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, m + character(len=20) :: name, ch_err + + name='psb_d_addconst_vect' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(z%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%get_global_rows() + nr = desc_a%get_local_rows() + + ! check vector correctness + call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(desc_a%get_local_rows() > 0) then + call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_d_upd_xyz_vect + diff --git a/base/psblas/psb_dgetmatinfo.F90 b/base/psblas/psb_dgetmatinfo.F90 index 16a1d3ca..9b327011 100644 --- a/base/psblas/psb_dgetmatinfo.F90 +++ b/base/psblas/psb_dgetmatinfo.F90 @@ -37,11 +37,11 @@ function psb_dget_nnz(a,desc_a,info) result(res) use psb_base_mod, psb_protect_name => psb_dget_nnz use psi_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -54,7 +54,6 @@ function psb_dget_nnz(a,desc_a,info) result(res) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& & err_act, iia, jja - integer(psb_lpk_) :: localnnz character(len=20) :: name, ch_err ! name='psb_dget_nnz' @@ -72,9 +71,9 @@ function psb_dget_nnz(a,desc_a,info) result(res) goto 9999 endif - localnnz = a%get_nzeros() + res = a%get_nzeros() - call psb_sum(ctxt,localnnz) + call psb_sum(ctxt,res) call psb_erractionrestore(err_act) return diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index a006c7e9..ca7e7c56 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -83,6 +83,9 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& character(len=20) :: name, ch_err logical :: aliw, doswap_ integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: do_timings=.false. + integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1 + integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1 name='psb_dspmv' info=psb_success_ @@ -130,6 +133,19 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name) goto 9999 end if + if ((do_timings).and.(mv_phase1==-1)) & + & mv_phase1 = psb_get_timer_idx("SPMM: and send ") + if ((do_timings).and.(mv_phase2==-1)) & + & mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad") + if ((do_timings).and.(mv_phase3==-1)) & + & mv_phase3 = psb_get_timer_idx("SPMM: and rcv") + if ((do_timings).and.(mv_phase4==-1)) & + & mv_phase4 = psb_get_timer_idx("SPMM: and cmp and") + if ((do_timings).and.(mv_phase11==-1)) & + & mv_phase11 = psb_get_timer_idx("SPMM: noand exch ") + if ((do_timings).and.(mv_phase12==-1)) & + & mv_phase12 = psb_get_timer_idx("SPMM: noand cmp") + m = desc_a%get_global_rows() n = desc_a%get_global_cols() @@ -178,14 +194,46 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + + if (allocated(a%ad)) then + block + logical, parameter :: do_timings=.false. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() + if (do_timings) call psb_barrier(ctxt) + if (do_timings) call psb_tic(mv_phase1) + if (doswap_) call psi_swapdata(psb_swap_send_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) call psb_tic(mv_phase3) + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) + call a%and%spmm(alpha,x%v,done,y%v,info) + if (do_timings) call psb_toc(mv_phase4) + end block + + else + block + logical, parameter :: do_timings=.false. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + + if (do_timings) call psb_tic(mv_phase11) + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase12) + end block end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) @@ -311,9 +359,9 @@ subroutine psb_dspmm(alpha,a,x,beta,y,desc_a,info,& ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & - & liwork, iiy, jjy, i, ib, ib1, ip, idx, ik + integer(psb_mpk_) :: np, me, ib1, ik + integer(psb_ipk_) :: err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & + & liwork, iiy, jjy, i, ib, ip, idx integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja, lik integer(psb_ipk_), parameter :: nb=4 real(psb_dpk_), pointer :: xp(:,:), yp(:,:), iwork(:) @@ -551,7 +599,7 @@ subroutine psb_dspmm(alpha,a,x,beta,y,desc_a,info,& if (doswap_)then ik = lik ! This should not be an issue, we are expecting the values - ! to be small, within IPK + ! to be small, within PSB_IPK call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),& & ik,done,y(:,1:ik),desc_a,iwork,info) if (info == psb_success_) call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& @@ -659,9 +707,9 @@ subroutine psb_dspmv(alpha,a,x,beta,y,desc_a,info,& ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & - & liwork, iiy, jjy, ib, ip, idx, ik + integer(psb_mpk_) :: np, me, ik + integer(psb_ipk_) :: err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & + & liwork, iiy, jjy, ib, ip, idx integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja, lik, jx, jy integer(psb_ipk_), parameter :: nb=4 real(psb_dpk_), pointer :: iwork(:), xp(:), yp(:) diff --git a/base/psblas/psb_dspsm.f90 b/base/psblas/psb_dspsm.f90 index 9e5eeafc..e4010b01 100644 --- a/base/psblas/psb_dspsm.f90 +++ b/base/psblas/psb_dspsm.f90 @@ -291,9 +291,9 @@ subroutine psb_dspsm(alpha,a,x,beta,y,desc_a,info,& ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iia, jja, lldx,lldy, choice_,& - & ik, i, lld, nrow, ncol, liwork, llwork, iiy, jjy, idx, ndm + integer(psb_mpk_) :: np, me, ik + integer(psb_ipk_) :: err_act, iix, jjx, iia, jja, lldx,lldy, choice_,& + & i, lld, nrow, ncol, liwork, llwork, iiy, jjy, idx, ndm integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja, lik character :: lscale @@ -447,7 +447,7 @@ subroutine psb_dspsm(alpha,a,x,beta,y,desc_a,info,& end if ik = lik ! This should not be a problem. - ! We expect ik to be small, well within IPK + ! We expect ik to be small, well within PSB_IPK ! Perform local triangular system solve xp => x(iix:lldx,jjx:jjx+ik-1) yp => y(iiy:lldy,jjy:jjy+ik-1) diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90 index 78f4d01a..61f71b5a 100644 --- a/base/psblas/psb_saxpby.f90 +++ b/base/psblas/psb_saxpby.f90 @@ -299,7 +299,7 @@ end subroutine psb_saxpby_vect_out ! subroutine psb_saxpby(alpha, x, beta,y,desc_a,info, n, jx, jy) use psb_base_mod, psb_protect_name => psb_saxpby - + use psi_s_serial_mod implicit none integer(psb_ipk_), intent(in), optional :: n, jx, jy @@ -384,9 +384,9 @@ subroutine psb_saxpby(alpha, x, beta,y,desc_a,info, n, jx, jy) if ((in /= 0)) then if(desc_a%get_local_rows() > 0) then - call saxpby(desc_a%get_local_cols(),in,& - & alpha,x(iix:,jjx),lldx,beta,& - & y(iiy:,jjy),lldy,info) + call psi_saxpby(desc_a%get_local_cols(),in,& + & alpha,x(iix:,jjx:),beta,& + & y(iiy:,jjy:),info) end if end if @@ -510,9 +510,8 @@ subroutine psb_saxpbyv(alpha, x, beta,y,desc_a,info) end if if(desc_a%get_local_rows() > 0) then - call saxpby(desc_a%get_local_cols(),ione,& - & alpha,x,lldx,beta,& - & y,lldy,info) + call psb_geaxpby(desc_a%get_local_cols(),& + & alpha,x,beta,y,info) end if call psb_erractionrestore(err_act) @@ -642,9 +641,8 @@ subroutine psb_saxpbyvout(alpha, x, beta,y, z, desc_a,info) end if if(desc_a%get_local_rows() > 0) then - call saxpbyv2(desc_a%get_local_cols(),ione,& - & alpha,x,lldx,beta,& - & y,lldy,z,lldz,info) + call psb_geaxpby(desc_a%get_local_cols(),& + & alpha,x,beta,y,z,info) end if call psb_erractionrestore(err_act) @@ -741,3 +739,86 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info) return end subroutine psb_saddconst_vect + + +subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + use psb_base_mod, psb_protect_name => psb_s_upd_xyz_vect + implicit none + type(psb_s_vect_type), intent (inout) :: x + type(psb_s_vect_type), intent (inout) :: y + type(psb_s_vect_type), intent (inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iiy, jjy, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, m + character(len=20) :: name, ch_err + + name='psb_s_addconst_vect' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(z%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%get_global_rows() + nr = desc_a%get_local_rows() + + ! check vector correctness + call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(desc_a%get_local_rows() > 0) then + call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_s_upd_xyz_vect + diff --git a/base/psblas/psb_sgetmatinfo.F90 b/base/psblas/psb_sgetmatinfo.F90 index abf1210c..5d5ae9c4 100644 --- a/base/psblas/psb_sgetmatinfo.F90 +++ b/base/psblas/psb_sgetmatinfo.F90 @@ -37,11 +37,11 @@ function psb_sget_nnz(a,desc_a,info) result(res) use psb_base_mod, psb_protect_name => psb_sget_nnz use psi_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -54,7 +54,6 @@ function psb_sget_nnz(a,desc_a,info) result(res) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& & err_act, iia, jja - integer(psb_lpk_) :: localnnz character(len=20) :: name, ch_err ! name='psb_sget_nnz' @@ -72,9 +71,9 @@ function psb_sget_nnz(a,desc_a,info) result(res) goto 9999 endif - localnnz = a%get_nzeros() + res = a%get_nzeros() - call psb_sum(ctxt,localnnz) + call psb_sum(ctxt,res) call psb_erractionrestore(err_act) return diff --git a/base/psblas/psb_sspmm.f90 b/base/psblas/psb_sspmm.f90 index 43ee0d48..7f680934 100644 --- a/base/psblas/psb_sspmm.f90 +++ b/base/psblas/psb_sspmm.f90 @@ -83,6 +83,9 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& character(len=20) :: name, ch_err logical :: aliw, doswap_ integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: do_timings=.false. + integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1 + integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1 name='psb_sspmv' info=psb_success_ @@ -130,6 +133,19 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name) goto 9999 end if + if ((do_timings).and.(mv_phase1==-1)) & + & mv_phase1 = psb_get_timer_idx("SPMM: and send ") + if ((do_timings).and.(mv_phase2==-1)) & + & mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad") + if ((do_timings).and.(mv_phase3==-1)) & + & mv_phase3 = psb_get_timer_idx("SPMM: and rcv") + if ((do_timings).and.(mv_phase4==-1)) & + & mv_phase4 = psb_get_timer_idx("SPMM: and cmp and") + if ((do_timings).and.(mv_phase11==-1)) & + & mv_phase11 = psb_get_timer_idx("SPMM: noand exch ") + if ((do_timings).and.(mv_phase12==-1)) & + & mv_phase12 = psb_get_timer_idx("SPMM: noand cmp") + m = desc_a%get_global_rows() n = desc_a%get_global_cols() @@ -178,14 +194,46 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + + if (allocated(a%ad)) then + block + logical, parameter :: do_timings=.false. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() + if (do_timings) call psb_barrier(ctxt) + if (do_timings) call psb_tic(mv_phase1) + if (doswap_) call psi_swapdata(psb_swap_send_,& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) call psb_tic(mv_phase3) + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) + call a%and%spmm(alpha,x%v,sone,y%v,info) + if (do_timings) call psb_toc(mv_phase4) + end block + + else + block + logical, parameter :: do_timings=.false. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + + if (do_timings) call psb_tic(mv_phase11) + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase12) + end block end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) @@ -311,9 +359,9 @@ subroutine psb_sspmm(alpha,a,x,beta,y,desc_a,info,& ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & - & liwork, iiy, jjy, i, ib, ib1, ip, idx, ik + integer(psb_mpk_) :: np, me, ib1, ik + integer(psb_ipk_) :: err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & + & liwork, iiy, jjy, i, ib, ip, idx integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja, lik integer(psb_ipk_), parameter :: nb=4 real(psb_spk_), pointer :: xp(:,:), yp(:,:), iwork(:) @@ -551,7 +599,7 @@ subroutine psb_sspmm(alpha,a,x,beta,y,desc_a,info,& if (doswap_)then ik = lik ! This should not be an issue, we are expecting the values - ! to be small, within IPK + ! to be small, within PSB_IPK call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),& & ik,sone,y(:,1:ik),desc_a,iwork,info) if (info == psb_success_) call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& @@ -659,9 +707,9 @@ subroutine psb_sspmv(alpha,a,x,beta,y,desc_a,info,& ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & - & liwork, iiy, jjy, ib, ip, idx, ik + integer(psb_mpk_) :: np, me, ik + integer(psb_ipk_) :: err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & + & liwork, iiy, jjy, ib, ip, idx integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja, lik, jx, jy integer(psb_ipk_), parameter :: nb=4 real(psb_spk_), pointer :: iwork(:), xp(:), yp(:) diff --git a/base/psblas/psb_sspsm.f90 b/base/psblas/psb_sspsm.f90 index 522d4bd9..c354569b 100644 --- a/base/psblas/psb_sspsm.f90 +++ b/base/psblas/psb_sspsm.f90 @@ -291,9 +291,9 @@ subroutine psb_sspsm(alpha,a,x,beta,y,desc_a,info,& ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iia, jja, lldx,lldy, choice_,& - & ik, i, lld, nrow, ncol, liwork, llwork, iiy, jjy, idx, ndm + integer(psb_mpk_) :: np, me, ik + integer(psb_ipk_) :: err_act, iix, jjx, iia, jja, lldx,lldy, choice_,& + & i, lld, nrow, ncol, liwork, llwork, iiy, jjy, idx, ndm integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja, lik character :: lscale @@ -447,7 +447,7 @@ subroutine psb_sspsm(alpha,a,x,beta,y,desc_a,info,& end if ik = lik ! This should not be a problem. - ! We expect ik to be small, well within IPK + ! We expect ik to be small, well within PSB_IPK ! Perform local triangular system solve xp => x(iix:lldx,jjx:jjx+ik-1) yp => y(iiy:lldy,jjy:jjy+ik-1) diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90 index 2258f38f..e47a871d 100644 --- a/base/psblas/psb_zaxpby.f90 +++ b/base/psblas/psb_zaxpby.f90 @@ -299,7 +299,7 @@ end subroutine psb_zaxpby_vect_out ! subroutine psb_zaxpby(alpha, x, beta,y,desc_a,info, n, jx, jy) use psb_base_mod, psb_protect_name => psb_zaxpby - + use psi_z_serial_mod implicit none integer(psb_ipk_), intent(in), optional :: n, jx, jy @@ -384,9 +384,9 @@ subroutine psb_zaxpby(alpha, x, beta,y,desc_a,info, n, jx, jy) if ((in /= 0)) then if(desc_a%get_local_rows() > 0) then - call zaxpby(desc_a%get_local_cols(),in,& - & alpha,x(iix:,jjx),lldx,beta,& - & y(iiy:,jjy),lldy,info) + call psi_zaxpby(desc_a%get_local_cols(),in,& + & alpha,x(iix:,jjx:),beta,& + & y(iiy:,jjy:),info) end if end if @@ -510,9 +510,8 @@ subroutine psb_zaxpbyv(alpha, x, beta,y,desc_a,info) end if if(desc_a%get_local_rows() > 0) then - call zaxpby(desc_a%get_local_cols(),ione,& - & alpha,x,lldx,beta,& - & y,lldy,info) + call psb_geaxpby(desc_a%get_local_cols(),& + & alpha,x,beta,y,info) end if call psb_erractionrestore(err_act) @@ -642,9 +641,8 @@ subroutine psb_zaxpbyvout(alpha, x, beta,y, z, desc_a,info) end if if(desc_a%get_local_rows() > 0) then - call zaxpbyv2(desc_a%get_local_cols(),ione,& - & alpha,x,lldx,beta,& - & y,lldy,z,lldz,info) + call psb_geaxpby(desc_a%get_local_cols(),& + & alpha,x,beta,y,z,info) end if call psb_erractionrestore(err_act) @@ -741,3 +739,86 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info) return end subroutine psb_zaddconst_vect + + +subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + use psb_base_mod, psb_protect_name => psb_z_upd_xyz_vect + implicit none + type(psb_z_vect_type), intent (inout) :: x + type(psb_z_vect_type), intent (inout) :: y + type(psb_z_vect_type), intent (inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iiy, jjy, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, m + character(len=20) :: name, ch_err + + name='psb_z_addconst_vect' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(z%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%get_global_rows() + nr = desc_a%get_local_rows() + + ! check vector correctness + call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(desc_a%get_local_rows() > 0) then + call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_z_upd_xyz_vect + diff --git a/base/psblas/psb_zgetmatinfo.F90 b/base/psblas/psb_zgetmatinfo.F90 index fab395f2..7cc3bfae 100644 --- a/base/psblas/psb_zgetmatinfo.F90 +++ b/base/psblas/psb_zgetmatinfo.F90 @@ -37,11 +37,11 @@ function psb_zget_nnz(a,desc_a,info) result(res) use psb_base_mod, psb_protect_name => psb_zget_nnz use psi_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -54,7 +54,6 @@ function psb_zget_nnz(a,desc_a,info) result(res) type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& & err_act, iia, jja - integer(psb_lpk_) :: localnnz character(len=20) :: name, ch_err ! name='psb_zget_nnz' @@ -72,9 +71,9 @@ function psb_zget_nnz(a,desc_a,info) result(res) goto 9999 endif - localnnz = a%get_nzeros() + res = a%get_nzeros() - call psb_sum(ctxt,localnnz) + call psb_sum(ctxt,res) call psb_erractionrestore(err_act) return diff --git a/base/psblas/psb_zspmm.f90 b/base/psblas/psb_zspmm.f90 index b58ca303..b7fc5cc6 100644 --- a/base/psblas/psb_zspmm.f90 +++ b/base/psblas/psb_zspmm.f90 @@ -83,6 +83,9 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& character(len=20) :: name, ch_err logical :: aliw, doswap_ integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: do_timings=.false. + integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1 + integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1 name='psb_zspmv' info=psb_success_ @@ -130,6 +133,19 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name) goto 9999 end if + if ((do_timings).and.(mv_phase1==-1)) & + & mv_phase1 = psb_get_timer_idx("SPMM: and send ") + if ((do_timings).and.(mv_phase2==-1)) & + & mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad") + if ((do_timings).and.(mv_phase3==-1)) & + & mv_phase3 = psb_get_timer_idx("SPMM: and rcv") + if ((do_timings).and.(mv_phase4==-1)) & + & mv_phase4 = psb_get_timer_idx("SPMM: and cmp and") + if ((do_timings).and.(mv_phase11==-1)) & + & mv_phase11 = psb_get_timer_idx("SPMM: noand exch ") + if ((do_timings).and.(mv_phase12==-1)) & + & mv_phase12 = psb_get_timer_idx("SPMM: noand cmp") + m = desc_a%get_global_rows() n = desc_a%get_global_cols() @@ -178,14 +194,46 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + + if (allocated(a%ad)) then + block + logical, parameter :: do_timings=.false. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() + if (do_timings) call psb_barrier(ctxt) + if (do_timings) call psb_tic(mv_phase1) + if (doswap_) call psi_swapdata(psb_swap_send_,& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) call psb_tic(mv_phase3) + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) + call a%and%spmm(alpha,x%v,zone,y%v,info) + if (do_timings) call psb_toc(mv_phase4) + end block + + else + block + logical, parameter :: do_timings=.false. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + + if (do_timings) call psb_tic(mv_phase11) + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase12) + end block end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) @@ -311,9 +359,9 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,& ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & - & liwork, iiy, jjy, i, ib, ib1, ip, idx, ik + integer(psb_mpk_) :: np, me, ib1, ik + integer(psb_ipk_) :: err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & + & liwork, iiy, jjy, i, ib, ip, idx integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja, lik integer(psb_ipk_), parameter :: nb=4 complex(psb_dpk_), pointer :: xp(:,:), yp(:,:), iwork(:) @@ -551,7 +599,7 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,& if (doswap_)then ik = lik ! This should not be an issue, we are expecting the values - ! to be small, within IPK + ! to be small, within PSB_IPK call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),& & ik,zone,y(:,1:ik),desc_a,iwork,info) if (info == psb_success_) call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& @@ -659,9 +707,9 @@ subroutine psb_zspmv(alpha,a,x,beta,y,desc_a,info,& ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & - & liwork, iiy, jjy, ib, ip, idx, ik + integer(psb_mpk_) :: np, me, ik + integer(psb_ipk_) :: err_act, iix, jjx, iia, jja, nrow, ncol, lldx, lldy, & + & liwork, iiy, jjy, ib, ip, idx integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja, lik, jx, jy integer(psb_ipk_), parameter :: nb=4 complex(psb_dpk_), pointer :: iwork(:), xp(:), yp(:) diff --git a/base/psblas/psb_zspsm.f90 b/base/psblas/psb_zspsm.f90 index 80fbfb56..3cb06b02 100644 --- a/base/psblas/psb_zspsm.f90 +++ b/base/psblas/psb_zspsm.f90 @@ -291,9 +291,9 @@ subroutine psb_zspsm(alpha,a,x,beta,y,desc_a,info,& ! locals type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iia, jja, lldx,lldy, choice_,& - & ik, i, lld, nrow, ncol, liwork, llwork, iiy, jjy, idx, ndm + integer(psb_mpk_) :: np, me, ik + integer(psb_ipk_) :: err_act, iix, jjx, iia, jja, lldx,lldy, choice_,& + & i, lld, nrow, ncol, liwork, llwork, iiy, jjy, idx, ndm integer(psb_lpk_) :: ix, ijx, iy, ijy, m, n, ia, ja, lik character :: lscale @@ -447,7 +447,7 @@ subroutine psb_zspsm(alpha,a,x,beta,y,desc_a,info,& end if ik = lik ! This should not be a problem. - ! We expect ik to be small, well within IPK + ! We expect ik to be small, well within PSB_IPK ! Perform local triangular system solve xp => x(iix:lldx,jjx:jjx+ik-1) yp => y(iiy:lldy,jjy:jjy+ik-1) diff --git a/base/serial/impl/psb_base_mat_impl.f90 b/base/serial/impl/psb_base_mat_impl.f90 index 4ab03086..39e21300 100644 --- a/base/serial/impl/psb_base_mat_impl.f90 +++ b/base/serial/impl/psb_base_mat_impl.f90 @@ -1,3 +1,34 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! function psb_base_get_nz_row(idx,a) result(res) use psb_error_mod use psb_base_mat_mod, psb_protect_name => psb_base_get_nz_row @@ -136,7 +167,6 @@ subroutine psb_base_csgetptn(imin,imax,a,nz,ia,ja,info,& ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_base_csgetptn @@ -223,7 +253,7 @@ subroutine psb_base_allocate_mnnz(m,n,a,nz) integer(psb_ipk_), intent(in) :: m,n class(psb_base_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: err_act, info character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. @@ -231,8 +261,8 @@ subroutine psb_base_allocate_mnnz(m,n,a,nz) ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,name,a_err=a%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=a%get_fmt()) call psb_error_handler(err_act) end subroutine psb_base_allocate_mnnz @@ -242,7 +272,7 @@ subroutine psb_base_reallocate_nz(nz,a) implicit none integer(psb_ipk_), intent(in) :: nz class(psb_base_sparse_mat), intent(inout) :: a - integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: err_act, info character(len=20) :: name='reallocate_nz' logical, parameter :: debug=.false. @@ -250,8 +280,8 @@ subroutine psb_base_reallocate_nz(nz,a) ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,name,a_err=a%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=a%get_fmt()) call psb_error_handler(err_act) end subroutine psb_base_reallocate_nz @@ -260,7 +290,7 @@ subroutine psb_base_free(a) use psb_error_mod implicit none class(psb_base_sparse_mat), intent(inout) :: a - integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: err_act, info character(len=20) :: name='free' logical, parameter :: debug=.false. @@ -268,8 +298,8 @@ subroutine psb_base_free(a) ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,name,a_err=a%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=a%get_fmt()) call psb_error_handler(err_act) end subroutine psb_base_free @@ -278,7 +308,7 @@ subroutine psb_base_trim(a) use psb_error_mod implicit none class(psb_base_sparse_mat), intent(inout) :: a - integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: err_act, info character(len=20) :: name='trim' logical, parameter :: debug=.false. @@ -353,7 +383,6 @@ function psb_lbase_get_size(a) result(res) ! it means the derived class is incomplete, ! so we throw an error. call psb_errpush(psb_err_missing_override_method_,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end function psb_lbase_get_size @@ -370,12 +399,11 @@ subroutine psb_lbase_reinit(a,clear) logical, parameter :: debug=.false. call psb_get_erraction(err_act) - info = psb_err_missing_override_method_ ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,name,a_err=a%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=a%get_fmt()) call psb_error_handler(err_act) end subroutine psb_lbase_reinit @@ -395,12 +423,11 @@ subroutine psb_lbase_sparse_print(iout,a,iv,head,ivr,ivc) logical, parameter :: debug=.false. call psb_get_erraction(err_act) - info = psb_err_missing_override_method_ ! This is the base version. If we get here ! it means the derived class is incomplete, ! so we throw an error. - call psb_errpush(psb_err_missing_override_method_,name,a_err=a%get_fmt()) - + info = psb_err_missing_override_method_ + call psb_errpush(info,name,a_err=a%get_fmt()) call psb_error_handler(err_act) end subroutine psb_lbase_sparse_print @@ -431,7 +458,6 @@ subroutine psb_lbase_csgetptn(imin,imax,a,nz,ia,ja,info,& ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lbase_csgetptn @@ -528,7 +554,6 @@ subroutine psb_lbase_allocate_mnnz(m,n,a,nz) ! it means the derived class is incomplete, ! so we throw an error. call psb_errpush(psb_err_missing_override_method_,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lbase_allocate_mnnz @@ -547,7 +572,6 @@ subroutine psb_lbase_reallocate_nz(nz,a) ! it means the derived class is incomplete, ! so we throw an error. call psb_errpush(psb_err_missing_override_method_,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lbase_reallocate_nz @@ -565,7 +589,6 @@ subroutine psb_lbase_free(a) ! it means the derived class is incomplete, ! so we throw an error. call psb_errpush(psb_err_missing_override_method_,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lbase_free diff --git a/base/serial/impl/psb_c_base_mat_impl.F90 b/base/serial/impl/psb_c_base_mat_impl.F90 index 17f2cdc8..25cd290a 100644 --- a/base/serial/impl/psb_c_base_mat_impl.F90 +++ b/base/serial/impl/psb_c_base_mat_impl.F90 @@ -60,7 +60,6 @@ subroutine psb_c_base_cp_to_coo(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_c_base_cp_to_coo @@ -84,7 +83,6 @@ subroutine psb_c_base_cp_from_coo(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_c_base_cp_from_coo @@ -344,7 +342,6 @@ subroutine psb_c_base_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_c_base_csput_a @@ -420,7 +417,6 @@ subroutine psb_c_base_csgetrow(imin,imax,a,nz,ia,ja,val,info,& ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_c_base_csgetrow @@ -993,7 +989,6 @@ subroutine psb_c_base_mold(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_c_base_mold @@ -1168,7 +1163,6 @@ subroutine psb_c_base_csmm(alpha,a,x,beta,y,info,trans) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_c_base_csmm @@ -1194,7 +1188,6 @@ subroutine psb_c_base_csmv(alpha,a,x,beta,y,info,trans) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) @@ -1221,7 +1214,6 @@ subroutine psb_c_base_inner_cssm(alpha,a,x,beta,y,info,trans) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_c_base_inner_cssm @@ -1247,7 +1239,6 @@ subroutine psb_c_base_inner_cssv(alpha,a,x,beta,y,info,trans) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_c_base_inner_cssv @@ -1549,7 +1540,6 @@ subroutine psb_c_base_scals(d,a,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_c_base_scals @@ -1618,7 +1608,6 @@ subroutine psb_c_base_scal(d,a,info,side) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_c_base_scal @@ -1643,7 +1632,6 @@ function psb_c_base_maxval(a) result(res) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end function psb_c_base_maxval @@ -1742,7 +1730,6 @@ subroutine psb_c_base_rowsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_c_base_rowsum @@ -1764,7 +1751,6 @@ subroutine psb_c_base_arwsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_c_base_arwsum @@ -1786,7 +1772,6 @@ subroutine psb_c_base_colsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_c_base_colsum @@ -1808,7 +1793,6 @@ subroutine psb_c_base_aclsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_c_base_aclsum @@ -1833,7 +1817,6 @@ subroutine psb_c_base_get_diag(a,d,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_c_base_get_diag @@ -2006,8 +1989,8 @@ subroutine psb_c_base_vect_mv(alpha,a,x,beta,y,info,trans) ! For the time being we just throw everything back ! onto the normal routines. - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%spmm(alpha,x%v,beta,y%v,info,trans) call y%set_host() end subroutine psb_c_base_vect_mv @@ -2060,8 +2043,8 @@ subroutine psb_c_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2082,6 +2065,7 @@ subroutine psb_c_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) if (info == psb_success_)& & call a%inner_spsm(alpha,tmpv,beta,y,info,trans) + call y%set_host() if (info == psb_success_) then call tmpv%free(info) if (info == psb_success_) deallocate(tmpv,stat=info) @@ -2161,8 +2145,11 @@ subroutine psb_c_base_inner_vect_sv(alpha,a,x,beta,y,info,trans) info = psb_success_ call psb_erractionsave(err_act) + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() if (info /= psb_success_) then info = psb_err_from_subroutine_ @@ -2543,7 +2530,6 @@ subroutine psb_lc_base_cp_to_coo(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lc_base_cp_to_coo @@ -2567,7 +2553,6 @@ subroutine psb_lc_base_cp_from_coo(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lc_base_cp_from_coo @@ -2827,7 +2812,6 @@ subroutine psb_lc_base_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lc_base_csput_a @@ -2904,7 +2888,6 @@ subroutine psb_lc_base_csgetrow(imin,imax,a,nz,ia,ja,val,info,& ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lc_base_csgetrow @@ -3486,7 +3469,6 @@ subroutine psb_lc_base_mold(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lc_base_mold @@ -3644,7 +3626,6 @@ subroutine psb_lc_base_scals(d,a,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lc_base_scals @@ -3713,7 +3694,6 @@ subroutine psb_lc_base_scal(d,a,info,side) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lc_base_scal @@ -3738,7 +3718,6 @@ function psb_lc_base_maxval(a) result(res) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end function psb_lc_base_maxval @@ -3834,7 +3813,6 @@ subroutine psb_lc_base_rowsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lc_base_rowsum @@ -3856,7 +3834,6 @@ subroutine psb_lc_base_arwsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lc_base_arwsum @@ -3878,7 +3855,6 @@ subroutine psb_lc_base_colsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lc_base_colsum @@ -3900,7 +3876,6 @@ subroutine psb_lc_base_aclsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lc_base_aclsum @@ -4064,7 +4039,6 @@ subroutine psb_lc_base_get_diag(a,d,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lc_base_get_diag diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index d6117546..025b6aa3 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -257,7 +257,7 @@ subroutine psb_c_coo_spaxpby(alpha,a,beta,b,info) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum -#if defined (OPENMP) +#if defined (PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -368,7 +368,7 @@ function psb_c_coo_cmpmat(a,b,tol,info) result(res) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum -#if defined (OPENMP) +#if defined (PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -595,12 +595,13 @@ subroutine psb_c_coo_clean_zeros(a, info) integer(psb_ipk_), intent(out) :: info ! integer(psb_ipk_) :: i,j,k, nzin - + info = 0 nzin = a%get_nzeros() j = 0 do i=1, nzin - if (a%val(i) /= czero) then + ! Always keep the diagonal, even if numerically zero + if ((a%val(i) /= czero).or.(a%ia(i) == a%ja(i))) then j = j + 1 a%val(j) = a%val(i) a%ia(j) = a%ia(i) @@ -608,6 +609,7 @@ subroutine psb_c_coo_clean_zeros(a, info) end if end do call a%set_nzeros(j) + call a%fix(info) call a%trim() end subroutine psb_c_coo_clean_zeros @@ -1928,7 +1930,7 @@ function psb_c_coo_maxval(a) result(res) nnz = a%get_nzeros() if (allocated(a%val)) then nnz = min(nnz,size(a%val)) -#if defined(OPENMP) +#if defined(PSB_OPENMP) res = szero !$omp parallel do private(i) reduction(max: res) do i=1, nnz @@ -2818,7 +2820,7 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_realloc_mod use psb_sort_mod use psb_c_base_mat_mod, psb_protect_name => psb_c_coo_csput_a -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -2867,29 +2869,42 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (a%is_bld()) then ! Structure here is peculiar, because this function can be called ! either within a parallel region, or outside. - ! Hence the call to set_nzeros done here. - !$omp critical + ! Hence the call to set_nzeros done here. +#if defined(PSB_OPENMP) + !$omp critical(c_coo_csput_a) +#endif nza = a%get_nzeros() nzaold = nza isza = a%get_size() + if (info /= 0) write(0,*) name,' point 0:',info,isza,nza,nz ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then + !write(0,*) ' before reallocate in csput ',psb_errstatus_fatal(),info call a%reallocate(max(nza+nz,int(1.5*isza))) + !write(0,*) ' after reallocate in csput ',psb_errstatus_fatal(),info endif isza = a%get_size() if (isza < (nza+nz)) then - info = psb_err_alloc_dealloc_; call psb_errpush(info,name) + info = psb_err_alloc_dealloc_; + write(0,*) name,' point 1:',info,isza,nza,nz,nza+nz + call psb_errpush(info,name) else -#if defined(OPENMP) +#if defined(PSB_OPENMP) nza = nza + nz #endif call a%set_nzeros(nza) end if - !$omp end critical - if (info /= 0) goto 9999 +#if defined(PSB_OPENMP) + if (info /= 0) write(0,*) name,' point 1.5:',info + !$omp end critical(c_coo_csput_a) +#endif + if (info /= 0) then + write(0,*) name,' point 2:',info + goto 9999 + end if call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) -#if !defined(OPENMP) +#if !defined(PSB_OPENMP) nza = nzaold call a%set_nzeros(nza) #endif @@ -2944,14 +2959,16 @@ contains integer(psb_ipk_) :: i,ir,ic info = psb_success_ -#if defined(OPENMP) +#if defined(PSB_OPENMP) + ! Disabling OpenMP parallel do for the time being. + ! Will need to redesign the entire code stack ! The logic here is different from the one used for ! the serial version: each element is stored in data ! structures but the invalid ones are stored as '-1' values. ! These values will be filtered in a future fixing process. - !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & - !$OMP private(ir,ic,i) + ! $ O M P PARALLEL DO schedule(STATIC) & + ! $ O M P shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & + ! $ O M P private(ir,ic,i) do i=1,nz ir = ia(i) ic = ja(i) @@ -2965,7 +2982,7 @@ contains aspk(nza+i) = -1 end if end do - !$OMP END PARALLEL DO + ! $ O M P END PARALLEL DO nza = nza + nz #else do i=1, nz @@ -3129,7 +3146,7 @@ subroutine psb_c_cp_coo_to_coo(a,b,info) call b%set_nzeros(nz) call b%reallocate(nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -3182,7 +3199,7 @@ subroutine psb_c_cp_coo_from_coo(a,b,info) call a%set_nzeros(nz) call a%reallocate(nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -3568,7 +3585,7 @@ subroutine psb_c_coo_tril(a,l,info,& nb = jmax_ endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_), allocatable :: lrws(:),urws(:) integer(psb_ipk_) :: lpnt, upnt, lnz, unz @@ -3864,7 +3881,7 @@ subroutine psb_c_coo_triu(a,u,info,& nb = jmax_ endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_), allocatable :: lrws(:),urws(:) integer(psb_ipk_) :: lpnt, upnt, lnz, unz @@ -4154,7 +4171,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) use psb_string_mod use psb_ip_reord_mod use psb_sort_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -4172,7 +4189,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' logical :: srt_inp, use_buffers -#if defined(OPENMP) +#if defined(PSB_OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads #endif @@ -4200,7 +4217,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) dupl_ = dupl -#if defined(OPENMP) +#if defined(PSB_OPENMP) maxthreads = omp_get_max_threads() ! 'iaux' has to allow the threads to have an exclusive group ! of indices as work space. Since each thread handles one @@ -4214,7 +4231,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(nzin+2),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) @@ -4256,7 +4273,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf use psb_string_mod use psb_ip_reord_mod use psb_sort_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -4274,7 +4291,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf character(len=20) :: name = 'psb_fixcoo' logical :: srt_inp, use_buffers real(psb_dpk_) :: t0, t1 -#if defined(OPENMP) +#if defined(PSB_OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_), allocatable :: kaux(:),idxaux(:) @@ -4289,7 +4306,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! Row major order if (nr <= nzin) then ! Avoid strange situations with large indices -#if defined(OPENMP) +#if defined(PSB_OPENMP) ! We are not going to need 'ix2' because of the presence ! of 'idxaux' as auxiliary buffer. allocate(ias(nzin),jas(nzin),vs(nzin), stat=info) @@ -4302,7 +4319,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if !if (use_buffers) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp workshare iaux(:) = 0 !$omp end workshare @@ -4356,7 +4373,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! all the indices are valid ! Check again use_buffers. if (use_buffers) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) maxthreads = omp_get_max_threads() allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info) if (info /= psb_success_) then @@ -4730,7 +4747,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf call psi_msort_up(nzin,ia(1:),iaux(1:),iret) if (iret == 0) & & call psb_ip_reord(nzin,val,ia,ja,iaux) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP PARALLEL default(none) & !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & @@ -4920,7 +4937,7 @@ subroutine psb_c_cp_coo_to_lcoo(a,b,info) call b%set_nzeros(nz) call b%reallocate(nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -4972,7 +4989,7 @@ subroutine psb_c_cp_coo_from_lcoo(a,b,info) call a%set_nzeros(nz) call a%reallocate(nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -5185,7 +5202,7 @@ function psb_lc_coo_maxval(a) result(res) nnz = a%get_nzeros() if (allocated(a%val)) then nnz = min(nnz,size(a%val)) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) reduction(max:res) @@ -5252,7 +5269,7 @@ function psb_lc_coo_csnmi(a) result(res) i = a%ia(j) vt(i) = vt(i) + abs(a%val(j)) end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) reduction(max:res) @@ -5302,7 +5319,7 @@ function psb_lc_coo_csnm1(a) result(res) i = a%ja(j) vt(i) = vt(i) + abs(a%val(j)) end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) reduction(max:res) @@ -5585,7 +5602,7 @@ subroutine psb_lc_coo_spaxpby(alpha,a,beta,b,info) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -5696,7 +5713,7 @@ function psb_lc_coo_cmpmat(a,b,tol,info) result(res) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -5926,12 +5943,13 @@ subroutine psb_lc_coo_clean_zeros(a, info) integer(psb_ipk_), intent(out) :: info ! integer(psb_lpk_) :: i,j,k, nzin - + info = 0 nzin = a%get_nzeros() j = 0 do i=1, nzin - if (a%val(i) /= czero) then + ! Always keep the diagonal, even if numerically zero + if ((a%val(i) /= czero).or.(a%ia(i) == a%ja(i))) then j = j + 1 a%val(j) = a%val(i) a%ia(j) = a%ia(i) @@ -5956,7 +5974,7 @@ subroutine psb_lc_coo_clean_negidx(a,info) end subroutine psb_lc_coo_clean_negidx -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_lc_coo_clean_negidx_inner(nzin,ia,ja,val,nzout,info) use psb_error_mod use psb_c_base_mat_mod, psb_protect_name => psb_lc_coo_clean_negidx_inner diff --git a/base/serial/impl/psb_c_csc_impl.F90 b/base/serial/impl/psb_c_csc_impl.F90 index 54332d06..94744dcf 100644 --- a/base/serial/impl/psb_c_csc_impl.F90 +++ b/base/serial/impl/psb_c_csc_impl.F90 @@ -2163,7 +2163,7 @@ subroutine psb_c_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -2189,7 +2189,7 @@ subroutine psb_c_mv_csc_from_coo(a,b,info) use psb_error_mod use psb_c_base_mat_mod use psb_c_csc_mat_mod, psb_protect_name => psb_c_mv_csc_from_coo -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -2226,7 +2226,7 @@ subroutine psb_c_mv_csc_from_coo(a,b,info) call psb_realloc(nc+1,a%icp,info) call b%free() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP PARALLEL default(shared) @@ -2328,7 +2328,7 @@ subroutine psb_c_cp_csc_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat nc = a%get_ncols() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) @@ -2403,35 +2403,36 @@ subroutine psb_c_mv_csc_from_fmt(a,b,info) end subroutine psb_c_mv_csc_from_fmt -subroutine psb_c_csc_clean_zeros(a, info) - use psb_error_mod - use psb_c_csc_mat_mod, psb_protect_name => psb_c_csc_clean_zeros - implicit none - class(psb_c_csc_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - ! - integer(psb_ipk_) :: i, j, k, nc - integer(psb_ipk_), allocatable :: ilcp(:) - - info = 0 - call a%sync() - nc = a%get_ncols() - ilcp = a%icp - a%icp(1) = 1 - j = a%icp(1) - do i=1, nc - do k = ilcp(i), ilcp(i+1) -1 - if (a%val(k) /= czero) then - a%val(j) = a%val(k) - a%ia(j) = a%ia(k) - j = j + 1 - end if - end do - a%icp(i+1) = j - end do - call a%trim() - call a%set_host() -end subroutine psb_c_csc_clean_zeros +!!$subroutine psb_c_csc_clean_zeros(a, info) +!!$ use psb_error_mod +!!$ use psb_c_csc_mat_mod, psb_protect_name => psb_c_csc_clean_zeros +!!$ implicit none +!!$ class(psb_c_csc_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ ! +!!$ integer(psb_ipk_) :: i, j, k, nc +!!$ integer(psb_ipk_), allocatable :: ilcp(:) +!!$ +!!$ info = 0 +!!$ call a%sync() +!!$ nc = a%get_ncols() +!!$ ilcp = a%icp +!!$ a%icp(1) = 1 +!!$ j = a%icp(1) +!!$ do i=1, nc +!!$ do k = ilcp(i), ilcp(i+1) -1 +!!$ ! Always keep the diagonal, even if numerically zero +!!$ if ((a%val(k) /= czero).or.(i == a%ia(k))) then +!!$ a%val(j) = a%val(k) +!!$ a%ia(j) = a%ia(k) +!!$ j = j + 1 +!!$ end if +!!$ end do +!!$ a%icp(i+1) = j +!!$ end do +!!$ call a%trim() +!!$ call a%set_host() +!!$end subroutine psb_c_csc_clean_zeros subroutine psb_c_cp_csc_from_fmt(a,b,info) use psb_const_mod @@ -2461,7 +2462,7 @@ subroutine psb_c_cp_csc_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat nc = b%get_ncols() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) @@ -4058,7 +4059,7 @@ subroutine psb_lc_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -4304,35 +4305,36 @@ subroutine psb_lc_cp_csc_from_fmt(a,b,info) end subroutine psb_lc_cp_csc_from_fmt -subroutine psb_lc_csc_clean_zeros(a, info) - use psb_error_mod - use psb_c_csc_mat_mod, psb_protect_name => psb_lc_csc_clean_zeros - implicit none - class(psb_lc_csc_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - ! - integer(psb_lpk_) :: i, j, k, nc - integer(psb_lpk_), allocatable :: ilcp(:) - - info = 0 - call a%sync() - nc = a%get_ncols() - ilcp = a%icp - a%icp(1) = 1 - j = a%icp(1) - do i=1, nc - do k = ilcp(i), ilcp(i+1) -1 - if (a%val(k) /= czero) then - a%val(j) = a%val(k) - a%ia(j) = a%ia(k) - j = j + 1 - end if - end do - a%icp(i+1) = j - end do - call a%trim() - call a%set_host() -end subroutine psb_lc_csc_clean_zeros +!!$subroutine psb_lc_csc_clean_zeros(a, info) +!!$ use psb_error_mod +!!$ use psb_c_csc_mat_mod, psb_protect_name => psb_lc_csc_clean_zeros +!!$ implicit none +!!$ class(psb_lc_csc_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ ! +!!$ integer(psb_lpk_) :: i, j, k, nc +!!$ integer(psb_lpk_), allocatable :: ilcp(:) +!!$ +!!$ info = 0 +!!$ call a%sync() +!!$ nc = a%get_ncols() +!!$ ilcp = a%icp +!!$ a%icp(1) = 1 +!!$ j = a%icp(1) +!!$ do i=1, nc +!!$ do k = ilcp(i), ilcp(i+1) -1 +!!$ ! Always keep the diagonal, even if numerically zero +!!$ if ((a%val(k) /= czero).or.(i == a%ia(k))) then +!!$ a%val(j) = a%val(k) +!!$ a%ia(j) = a%ia(k) +!!$ j = j + 1 +!!$ end if +!!$ end do +!!$ a%icp(i+1) = j +!!$ end do +!!$ call a%trim() +!!$ call a%set_host() +!!$end subroutine psb_lc_csc_clean_zeros subroutine psb_lc_csc_mold(a,b,info) diff --git a/base/serial/impl/psb_c_csr_impl.F90 b/base/serial/impl/psb_c_csr_impl.F90 index 2028300d..904bc6e7 100644 --- a/base/serial/impl/psb_c_csr_impl.F90 +++ b/base/serial/impl/psb_c_csr_impl.F90 @@ -152,7 +152,7 @@ contains !$omp parallel do private(i,j, acc) schedule(static) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -164,7 +164,7 @@ contains !$omp parallel do private(i,j, acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -176,7 +176,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -192,7 +192,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -204,7 +204,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -216,7 +216,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -231,7 +231,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -243,7 +243,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -255,7 +255,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -270,7 +270,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -282,7 +282,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -294,7 +294,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -2289,7 +2289,7 @@ subroutine psb_c_csr_tril(a,l,info,& nb = jmax_ endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_), allocatable :: lrws(:),urws(:) integer(psb_ipk_) :: lpnt, upnt, lnz, unz @@ -2591,7 +2591,7 @@ subroutine psb_c_csr_triu(a,u,info,& endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_), allocatable :: lrws(:),urws(:) integer(psb_ipk_) :: lpnt, upnt, lnz, unz @@ -3156,7 +3156,7 @@ subroutine psb_c_cp_csr_from_coo(a,b,info) use psb_realloc_mod use psb_c_base_mat_mod use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_csr_from_coo -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -3217,7 +3217,7 @@ subroutine psb_c_cp_csr_from_coo(a,b,info) endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP PARALLEL default(shared) reduction(max:info) @@ -3318,7 +3318,7 @@ subroutine psb_c_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -3346,7 +3346,7 @@ subroutine psb_c_mv_csr_from_coo(a,b,info) use psb_error_mod use psb_c_base_mat_mod use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_csr_from_coo -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -3385,7 +3385,7 @@ subroutine psb_c_mv_csr_from_coo(a,b,info) call psb_realloc(max(nr+1,nc+1),a%irp,info) call b%free() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP PARALLEL default(shared) reduction(max:info) @@ -3489,7 +3489,7 @@ subroutine psb_c_cp_csr_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat nr = a%get_nrows() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) @@ -3594,7 +3594,7 @@ subroutine psb_c_cp_csr_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat nr = b%get_nrows() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) @@ -3624,37 +3624,38 @@ subroutine psb_c_cp_csr_from_fmt(a,b,info) end select end subroutine psb_c_cp_csr_from_fmt -subroutine psb_c_csr_clean_zeros(a, info) - use psb_error_mod - use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_clean_zeros - implicit none - class(psb_c_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - ! - integer(psb_ipk_) :: i, j, k, nr - integer(psb_ipk_), allocatable :: ilrp(:) - - info = 0 - call a%sync() - nr = a%get_nrows() - ilrp = a%irp - a%irp(1) = 1 - j = a%irp(1) - do i=1, nr - do k = ilrp(i), ilrp(i+1) -1 - if (a%val(k) /= czero) then - a%val(j) = a%val(k) - a%ja(j) = a%ja(k) - j = j + 1 - end if - end do - a%irp(i+1) = j - end do - call a%trim() - call a%set_host() -end subroutine psb_c_csr_clean_zeros - -#if defined(OPENMP) +!!$subroutine psb_c_csr_clean_zeros(a, info) +!!$ use psb_error_mod +!!$ use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_clean_zeros +!!$ implicit none +!!$ class(psb_c_csr_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ ! +!!$ integer(psb_ipk_) :: i, j, k, nr +!!$ integer(psb_ipk_), allocatable :: ilrp(:) +!!$ +!!$ info = 0 +!!$ call a%sync() +!!$ nr = a%get_nrows() +!!$ ilrp = a%irp +!!$ a%irp(1) = 1 +!!$ j = a%irp(1) +!!$ do i=1, nr +!!$ do k = ilrp(i), ilrp(i+1) -1 +!!$ ! Always keep the diagonal, even if numerically zero +!!$ if ((a%val(k) /= czero).or.(i == a%ja(k))) then +!!$ a%val(j) = a%val(k) +!!$ a%ja(j) = a%ja(k) +!!$ j = j + 1 +!!$ end if +!!$ end do +!!$ a%irp(i+1) = j +!!$ end do +!!$ call a%trim() +!!$ call a%set_host() +!!$end subroutine psb_c_csr_clean_zeros + +#if defined(PSB_OPENMP) subroutine psb_ccsrspspmm(a,b,c,info) use psb_c_mat_mod use psb_serial_mod, psb_protect_name => psb_ccsrspspmm @@ -3692,7 +3693,7 @@ subroutine psb_ccsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = max(nint(0.5*(nza+nzb)),ma,mb,na,nb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) @@ -3772,8 +3773,8 @@ contains if (nrc > 0 ) then if ((nzc+nrc)>nze) then nze = max(ma*((nzc+j-1)/j),nzc+2*nrc) - call psb_realloc(nze,c%val,info) - if (info == 0) call psb_realloc(nze,c%ja,info) + call psb_ensure_size(nze,c%val,info) + if (info == 0) call psb_ensure_size(nze,c%ja,info) if (info /= 0) return end if @@ -3805,6 +3806,7 @@ contains integer(psb_ipk_) :: ma, nb integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx + integer(psb_ipk_) :: nth, lth,ith ma = a%get_nrows() nb = b%get_ncols() @@ -3815,12 +3817,23 @@ contains ! dense accumulator ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf call psb_realloc(nb, acc, info) + !$omp parallel shared(nth,lth,offsets,info) + !$omp single + nth = omp_get_num_threads() + lth = min(nth, ma) + allocate(offsets(omp_get_max_threads()),stat=info) + !$omp end single + !$omp end parallel + if (info /= 0) then + write(0,*)'Offsets allocation failed ',info + return + end if - allocate(offsets(omp_get_max_threads())) !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & - !$omp shared(a,b,c,offsets) + !$omp num_threads(lth) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma if (start_idx == 0) then @@ -3876,15 +3889,14 @@ contains !$omp end single !$omp barrier - - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -3892,9 +3904,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson @@ -3930,6 +3943,7 @@ contains !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma do jj = a%irp(irw), a%irp(irw + 1) - 1 @@ -3996,14 +4010,14 @@ contains !$omp barrier - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -4011,9 +4025,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson_1d @@ -4223,7 +4238,7 @@ subroutine psb_ccsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = max(nint(0.25*(nza+nzb)),ma,nb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) @@ -4261,9 +4276,9 @@ contains nze = min(size(c%val),size(c%ja)) isz = max(ma,na,mb,nb) - call psb_realloc(isz,row,info) - if (info == 0) call psb_realloc(isz,idxs,info) - if (info == 0) call psb_realloc(isz,irow,info) + call psb_realloc(nb,row,info) + if (info == 0) call psb_realloc(na,idxs,info) + if (info == 0) call psb_realloc(nb,irow,info) if (info /= 0) return row = dzero irow = 0 @@ -4288,8 +4303,8 @@ contains if (nrc > 0 ) then if ((nzc+nrc)>nze) then nze = max(ma*((nzc+j-1)/j),nzc+2*nrc) - call psb_realloc(nze,c%val,info) - if (info == 0) call psb_realloc(nze,c%ja,info) + call psb_ensure_size(nze,c%val,info) + if (info == 0) call psb_ensure_size(nze,c%ja,info) if (info /= 0) return end if @@ -4312,6 +4327,266 @@ contains end subroutine psb_ccsrspspmm #endif +subroutine psb_c_ecsr_mold(a,b,info) + use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_mold + use psb_error_mod + implicit none + class(psb_c_ecsr_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='ecsr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_ecsr_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ecsr_mold + +subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_csmv + implicit none + class(psb_c_ecsr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: m, n + logical :: tra, ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='c_csr_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_ecsr_cmp_nerwp + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnerws, i, nr, nzr + info = psb_success_ + nr = a%get_nrows() + call psb_realloc(nr,a%nerwp,info) + nnerws = 0 + do i=1, nr + nzr = a%irp(i+1)-a%irp(i) + if (nzr>0) then + nnerws = nnerws + 1 + a%nerwp(nnerws) = i + end if + end do + call psb_realloc(nnerws,a%nerwp,info) + a%nnerws = nnerws +end subroutine psb_c_ecsr_cmp_nerwp + +subroutine psb_c_cp_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_c_base_mat_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_ecsr_from_coo + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_c_cp_ecsr_from_coo + +subroutine psb_c_mv_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + use psb_c_base_mat_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_ecsr_from_coo + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_c_csr_sparse_mat%mv_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_c_mv_ecsr_from_coo + +subroutine psb_c_mv_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_c_base_mat_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_ecsr_from_fmt + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_c_mv_ecsr_from_fmt + +subroutine psb_c_cp_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_c_base_mat_mod + use psb_realloc_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_ecsr_from_fmt + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_c_cp_ecsr_from_fmt + ! ! ! lc version @@ -6021,7 +6296,7 @@ subroutine psb_lc_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -6273,35 +6548,36 @@ subroutine psb_lc_cp_csr_from_fmt(a,b,info) end subroutine psb_lc_cp_csr_from_fmt -subroutine psb_lc_csr_clean_zeros(a, info) - use psb_error_mod - use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_clean_zeros - implicit none - class(psb_lc_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - ! - integer(psb_lpk_) :: i, j, k, nr - integer(psb_lpk_), allocatable :: ilrp(:) - - info = 0 - call a%sync() - nr = a%get_nrows() - ilrp = a%irp - a%irp(1) = 1 - j = a%irp(1) - do i=1, nr - do k = ilrp(i), ilrp(i+1) -1 - if (a%val(k) /= czero) then - a%val(j) = a%val(k) - a%ja(j) = a%ja(k) - j = j + 1 - end if - end do - a%irp(i+1) = j - end do - call a%trim() - call a%set_host() -end subroutine psb_lc_csr_clean_zeros +!!$subroutine psb_lc_csr_clean_zeros(a, info) +!!$ use psb_error_mod +!!$ use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_clean_zeros +!!$ implicit none +!!$ class(psb_lc_csr_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ ! +!!$ integer(psb_lpk_) :: i, j, k, nr +!!$ integer(psb_lpk_), allocatable :: ilrp(:) +!!$ +!!$ info = 0 +!!$ call a%sync() +!!$ nr = a%get_nrows() +!!$ ilrp = a%irp +!!$ a%irp(1) = 1 +!!$ j = a%irp(1) +!!$ do i=1, nr +!!$ do k = ilrp(i), ilrp(i+1) -1 +!!$ ! Always keep the diagonal, even if numerically zero +!!$ if ((a%val(k) /= czero).or.(i == a%ja(k))) then +!!$ a%val(j) = a%val(k) +!!$ a%ja(j) = a%ja(k) +!!$ j = j + 1 +!!$ end if +!!$ end do +!!$ a%irp(i+1) = j +!!$ end do +!!$ call a%trim() +!!$ call a%set_host() +!!$end subroutine psb_lc_csr_clean_zeros subroutine psb_lccsrspspmm(a,b,c,info) use psb_c_mat_mod @@ -6337,7 +6613,7 @@ subroutine psb_lccsrspspmm(a,b,c,info) nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = max(nint(0.25*(nza+nzb)),ma,nb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) @@ -6375,9 +6651,9 @@ contains nze = min(size(c%val),size(c%ja)) isz = max(ma,na,mb,nb) - call psb_realloc(isz,row,info) - if (info == 0) call psb_realloc(isz,idxs,info) - if (info == 0) call psb_realloc(isz,irow,info) + call psb_realloc(nb,row,info) + if (info == 0) call psb_realloc(na,idxs,info) + if (info == 0) call psb_realloc(nb,irow,info) if (info /= 0) return row = dzero irow = 0 diff --git a/base/serial/impl/psb_c_mat_impl.F90 b/base/serial/impl/psb_c_mat_impl.F90 index df5c4cd9..81479084 100644 --- a/base/serial/impl/psb_c_mat_impl.F90 +++ b/base/serial/impl/psb_c_mat_impl.F90 @@ -1213,6 +1213,106 @@ subroutine psb_c_b_csclip(a,b,info,& end subroutine psb_c_b_csclip +subroutine psb_c_split_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_c_mat_mod, psb_protect_name => psb_c_split_nd + implicit none + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold + type(psb_c_coo_sparse_mat) :: acoo + type(psb_c_csr_sparse_mat), allocatable :: aclip + type(psb_c_ecsr_sparse_mat), allocatable :: andclip + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + allocate(aclip) + call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_split_nd + +subroutine psb_c_merge_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_c_mat_mod, psb_protect_name => psb_c_merge_nd + implicit none + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold + type(psb_c_coo_sparse_mat) :: acoo1,acoo2 + integer(psb_ipk_) :: nz + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + + call a%ad%mv_to_coo(acoo1,info) + call acoo1%set_bld() + call acoo1%set_nrows(n_rows) + call acoo1%set_ncols(n_cols) + call a%and%mv_to_coo(acoo2,info) + nz=acoo2%get_nzeros() + call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info) + if (allocated(a%a)) then + call a%a%free() + deallocate(a%a) + end if + allocate(a%a,mold=a%ad) + call a%a%mv_from_coo(acoo1,info) + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_merge_nd + subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod @@ -1246,54 +1346,65 @@ subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_c_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_c_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_c_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_c_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_c_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if + end if - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if - if (present(dupl)) then - call altmp%set_dupl(dupl) - else if (a%is_bld()) then - ! Does this make sense at all?? Who knows.. - call altmp%set_dupl(psb_dupl_def_) - end if + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - if (debug) write(psb_err_unit,*) 'Converting from ',& - & a%get_fmt(),' to ',altmp%get_fmt() + call altmp%cp_from_fmt(a%a, info) - call altmp%cp_from_fmt(a%a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 + call move_alloc(altmp,b%a) + else + call inner_cp_fmt(a%a,b%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_cp_fmt(a%and,b%and,info,type,mold,dupl) + end if end if - call move_alloc(altmp,b%a) call b%trim() call b%set_asb() call psb_erractionrestore(err_act) @@ -1303,7 +1414,79 @@ subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_cp_fmt(a,b,info,type,mold,dupl) + class(psb_c_base_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_c_base_sparse_mat), intent(in), optional :: mold + + class(psb_c_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_c_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_c_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b) + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_cp_fmt end subroutine psb_c_cscnv subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) @@ -1312,13 +1495,12 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) use psb_c_mat_mod, psb_protect_name => psb_c_cscnv_ip implicit none - class(psb_cspmat_type), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_),optional, intent(in) :: dupl - character(len=*), optional, intent(in) :: type + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type class(psb_c_base_sparse_mat), intent(in), optional :: mold - class(psb_c_base_sparse_mat), allocatable :: altmp integer(psb_ipk_) :: err_act character(len=20) :: name='cscnv_ip' @@ -1345,46 +1527,55 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if - if (present(mold)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_c_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_c_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - allocate(altmp, mold=mold,stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - else if (present(type)) then + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_c_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_c_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_c_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select + call altmp%mv_from_fmt(a%a, info) + call move_alloc(altmp,a%a) else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 + call inner_mv_fmt(a%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_mv_fmt(a%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_mv_fmt(a%and,info,type,mold,dupl) + end if end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%mv_from_fmt(a%a, info) - if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - call move_alloc(altmp,a%a) call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1394,6 +1585,77 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_mv_fmt(a,info,type,mold,dupl) + class(psb_c_base_sparse_mat), intent(inout), allocatable :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_c_base_sparse_mat), intent(in), optional :: mold + class(psb_c_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_c_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_c_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,a) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_mv_fmt end subroutine psb_c_cscnv_ip @@ -2849,7 +3111,7 @@ subroutine psb_lc_set_lnrows(m,a) end subroutine psb_lc_set_lnrows -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_lc_set_inrows(m,a) use psb_c_mat_mod, psb_protect_name => psb_lc_set_inrows use psb_error_mod @@ -2906,7 +3168,7 @@ subroutine psb_lc_set_lncols(n,a) end subroutine psb_lc_set_lncols -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_lc_set_incols(n,a) use psb_c_mat_mod, psb_protect_name => psb_lc_set_incols use psb_error_mod diff --git a/base/serial/impl/psb_c_rb_idx_tree_impl.F90 b/base/serial/impl/psb_c_rb_idx_tree_impl.F90 index 42704c6e..04730aaa 100644 --- a/base/serial/impl/psb_c_rb_idx_tree_impl.F90 +++ b/base/serial/impl/psb_c_rb_idx_tree_impl.F90 @@ -267,7 +267,7 @@ subroutine psb_c_rb_idx_tree_scalar_sparse_row_mul(tree, scalar, mat, row_num) end subroutine psb_c_rb_idx_tree_scalar_sparse_row_mul subroutine psb_c_rb_idx_tree_merge(trees, mat) -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif use psb_realloc_mod @@ -294,7 +294,7 @@ subroutine psb_c_rb_idx_tree_merge(trees, mat) call psb_realloc(nnz, mat%val, info) call psb_realloc(nnz, mat%ja, info) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do schedule(static), private(current, previous, j) #endif do i = 1, size(trees) @@ -323,7 +323,7 @@ subroutine psb_c_rb_idx_tree_merge(trees, mat) deallocate(previous) end do end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp end parallel do #endif end subroutine psb_c_rb_idx_tree_merge diff --git a/base/serial/impl/psb_d_base_mat_impl.F90 b/base/serial/impl/psb_d_base_mat_impl.F90 index 69112529..5f849bea 100644 --- a/base/serial/impl/psb_d_base_mat_impl.F90 +++ b/base/serial/impl/psb_d_base_mat_impl.F90 @@ -60,7 +60,6 @@ subroutine psb_d_base_cp_to_coo(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_d_base_cp_to_coo @@ -84,7 +83,6 @@ subroutine psb_d_base_cp_from_coo(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_d_base_cp_from_coo @@ -344,7 +342,6 @@ subroutine psb_d_base_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_d_base_csput_a @@ -420,7 +417,6 @@ subroutine psb_d_base_csgetrow(imin,imax,a,nz,ia,ja,val,info,& ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_d_base_csgetrow @@ -993,7 +989,6 @@ subroutine psb_d_base_mold(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_d_base_mold @@ -1168,7 +1163,6 @@ subroutine psb_d_base_csmm(alpha,a,x,beta,y,info,trans) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_d_base_csmm @@ -1194,7 +1188,6 @@ subroutine psb_d_base_csmv(alpha,a,x,beta,y,info,trans) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) @@ -1221,7 +1214,6 @@ subroutine psb_d_base_inner_cssm(alpha,a,x,beta,y,info,trans) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_d_base_inner_cssm @@ -1247,7 +1239,6 @@ subroutine psb_d_base_inner_cssv(alpha,a,x,beta,y,info,trans) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_d_base_inner_cssv @@ -1549,7 +1540,6 @@ subroutine psb_d_base_scals(d,a,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_d_base_scals @@ -1618,7 +1608,6 @@ subroutine psb_d_base_scal(d,a,info,side) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_d_base_scal @@ -1643,7 +1632,6 @@ function psb_d_base_maxval(a) result(res) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end function psb_d_base_maxval @@ -1742,7 +1730,6 @@ subroutine psb_d_base_rowsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_d_base_rowsum @@ -1764,7 +1751,6 @@ subroutine psb_d_base_arwsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_d_base_arwsum @@ -1786,7 +1772,6 @@ subroutine psb_d_base_colsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_d_base_colsum @@ -1808,7 +1793,6 @@ subroutine psb_d_base_aclsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_d_base_aclsum @@ -1833,7 +1817,6 @@ subroutine psb_d_base_get_diag(a,d,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_d_base_get_diag @@ -2006,8 +1989,8 @@ subroutine psb_d_base_vect_mv(alpha,a,x,beta,y,info,trans) ! For the time being we just throw everything back ! onto the normal routines. - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%spmm(alpha,x%v,beta,y%v,info,trans) call y%set_host() end subroutine psb_d_base_vect_mv @@ -2060,8 +2043,8 @@ subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2082,6 +2065,7 @@ subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) if (info == psb_success_)& & call a%inner_spsm(alpha,tmpv,beta,y,info,trans) + call y%set_host() if (info == psb_success_) then call tmpv%free(info) if (info == psb_success_) deallocate(tmpv,stat=info) @@ -2161,8 +2145,11 @@ subroutine psb_d_base_inner_vect_sv(alpha,a,x,beta,y,info,trans) info = psb_success_ call psb_erractionsave(err_act) + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() if (info /= psb_success_) then info = psb_err_from_subroutine_ @@ -2543,7 +2530,6 @@ subroutine psb_ld_base_cp_to_coo(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ld_base_cp_to_coo @@ -2567,7 +2553,6 @@ subroutine psb_ld_base_cp_from_coo(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ld_base_cp_from_coo @@ -2827,7 +2812,6 @@ subroutine psb_ld_base_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ld_base_csput_a @@ -2904,7 +2888,6 @@ subroutine psb_ld_base_csgetrow(imin,imax,a,nz,ia,ja,val,info,& ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ld_base_csgetrow @@ -3486,7 +3469,6 @@ subroutine psb_ld_base_mold(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ld_base_mold @@ -3644,7 +3626,6 @@ subroutine psb_ld_base_scals(d,a,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ld_base_scals @@ -3713,7 +3694,6 @@ subroutine psb_ld_base_scal(d,a,info,side) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ld_base_scal @@ -3738,7 +3718,6 @@ function psb_ld_base_maxval(a) result(res) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end function psb_ld_base_maxval @@ -3834,7 +3813,6 @@ subroutine psb_ld_base_rowsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ld_base_rowsum @@ -3856,7 +3834,6 @@ subroutine psb_ld_base_arwsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ld_base_arwsum @@ -3878,7 +3855,6 @@ subroutine psb_ld_base_colsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ld_base_colsum @@ -3900,7 +3876,6 @@ subroutine psb_ld_base_aclsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ld_base_aclsum @@ -4064,7 +4039,6 @@ subroutine psb_ld_base_get_diag(a,d,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ld_base_get_diag diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index 4cb4c3ec..bff16ff8 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -257,7 +257,7 @@ subroutine psb_d_coo_spaxpby(alpha,a,beta,b,info) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum -#if defined (OPENMP) +#if defined (PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -368,7 +368,7 @@ function psb_d_coo_cmpmat(a,b,tol,info) result(res) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum -#if defined (OPENMP) +#if defined (PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -595,12 +595,13 @@ subroutine psb_d_coo_clean_zeros(a, info) integer(psb_ipk_), intent(out) :: info ! integer(psb_ipk_) :: i,j,k, nzin - + info = 0 nzin = a%get_nzeros() j = 0 do i=1, nzin - if (a%val(i) /= dzero) then + ! Always keep the diagonal, even if numerically zero + if ((a%val(i) /= dzero).or.(a%ia(i) == a%ja(i))) then j = j + 1 a%val(j) = a%val(i) a%ia(j) = a%ia(i) @@ -608,6 +609,7 @@ subroutine psb_d_coo_clean_zeros(a, info) end if end do call a%set_nzeros(j) + call a%fix(info) call a%trim() end subroutine psb_d_coo_clean_zeros @@ -1928,7 +1930,7 @@ function psb_d_coo_maxval(a) result(res) nnz = a%get_nzeros() if (allocated(a%val)) then nnz = min(nnz,size(a%val)) -#if defined(OPENMP) +#if defined(PSB_OPENMP) res = dzero !$omp parallel do private(i) reduction(max: res) do i=1, nnz @@ -2818,7 +2820,7 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_realloc_mod use psb_sort_mod use psb_d_base_mat_mod, psb_protect_name => psb_d_coo_csput_a -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -2867,29 +2869,42 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (a%is_bld()) then ! Structure here is peculiar, because this function can be called ! either within a parallel region, or outside. - ! Hence the call to set_nzeros done here. - !$omp critical + ! Hence the call to set_nzeros done here. +#if defined(PSB_OPENMP) + !$omp critical(d_coo_csput_a) +#endif nza = a%get_nzeros() nzaold = nza isza = a%get_size() + if (info /= 0) write(0,*) name,' point 0:',info,isza,nza,nz ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then + !write(0,*) ' before reallocate in csput ',psb_errstatus_fatal(),info call a%reallocate(max(nza+nz,int(1.5*isza))) + !write(0,*) ' after reallocate in csput ',psb_errstatus_fatal(),info endif isza = a%get_size() if (isza < (nza+nz)) then - info = psb_err_alloc_dealloc_; call psb_errpush(info,name) + info = psb_err_alloc_dealloc_; + write(0,*) name,' point 1:',info,isza,nza,nz,nza+nz + call psb_errpush(info,name) else -#if defined(OPENMP) +#if defined(PSB_OPENMP) nza = nza + nz #endif call a%set_nzeros(nza) end if - !$omp end critical - if (info /= 0) goto 9999 +#if defined(PSB_OPENMP) + if (info /= 0) write(0,*) name,' point 1.5:',info + !$omp end critical(d_coo_csput_a) +#endif + if (info /= 0) then + write(0,*) name,' point 2:',info + goto 9999 + end if call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) -#if !defined(OPENMP) +#if !defined(PSB_OPENMP) nza = nzaold call a%set_nzeros(nza) #endif @@ -2944,14 +2959,16 @@ contains integer(psb_ipk_) :: i,ir,ic info = psb_success_ -#if defined(OPENMP) +#if defined(PSB_OPENMP) + ! Disabling OpenMP parallel do for the time being. + ! Will need to redesign the entire code stack ! The logic here is different from the one used for ! the serial version: each element is stored in data ! structures but the invalid ones are stored as '-1' values. ! These values will be filtered in a future fixing process. - !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & - !$OMP private(ir,ic,i) + ! $ O M P PARALLEL DO schedule(STATIC) & + ! $ O M P shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & + ! $ O M P private(ir,ic,i) do i=1,nz ir = ia(i) ic = ja(i) @@ -2965,7 +2982,7 @@ contains aspk(nza+i) = -1 end if end do - !$OMP END PARALLEL DO + ! $ O M P END PARALLEL DO nza = nza + nz #else do i=1, nz @@ -3129,7 +3146,7 @@ subroutine psb_d_cp_coo_to_coo(a,b,info) call b%set_nzeros(nz) call b%reallocate(nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -3182,7 +3199,7 @@ subroutine psb_d_cp_coo_from_coo(a,b,info) call a%set_nzeros(nz) call a%reallocate(nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -3568,7 +3585,7 @@ subroutine psb_d_coo_tril(a,l,info,& nb = jmax_ endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_), allocatable :: lrws(:),urws(:) integer(psb_ipk_) :: lpnt, upnt, lnz, unz @@ -3864,7 +3881,7 @@ subroutine psb_d_coo_triu(a,u,info,& nb = jmax_ endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_), allocatable :: lrws(:),urws(:) integer(psb_ipk_) :: lpnt, upnt, lnz, unz @@ -4154,7 +4171,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) use psb_string_mod use psb_ip_reord_mod use psb_sort_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -4172,7 +4189,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' logical :: srt_inp, use_buffers -#if defined(OPENMP) +#if defined(PSB_OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads #endif @@ -4200,7 +4217,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) dupl_ = dupl -#if defined(OPENMP) +#if defined(PSB_OPENMP) maxthreads = omp_get_max_threads() ! 'iaux' has to allow the threads to have an exclusive group ! of indices as work space. Since each thread handles one @@ -4214,7 +4231,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(nzin+2),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) @@ -4256,7 +4273,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf use psb_string_mod use psb_ip_reord_mod use psb_sort_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -4274,7 +4291,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf character(len=20) :: name = 'psb_fixcoo' logical :: srt_inp, use_buffers real(psb_dpk_) :: t0, t1 -#if defined(OPENMP) +#if defined(PSB_OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_), allocatable :: kaux(:),idxaux(:) @@ -4289,7 +4306,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! Row major order if (nr <= nzin) then ! Avoid strange situations with large indices -#if defined(OPENMP) +#if defined(PSB_OPENMP) ! We are not going to need 'ix2' because of the presence ! of 'idxaux' as auxiliary buffer. allocate(ias(nzin),jas(nzin),vs(nzin), stat=info) @@ -4302,7 +4319,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if !if (use_buffers) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp workshare iaux(:) = 0 !$omp end workshare @@ -4356,7 +4373,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! all the indices are valid ! Check again use_buffers. if (use_buffers) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) maxthreads = omp_get_max_threads() allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info) if (info /= psb_success_) then @@ -4730,7 +4747,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf call psi_msort_up(nzin,ia(1:),iaux(1:),iret) if (iret == 0) & & call psb_ip_reord(nzin,val,ia,ja,iaux) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP PARALLEL default(none) & !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & @@ -4920,7 +4937,7 @@ subroutine psb_d_cp_coo_to_lcoo(a,b,info) call b%set_nzeros(nz) call b%reallocate(nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -4972,7 +4989,7 @@ subroutine psb_d_cp_coo_from_lcoo(a,b,info) call a%set_nzeros(nz) call a%reallocate(nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -5185,7 +5202,7 @@ function psb_ld_coo_maxval(a) result(res) nnz = a%get_nzeros() if (allocated(a%val)) then nnz = min(nnz,size(a%val)) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) reduction(max:res) @@ -5252,7 +5269,7 @@ function psb_ld_coo_csnmi(a) result(res) i = a%ia(j) vt(i) = vt(i) + abs(a%val(j)) end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) reduction(max:res) @@ -5302,7 +5319,7 @@ function psb_ld_coo_csnm1(a) result(res) i = a%ja(j) vt(i) = vt(i) + abs(a%val(j)) end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) reduction(max:res) @@ -5585,7 +5602,7 @@ subroutine psb_ld_coo_spaxpby(alpha,a,beta,b,info) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -5696,7 +5713,7 @@ function psb_ld_coo_cmpmat(a,b,tol,info) result(res) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -5926,12 +5943,13 @@ subroutine psb_ld_coo_clean_zeros(a, info) integer(psb_ipk_), intent(out) :: info ! integer(psb_lpk_) :: i,j,k, nzin - + info = 0 nzin = a%get_nzeros() j = 0 do i=1, nzin - if (a%val(i) /= dzero) then + ! Always keep the diagonal, even if numerically zero + if ((a%val(i) /= dzero).or.(a%ia(i) == a%ja(i))) then j = j + 1 a%val(j) = a%val(i) a%ia(j) = a%ia(i) @@ -5956,7 +5974,7 @@ subroutine psb_ld_coo_clean_negidx(a,info) end subroutine psb_ld_coo_clean_negidx -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_ld_coo_clean_negidx_inner(nzin,ia,ja,val,nzout,info) use psb_error_mod use psb_d_base_mat_mod, psb_protect_name => psb_ld_coo_clean_negidx_inner diff --git a/base/serial/impl/psb_d_csc_impl.F90 b/base/serial/impl/psb_d_csc_impl.F90 index 1761b051..ba38e763 100644 --- a/base/serial/impl/psb_d_csc_impl.F90 +++ b/base/serial/impl/psb_d_csc_impl.F90 @@ -2163,7 +2163,7 @@ subroutine psb_d_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -2189,7 +2189,7 @@ subroutine psb_d_mv_csc_from_coo(a,b,info) use psb_error_mod use psb_d_base_mat_mod use psb_d_csc_mat_mod, psb_protect_name => psb_d_mv_csc_from_coo -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -2226,7 +2226,7 @@ subroutine psb_d_mv_csc_from_coo(a,b,info) call psb_realloc(nc+1,a%icp,info) call b%free() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP PARALLEL default(shared) @@ -2328,7 +2328,7 @@ subroutine psb_d_cp_csc_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat nc = a%get_ncols() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) @@ -2403,35 +2403,36 @@ subroutine psb_d_mv_csc_from_fmt(a,b,info) end subroutine psb_d_mv_csc_from_fmt -subroutine psb_d_csc_clean_zeros(a, info) - use psb_error_mod - use psb_d_csc_mat_mod, psb_protect_name => psb_d_csc_clean_zeros - implicit none - class(psb_d_csc_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - ! - integer(psb_ipk_) :: i, j, k, nc - integer(psb_ipk_), allocatable :: ilcp(:) - - info = 0 - call a%sync() - nc = a%get_ncols() - ilcp = a%icp - a%icp(1) = 1 - j = a%icp(1) - do i=1, nc - do k = ilcp(i), ilcp(i+1) -1 - if (a%val(k) /= dzero) then - a%val(j) = a%val(k) - a%ia(j) = a%ia(k) - j = j + 1 - end if - end do - a%icp(i+1) = j - end do - call a%trim() - call a%set_host() -end subroutine psb_d_csc_clean_zeros +!!$subroutine psb_d_csc_clean_zeros(a, info) +!!$ use psb_error_mod +!!$ use psb_d_csc_mat_mod, psb_protect_name => psb_d_csc_clean_zeros +!!$ implicit none +!!$ class(psb_d_csc_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ ! +!!$ integer(psb_ipk_) :: i, j, k, nc +!!$ integer(psb_ipk_), allocatable :: ilcp(:) +!!$ +!!$ info = 0 +!!$ call a%sync() +!!$ nc = a%get_ncols() +!!$ ilcp = a%icp +!!$ a%icp(1) = 1 +!!$ j = a%icp(1) +!!$ do i=1, nc +!!$ do k = ilcp(i), ilcp(i+1) -1 +!!$ ! Always keep the diagonal, even if numerically zero +!!$ if ((a%val(k) /= dzero).or.(i == a%ia(k))) then +!!$ a%val(j) = a%val(k) +!!$ a%ia(j) = a%ia(k) +!!$ j = j + 1 +!!$ end if +!!$ end do +!!$ a%icp(i+1) = j +!!$ end do +!!$ call a%trim() +!!$ call a%set_host() +!!$end subroutine psb_d_csc_clean_zeros subroutine psb_d_cp_csc_from_fmt(a,b,info) use psb_const_mod @@ -2461,7 +2462,7 @@ subroutine psb_d_cp_csc_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat nc = b%get_ncols() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) @@ -4058,7 +4059,7 @@ subroutine psb_ld_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -4304,35 +4305,36 @@ subroutine psb_ld_cp_csc_from_fmt(a,b,info) end subroutine psb_ld_cp_csc_from_fmt -subroutine psb_ld_csc_clean_zeros(a, info) - use psb_error_mod - use psb_d_csc_mat_mod, psb_protect_name => psb_ld_csc_clean_zeros - implicit none - class(psb_ld_csc_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - ! - integer(psb_lpk_) :: i, j, k, nc - integer(psb_lpk_), allocatable :: ilcp(:) - - info = 0 - call a%sync() - nc = a%get_ncols() - ilcp = a%icp - a%icp(1) = 1 - j = a%icp(1) - do i=1, nc - do k = ilcp(i), ilcp(i+1) -1 - if (a%val(k) /= dzero) then - a%val(j) = a%val(k) - a%ia(j) = a%ia(k) - j = j + 1 - end if - end do - a%icp(i+1) = j - end do - call a%trim() - call a%set_host() -end subroutine psb_ld_csc_clean_zeros +!!$subroutine psb_ld_csc_clean_zeros(a, info) +!!$ use psb_error_mod +!!$ use psb_d_csc_mat_mod, psb_protect_name => psb_ld_csc_clean_zeros +!!$ implicit none +!!$ class(psb_ld_csc_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ ! +!!$ integer(psb_lpk_) :: i, j, k, nc +!!$ integer(psb_lpk_), allocatable :: ilcp(:) +!!$ +!!$ info = 0 +!!$ call a%sync() +!!$ nc = a%get_ncols() +!!$ ilcp = a%icp +!!$ a%icp(1) = 1 +!!$ j = a%icp(1) +!!$ do i=1, nc +!!$ do k = ilcp(i), ilcp(i+1) -1 +!!$ ! Always keep the diagonal, even if numerically zero +!!$ if ((a%val(k) /= dzero).or.(i == a%ia(k))) then +!!$ a%val(j) = a%val(k) +!!$ a%ia(j) = a%ia(k) +!!$ j = j + 1 +!!$ end if +!!$ end do +!!$ a%icp(i+1) = j +!!$ end do +!!$ call a%trim() +!!$ call a%set_host() +!!$end subroutine psb_ld_csc_clean_zeros subroutine psb_ld_csc_mold(a,b,info) diff --git a/base/serial/impl/psb_d_csr_impl.F90 b/base/serial/impl/psb_d_csr_impl.F90 index 10f99bc9..952bb1e9 100644 --- a/base/serial/impl/psb_d_csr_impl.F90 +++ b/base/serial/impl/psb_d_csr_impl.F90 @@ -152,7 +152,7 @@ contains !$omp parallel do private(i,j, acc) schedule(static) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -164,7 +164,7 @@ contains !$omp parallel do private(i,j, acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -176,7 +176,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -192,7 +192,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -204,7 +204,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -216,7 +216,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -231,7 +231,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -243,7 +243,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -255,7 +255,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -270,7 +270,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -282,7 +282,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -294,7 +294,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -2289,7 +2289,7 @@ subroutine psb_d_csr_tril(a,l,info,& nb = jmax_ endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_), allocatable :: lrws(:),urws(:) integer(psb_ipk_) :: lpnt, upnt, lnz, unz @@ -2591,7 +2591,7 @@ subroutine psb_d_csr_triu(a,u,info,& endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_), allocatable :: lrws(:),urws(:) integer(psb_ipk_) :: lpnt, upnt, lnz, unz @@ -3156,7 +3156,7 @@ subroutine psb_d_cp_csr_from_coo(a,b,info) use psb_realloc_mod use psb_d_base_mat_mod use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_csr_from_coo -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -3217,7 +3217,7 @@ subroutine psb_d_cp_csr_from_coo(a,b,info) endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP PARALLEL default(shared) reduction(max:info) @@ -3318,7 +3318,7 @@ subroutine psb_d_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -3346,7 +3346,7 @@ subroutine psb_d_mv_csr_from_coo(a,b,info) use psb_error_mod use psb_d_base_mat_mod use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_csr_from_coo -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -3385,7 +3385,7 @@ subroutine psb_d_mv_csr_from_coo(a,b,info) call psb_realloc(max(nr+1,nc+1),a%irp,info) call b%free() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP PARALLEL default(shared) reduction(max:info) @@ -3489,7 +3489,7 @@ subroutine psb_d_cp_csr_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat nr = a%get_nrows() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) @@ -3594,7 +3594,7 @@ subroutine psb_d_cp_csr_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat nr = b%get_nrows() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) @@ -3624,37 +3624,38 @@ subroutine psb_d_cp_csr_from_fmt(a,b,info) end select end subroutine psb_d_cp_csr_from_fmt -subroutine psb_d_csr_clean_zeros(a, info) - use psb_error_mod - use psb_d_csr_mat_mod, psb_protect_name => psb_d_csr_clean_zeros - implicit none - class(psb_d_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - ! - integer(psb_ipk_) :: i, j, k, nr - integer(psb_ipk_), allocatable :: ilrp(:) - - info = 0 - call a%sync() - nr = a%get_nrows() - ilrp = a%irp - a%irp(1) = 1 - j = a%irp(1) - do i=1, nr - do k = ilrp(i), ilrp(i+1) -1 - if (a%val(k) /= dzero) then - a%val(j) = a%val(k) - a%ja(j) = a%ja(k) - j = j + 1 - end if - end do - a%irp(i+1) = j - end do - call a%trim() - call a%set_host() -end subroutine psb_d_csr_clean_zeros - -#if defined(OPENMP) +!!$subroutine psb_d_csr_clean_zeros(a, info) +!!$ use psb_error_mod +!!$ use psb_d_csr_mat_mod, psb_protect_name => psb_d_csr_clean_zeros +!!$ implicit none +!!$ class(psb_d_csr_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ ! +!!$ integer(psb_ipk_) :: i, j, k, nr +!!$ integer(psb_ipk_), allocatable :: ilrp(:) +!!$ +!!$ info = 0 +!!$ call a%sync() +!!$ nr = a%get_nrows() +!!$ ilrp = a%irp +!!$ a%irp(1) = 1 +!!$ j = a%irp(1) +!!$ do i=1, nr +!!$ do k = ilrp(i), ilrp(i+1) -1 +!!$ ! Always keep the diagonal, even if numerically zero +!!$ if ((a%val(k) /= dzero).or.(i == a%ja(k))) then +!!$ a%val(j) = a%val(k) +!!$ a%ja(j) = a%ja(k) +!!$ j = j + 1 +!!$ end if +!!$ end do +!!$ a%irp(i+1) = j +!!$ end do +!!$ call a%trim() +!!$ call a%set_host() +!!$end subroutine psb_d_csr_clean_zeros + +#if defined(PSB_OPENMP) subroutine psb_dcsrspspmm(a,b,c,info) use psb_d_mat_mod use psb_serial_mod, psb_protect_name => psb_dcsrspspmm @@ -3692,7 +3693,7 @@ subroutine psb_dcsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = max(nint(0.5*(nza+nzb)),ma,mb,na,nb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) @@ -3772,8 +3773,8 @@ contains if (nrc > 0 ) then if ((nzc+nrc)>nze) then nze = max(ma*((nzc+j-1)/j),nzc+2*nrc) - call psb_realloc(nze,c%val,info) - if (info == 0) call psb_realloc(nze,c%ja,info) + call psb_ensure_size(nze,c%val,info) + if (info == 0) call psb_ensure_size(nze,c%ja,info) if (info /= 0) return end if @@ -3805,6 +3806,7 @@ contains integer(psb_ipk_) :: ma, nb integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx + integer(psb_ipk_) :: nth, lth,ith ma = a%get_nrows() nb = b%get_ncols() @@ -3815,12 +3817,23 @@ contains ! dense accumulator ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf call psb_realloc(nb, acc, info) + !$omp parallel shared(nth,lth,offsets,info) + !$omp single + nth = omp_get_num_threads() + lth = min(nth, ma) + allocate(offsets(omp_get_max_threads()),stat=info) + !$omp end single + !$omp end parallel + if (info /= 0) then + write(0,*)'Offsets allocation failed ',info + return + end if - allocate(offsets(omp_get_max_threads())) !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & - !$omp shared(a,b,c,offsets) + !$omp num_threads(lth) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma if (start_idx == 0) then @@ -3876,15 +3889,14 @@ contains !$omp end single !$omp barrier - - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -3892,9 +3904,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson @@ -3930,6 +3943,7 @@ contains !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma do jj = a%irp(irw), a%irp(irw + 1) - 1 @@ -3996,14 +4010,14 @@ contains !$omp barrier - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -4011,9 +4025,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson_1d @@ -4223,7 +4238,7 @@ subroutine psb_dcsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = max(nint(0.25*(nza+nzb)),ma,nb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) @@ -4261,9 +4276,9 @@ contains nze = min(size(c%val),size(c%ja)) isz = max(ma,na,mb,nb) - call psb_realloc(isz,row,info) - if (info == 0) call psb_realloc(isz,idxs,info) - if (info == 0) call psb_realloc(isz,irow,info) + call psb_realloc(nb,row,info) + if (info == 0) call psb_realloc(na,idxs,info) + if (info == 0) call psb_realloc(nb,irow,info) if (info /= 0) return row = dzero irow = 0 @@ -4288,8 +4303,8 @@ contains if (nrc > 0 ) then if ((nzc+nrc)>nze) then nze = max(ma*((nzc+j-1)/j),nzc+2*nrc) - call psb_realloc(nze,c%val,info) - if (info == 0) call psb_realloc(nze,c%ja,info) + call psb_ensure_size(nze,c%val,info) + if (info == 0) call psb_ensure_size(nze,c%ja,info) if (info /= 0) return end if @@ -4312,6 +4327,266 @@ contains end subroutine psb_dcsrspspmm #endif +subroutine psb_d_ecsr_mold(a,b,info) + use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_mold + use psb_error_mod + implicit none + class(psb_d_ecsr_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='ecsr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_ecsr_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ecsr_mold + +subroutine psb_d_ecsr_csmv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_csmv + implicit none + class(psb_d_ecsr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: m, n + logical :: tra, ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='d_csr_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_ecsr_cmp_nerwp + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnerws, i, nr, nzr + info = psb_success_ + nr = a%get_nrows() + call psb_realloc(nr,a%nerwp,info) + nnerws = 0 + do i=1, nr + nzr = a%irp(i+1)-a%irp(i) + if (nzr>0) then + nnerws = nnerws + 1 + a%nerwp(nnerws) = i + end if + end do + call psb_realloc(nnerws,a%nerwp,info) + a%nnerws = nnerws +end subroutine psb_d_ecsr_cmp_nerwp + +subroutine psb_d_cp_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_d_base_mat_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_ecsr_from_coo + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_d_cp_ecsr_from_coo + +subroutine psb_d_mv_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + use psb_d_base_mat_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_ecsr_from_coo + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_d_csr_sparse_mat%mv_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_d_mv_ecsr_from_coo + +subroutine psb_d_mv_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_d_base_mat_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_ecsr_from_fmt + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_d_mv_ecsr_from_fmt + +subroutine psb_d_cp_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_d_base_mat_mod + use psb_realloc_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_ecsr_from_fmt + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_d_cp_ecsr_from_fmt + ! ! ! ld version @@ -6021,7 +6296,7 @@ subroutine psb_ld_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -6273,35 +6548,36 @@ subroutine psb_ld_cp_csr_from_fmt(a,b,info) end subroutine psb_ld_cp_csr_from_fmt -subroutine psb_ld_csr_clean_zeros(a, info) - use psb_error_mod - use psb_d_csr_mat_mod, psb_protect_name => psb_ld_csr_clean_zeros - implicit none - class(psb_ld_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - ! - integer(psb_lpk_) :: i, j, k, nr - integer(psb_lpk_), allocatable :: ilrp(:) - - info = 0 - call a%sync() - nr = a%get_nrows() - ilrp = a%irp - a%irp(1) = 1 - j = a%irp(1) - do i=1, nr - do k = ilrp(i), ilrp(i+1) -1 - if (a%val(k) /= dzero) then - a%val(j) = a%val(k) - a%ja(j) = a%ja(k) - j = j + 1 - end if - end do - a%irp(i+1) = j - end do - call a%trim() - call a%set_host() -end subroutine psb_ld_csr_clean_zeros +!!$subroutine psb_ld_csr_clean_zeros(a, info) +!!$ use psb_error_mod +!!$ use psb_d_csr_mat_mod, psb_protect_name => psb_ld_csr_clean_zeros +!!$ implicit none +!!$ class(psb_ld_csr_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ ! +!!$ integer(psb_lpk_) :: i, j, k, nr +!!$ integer(psb_lpk_), allocatable :: ilrp(:) +!!$ +!!$ info = 0 +!!$ call a%sync() +!!$ nr = a%get_nrows() +!!$ ilrp = a%irp +!!$ a%irp(1) = 1 +!!$ j = a%irp(1) +!!$ do i=1, nr +!!$ do k = ilrp(i), ilrp(i+1) -1 +!!$ ! Always keep the diagonal, even if numerically zero +!!$ if ((a%val(k) /= dzero).or.(i == a%ja(k))) then +!!$ a%val(j) = a%val(k) +!!$ a%ja(j) = a%ja(k) +!!$ j = j + 1 +!!$ end if +!!$ end do +!!$ a%irp(i+1) = j +!!$ end do +!!$ call a%trim() +!!$ call a%set_host() +!!$end subroutine psb_ld_csr_clean_zeros subroutine psb_ldcsrspspmm(a,b,c,info) use psb_d_mat_mod @@ -6337,7 +6613,7 @@ subroutine psb_ldcsrspspmm(a,b,c,info) nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = max(nint(0.25*(nza+nzb)),ma,nb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) @@ -6375,9 +6651,9 @@ contains nze = min(size(c%val),size(c%ja)) isz = max(ma,na,mb,nb) - call psb_realloc(isz,row,info) - if (info == 0) call psb_realloc(isz,idxs,info) - if (info == 0) call psb_realloc(isz,irow,info) + call psb_realloc(nb,row,info) + if (info == 0) call psb_realloc(na,idxs,info) + if (info == 0) call psb_realloc(nb,irow,info) if (info /= 0) return row = dzero irow = 0 diff --git a/base/serial/impl/psb_d_mat_impl.F90 b/base/serial/impl/psb_d_mat_impl.F90 index 2a6fb9a5..c744f7f5 100644 --- a/base/serial/impl/psb_d_mat_impl.F90 +++ b/base/serial/impl/psb_d_mat_impl.F90 @@ -1213,6 +1213,106 @@ subroutine psb_d_b_csclip(a,b,info,& end subroutine psb_d_b_csclip +subroutine psb_d_split_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_d_mat_mod, psb_protect_name => psb_d_split_nd + implicit none + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat), allocatable :: aclip + type(psb_d_ecsr_sparse_mat), allocatable :: andclip + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + allocate(aclip) + call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_split_nd + +subroutine psb_d_merge_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_d_mat_mod, psb_protect_name => psb_d_merge_nd + implicit none + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold + type(psb_d_coo_sparse_mat) :: acoo1,acoo2 + integer(psb_ipk_) :: nz + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + + call a%ad%mv_to_coo(acoo1,info) + call acoo1%set_bld() + call acoo1%set_nrows(n_rows) + call acoo1%set_ncols(n_cols) + call a%and%mv_to_coo(acoo2,info) + nz=acoo2%get_nzeros() + call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info) + if (allocated(a%a)) then + call a%a%free() + deallocate(a%a) + end if + allocate(a%a,mold=a%ad) + call a%a%mv_from_coo(acoo1,info) + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_merge_nd + subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod @@ -1246,54 +1346,65 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_d_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_d_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_d_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_d_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_d_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if + end if - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if - if (present(dupl)) then - call altmp%set_dupl(dupl) - else if (a%is_bld()) then - ! Does this make sense at all?? Who knows.. - call altmp%set_dupl(psb_dupl_def_) - end if + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - if (debug) write(psb_err_unit,*) 'Converting from ',& - & a%get_fmt(),' to ',altmp%get_fmt() + call altmp%cp_from_fmt(a%a, info) - call altmp%cp_from_fmt(a%a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 + call move_alloc(altmp,b%a) + else + call inner_cp_fmt(a%a,b%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_cp_fmt(a%and,b%and,info,type,mold,dupl) + end if end if - call move_alloc(altmp,b%a) call b%trim() call b%set_asb() call psb_erractionrestore(err_act) @@ -1303,7 +1414,79 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_cp_fmt(a,b,info,type,mold,dupl) + class(psb_d_base_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_d_base_sparse_mat), intent(in), optional :: mold + + class(psb_d_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_d_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_d_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b) + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_cp_fmt end subroutine psb_d_cscnv subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) @@ -1312,13 +1495,12 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) use psb_d_mat_mod, psb_protect_name => psb_d_cscnv_ip implicit none - class(psb_dspmat_type), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_),optional, intent(in) :: dupl - character(len=*), optional, intent(in) :: type + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type class(psb_d_base_sparse_mat), intent(in), optional :: mold - class(psb_d_base_sparse_mat), allocatable :: altmp integer(psb_ipk_) :: err_act character(len=20) :: name='cscnv_ip' @@ -1345,46 +1527,55 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if - if (present(mold)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_d_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_d_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - allocate(altmp, mold=mold,stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - else if (present(type)) then + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_d_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_d_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_d_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select + call altmp%mv_from_fmt(a%a, info) + call move_alloc(altmp,a%a) else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 + call inner_mv_fmt(a%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_mv_fmt(a%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_mv_fmt(a%and,info,type,mold,dupl) + end if end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%mv_from_fmt(a%a, info) - if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - call move_alloc(altmp,a%a) call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1394,6 +1585,77 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_mv_fmt(a,info,type,mold,dupl) + class(psb_d_base_sparse_mat), intent(inout), allocatable :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_d_base_sparse_mat), intent(in), optional :: mold + class(psb_d_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_d_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_d_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,a) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_mv_fmt end subroutine psb_d_cscnv_ip @@ -2849,7 +3111,7 @@ subroutine psb_ld_set_lnrows(m,a) end subroutine psb_ld_set_lnrows -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_ld_set_inrows(m,a) use psb_d_mat_mod, psb_protect_name => psb_ld_set_inrows use psb_error_mod @@ -2906,7 +3168,7 @@ subroutine psb_ld_set_lncols(n,a) end subroutine psb_ld_set_lncols -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_ld_set_incols(n,a) use psb_d_mat_mod, psb_protect_name => psb_ld_set_incols use psb_error_mod diff --git a/base/serial/impl/psb_d_rb_idx_tree_impl.F90 b/base/serial/impl/psb_d_rb_idx_tree_impl.F90 index 9b63d51c..241338fb 100644 --- a/base/serial/impl/psb_d_rb_idx_tree_impl.F90 +++ b/base/serial/impl/psb_d_rb_idx_tree_impl.F90 @@ -267,7 +267,7 @@ subroutine psb_d_rb_idx_tree_scalar_sparse_row_mul(tree, scalar, mat, row_num) end subroutine psb_d_rb_idx_tree_scalar_sparse_row_mul subroutine psb_d_rb_idx_tree_merge(trees, mat) -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif use psb_realloc_mod @@ -294,7 +294,7 @@ subroutine psb_d_rb_idx_tree_merge(trees, mat) call psb_realloc(nnz, mat%val, info) call psb_realloc(nnz, mat%ja, info) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do schedule(static), private(current, previous, j) #endif do i = 1, size(trees) @@ -323,7 +323,7 @@ subroutine psb_d_rb_idx_tree_merge(trees, mat) deallocate(previous) end do end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp end parallel do #endif end subroutine psb_d_rb_idx_tree_merge diff --git a/base/serial/impl/psb_s_base_mat_impl.F90 b/base/serial/impl/psb_s_base_mat_impl.F90 index 4a99a684..c070e716 100644 --- a/base/serial/impl/psb_s_base_mat_impl.F90 +++ b/base/serial/impl/psb_s_base_mat_impl.F90 @@ -60,7 +60,6 @@ subroutine psb_s_base_cp_to_coo(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_s_base_cp_to_coo @@ -84,7 +83,6 @@ subroutine psb_s_base_cp_from_coo(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_s_base_cp_from_coo @@ -344,7 +342,6 @@ subroutine psb_s_base_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_s_base_csput_a @@ -420,7 +417,6 @@ subroutine psb_s_base_csgetrow(imin,imax,a,nz,ia,ja,val,info,& ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_s_base_csgetrow @@ -993,7 +989,6 @@ subroutine psb_s_base_mold(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_s_base_mold @@ -1168,7 +1163,6 @@ subroutine psb_s_base_csmm(alpha,a,x,beta,y,info,trans) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_s_base_csmm @@ -1194,7 +1188,6 @@ subroutine psb_s_base_csmv(alpha,a,x,beta,y,info,trans) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) @@ -1221,7 +1214,6 @@ subroutine psb_s_base_inner_cssm(alpha,a,x,beta,y,info,trans) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_s_base_inner_cssm @@ -1247,7 +1239,6 @@ subroutine psb_s_base_inner_cssv(alpha,a,x,beta,y,info,trans) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_s_base_inner_cssv @@ -1549,7 +1540,6 @@ subroutine psb_s_base_scals(d,a,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_s_base_scals @@ -1618,7 +1608,6 @@ subroutine psb_s_base_scal(d,a,info,side) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_s_base_scal @@ -1643,7 +1632,6 @@ function psb_s_base_maxval(a) result(res) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end function psb_s_base_maxval @@ -1742,7 +1730,6 @@ subroutine psb_s_base_rowsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_s_base_rowsum @@ -1764,7 +1751,6 @@ subroutine psb_s_base_arwsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_s_base_arwsum @@ -1786,7 +1772,6 @@ subroutine psb_s_base_colsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_s_base_colsum @@ -1808,7 +1793,6 @@ subroutine psb_s_base_aclsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_s_base_aclsum @@ -1833,7 +1817,6 @@ subroutine psb_s_base_get_diag(a,d,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_s_base_get_diag @@ -2006,8 +1989,8 @@ subroutine psb_s_base_vect_mv(alpha,a,x,beta,y,info,trans) ! For the time being we just throw everything back ! onto the normal routines. - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%spmm(alpha,x%v,beta,y%v,info,trans) call y%set_host() end subroutine psb_s_base_vect_mv @@ -2060,8 +2043,8 @@ subroutine psb_s_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2082,6 +2065,7 @@ subroutine psb_s_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) if (info == psb_success_)& & call a%inner_spsm(alpha,tmpv,beta,y,info,trans) + call y%set_host() if (info == psb_success_) then call tmpv%free(info) if (info == psb_success_) deallocate(tmpv,stat=info) @@ -2161,8 +2145,11 @@ subroutine psb_s_base_inner_vect_sv(alpha,a,x,beta,y,info,trans) info = psb_success_ call psb_erractionsave(err_act) + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() if (info /= psb_success_) then info = psb_err_from_subroutine_ @@ -2543,7 +2530,6 @@ subroutine psb_ls_base_cp_to_coo(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ls_base_cp_to_coo @@ -2567,7 +2553,6 @@ subroutine psb_ls_base_cp_from_coo(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ls_base_cp_from_coo @@ -2827,7 +2812,6 @@ subroutine psb_ls_base_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ls_base_csput_a @@ -2904,7 +2888,6 @@ subroutine psb_ls_base_csgetrow(imin,imax,a,nz,ia,ja,val,info,& ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ls_base_csgetrow @@ -3486,7 +3469,6 @@ subroutine psb_ls_base_mold(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ls_base_mold @@ -3644,7 +3626,6 @@ subroutine psb_ls_base_scals(d,a,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ls_base_scals @@ -3713,7 +3694,6 @@ subroutine psb_ls_base_scal(d,a,info,side) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ls_base_scal @@ -3738,7 +3718,6 @@ function psb_ls_base_maxval(a) result(res) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end function psb_ls_base_maxval @@ -3834,7 +3813,6 @@ subroutine psb_ls_base_rowsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ls_base_rowsum @@ -3856,7 +3834,6 @@ subroutine psb_ls_base_arwsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ls_base_arwsum @@ -3878,7 +3855,6 @@ subroutine psb_ls_base_colsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ls_base_colsum @@ -3900,7 +3876,6 @@ subroutine psb_ls_base_aclsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ls_base_aclsum @@ -4064,7 +4039,6 @@ subroutine psb_ls_base_get_diag(a,d,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_ls_base_get_diag diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index f706db33..f3c831ab 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -257,7 +257,7 @@ subroutine psb_s_coo_spaxpby(alpha,a,beta,b,info) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum -#if defined (OPENMP) +#if defined (PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -368,7 +368,7 @@ function psb_s_coo_cmpmat(a,b,tol,info) result(res) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum -#if defined (OPENMP) +#if defined (PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -595,12 +595,13 @@ subroutine psb_s_coo_clean_zeros(a, info) integer(psb_ipk_), intent(out) :: info ! integer(psb_ipk_) :: i,j,k, nzin - + info = 0 nzin = a%get_nzeros() j = 0 do i=1, nzin - if (a%val(i) /= szero) then + ! Always keep the diagonal, even if numerically zero + if ((a%val(i) /= szero).or.(a%ia(i) == a%ja(i))) then j = j + 1 a%val(j) = a%val(i) a%ia(j) = a%ia(i) @@ -608,6 +609,7 @@ subroutine psb_s_coo_clean_zeros(a, info) end if end do call a%set_nzeros(j) + call a%fix(info) call a%trim() end subroutine psb_s_coo_clean_zeros @@ -1928,7 +1930,7 @@ function psb_s_coo_maxval(a) result(res) nnz = a%get_nzeros() if (allocated(a%val)) then nnz = min(nnz,size(a%val)) -#if defined(OPENMP) +#if defined(PSB_OPENMP) res = szero !$omp parallel do private(i) reduction(max: res) do i=1, nnz @@ -2818,7 +2820,7 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_realloc_mod use psb_sort_mod use psb_s_base_mat_mod, psb_protect_name => psb_s_coo_csput_a -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -2867,29 +2869,42 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (a%is_bld()) then ! Structure here is peculiar, because this function can be called ! either within a parallel region, or outside. - ! Hence the call to set_nzeros done here. - !$omp critical + ! Hence the call to set_nzeros done here. +#if defined(PSB_OPENMP) + !$omp critical(s_coo_csput_a) +#endif nza = a%get_nzeros() nzaold = nza isza = a%get_size() + if (info /= 0) write(0,*) name,' point 0:',info,isza,nza,nz ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then + !write(0,*) ' before reallocate in csput ',psb_errstatus_fatal(),info call a%reallocate(max(nza+nz,int(1.5*isza))) + !write(0,*) ' after reallocate in csput ',psb_errstatus_fatal(),info endif isza = a%get_size() if (isza < (nza+nz)) then - info = psb_err_alloc_dealloc_; call psb_errpush(info,name) + info = psb_err_alloc_dealloc_; + write(0,*) name,' point 1:',info,isza,nza,nz,nza+nz + call psb_errpush(info,name) else -#if defined(OPENMP) +#if defined(PSB_OPENMP) nza = nza + nz #endif call a%set_nzeros(nza) end if - !$omp end critical - if (info /= 0) goto 9999 +#if defined(PSB_OPENMP) + if (info /= 0) write(0,*) name,' point 1.5:',info + !$omp end critical(s_coo_csput_a) +#endif + if (info /= 0) then + write(0,*) name,' point 2:',info + goto 9999 + end if call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) -#if !defined(OPENMP) +#if !defined(PSB_OPENMP) nza = nzaold call a%set_nzeros(nza) #endif @@ -2944,14 +2959,16 @@ contains integer(psb_ipk_) :: i,ir,ic info = psb_success_ -#if defined(OPENMP) +#if defined(PSB_OPENMP) + ! Disabling OpenMP parallel do for the time being. + ! Will need to redesign the entire code stack ! The logic here is different from the one used for ! the serial version: each element is stored in data ! structures but the invalid ones are stored as '-1' values. ! These values will be filtered in a future fixing process. - !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & - !$OMP private(ir,ic,i) + ! $ O M P PARALLEL DO schedule(STATIC) & + ! $ O M P shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & + ! $ O M P private(ir,ic,i) do i=1,nz ir = ia(i) ic = ja(i) @@ -2965,7 +2982,7 @@ contains aspk(nza+i) = -1 end if end do - !$OMP END PARALLEL DO + ! $ O M P END PARALLEL DO nza = nza + nz #else do i=1, nz @@ -3129,7 +3146,7 @@ subroutine psb_s_cp_coo_to_coo(a,b,info) call b%set_nzeros(nz) call b%reallocate(nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -3182,7 +3199,7 @@ subroutine psb_s_cp_coo_from_coo(a,b,info) call a%set_nzeros(nz) call a%reallocate(nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -3568,7 +3585,7 @@ subroutine psb_s_coo_tril(a,l,info,& nb = jmax_ endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_), allocatable :: lrws(:),urws(:) integer(psb_ipk_) :: lpnt, upnt, lnz, unz @@ -3864,7 +3881,7 @@ subroutine psb_s_coo_triu(a,u,info,& nb = jmax_ endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_), allocatable :: lrws(:),urws(:) integer(psb_ipk_) :: lpnt, upnt, lnz, unz @@ -4154,7 +4171,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) use psb_string_mod use psb_ip_reord_mod use psb_sort_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -4172,7 +4189,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' logical :: srt_inp, use_buffers -#if defined(OPENMP) +#if defined(PSB_OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads #endif @@ -4200,7 +4217,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) dupl_ = dupl -#if defined(OPENMP) +#if defined(PSB_OPENMP) maxthreads = omp_get_max_threads() ! 'iaux' has to allow the threads to have an exclusive group ! of indices as work space. Since each thread handles one @@ -4214,7 +4231,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(nzin+2),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) @@ -4256,7 +4273,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf use psb_string_mod use psb_ip_reord_mod use psb_sort_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -4274,7 +4291,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf character(len=20) :: name = 'psb_fixcoo' logical :: srt_inp, use_buffers real(psb_dpk_) :: t0, t1 -#if defined(OPENMP) +#if defined(PSB_OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_), allocatable :: kaux(:),idxaux(:) @@ -4289,7 +4306,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! Row major order if (nr <= nzin) then ! Avoid strange situations with large indices -#if defined(OPENMP) +#if defined(PSB_OPENMP) ! We are not going to need 'ix2' because of the presence ! of 'idxaux' as auxiliary buffer. allocate(ias(nzin),jas(nzin),vs(nzin), stat=info) @@ -4302,7 +4319,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if !if (use_buffers) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp workshare iaux(:) = 0 !$omp end workshare @@ -4356,7 +4373,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! all the indices are valid ! Check again use_buffers. if (use_buffers) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) maxthreads = omp_get_max_threads() allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info) if (info /= psb_success_) then @@ -4730,7 +4747,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf call psi_msort_up(nzin,ia(1:),iaux(1:),iret) if (iret == 0) & & call psb_ip_reord(nzin,val,ia,ja,iaux) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP PARALLEL default(none) & !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & @@ -4920,7 +4937,7 @@ subroutine psb_s_cp_coo_to_lcoo(a,b,info) call b%set_nzeros(nz) call b%reallocate(nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -4972,7 +4989,7 @@ subroutine psb_s_cp_coo_from_lcoo(a,b,info) call a%set_nzeros(nz) call a%reallocate(nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -5185,7 +5202,7 @@ function psb_ls_coo_maxval(a) result(res) nnz = a%get_nzeros() if (allocated(a%val)) then nnz = min(nnz,size(a%val)) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) reduction(max:res) @@ -5252,7 +5269,7 @@ function psb_ls_coo_csnmi(a) result(res) i = a%ia(j) vt(i) = vt(i) + abs(a%val(j)) end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) reduction(max:res) @@ -5302,7 +5319,7 @@ function psb_ls_coo_csnm1(a) result(res) i = a%ja(j) vt(i) = vt(i) + abs(a%val(j)) end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) reduction(max:res) @@ -5585,7 +5602,7 @@ subroutine psb_ls_coo_spaxpby(alpha,a,beta,b,info) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -5696,7 +5713,7 @@ function psb_ls_coo_cmpmat(a,b,tol,info) result(res) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -5926,12 +5943,13 @@ subroutine psb_ls_coo_clean_zeros(a, info) integer(psb_ipk_), intent(out) :: info ! integer(psb_lpk_) :: i,j,k, nzin - + info = 0 nzin = a%get_nzeros() j = 0 do i=1, nzin - if (a%val(i) /= szero) then + ! Always keep the diagonal, even if numerically zero + if ((a%val(i) /= szero).or.(a%ia(i) == a%ja(i))) then j = j + 1 a%val(j) = a%val(i) a%ia(j) = a%ia(i) @@ -5956,7 +5974,7 @@ subroutine psb_ls_coo_clean_negidx(a,info) end subroutine psb_ls_coo_clean_negidx -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_ls_coo_clean_negidx_inner(nzin,ia,ja,val,nzout,info) use psb_error_mod use psb_s_base_mat_mod, psb_protect_name => psb_ls_coo_clean_negidx_inner diff --git a/base/serial/impl/psb_s_csc_impl.F90 b/base/serial/impl/psb_s_csc_impl.F90 index a66b7dc0..73c11ce6 100644 --- a/base/serial/impl/psb_s_csc_impl.F90 +++ b/base/serial/impl/psb_s_csc_impl.F90 @@ -2163,7 +2163,7 @@ subroutine psb_s_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -2189,7 +2189,7 @@ subroutine psb_s_mv_csc_from_coo(a,b,info) use psb_error_mod use psb_s_base_mat_mod use psb_s_csc_mat_mod, psb_protect_name => psb_s_mv_csc_from_coo -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -2226,7 +2226,7 @@ subroutine psb_s_mv_csc_from_coo(a,b,info) call psb_realloc(nc+1,a%icp,info) call b%free() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP PARALLEL default(shared) @@ -2328,7 +2328,7 @@ subroutine psb_s_cp_csc_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat nc = a%get_ncols() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) @@ -2403,35 +2403,36 @@ subroutine psb_s_mv_csc_from_fmt(a,b,info) end subroutine psb_s_mv_csc_from_fmt -subroutine psb_s_csc_clean_zeros(a, info) - use psb_error_mod - use psb_s_csc_mat_mod, psb_protect_name => psb_s_csc_clean_zeros - implicit none - class(psb_s_csc_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - ! - integer(psb_ipk_) :: i, j, k, nc - integer(psb_ipk_), allocatable :: ilcp(:) - - info = 0 - call a%sync() - nc = a%get_ncols() - ilcp = a%icp - a%icp(1) = 1 - j = a%icp(1) - do i=1, nc - do k = ilcp(i), ilcp(i+1) -1 - if (a%val(k) /= szero) then - a%val(j) = a%val(k) - a%ia(j) = a%ia(k) - j = j + 1 - end if - end do - a%icp(i+1) = j - end do - call a%trim() - call a%set_host() -end subroutine psb_s_csc_clean_zeros +!!$subroutine psb_s_csc_clean_zeros(a, info) +!!$ use psb_error_mod +!!$ use psb_s_csc_mat_mod, psb_protect_name => psb_s_csc_clean_zeros +!!$ implicit none +!!$ class(psb_s_csc_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ ! +!!$ integer(psb_ipk_) :: i, j, k, nc +!!$ integer(psb_ipk_), allocatable :: ilcp(:) +!!$ +!!$ info = 0 +!!$ call a%sync() +!!$ nc = a%get_ncols() +!!$ ilcp = a%icp +!!$ a%icp(1) = 1 +!!$ j = a%icp(1) +!!$ do i=1, nc +!!$ do k = ilcp(i), ilcp(i+1) -1 +!!$ ! Always keep the diagonal, even if numerically zero +!!$ if ((a%val(k) /= szero).or.(i == a%ia(k))) then +!!$ a%val(j) = a%val(k) +!!$ a%ia(j) = a%ia(k) +!!$ j = j + 1 +!!$ end if +!!$ end do +!!$ a%icp(i+1) = j +!!$ end do +!!$ call a%trim() +!!$ call a%set_host() +!!$end subroutine psb_s_csc_clean_zeros subroutine psb_s_cp_csc_from_fmt(a,b,info) use psb_const_mod @@ -2461,7 +2462,7 @@ subroutine psb_s_cp_csc_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat nc = b%get_ncols() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) @@ -4058,7 +4059,7 @@ subroutine psb_ls_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -4304,35 +4305,36 @@ subroutine psb_ls_cp_csc_from_fmt(a,b,info) end subroutine psb_ls_cp_csc_from_fmt -subroutine psb_ls_csc_clean_zeros(a, info) - use psb_error_mod - use psb_s_csc_mat_mod, psb_protect_name => psb_ls_csc_clean_zeros - implicit none - class(psb_ls_csc_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - ! - integer(psb_lpk_) :: i, j, k, nc - integer(psb_lpk_), allocatable :: ilcp(:) - - info = 0 - call a%sync() - nc = a%get_ncols() - ilcp = a%icp - a%icp(1) = 1 - j = a%icp(1) - do i=1, nc - do k = ilcp(i), ilcp(i+1) -1 - if (a%val(k) /= szero) then - a%val(j) = a%val(k) - a%ia(j) = a%ia(k) - j = j + 1 - end if - end do - a%icp(i+1) = j - end do - call a%trim() - call a%set_host() -end subroutine psb_ls_csc_clean_zeros +!!$subroutine psb_ls_csc_clean_zeros(a, info) +!!$ use psb_error_mod +!!$ use psb_s_csc_mat_mod, psb_protect_name => psb_ls_csc_clean_zeros +!!$ implicit none +!!$ class(psb_ls_csc_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ ! +!!$ integer(psb_lpk_) :: i, j, k, nc +!!$ integer(psb_lpk_), allocatable :: ilcp(:) +!!$ +!!$ info = 0 +!!$ call a%sync() +!!$ nc = a%get_ncols() +!!$ ilcp = a%icp +!!$ a%icp(1) = 1 +!!$ j = a%icp(1) +!!$ do i=1, nc +!!$ do k = ilcp(i), ilcp(i+1) -1 +!!$ ! Always keep the diagonal, even if numerically zero +!!$ if ((a%val(k) /= szero).or.(i == a%ia(k))) then +!!$ a%val(j) = a%val(k) +!!$ a%ia(j) = a%ia(k) +!!$ j = j + 1 +!!$ end if +!!$ end do +!!$ a%icp(i+1) = j +!!$ end do +!!$ call a%trim() +!!$ call a%set_host() +!!$end subroutine psb_ls_csc_clean_zeros subroutine psb_ls_csc_mold(a,b,info) diff --git a/base/serial/impl/psb_s_csr_impl.F90 b/base/serial/impl/psb_s_csr_impl.F90 index f3d5c669..f384ef33 100644 --- a/base/serial/impl/psb_s_csr_impl.F90 +++ b/base/serial/impl/psb_s_csr_impl.F90 @@ -152,7 +152,7 @@ contains !$omp parallel do private(i,j, acc) schedule(static) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -164,7 +164,7 @@ contains !$omp parallel do private(i,j, acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -176,7 +176,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -192,7 +192,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -204,7 +204,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -216,7 +216,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -231,7 +231,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -243,7 +243,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -255,7 +255,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -270,7 +270,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -282,7 +282,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -294,7 +294,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -2289,7 +2289,7 @@ subroutine psb_s_csr_tril(a,l,info,& nb = jmax_ endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_), allocatable :: lrws(:),urws(:) integer(psb_ipk_) :: lpnt, upnt, lnz, unz @@ -2591,7 +2591,7 @@ subroutine psb_s_csr_triu(a,u,info,& endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_), allocatable :: lrws(:),urws(:) integer(psb_ipk_) :: lpnt, upnt, lnz, unz @@ -3156,7 +3156,7 @@ subroutine psb_s_cp_csr_from_coo(a,b,info) use psb_realloc_mod use psb_s_base_mat_mod use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_csr_from_coo -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -3217,7 +3217,7 @@ subroutine psb_s_cp_csr_from_coo(a,b,info) endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP PARALLEL default(shared) reduction(max:info) @@ -3318,7 +3318,7 @@ subroutine psb_s_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -3346,7 +3346,7 @@ subroutine psb_s_mv_csr_from_coo(a,b,info) use psb_error_mod use psb_s_base_mat_mod use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_csr_from_coo -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -3385,7 +3385,7 @@ subroutine psb_s_mv_csr_from_coo(a,b,info) call psb_realloc(max(nr+1,nc+1),a%irp,info) call b%free() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP PARALLEL default(shared) reduction(max:info) @@ -3489,7 +3489,7 @@ subroutine psb_s_cp_csr_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat nr = a%get_nrows() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) @@ -3594,7 +3594,7 @@ subroutine psb_s_cp_csr_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat nr = b%get_nrows() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) @@ -3624,37 +3624,38 @@ subroutine psb_s_cp_csr_from_fmt(a,b,info) end select end subroutine psb_s_cp_csr_from_fmt -subroutine psb_s_csr_clean_zeros(a, info) - use psb_error_mod - use psb_s_csr_mat_mod, psb_protect_name => psb_s_csr_clean_zeros - implicit none - class(psb_s_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - ! - integer(psb_ipk_) :: i, j, k, nr - integer(psb_ipk_), allocatable :: ilrp(:) - - info = 0 - call a%sync() - nr = a%get_nrows() - ilrp = a%irp - a%irp(1) = 1 - j = a%irp(1) - do i=1, nr - do k = ilrp(i), ilrp(i+1) -1 - if (a%val(k) /= szero) then - a%val(j) = a%val(k) - a%ja(j) = a%ja(k) - j = j + 1 - end if - end do - a%irp(i+1) = j - end do - call a%trim() - call a%set_host() -end subroutine psb_s_csr_clean_zeros - -#if defined(OPENMP) +!!$subroutine psb_s_csr_clean_zeros(a, info) +!!$ use psb_error_mod +!!$ use psb_s_csr_mat_mod, psb_protect_name => psb_s_csr_clean_zeros +!!$ implicit none +!!$ class(psb_s_csr_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ ! +!!$ integer(psb_ipk_) :: i, j, k, nr +!!$ integer(psb_ipk_), allocatable :: ilrp(:) +!!$ +!!$ info = 0 +!!$ call a%sync() +!!$ nr = a%get_nrows() +!!$ ilrp = a%irp +!!$ a%irp(1) = 1 +!!$ j = a%irp(1) +!!$ do i=1, nr +!!$ do k = ilrp(i), ilrp(i+1) -1 +!!$ ! Always keep the diagonal, even if numerically zero +!!$ if ((a%val(k) /= szero).or.(i == a%ja(k))) then +!!$ a%val(j) = a%val(k) +!!$ a%ja(j) = a%ja(k) +!!$ j = j + 1 +!!$ end if +!!$ end do +!!$ a%irp(i+1) = j +!!$ end do +!!$ call a%trim() +!!$ call a%set_host() +!!$end subroutine psb_s_csr_clean_zeros + +#if defined(PSB_OPENMP) subroutine psb_scsrspspmm(a,b,c,info) use psb_s_mat_mod use psb_serial_mod, psb_protect_name => psb_scsrspspmm @@ -3692,7 +3693,7 @@ subroutine psb_scsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = max(nint(0.5*(nza+nzb)),ma,mb,na,nb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) @@ -3772,8 +3773,8 @@ contains if (nrc > 0 ) then if ((nzc+nrc)>nze) then nze = max(ma*((nzc+j-1)/j),nzc+2*nrc) - call psb_realloc(nze,c%val,info) - if (info == 0) call psb_realloc(nze,c%ja,info) + call psb_ensure_size(nze,c%val,info) + if (info == 0) call psb_ensure_size(nze,c%ja,info) if (info /= 0) return end if @@ -3805,6 +3806,7 @@ contains integer(psb_ipk_) :: ma, nb integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx + integer(psb_ipk_) :: nth, lth,ith ma = a%get_nrows() nb = b%get_ncols() @@ -3815,12 +3817,23 @@ contains ! dense accumulator ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf call psb_realloc(nb, acc, info) + !$omp parallel shared(nth,lth,offsets,info) + !$omp single + nth = omp_get_num_threads() + lth = min(nth, ma) + allocate(offsets(omp_get_max_threads()),stat=info) + !$omp end single + !$omp end parallel + if (info /= 0) then + write(0,*)'Offsets allocation failed ',info + return + end if - allocate(offsets(omp_get_max_threads())) !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & - !$omp shared(a,b,c,offsets) + !$omp num_threads(lth) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma if (start_idx == 0) then @@ -3876,15 +3889,14 @@ contains !$omp end single !$omp barrier - - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -3892,9 +3904,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson @@ -3930,6 +3943,7 @@ contains !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma do jj = a%irp(irw), a%irp(irw + 1) - 1 @@ -3996,14 +4010,14 @@ contains !$omp barrier - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -4011,9 +4025,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson_1d @@ -4223,7 +4238,7 @@ subroutine psb_scsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = max(nint(0.25*(nza+nzb)),ma,nb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) @@ -4261,9 +4276,9 @@ contains nze = min(size(c%val),size(c%ja)) isz = max(ma,na,mb,nb) - call psb_realloc(isz,row,info) - if (info == 0) call psb_realloc(isz,idxs,info) - if (info == 0) call psb_realloc(isz,irow,info) + call psb_realloc(nb,row,info) + if (info == 0) call psb_realloc(na,idxs,info) + if (info == 0) call psb_realloc(nb,irow,info) if (info /= 0) return row = dzero irow = 0 @@ -4288,8 +4303,8 @@ contains if (nrc > 0 ) then if ((nzc+nrc)>nze) then nze = max(ma*((nzc+j-1)/j),nzc+2*nrc) - call psb_realloc(nze,c%val,info) - if (info == 0) call psb_realloc(nze,c%ja,info) + call psb_ensure_size(nze,c%val,info) + if (info == 0) call psb_ensure_size(nze,c%ja,info) if (info /= 0) return end if @@ -4312,6 +4327,266 @@ contains end subroutine psb_scsrspspmm #endif +subroutine psb_s_ecsr_mold(a,b,info) + use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_mold + use psb_error_mod + implicit none + class(psb_s_ecsr_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='ecsr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_ecsr_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ecsr_mold + +subroutine psb_s_ecsr_csmv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_csmv + implicit none + class(psb_s_ecsr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: m, n + logical :: tra, ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='s_csr_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_ecsr_cmp_nerwp + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnerws, i, nr, nzr + info = psb_success_ + nr = a%get_nrows() + call psb_realloc(nr,a%nerwp,info) + nnerws = 0 + do i=1, nr + nzr = a%irp(i+1)-a%irp(i) + if (nzr>0) then + nnerws = nnerws + 1 + a%nerwp(nnerws) = i + end if + end do + call psb_realloc(nnerws,a%nerwp,info) + a%nnerws = nnerws +end subroutine psb_s_ecsr_cmp_nerwp + +subroutine psb_s_cp_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_s_base_mat_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_ecsr_from_coo + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_s_cp_ecsr_from_coo + +subroutine psb_s_mv_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + use psb_s_base_mat_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_ecsr_from_coo + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_s_csr_sparse_mat%mv_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_s_mv_ecsr_from_coo + +subroutine psb_s_mv_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_s_base_mat_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_ecsr_from_fmt + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_s_mv_ecsr_from_fmt + +subroutine psb_s_cp_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_s_base_mat_mod + use psb_realloc_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_ecsr_from_fmt + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_s_cp_ecsr_from_fmt + ! ! ! ls version @@ -6021,7 +6296,7 @@ subroutine psb_ls_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -6273,35 +6548,36 @@ subroutine psb_ls_cp_csr_from_fmt(a,b,info) end subroutine psb_ls_cp_csr_from_fmt -subroutine psb_ls_csr_clean_zeros(a, info) - use psb_error_mod - use psb_s_csr_mat_mod, psb_protect_name => psb_ls_csr_clean_zeros - implicit none - class(psb_ls_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - ! - integer(psb_lpk_) :: i, j, k, nr - integer(psb_lpk_), allocatable :: ilrp(:) - - info = 0 - call a%sync() - nr = a%get_nrows() - ilrp = a%irp - a%irp(1) = 1 - j = a%irp(1) - do i=1, nr - do k = ilrp(i), ilrp(i+1) -1 - if (a%val(k) /= szero) then - a%val(j) = a%val(k) - a%ja(j) = a%ja(k) - j = j + 1 - end if - end do - a%irp(i+1) = j - end do - call a%trim() - call a%set_host() -end subroutine psb_ls_csr_clean_zeros +!!$subroutine psb_ls_csr_clean_zeros(a, info) +!!$ use psb_error_mod +!!$ use psb_s_csr_mat_mod, psb_protect_name => psb_ls_csr_clean_zeros +!!$ implicit none +!!$ class(psb_ls_csr_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ ! +!!$ integer(psb_lpk_) :: i, j, k, nr +!!$ integer(psb_lpk_), allocatable :: ilrp(:) +!!$ +!!$ info = 0 +!!$ call a%sync() +!!$ nr = a%get_nrows() +!!$ ilrp = a%irp +!!$ a%irp(1) = 1 +!!$ j = a%irp(1) +!!$ do i=1, nr +!!$ do k = ilrp(i), ilrp(i+1) -1 +!!$ ! Always keep the diagonal, even if numerically zero +!!$ if ((a%val(k) /= szero).or.(i == a%ja(k))) then +!!$ a%val(j) = a%val(k) +!!$ a%ja(j) = a%ja(k) +!!$ j = j + 1 +!!$ end if +!!$ end do +!!$ a%irp(i+1) = j +!!$ end do +!!$ call a%trim() +!!$ call a%set_host() +!!$end subroutine psb_ls_csr_clean_zeros subroutine psb_lscsrspspmm(a,b,c,info) use psb_s_mat_mod @@ -6337,7 +6613,7 @@ subroutine psb_lscsrspspmm(a,b,c,info) nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = max(nint(0.25*(nza+nzb)),ma,nb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) @@ -6375,9 +6651,9 @@ contains nze = min(size(c%val),size(c%ja)) isz = max(ma,na,mb,nb) - call psb_realloc(isz,row,info) - if (info == 0) call psb_realloc(isz,idxs,info) - if (info == 0) call psb_realloc(isz,irow,info) + call psb_realloc(nb,row,info) + if (info == 0) call psb_realloc(na,idxs,info) + if (info == 0) call psb_realloc(nb,irow,info) if (info /= 0) return row = dzero irow = 0 diff --git a/base/serial/impl/psb_s_mat_impl.F90 b/base/serial/impl/psb_s_mat_impl.F90 index ce7ce653..a27a24a5 100644 --- a/base/serial/impl/psb_s_mat_impl.F90 +++ b/base/serial/impl/psb_s_mat_impl.F90 @@ -1213,6 +1213,106 @@ subroutine psb_s_b_csclip(a,b,info,& end subroutine psb_s_b_csclip +subroutine psb_s_split_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_s_mat_mod, psb_protect_name => psb_s_split_nd + implicit none + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold + type(psb_s_coo_sparse_mat) :: acoo + type(psb_s_csr_sparse_mat), allocatable :: aclip + type(psb_s_ecsr_sparse_mat), allocatable :: andclip + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + allocate(aclip) + call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_split_nd + +subroutine psb_s_merge_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_s_mat_mod, psb_protect_name => psb_s_merge_nd + implicit none + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold + type(psb_s_coo_sparse_mat) :: acoo1,acoo2 + integer(psb_ipk_) :: nz + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + + call a%ad%mv_to_coo(acoo1,info) + call acoo1%set_bld() + call acoo1%set_nrows(n_rows) + call acoo1%set_ncols(n_cols) + call a%and%mv_to_coo(acoo2,info) + nz=acoo2%get_nzeros() + call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info) + if (allocated(a%a)) then + call a%a%free() + deallocate(a%a) + end if + allocate(a%a,mold=a%ad) + call a%a%mv_from_coo(acoo1,info) + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_merge_nd + subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod @@ -1246,54 +1346,65 @@ subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_s_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_s_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_s_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_s_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_s_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if + end if - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if - if (present(dupl)) then - call altmp%set_dupl(dupl) - else if (a%is_bld()) then - ! Does this make sense at all?? Who knows.. - call altmp%set_dupl(psb_dupl_def_) - end if + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - if (debug) write(psb_err_unit,*) 'Converting from ',& - & a%get_fmt(),' to ',altmp%get_fmt() + call altmp%cp_from_fmt(a%a, info) - call altmp%cp_from_fmt(a%a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 + call move_alloc(altmp,b%a) + else + call inner_cp_fmt(a%a,b%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_cp_fmt(a%and,b%and,info,type,mold,dupl) + end if end if - call move_alloc(altmp,b%a) call b%trim() call b%set_asb() call psb_erractionrestore(err_act) @@ -1303,7 +1414,79 @@ subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_cp_fmt(a,b,info,type,mold,dupl) + class(psb_s_base_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_s_base_sparse_mat), intent(in), optional :: mold + + class(psb_s_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_s_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_s_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b) + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_cp_fmt end subroutine psb_s_cscnv subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) @@ -1312,13 +1495,12 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) use psb_s_mat_mod, psb_protect_name => psb_s_cscnv_ip implicit none - class(psb_sspmat_type), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_),optional, intent(in) :: dupl - character(len=*), optional, intent(in) :: type + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type class(psb_s_base_sparse_mat), intent(in), optional :: mold - class(psb_s_base_sparse_mat), allocatable :: altmp integer(psb_ipk_) :: err_act character(len=20) :: name='cscnv_ip' @@ -1345,46 +1527,55 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if - if (present(mold)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_s_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_s_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - allocate(altmp, mold=mold,stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - else if (present(type)) then + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_s_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_s_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_s_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select + call altmp%mv_from_fmt(a%a, info) + call move_alloc(altmp,a%a) else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 + call inner_mv_fmt(a%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_mv_fmt(a%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_mv_fmt(a%and,info,type,mold,dupl) + end if end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%mv_from_fmt(a%a, info) - if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - call move_alloc(altmp,a%a) call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1394,6 +1585,77 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_mv_fmt(a,info,type,mold,dupl) + class(psb_s_base_sparse_mat), intent(inout), allocatable :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_s_base_sparse_mat), intent(in), optional :: mold + class(psb_s_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_s_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_s_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,a) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_mv_fmt end subroutine psb_s_cscnv_ip @@ -2849,7 +3111,7 @@ subroutine psb_ls_set_lnrows(m,a) end subroutine psb_ls_set_lnrows -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_ls_set_inrows(m,a) use psb_s_mat_mod, psb_protect_name => psb_ls_set_inrows use psb_error_mod @@ -2906,7 +3168,7 @@ subroutine psb_ls_set_lncols(n,a) end subroutine psb_ls_set_lncols -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_ls_set_incols(n,a) use psb_s_mat_mod, psb_protect_name => psb_ls_set_incols use psb_error_mod diff --git a/base/serial/impl/psb_s_rb_idx_tree_impl.F90 b/base/serial/impl/psb_s_rb_idx_tree_impl.F90 index ae624f72..5241225e 100644 --- a/base/serial/impl/psb_s_rb_idx_tree_impl.F90 +++ b/base/serial/impl/psb_s_rb_idx_tree_impl.F90 @@ -267,7 +267,7 @@ subroutine psb_s_rb_idx_tree_scalar_sparse_row_mul(tree, scalar, mat, row_num) end subroutine psb_s_rb_idx_tree_scalar_sparse_row_mul subroutine psb_s_rb_idx_tree_merge(trees, mat) -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif use psb_realloc_mod @@ -294,7 +294,7 @@ subroutine psb_s_rb_idx_tree_merge(trees, mat) call psb_realloc(nnz, mat%val, info) call psb_realloc(nnz, mat%ja, info) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do schedule(static), private(current, previous, j) #endif do i = 1, size(trees) @@ -323,7 +323,7 @@ subroutine psb_s_rb_idx_tree_merge(trees, mat) deallocate(previous) end do end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp end parallel do #endif end subroutine psb_s_rb_idx_tree_merge diff --git a/base/serial/impl/psb_z_base_mat_impl.F90 b/base/serial/impl/psb_z_base_mat_impl.F90 index 404027c5..2d68d152 100644 --- a/base/serial/impl/psb_z_base_mat_impl.F90 +++ b/base/serial/impl/psb_z_base_mat_impl.F90 @@ -60,7 +60,6 @@ subroutine psb_z_base_cp_to_coo(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_z_base_cp_to_coo @@ -84,7 +83,6 @@ subroutine psb_z_base_cp_from_coo(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_z_base_cp_from_coo @@ -344,7 +342,6 @@ subroutine psb_z_base_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_z_base_csput_a @@ -420,7 +417,6 @@ subroutine psb_z_base_csgetrow(imin,imax,a,nz,ia,ja,val,info,& ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_z_base_csgetrow @@ -993,7 +989,6 @@ subroutine psb_z_base_mold(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_z_base_mold @@ -1168,7 +1163,6 @@ subroutine psb_z_base_csmm(alpha,a,x,beta,y,info,trans) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_z_base_csmm @@ -1194,7 +1188,6 @@ subroutine psb_z_base_csmv(alpha,a,x,beta,y,info,trans) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) @@ -1221,7 +1214,6 @@ subroutine psb_z_base_inner_cssm(alpha,a,x,beta,y,info,trans) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_z_base_inner_cssm @@ -1247,7 +1239,6 @@ subroutine psb_z_base_inner_cssv(alpha,a,x,beta,y,info,trans) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_z_base_inner_cssv @@ -1549,7 +1540,6 @@ subroutine psb_z_base_scals(d,a,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_z_base_scals @@ -1618,7 +1608,6 @@ subroutine psb_z_base_scal(d,a,info,side) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_z_base_scal @@ -1643,7 +1632,6 @@ function psb_z_base_maxval(a) result(res) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end function psb_z_base_maxval @@ -1742,7 +1730,6 @@ subroutine psb_z_base_rowsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_z_base_rowsum @@ -1764,7 +1751,6 @@ subroutine psb_z_base_arwsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_z_base_arwsum @@ -1786,7 +1772,6 @@ subroutine psb_z_base_colsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_z_base_colsum @@ -1808,7 +1793,6 @@ subroutine psb_z_base_aclsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_z_base_aclsum @@ -1833,7 +1817,6 @@ subroutine psb_z_base_get_diag(a,d,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_z_base_get_diag @@ -2006,8 +1989,8 @@ subroutine psb_z_base_vect_mv(alpha,a,x,beta,y,info,trans) ! For the time being we just throw everything back ! onto the normal routines. - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%spmm(alpha,x%v,beta,y%v,info,trans) call y%set_host() end subroutine psb_z_base_vect_mv @@ -2060,8 +2043,8 @@ subroutine psb_z_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2082,6 +2065,7 @@ subroutine psb_z_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) if (info == psb_success_)& & call a%inner_spsm(alpha,tmpv,beta,y,info,trans) + call y%set_host() if (info == psb_success_) then call tmpv%free(info) if (info == psb_success_) deallocate(tmpv,stat=info) @@ -2161,8 +2145,11 @@ subroutine psb_z_base_inner_vect_sv(alpha,a,x,beta,y,info,trans) info = psb_success_ call psb_erractionsave(err_act) + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() if (info /= psb_success_) then info = psb_err_from_subroutine_ @@ -2543,7 +2530,6 @@ subroutine psb_lz_base_cp_to_coo(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lz_base_cp_to_coo @@ -2567,7 +2553,6 @@ subroutine psb_lz_base_cp_from_coo(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lz_base_cp_from_coo @@ -2827,7 +2812,6 @@ subroutine psb_lz_base_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lz_base_csput_a @@ -2904,7 +2888,6 @@ subroutine psb_lz_base_csgetrow(imin,imax,a,nz,ia,ja,val,info,& ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lz_base_csgetrow @@ -3486,7 +3469,6 @@ subroutine psb_lz_base_mold(a,b,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lz_base_mold @@ -3644,7 +3626,6 @@ subroutine psb_lz_base_scals(d,a,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lz_base_scals @@ -3713,7 +3694,6 @@ subroutine psb_lz_base_scal(d,a,info,side) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lz_base_scal @@ -3738,7 +3718,6 @@ function psb_lz_base_maxval(a) result(res) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end function psb_lz_base_maxval @@ -3834,7 +3813,6 @@ subroutine psb_lz_base_rowsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lz_base_rowsum @@ -3856,7 +3834,6 @@ subroutine psb_lz_base_arwsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lz_base_arwsum @@ -3878,7 +3855,6 @@ subroutine psb_lz_base_colsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lz_base_colsum @@ -3900,7 +3876,6 @@ subroutine psb_lz_base_aclsum(d,a) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lz_base_aclsum @@ -4064,7 +4039,6 @@ subroutine psb_lz_base_get_diag(a,d,info) ! so we throw an error. info = psb_err_missing_override_method_ call psb_errpush(info,name,a_err=a%get_fmt()) - call psb_error_handler(err_act) end subroutine psb_lz_base_get_diag diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index c368ce91..7653699d 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -257,7 +257,7 @@ subroutine psb_z_coo_spaxpby(alpha,a,beta,b,info) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum -#if defined (OPENMP) +#if defined (PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -368,7 +368,7 @@ function psb_z_coo_cmpmat(a,b,tol,info) result(res) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum -#if defined (OPENMP) +#if defined (PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -595,12 +595,13 @@ subroutine psb_z_coo_clean_zeros(a, info) integer(psb_ipk_), intent(out) :: info ! integer(psb_ipk_) :: i,j,k, nzin - + info = 0 nzin = a%get_nzeros() j = 0 do i=1, nzin - if (a%val(i) /= zzero) then + ! Always keep the diagonal, even if numerically zero + if ((a%val(i) /= zzero).or.(a%ia(i) == a%ja(i))) then j = j + 1 a%val(j) = a%val(i) a%ia(j) = a%ia(i) @@ -608,6 +609,7 @@ subroutine psb_z_coo_clean_zeros(a, info) end if end do call a%set_nzeros(j) + call a%fix(info) call a%trim() end subroutine psb_z_coo_clean_zeros @@ -1928,7 +1930,7 @@ function psb_z_coo_maxval(a) result(res) nnz = a%get_nzeros() if (allocated(a%val)) then nnz = min(nnz,size(a%val)) -#if defined(OPENMP) +#if defined(PSB_OPENMP) res = dzero !$omp parallel do private(i) reduction(max: res) do i=1, nnz @@ -2818,7 +2820,7 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_realloc_mod use psb_sort_mod use psb_z_base_mat_mod, psb_protect_name => psb_z_coo_csput_a -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -2867,29 +2869,42 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (a%is_bld()) then ! Structure here is peculiar, because this function can be called ! either within a parallel region, or outside. - ! Hence the call to set_nzeros done here. - !$omp critical + ! Hence the call to set_nzeros done here. +#if defined(PSB_OPENMP) + !$omp critical(z_coo_csput_a) +#endif nza = a%get_nzeros() nzaold = nza isza = a%get_size() + if (info /= 0) write(0,*) name,' point 0:',info,isza,nza,nz ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then + !write(0,*) ' before reallocate in csput ',psb_errstatus_fatal(),info call a%reallocate(max(nza+nz,int(1.5*isza))) + !write(0,*) ' after reallocate in csput ',psb_errstatus_fatal(),info endif isza = a%get_size() if (isza < (nza+nz)) then - info = psb_err_alloc_dealloc_; call psb_errpush(info,name) + info = psb_err_alloc_dealloc_; + write(0,*) name,' point 1:',info,isza,nza,nz,nza+nz + call psb_errpush(info,name) else -#if defined(OPENMP) +#if defined(PSB_OPENMP) nza = nza + nz #endif call a%set_nzeros(nza) end if - !$omp end critical - if (info /= 0) goto 9999 +#if defined(PSB_OPENMP) + if (info /= 0) write(0,*) name,' point 1.5:',info + !$omp end critical(z_coo_csput_a) +#endif + if (info /= 0) then + write(0,*) name,' point 2:',info + goto 9999 + end if call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) -#if !defined(OPENMP) +#if !defined(PSB_OPENMP) nza = nzaold call a%set_nzeros(nza) #endif @@ -2944,14 +2959,16 @@ contains integer(psb_ipk_) :: i,ir,ic info = psb_success_ -#if defined(OPENMP) +#if defined(PSB_OPENMP) + ! Disabling OpenMP parallel do for the time being. + ! Will need to redesign the entire code stack ! The logic here is different from the one used for ! the serial version: each element is stored in data ! structures but the invalid ones are stored as '-1' values. ! These values will be filtered in a future fixing process. - !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & - !$OMP private(ir,ic,i) + ! $ O M P PARALLEL DO schedule(STATIC) & + ! $ O M P shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & + ! $ O M P private(ir,ic,i) do i=1,nz ir = ia(i) ic = ja(i) @@ -2965,7 +2982,7 @@ contains aspk(nza+i) = -1 end if end do - !$OMP END PARALLEL DO + ! $ O M P END PARALLEL DO nza = nza + nz #else do i=1, nz @@ -3129,7 +3146,7 @@ subroutine psb_z_cp_coo_to_coo(a,b,info) call b%set_nzeros(nz) call b%reallocate(nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -3182,7 +3199,7 @@ subroutine psb_z_cp_coo_from_coo(a,b,info) call a%set_nzeros(nz) call a%reallocate(nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -3568,7 +3585,7 @@ subroutine psb_z_coo_tril(a,l,info,& nb = jmax_ endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_), allocatable :: lrws(:),urws(:) integer(psb_ipk_) :: lpnt, upnt, lnz, unz @@ -3864,7 +3881,7 @@ subroutine psb_z_coo_triu(a,u,info,& nb = jmax_ endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_), allocatable :: lrws(:),urws(:) integer(psb_ipk_) :: lpnt, upnt, lnz, unz @@ -4154,7 +4171,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) use psb_string_mod use psb_ip_reord_mod use psb_sort_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -4172,7 +4189,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' logical :: srt_inp, use_buffers -#if defined(OPENMP) +#if defined(PSB_OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads #endif @@ -4200,7 +4217,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) dupl_ = dupl -#if defined(OPENMP) +#if defined(PSB_OPENMP) maxthreads = omp_get_max_threads() ! 'iaux' has to allow the threads to have an exclusive group ! of indices as work space. Since each thread handles one @@ -4214,7 +4231,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(nzin+2),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) @@ -4256,7 +4273,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf use psb_string_mod use psb_ip_reord_mod use psb_sort_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -4274,7 +4291,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf character(len=20) :: name = 'psb_fixcoo' logical :: srt_inp, use_buffers real(psb_dpk_) :: t0, t1 -#if defined(OPENMP) +#if defined(PSB_OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_), allocatable :: kaux(:),idxaux(:) @@ -4289,7 +4306,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! Row major order if (nr <= nzin) then ! Avoid strange situations with large indices -#if defined(OPENMP) +#if defined(PSB_OPENMP) ! We are not going to need 'ix2' because of the presence ! of 'idxaux' as auxiliary buffer. allocate(ias(nzin),jas(nzin),vs(nzin), stat=info) @@ -4302,7 +4319,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if !if (use_buffers) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp workshare iaux(:) = 0 !$omp end workshare @@ -4356,7 +4373,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! all the indices are valid ! Check again use_buffers. if (use_buffers) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) maxthreads = omp_get_max_threads() allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info) if (info /= psb_success_) then @@ -4730,7 +4747,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf call psi_msort_up(nzin,ia(1:),iaux(1:),iret) if (iret == 0) & & call psb_ip_reord(nzin,val,ia,ja,iaux) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP PARALLEL default(none) & !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & @@ -4920,7 +4937,7 @@ subroutine psb_z_cp_coo_to_lcoo(a,b,info) call b%set_nzeros(nz) call b%reallocate(nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -4972,7 +4989,7 @@ subroutine psb_z_cp_coo_from_lcoo(a,b,info) call a%set_nzeros(nz) call a%reallocate(nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -5185,7 +5202,7 @@ function psb_lz_coo_maxval(a) result(res) nnz = a%get_nzeros() if (allocated(a%val)) then nnz = min(nnz,size(a%val)) -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) reduction(max:res) @@ -5252,7 +5269,7 @@ function psb_lz_coo_csnmi(a) result(res) i = a%ia(j) vt(i) = vt(i) + abs(a%val(j)) end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) reduction(max:res) @@ -5302,7 +5319,7 @@ function psb_lz_coo_csnm1(a) result(res) i = a%ja(j) vt(i) = vt(i) + abs(a%val(j)) end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) reduction(max:res) @@ -5585,7 +5602,7 @@ subroutine psb_lz_coo_spaxpby(alpha,a,beta,b,info) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -5696,7 +5713,7 @@ function psb_lz_coo_cmpmat(a,b,tol,info) result(res) ! Allocate (temporary) space for the solution call tcoo%allocate(M,N,(nza+nzb)) ! Compute the sum -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_) :: i !$omp parallel do private(i) @@ -5926,12 +5943,13 @@ subroutine psb_lz_coo_clean_zeros(a, info) integer(psb_ipk_), intent(out) :: info ! integer(psb_lpk_) :: i,j,k, nzin - + info = 0 nzin = a%get_nzeros() j = 0 do i=1, nzin - if (a%val(i) /= zzero) then + ! Always keep the diagonal, even if numerically zero + if ((a%val(i) /= zzero).or.(a%ia(i) == a%ja(i))) then j = j + 1 a%val(j) = a%val(i) a%ia(j) = a%ia(i) @@ -5956,7 +5974,7 @@ subroutine psb_lz_coo_clean_negidx(a,info) end subroutine psb_lz_coo_clean_negidx -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_lz_coo_clean_negidx_inner(nzin,ia,ja,val,nzout,info) use psb_error_mod use psb_z_base_mat_mod, psb_protect_name => psb_lz_coo_clean_negidx_inner diff --git a/base/serial/impl/psb_z_csc_impl.F90 b/base/serial/impl/psb_z_csc_impl.F90 index e5516bd9..95de776d 100644 --- a/base/serial/impl/psb_z_csc_impl.F90 +++ b/base/serial/impl/psb_z_csc_impl.F90 @@ -2163,7 +2163,7 @@ subroutine psb_z_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -2189,7 +2189,7 @@ subroutine psb_z_mv_csc_from_coo(a,b,info) use psb_error_mod use psb_z_base_mat_mod use psb_z_csc_mat_mod, psb_protect_name => psb_z_mv_csc_from_coo -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -2226,7 +2226,7 @@ subroutine psb_z_mv_csc_from_coo(a,b,info) call psb_realloc(nc+1,a%icp,info) call b%free() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP PARALLEL default(shared) @@ -2328,7 +2328,7 @@ subroutine psb_z_cp_csc_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat nc = a%get_ncols() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) @@ -2403,35 +2403,36 @@ subroutine psb_z_mv_csc_from_fmt(a,b,info) end subroutine psb_z_mv_csc_from_fmt -subroutine psb_z_csc_clean_zeros(a, info) - use psb_error_mod - use psb_z_csc_mat_mod, psb_protect_name => psb_z_csc_clean_zeros - implicit none - class(psb_z_csc_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - ! - integer(psb_ipk_) :: i, j, k, nc - integer(psb_ipk_), allocatable :: ilcp(:) - - info = 0 - call a%sync() - nc = a%get_ncols() - ilcp = a%icp - a%icp(1) = 1 - j = a%icp(1) - do i=1, nc - do k = ilcp(i), ilcp(i+1) -1 - if (a%val(k) /= zzero) then - a%val(j) = a%val(k) - a%ia(j) = a%ia(k) - j = j + 1 - end if - end do - a%icp(i+1) = j - end do - call a%trim() - call a%set_host() -end subroutine psb_z_csc_clean_zeros +!!$subroutine psb_z_csc_clean_zeros(a, info) +!!$ use psb_error_mod +!!$ use psb_z_csc_mat_mod, psb_protect_name => psb_z_csc_clean_zeros +!!$ implicit none +!!$ class(psb_z_csc_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ ! +!!$ integer(psb_ipk_) :: i, j, k, nc +!!$ integer(psb_ipk_), allocatable :: ilcp(:) +!!$ +!!$ info = 0 +!!$ call a%sync() +!!$ nc = a%get_ncols() +!!$ ilcp = a%icp +!!$ a%icp(1) = 1 +!!$ j = a%icp(1) +!!$ do i=1, nc +!!$ do k = ilcp(i), ilcp(i+1) -1 +!!$ ! Always keep the diagonal, even if numerically zero +!!$ if ((a%val(k) /= zzero).or.(i == a%ia(k))) then +!!$ a%val(j) = a%val(k) +!!$ a%ia(j) = a%ia(k) +!!$ j = j + 1 +!!$ end if +!!$ end do +!!$ a%icp(i+1) = j +!!$ end do +!!$ call a%trim() +!!$ call a%set_host() +!!$end subroutine psb_z_csc_clean_zeros subroutine psb_z_cp_csc_from_fmt(a,b,info) use psb_const_mod @@ -2461,7 +2462,7 @@ subroutine psb_z_cp_csc_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat nc = b%get_ncols() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) @@ -4058,7 +4059,7 @@ subroutine psb_lz_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -4304,35 +4305,36 @@ subroutine psb_lz_cp_csc_from_fmt(a,b,info) end subroutine psb_lz_cp_csc_from_fmt -subroutine psb_lz_csc_clean_zeros(a, info) - use psb_error_mod - use psb_z_csc_mat_mod, psb_protect_name => psb_lz_csc_clean_zeros - implicit none - class(psb_lz_csc_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - ! - integer(psb_lpk_) :: i, j, k, nc - integer(psb_lpk_), allocatable :: ilcp(:) - - info = 0 - call a%sync() - nc = a%get_ncols() - ilcp = a%icp - a%icp(1) = 1 - j = a%icp(1) - do i=1, nc - do k = ilcp(i), ilcp(i+1) -1 - if (a%val(k) /= zzero) then - a%val(j) = a%val(k) - a%ia(j) = a%ia(k) - j = j + 1 - end if - end do - a%icp(i+1) = j - end do - call a%trim() - call a%set_host() -end subroutine psb_lz_csc_clean_zeros +!!$subroutine psb_lz_csc_clean_zeros(a, info) +!!$ use psb_error_mod +!!$ use psb_z_csc_mat_mod, psb_protect_name => psb_lz_csc_clean_zeros +!!$ implicit none +!!$ class(psb_lz_csc_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ ! +!!$ integer(psb_lpk_) :: i, j, k, nc +!!$ integer(psb_lpk_), allocatable :: ilcp(:) +!!$ +!!$ info = 0 +!!$ call a%sync() +!!$ nc = a%get_ncols() +!!$ ilcp = a%icp +!!$ a%icp(1) = 1 +!!$ j = a%icp(1) +!!$ do i=1, nc +!!$ do k = ilcp(i), ilcp(i+1) -1 +!!$ ! Always keep the diagonal, even if numerically zero +!!$ if ((a%val(k) /= zzero).or.(i == a%ia(k))) then +!!$ a%val(j) = a%val(k) +!!$ a%ia(j) = a%ia(k) +!!$ j = j + 1 +!!$ end if +!!$ end do +!!$ a%icp(i+1) = j +!!$ end do +!!$ call a%trim() +!!$ call a%set_host() +!!$end subroutine psb_lz_csc_clean_zeros subroutine psb_lz_csc_mold(a,b,info) diff --git a/base/serial/impl/psb_z_csr_impl.F90 b/base/serial/impl/psb_z_csr_impl.F90 index 5cf1c72d..dd4be13a 100644 --- a/base/serial/impl/psb_z_csr_impl.F90 +++ b/base/serial/impl/psb_z_csr_impl.F90 @@ -152,7 +152,7 @@ contains !$omp parallel do private(i,j, acc) schedule(static) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -164,7 +164,7 @@ contains !$omp parallel do private(i,j, acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -176,7 +176,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -192,7 +192,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -204,7 +204,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -216,7 +216,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -231,7 +231,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -243,7 +243,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -255,7 +255,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -270,7 +270,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -282,7 +282,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -294,7 +294,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero - !$omp simd + !$omp simd reduction(+:acc) do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -2289,7 +2289,7 @@ subroutine psb_z_csr_tril(a,l,info,& nb = jmax_ endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_), allocatable :: lrws(:),urws(:) integer(psb_ipk_) :: lpnt, upnt, lnz, unz @@ -2591,7 +2591,7 @@ subroutine psb_z_csr_triu(a,u,info,& endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) block integer(psb_ipk_), allocatable :: lrws(:),urws(:) integer(psb_ipk_) :: lpnt, upnt, lnz, unz @@ -3156,7 +3156,7 @@ subroutine psb_z_cp_csr_from_coo(a,b,info) use psb_realloc_mod use psb_z_base_mat_mod use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_csr_from_coo -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -3217,7 +3217,7 @@ subroutine psb_z_cp_csr_from_coo(a,b,info) endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP PARALLEL default(shared) reduction(max:info) @@ -3318,7 +3318,7 @@ subroutine psb_z_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -3346,7 +3346,7 @@ subroutine psb_z_mv_csr_from_coo(a,b,info) use psb_error_mod use psb_z_base_mat_mod use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_csr_from_coo -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -3385,7 +3385,7 @@ subroutine psb_z_mv_csr_from_coo(a,b,info) call psb_realloc(max(nr+1,nc+1),a%irp,info) call b%free() -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP PARALLEL default(shared) reduction(max:info) @@ -3489,7 +3489,7 @@ subroutine psb_z_cp_csr_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat nr = a%get_nrows() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) @@ -3594,7 +3594,7 @@ subroutine psb_z_cp_csr_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat nr = b%get_nrows() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) @@ -3624,37 +3624,38 @@ subroutine psb_z_cp_csr_from_fmt(a,b,info) end select end subroutine psb_z_cp_csr_from_fmt -subroutine psb_z_csr_clean_zeros(a, info) - use psb_error_mod - use psb_z_csr_mat_mod, psb_protect_name => psb_z_csr_clean_zeros - implicit none - class(psb_z_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - ! - integer(psb_ipk_) :: i, j, k, nr - integer(psb_ipk_), allocatable :: ilrp(:) - - info = 0 - call a%sync() - nr = a%get_nrows() - ilrp = a%irp - a%irp(1) = 1 - j = a%irp(1) - do i=1, nr - do k = ilrp(i), ilrp(i+1) -1 - if (a%val(k) /= zzero) then - a%val(j) = a%val(k) - a%ja(j) = a%ja(k) - j = j + 1 - end if - end do - a%irp(i+1) = j - end do - call a%trim() - call a%set_host() -end subroutine psb_z_csr_clean_zeros - -#if defined(OPENMP) +!!$subroutine psb_z_csr_clean_zeros(a, info) +!!$ use psb_error_mod +!!$ use psb_z_csr_mat_mod, psb_protect_name => psb_z_csr_clean_zeros +!!$ implicit none +!!$ class(psb_z_csr_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ ! +!!$ integer(psb_ipk_) :: i, j, k, nr +!!$ integer(psb_ipk_), allocatable :: ilrp(:) +!!$ +!!$ info = 0 +!!$ call a%sync() +!!$ nr = a%get_nrows() +!!$ ilrp = a%irp +!!$ a%irp(1) = 1 +!!$ j = a%irp(1) +!!$ do i=1, nr +!!$ do k = ilrp(i), ilrp(i+1) -1 +!!$ ! Always keep the diagonal, even if numerically zero +!!$ if ((a%val(k) /= zzero).or.(i == a%ja(k))) then +!!$ a%val(j) = a%val(k) +!!$ a%ja(j) = a%ja(k) +!!$ j = j + 1 +!!$ end if +!!$ end do +!!$ a%irp(i+1) = j +!!$ end do +!!$ call a%trim() +!!$ call a%set_host() +!!$end subroutine psb_z_csr_clean_zeros + +#if defined(PSB_OPENMP) subroutine psb_zcsrspspmm(a,b,c,info) use psb_z_mat_mod use psb_serial_mod, psb_protect_name => psb_zcsrspspmm @@ -3692,7 +3693,7 @@ subroutine psb_zcsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = max(nint(0.5*(nza+nzb)),ma,mb,na,nb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) @@ -3772,8 +3773,8 @@ contains if (nrc > 0 ) then if ((nzc+nrc)>nze) then nze = max(ma*((nzc+j-1)/j),nzc+2*nrc) - call psb_realloc(nze,c%val,info) - if (info == 0) call psb_realloc(nze,c%ja,info) + call psb_ensure_size(nze,c%val,info) + if (info == 0) call psb_ensure_size(nze,c%ja,info) if (info /= 0) return end if @@ -3805,6 +3806,7 @@ contains integer(psb_ipk_) :: ma, nb integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx + integer(psb_ipk_) :: nth, lth,ith ma = a%get_nrows() nb = b%get_ncols() @@ -3815,12 +3817,23 @@ contains ! dense accumulator ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf call psb_realloc(nb, acc, info) + !$omp parallel shared(nth,lth,offsets,info) + !$omp single + nth = omp_get_num_threads() + lth = min(nth, ma) + allocate(offsets(omp_get_max_threads()),stat=info) + !$omp end single + !$omp end parallel + if (info /= 0) then + write(0,*)'Offsets allocation failed ',info + return + end if - allocate(offsets(omp_get_max_threads())) !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & - !$omp shared(a,b,c,offsets) + !$omp num_threads(lth) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma if (start_idx == 0) then @@ -3876,15 +3889,14 @@ contains !$omp end single !$omp barrier - - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -3892,9 +3904,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson @@ -3930,6 +3943,7 @@ contains !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma do jj = a%irp(irw), a%irp(irw + 1) - 1 @@ -3996,14 +4010,14 @@ contains !$omp barrier - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -4011,9 +4025,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson_1d @@ -4223,7 +4238,7 @@ subroutine psb_zcsrspspmm(a,b,c,info) ! Estimate number of nonzeros on output. nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = max(nint(0.25*(nza+nzb)),ma,nb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) @@ -4261,9 +4276,9 @@ contains nze = min(size(c%val),size(c%ja)) isz = max(ma,na,mb,nb) - call psb_realloc(isz,row,info) - if (info == 0) call psb_realloc(isz,idxs,info) - if (info == 0) call psb_realloc(isz,irow,info) + call psb_realloc(nb,row,info) + if (info == 0) call psb_realloc(na,idxs,info) + if (info == 0) call psb_realloc(nb,irow,info) if (info /= 0) return row = dzero irow = 0 @@ -4288,8 +4303,8 @@ contains if (nrc > 0 ) then if ((nzc+nrc)>nze) then nze = max(ma*((nzc+j-1)/j),nzc+2*nrc) - call psb_realloc(nze,c%val,info) - if (info == 0) call psb_realloc(nze,c%ja,info) + call psb_ensure_size(nze,c%val,info) + if (info == 0) call psb_ensure_size(nze,c%ja,info) if (info /= 0) return end if @@ -4312,6 +4327,266 @@ contains end subroutine psb_zcsrspspmm #endif +subroutine psb_z_ecsr_mold(a,b,info) + use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_mold + use psb_error_mod + implicit none + class(psb_z_ecsr_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='ecsr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_ecsr_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ecsr_mold + +subroutine psb_z_ecsr_csmv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_csmv + implicit none + class(psb_z_ecsr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: m, n + logical :: tra, ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='z_csr_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_ecsr_cmp_nerwp + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnerws, i, nr, nzr + info = psb_success_ + nr = a%get_nrows() + call psb_realloc(nr,a%nerwp,info) + nnerws = 0 + do i=1, nr + nzr = a%irp(i+1)-a%irp(i) + if (nzr>0) then + nnerws = nnerws + 1 + a%nerwp(nnerws) = i + end if + end do + call psb_realloc(nnerws,a%nerwp,info) + a%nnerws = nnerws +end subroutine psb_z_ecsr_cmp_nerwp + +subroutine psb_z_cp_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_z_base_mat_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_ecsr_from_coo + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_z_cp_ecsr_from_coo + +subroutine psb_z_mv_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + use psb_z_base_mat_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_ecsr_from_coo + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_z_csr_sparse_mat%mv_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_z_mv_ecsr_from_coo + +subroutine psb_z_mv_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_z_base_mat_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_ecsr_from_fmt + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_z_mv_ecsr_from_fmt + +subroutine psb_z_cp_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_z_base_mat_mod + use psb_realloc_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_ecsr_from_fmt + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_z_cp_ecsr_from_fmt + ! ! ! lz version @@ -6021,7 +6296,7 @@ subroutine psb_lz_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -6273,35 +6548,36 @@ subroutine psb_lz_cp_csr_from_fmt(a,b,info) end subroutine psb_lz_cp_csr_from_fmt -subroutine psb_lz_csr_clean_zeros(a, info) - use psb_error_mod - use psb_z_csr_mat_mod, psb_protect_name => psb_lz_csr_clean_zeros - implicit none - class(psb_lz_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - ! - integer(psb_lpk_) :: i, j, k, nr - integer(psb_lpk_), allocatable :: ilrp(:) - - info = 0 - call a%sync() - nr = a%get_nrows() - ilrp = a%irp - a%irp(1) = 1 - j = a%irp(1) - do i=1, nr - do k = ilrp(i), ilrp(i+1) -1 - if (a%val(k) /= zzero) then - a%val(j) = a%val(k) - a%ja(j) = a%ja(k) - j = j + 1 - end if - end do - a%irp(i+1) = j - end do - call a%trim() - call a%set_host() -end subroutine psb_lz_csr_clean_zeros +!!$subroutine psb_lz_csr_clean_zeros(a, info) +!!$ use psb_error_mod +!!$ use psb_z_csr_mat_mod, psb_protect_name => psb_lz_csr_clean_zeros +!!$ implicit none +!!$ class(psb_lz_csr_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(out) :: info +!!$ ! +!!$ integer(psb_lpk_) :: i, j, k, nr +!!$ integer(psb_lpk_), allocatable :: ilrp(:) +!!$ +!!$ info = 0 +!!$ call a%sync() +!!$ nr = a%get_nrows() +!!$ ilrp = a%irp +!!$ a%irp(1) = 1 +!!$ j = a%irp(1) +!!$ do i=1, nr +!!$ do k = ilrp(i), ilrp(i+1) -1 +!!$ ! Always keep the diagonal, even if numerically zero +!!$ if ((a%val(k) /= zzero).or.(i == a%ja(k))) then +!!$ a%val(j) = a%val(k) +!!$ a%ja(j) = a%ja(k) +!!$ j = j + 1 +!!$ end if +!!$ end do +!!$ a%irp(i+1) = j +!!$ end do +!!$ call a%trim() +!!$ call a%set_host() +!!$end subroutine psb_lz_csr_clean_zeros subroutine psb_lzcsrspspmm(a,b,c,info) use psb_z_mat_mod @@ -6337,7 +6613,7 @@ subroutine psb_lzcsrspspmm(a,b,c,info) nza = a%get_nzeros() nzb = b%get_nzeros() - nzc = 2*(nza+nzb) + nzc = max(nint(0.25*(nza+nzb)),ma,nb) call c%allocate(ma,nb,nzc) call csr_spspmm(a,b,c,info) @@ -6375,9 +6651,9 @@ contains nze = min(size(c%val),size(c%ja)) isz = max(ma,na,mb,nb) - call psb_realloc(isz,row,info) - if (info == 0) call psb_realloc(isz,idxs,info) - if (info == 0) call psb_realloc(isz,irow,info) + call psb_realloc(nb,row,info) + if (info == 0) call psb_realloc(na,idxs,info) + if (info == 0) call psb_realloc(nb,irow,info) if (info /= 0) return row = dzero irow = 0 diff --git a/base/serial/impl/psb_z_mat_impl.F90 b/base/serial/impl/psb_z_mat_impl.F90 index 2cebf9e7..e1f9310b 100644 --- a/base/serial/impl/psb_z_mat_impl.F90 +++ b/base/serial/impl/psb_z_mat_impl.F90 @@ -1213,6 +1213,106 @@ subroutine psb_z_b_csclip(a,b,info,& end subroutine psb_z_b_csclip +subroutine psb_z_split_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_z_mat_mod, psb_protect_name => psb_z_split_nd + implicit none + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold + type(psb_z_coo_sparse_mat) :: acoo + type(psb_z_csr_sparse_mat), allocatable :: aclip + type(psb_z_ecsr_sparse_mat), allocatable :: andclip + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + allocate(aclip) + call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_split_nd + +subroutine psb_z_merge_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_z_mat_mod, psb_protect_name => psb_z_merge_nd + implicit none + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold + type(psb_z_coo_sparse_mat) :: acoo1,acoo2 + integer(psb_ipk_) :: nz + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + + call a%ad%mv_to_coo(acoo1,info) + call acoo1%set_bld() + call acoo1%set_nrows(n_rows) + call acoo1%set_ncols(n_cols) + call a%and%mv_to_coo(acoo2,info) + nz=acoo2%get_nzeros() + call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info) + if (allocated(a%a)) then + call a%a%free() + deallocate(a%a) + end if + allocate(a%a,mold=a%ad) + call a%a%mv_from_coo(acoo1,info) + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_merge_nd + subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod @@ -1246,54 +1346,65 @@ subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_z_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_z_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_z_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_z_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_z_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if + end if - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if - if (present(dupl)) then - call altmp%set_dupl(dupl) - else if (a%is_bld()) then - ! Does this make sense at all?? Who knows.. - call altmp%set_dupl(psb_dupl_def_) - end if + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - if (debug) write(psb_err_unit,*) 'Converting from ',& - & a%get_fmt(),' to ',altmp%get_fmt() + call altmp%cp_from_fmt(a%a, info) - call altmp%cp_from_fmt(a%a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 + call move_alloc(altmp,b%a) + else + call inner_cp_fmt(a%a,b%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_cp_fmt(a%and,b%and,info,type,mold,dupl) + end if end if - call move_alloc(altmp,b%a) call b%trim() call b%set_asb() call psb_erractionrestore(err_act) @@ -1303,7 +1414,79 @@ subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_cp_fmt(a,b,info,type,mold,dupl) + class(psb_z_base_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_z_base_sparse_mat), intent(in), optional :: mold + + class(psb_z_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_z_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_z_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b) + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_cp_fmt end subroutine psb_z_cscnv subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) @@ -1312,13 +1495,12 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) use psb_z_mat_mod, psb_protect_name => psb_z_cscnv_ip implicit none - class(psb_zspmat_type), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_),optional, intent(in) :: dupl - character(len=*), optional, intent(in) :: type + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type class(psb_z_base_sparse_mat), intent(in), optional :: mold - class(psb_z_base_sparse_mat), allocatable :: altmp integer(psb_ipk_) :: err_act character(len=20) :: name='cscnv_ip' @@ -1345,46 +1527,55 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if - if (present(mold)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_z_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_z_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - allocate(altmp, mold=mold,stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - else if (present(type)) then + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_z_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_z_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_z_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select + call altmp%mv_from_fmt(a%a, info) + call move_alloc(altmp,a%a) else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 + call inner_mv_fmt(a%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_mv_fmt(a%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_mv_fmt(a%and,info,type,mold,dupl) + end if end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%mv_from_fmt(a%a, info) - if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - call move_alloc(altmp,a%a) call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1394,6 +1585,77 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_mv_fmt(a,info,type,mold,dupl) + class(psb_z_base_sparse_mat), intent(inout), allocatable :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_z_base_sparse_mat), intent(in), optional :: mold + class(psb_z_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_z_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_z_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,a) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_mv_fmt end subroutine psb_z_cscnv_ip @@ -2849,7 +3111,7 @@ subroutine psb_lz_set_lnrows(m,a) end subroutine psb_lz_set_lnrows -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_lz_set_inrows(m,a) use psb_z_mat_mod, psb_protect_name => psb_lz_set_inrows use psb_error_mod @@ -2906,7 +3168,7 @@ subroutine psb_lz_set_lncols(n,a) end subroutine psb_lz_set_lncols -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_lz_set_incols(n,a) use psb_z_mat_mod, psb_protect_name => psb_lz_set_incols use psb_error_mod diff --git a/base/serial/impl/psb_z_rb_idx_tree_impl.F90 b/base/serial/impl/psb_z_rb_idx_tree_impl.F90 index 88ab214b..42695158 100644 --- a/base/serial/impl/psb_z_rb_idx_tree_impl.F90 +++ b/base/serial/impl/psb_z_rb_idx_tree_impl.F90 @@ -267,7 +267,7 @@ subroutine psb_z_rb_idx_tree_scalar_sparse_row_mul(tree, scalar, mat, row_num) end subroutine psb_z_rb_idx_tree_scalar_sparse_row_mul subroutine psb_z_rb_idx_tree_merge(trees, mat) -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif use psb_realloc_mod @@ -294,7 +294,7 @@ subroutine psb_z_rb_idx_tree_merge(trees, mat) call psb_realloc(nnz, mat%val, info) call psb_realloc(nnz, mat%ja, info) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel do schedule(static), private(current, previous, j) #endif do i = 1, size(trees) @@ -323,7 +323,7 @@ subroutine psb_z_rb_idx_tree_merge(trees, mat) deallocate(previous) end do end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp end parallel do #endif end subroutine psb_z_rb_idx_tree_merge diff --git a/base/serial/psb_cgeprt.f90 b/base/serial/psb_cgeprt.f90 index 985c1eab..e05d673b 100644 --- a/base/serial/psb_cgeprt.f90 +++ b/base/serial/psb_cgeprt.f90 @@ -29,7 +29,7 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! -! File: psb_scsprt.f90 +! File: psb_geprt.f90 ! Subroutine: ! Arguments: @@ -131,6 +131,7 @@ subroutine psb_cgeprt2(iout,a,head) ncol = size(a,2) write(iout,*) nrow,ncol + write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))' do i=1,nrow @@ -161,7 +162,7 @@ subroutine psb_cgeprt1(iout,a,head) write(iout,'(a)') '% ' nrow = size(a,1) ncol = 1 - write(iout,*) nrow + write(iout,*) nrow,ncol write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))' diff --git a/base/serial/psb_cnumbmm.f90 b/base/serial/psb_cnumbmm.f90 index c965d4f3..920187b3 100644 --- a/base/serial/psb_cnumbmm.f90 +++ b/base/serial/psb_cnumbmm.f90 @@ -40,7 +40,8 @@ ! ! subroutine psb_cnumbmm(a,b,c) - use psb_base_mod, psb_protect_name => psb_cnumbmm + use psb_mat_mod + use psb_c_serial_mod, only : psb_cbase_numbmm implicit none type(psb_cspmat_type), intent(in) :: a,b @@ -60,7 +61,7 @@ subroutine psb_cnumbmm(a,b,c) select type(aa=>c%a) type is (psb_c_csr_sparse_mat) - call psb_numbmm(a%a,b%a,aa) + call psb_cbase_numbmm(a%a,b%a,aa) class default info = psb_err_invalid_mat_state_ call psb_errpush(info,name) @@ -81,7 +82,6 @@ end subroutine psb_cnumbmm subroutine psb_cbase_numbmm(a,b,c) use psb_mat_mod use psb_string_mod - use psb_serial_mod, psb_protect_name => psb_cbase_numbmm implicit none class(psb_c_base_sparse_mat), intent(in) :: a,b @@ -234,10 +234,10 @@ contains end subroutine psb_cbase_numbmm - - subroutine psb_lcnumbmm(a,b,c) - use psb_base_mod, psb_protect_name => psb_lcnumbmm + use psb_mat_mod + use psb_c_serial_mod, only : psb_lcbase_numbmm + implicit none type(psb_lcspmat_type), intent(in) :: a,b @@ -257,7 +257,7 @@ subroutine psb_lcnumbmm(a,b,c) select type(aa=>c%a) type is (psb_lc_csr_sparse_mat) - call psb_numbmm(a%a,b%a,aa) + call psb_lcbase_numbmm(a%a,b%a,aa) class default info = psb_err_invalid_mat_state_ call psb_errpush(info,name) @@ -278,7 +278,6 @@ end subroutine psb_lcnumbmm subroutine psb_lcbase_numbmm(a,b,c) use psb_mat_mod use psb_string_mod - use psb_serial_mod, psb_protect_name => psb_lcbase_numbmm implicit none class(psb_lc_base_sparse_mat), intent(in) :: a,b diff --git a/base/serial/psb_crwextd.f90 b/base/serial/psb_crwextd.f90 index 1b55e4db..9676ad24 100644 --- a/base/serial/psb_crwextd.f90 +++ b/base/serial/psb_crwextd.f90 @@ -40,7 +40,7 @@ ! subroutine psb_crwextd(nr,a,info,b,rowscale) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_crwextd + use psb_c_serial_mod, only : psb_cbase_rwextd implicit none ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) @@ -63,23 +63,23 @@ subroutine psb_crwextd(nr,a,info,b,rowscale) select type(aa=> a%a) type is (psb_c_csr_sparse_mat) if (present(b)) then - call psb_rwextd(nr,aa,info,b%a,rowscale) + call psb_cbase_rwextd(nr,aa,info,b%a,rowscale) else - call psb_rwextd(nr,aa,info,rowscale=rowscale) + call psb_cbase_rwextd(nr,aa,info,rowscale=rowscale) end if type is (psb_c_coo_sparse_mat) if (present(b)) then - call psb_rwextd(nr,aa,info,b%a,rowscale=rowscale) + call psb_cbase_rwextd(nr,aa,info,b%a,rowscale=rowscale) else - call psb_rwextd(nr,aa,info,rowscale=rowscale) + call psb_cbase_rwextd(nr,aa,info,rowscale=rowscale) end if class default call aa%mv_to_coo(actmp,info) if (info == psb_success_) then if (present(b)) then - call psb_rwextd(nr,actmp,info,b%a,rowscale=rowscale) + call psb_cbase_rwextd(nr,actmp,info,b%a,rowscale=rowscale) else - call psb_rwextd(nr,actmp,info,rowscale=rowscale) + call psb_cbase_rwextd(nr,actmp,info,rowscale=rowscale) end if end if if (info == psb_success_) call aa%mv_from_coo(actmp,info) @@ -95,9 +95,9 @@ subroutine psb_crwextd(nr,a,info,b,rowscale) return end subroutine psb_crwextd + subroutine psb_cbase_rwextd(nr,a,info,b,rowscale) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_cbase_rwextd implicit none ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) @@ -240,7 +240,7 @@ end subroutine psb_cbase_rwextd subroutine psb_lcrwextd(nr,a,info,b,rowscale) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_lcrwextd + use psb_c_serial_mod, only : psb_lcbase_rwextd implicit none ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) @@ -264,23 +264,23 @@ subroutine psb_lcrwextd(nr,a,info,b,rowscale) select type(aa=> a%a) type is (psb_lc_csr_sparse_mat) if (present(b)) then - call psb_rwextd(nr,aa,info,b%a,rowscale) + call psb_lcbase_rwextd(nr,aa,info,b%a,rowscale) else - call psb_rwextd(nr,aa,info,rowscale=rowscale) + call psb_lcbase_rwextd(nr,aa,info,rowscale=rowscale) end if type is (psb_lc_coo_sparse_mat) if (present(b)) then - call psb_rwextd(nr,aa,info,b%a,rowscale=rowscale) + call psb_lcbase_rwextd(nr,aa,info,b%a,rowscale=rowscale) else - call psb_rwextd(nr,aa,info,rowscale=rowscale) + call psb_lcbase_rwextd(nr,aa,info,rowscale=rowscale) end if class default call aa%mv_to_coo(actmp,info) if (info == psb_success_) then if (present(b)) then - call psb_rwextd(nr,actmp,info,b%a,rowscale=rowscale) + call psb_lcbase_rwextd(nr,actmp,info,b%a,rowscale=rowscale) else - call psb_rwextd(nr,actmp,info,rowscale=rowscale) + call psb_lcbase_rwextd(nr,actmp,info,rowscale=rowscale) end if end if if (info == psb_success_) call aa%mv_from_coo(actmp,info) @@ -296,9 +296,9 @@ subroutine psb_lcrwextd(nr,a,info,b,rowscale) return end subroutine psb_lcrwextd + subroutine psb_lcbase_rwextd(nr,a,info,b,rowscale) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_lcbase_rwextd implicit none ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) diff --git a/base/serial/psb_cspspmm.f90 b/base/serial/psb_cspspmm.f90 index ef56757e..83f22012 100644 --- a/base/serial/psb_cspspmm.f90 +++ b/base/serial/psb_cspspmm.f90 @@ -36,7 +36,8 @@ ! ! subroutine psb_cspspmm(a,b,c,info) - use psb_base_mod, psb_protect_name => psb_cspspmm + use psb_mat_mod + use psb_c_serial_mod, psb_protect_name => psb_cspspmm implicit none type(psb_cspmat_type), intent(in) :: a,b @@ -115,9 +116,9 @@ subroutine psb_cspspmm(a,b,c,info) end subroutine psb_cspspmm - subroutine psb_lcspspmm(a,b,c,info) - use psb_base_mod, psb_protect_name => psb_lcspspmm + use psb_mat_mod + use psb_c_serial_mod, psb_protect_name => psb_lcspspmm implicit none type(psb_lcspmat_type), intent(in) :: a,b diff --git a/base/serial/psb_csymbmm.f90 b/base/serial/psb_csymbmm.f90 index 25818791..0f8c18ab 100644 --- a/base/serial/psb_csymbmm.f90 +++ b/base/serial/psb_csymbmm.f90 @@ -40,7 +40,8 @@ ! subroutine psb_csymbmm(a,b,c,info) - use psb_base_mod, psb_protect_name => psb_csymbmm + use psb_mat_mod + use psb_c_serial_mod, only : psb_cbase_symbmm implicit none type(psb_cspmat_type), intent(in) :: a,b @@ -61,7 +62,7 @@ subroutine psb_csymbmm(a,b,c,info) allocate(ccsr,stat=info) if (info == psb_success_) then - call psb_symbmm(a%a,b%a,ccsr,info) + call psb_cbase_symbmm(a%a,b%a,ccsr,info) else info = psb_err_alloc_dealloc_ end if @@ -83,7 +84,8 @@ end subroutine psb_csymbmm subroutine psb_cbase_symbmm(a,b,c,info) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_cbase_symbmm + use psb_sort_mod + use psb_serial_mod, only : symbmm implicit none class(psb_c_base_sparse_mat), intent(in) :: a,b @@ -256,10 +258,9 @@ contains end subroutine psb_cbase_symbmm - - subroutine psb_lcsymbmm(a,b,c,info) - use psb_base_mod, psb_protect_name => psb_lcsymbmm + use psb_mat_mod + use psb_c_serial_mod, only : psb_lcbase_symbmm implicit none type(psb_lcspmat_type), intent(in) :: a,b @@ -280,7 +281,7 @@ subroutine psb_lcsymbmm(a,b,c,info) allocate(ccsr,stat=info) if (info == psb_success_) then - call psb_symbmm(a%a,b%a,ccsr,info) + call psb_lcbase_symbmm(a%a,b%a,ccsr,info) else info = psb_err_alloc_dealloc_ end if @@ -302,7 +303,7 @@ end subroutine psb_lcsymbmm subroutine psb_lcbase_symbmm(a,b,c,info) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_lcbase_symbmm + use psb_sort_mod implicit none class(psb_lc_base_sparse_mat), intent(in) :: a,b diff --git a/base/serial/psb_dgeprt.f90 b/base/serial/psb_dgeprt.f90 index 07fb32f8..ae4c5b11 100644 --- a/base/serial/psb_dgeprt.f90 +++ b/base/serial/psb_dgeprt.f90 @@ -29,7 +29,7 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! -! File: psb_dcsprt.f90 +! File: psb_geprt.f90 ! Subroutine: ! Arguments: @@ -124,13 +124,14 @@ subroutine psb_dgeprt2(iout,a,head) character(len=80) :: frmtv integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol - write(iout,'(a)') '%%MatrixMarket matrix array real general' + write(iout,'(a)') '%%MatrixMarket matrix array complex general' write(iout,'(a)') '% '//trim(head) write(iout,'(a)') '% ' nrow = size(a,1) ncol = size(a,2) write(iout,*) nrow,ncol + write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))' do i=1,nrow @@ -156,12 +157,12 @@ subroutine psb_dgeprt1(iout,a,head) character(len=80) :: frmtv integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol - write(iout,'(a)') '%%MatrixMarket matrix array real general' + write(iout,'(a)') '%%MatrixMarket matrix array complex general' write(iout,'(a)') '% '//trim(head) write(iout,'(a)') '% ' nrow = size(a,1) ncol = 1 - write(iout,*) nrow + write(iout,*) nrow,ncol write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))' diff --git a/base/serial/psb_dnumbmm.f90 b/base/serial/psb_dnumbmm.f90 index c1d3951c..4719b2bc 100644 --- a/base/serial/psb_dnumbmm.f90 +++ b/base/serial/psb_dnumbmm.f90 @@ -40,7 +40,8 @@ ! ! subroutine psb_dnumbmm(a,b,c) - use psb_base_mod, psb_protect_name => psb_dnumbmm + use psb_mat_mod + use psb_d_serial_mod, only : psb_dbase_numbmm implicit none type(psb_dspmat_type), intent(in) :: a,b @@ -60,7 +61,7 @@ subroutine psb_dnumbmm(a,b,c) select type(aa=>c%a) type is (psb_d_csr_sparse_mat) - call psb_numbmm(a%a,b%a,aa) + call psb_dbase_numbmm(a%a,b%a,aa) class default info = psb_err_invalid_mat_state_ call psb_errpush(info,name) @@ -81,7 +82,6 @@ end subroutine psb_dnumbmm subroutine psb_dbase_numbmm(a,b,c) use psb_mat_mod use psb_string_mod - use psb_serial_mod, psb_protect_name => psb_dbase_numbmm implicit none class(psb_d_base_sparse_mat), intent(in) :: a,b @@ -234,10 +234,10 @@ contains end subroutine psb_dbase_numbmm - - subroutine psb_ldnumbmm(a,b,c) - use psb_base_mod, psb_protect_name => psb_ldnumbmm + use psb_mat_mod + use psb_d_serial_mod, only : psb_ldbase_numbmm + implicit none type(psb_ldspmat_type), intent(in) :: a,b @@ -257,7 +257,7 @@ subroutine psb_ldnumbmm(a,b,c) select type(aa=>c%a) type is (psb_ld_csr_sparse_mat) - call psb_numbmm(a%a,b%a,aa) + call psb_ldbase_numbmm(a%a,b%a,aa) class default info = psb_err_invalid_mat_state_ call psb_errpush(info,name) @@ -278,7 +278,6 @@ end subroutine psb_ldnumbmm subroutine psb_ldbase_numbmm(a,b,c) use psb_mat_mod use psb_string_mod - use psb_serial_mod, psb_protect_name => psb_ldbase_numbmm implicit none class(psb_ld_base_sparse_mat), intent(in) :: a,b diff --git a/base/serial/psb_drwextd.f90 b/base/serial/psb_drwextd.f90 index 9abc42d2..70c73f83 100644 --- a/base/serial/psb_drwextd.f90 +++ b/base/serial/psb_drwextd.f90 @@ -40,7 +40,7 @@ ! subroutine psb_drwextd(nr,a,info,b,rowscale) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_drwextd + use psb_d_serial_mod, only : psb_dbase_rwextd implicit none ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) @@ -63,23 +63,23 @@ subroutine psb_drwextd(nr,a,info,b,rowscale) select type(aa=> a%a) type is (psb_d_csr_sparse_mat) if (present(b)) then - call psb_rwextd(nr,aa,info,b%a,rowscale) + call psb_dbase_rwextd(nr,aa,info,b%a,rowscale) else - call psb_rwextd(nr,aa,info,rowscale=rowscale) + call psb_dbase_rwextd(nr,aa,info,rowscale=rowscale) end if type is (psb_d_coo_sparse_mat) if (present(b)) then - call psb_rwextd(nr,aa,info,b%a,rowscale=rowscale) + call psb_dbase_rwextd(nr,aa,info,b%a,rowscale=rowscale) else - call psb_rwextd(nr,aa,info,rowscale=rowscale) + call psb_dbase_rwextd(nr,aa,info,rowscale=rowscale) end if class default call aa%mv_to_coo(actmp,info) if (info == psb_success_) then if (present(b)) then - call psb_rwextd(nr,actmp,info,b%a,rowscale=rowscale) + call psb_dbase_rwextd(nr,actmp,info,b%a,rowscale=rowscale) else - call psb_rwextd(nr,actmp,info,rowscale=rowscale) + call psb_dbase_rwextd(nr,actmp,info,rowscale=rowscale) end if end if if (info == psb_success_) call aa%mv_from_coo(actmp,info) @@ -95,9 +95,9 @@ subroutine psb_drwextd(nr,a,info,b,rowscale) return end subroutine psb_drwextd + subroutine psb_dbase_rwextd(nr,a,info,b,rowscale) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_dbase_rwextd implicit none ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) @@ -240,7 +240,7 @@ end subroutine psb_dbase_rwextd subroutine psb_ldrwextd(nr,a,info,b,rowscale) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_ldrwextd + use psb_d_serial_mod, only : psb_ldbase_rwextd implicit none ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) @@ -264,23 +264,23 @@ subroutine psb_ldrwextd(nr,a,info,b,rowscale) select type(aa=> a%a) type is (psb_ld_csr_sparse_mat) if (present(b)) then - call psb_rwextd(nr,aa,info,b%a,rowscale) + call psb_ldbase_rwextd(nr,aa,info,b%a,rowscale) else - call psb_rwextd(nr,aa,info,rowscale=rowscale) + call psb_ldbase_rwextd(nr,aa,info,rowscale=rowscale) end if type is (psb_ld_coo_sparse_mat) if (present(b)) then - call psb_rwextd(nr,aa,info,b%a,rowscale=rowscale) + call psb_ldbase_rwextd(nr,aa,info,b%a,rowscale=rowscale) else - call psb_rwextd(nr,aa,info,rowscale=rowscale) + call psb_ldbase_rwextd(nr,aa,info,rowscale=rowscale) end if class default call aa%mv_to_coo(actmp,info) if (info == psb_success_) then if (present(b)) then - call psb_rwextd(nr,actmp,info,b%a,rowscale=rowscale) + call psb_ldbase_rwextd(nr,actmp,info,b%a,rowscale=rowscale) else - call psb_rwextd(nr,actmp,info,rowscale=rowscale) + call psb_ldbase_rwextd(nr,actmp,info,rowscale=rowscale) end if end if if (info == psb_success_) call aa%mv_from_coo(actmp,info) @@ -296,9 +296,9 @@ subroutine psb_ldrwextd(nr,a,info,b,rowscale) return end subroutine psb_ldrwextd + subroutine psb_ldbase_rwextd(nr,a,info,b,rowscale) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_ldbase_rwextd implicit none ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) diff --git a/base/serial/psb_dspspmm.f90 b/base/serial/psb_dspspmm.f90 index cec9699a..e3e203d6 100644 --- a/base/serial/psb_dspspmm.f90 +++ b/base/serial/psb_dspspmm.f90 @@ -36,7 +36,8 @@ ! ! subroutine psb_dspspmm(a,b,c,info) - use psb_base_mod, psb_protect_name => psb_dspspmm + use psb_mat_mod + use psb_d_serial_mod, psb_protect_name => psb_dspspmm implicit none type(psb_dspmat_type), intent(in) :: a,b @@ -115,9 +116,9 @@ subroutine psb_dspspmm(a,b,c,info) end subroutine psb_dspspmm - subroutine psb_ldspspmm(a,b,c,info) - use psb_base_mod, psb_protect_name => psb_ldspspmm + use psb_mat_mod + use psb_d_serial_mod, psb_protect_name => psb_ldspspmm implicit none type(psb_ldspmat_type), intent(in) :: a,b diff --git a/base/serial/psb_dsymbmm.f90 b/base/serial/psb_dsymbmm.f90 index 3dcad00f..d647d270 100644 --- a/base/serial/psb_dsymbmm.f90 +++ b/base/serial/psb_dsymbmm.f90 @@ -40,7 +40,8 @@ ! subroutine psb_dsymbmm(a,b,c,info) - use psb_base_mod, psb_protect_name => psb_dsymbmm + use psb_mat_mod + use psb_d_serial_mod, only : psb_dbase_symbmm implicit none type(psb_dspmat_type), intent(in) :: a,b @@ -61,7 +62,7 @@ subroutine psb_dsymbmm(a,b,c,info) allocate(ccsr,stat=info) if (info == psb_success_) then - call psb_symbmm(a%a,b%a,ccsr,info) + call psb_dbase_symbmm(a%a,b%a,ccsr,info) else info = psb_err_alloc_dealloc_ end if @@ -83,7 +84,8 @@ end subroutine psb_dsymbmm subroutine psb_dbase_symbmm(a,b,c,info) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_dbase_symbmm + use psb_sort_mod + use psb_serial_mod, only : symbmm implicit none class(psb_d_base_sparse_mat), intent(in) :: a,b @@ -256,10 +258,9 @@ contains end subroutine psb_dbase_symbmm - - subroutine psb_ldsymbmm(a,b,c,info) - use psb_base_mod, psb_protect_name => psb_ldsymbmm + use psb_mat_mod + use psb_d_serial_mod, only : psb_ldbase_symbmm implicit none type(psb_ldspmat_type), intent(in) :: a,b @@ -280,7 +281,7 @@ subroutine psb_ldsymbmm(a,b,c,info) allocate(ccsr,stat=info) if (info == psb_success_) then - call psb_symbmm(a%a,b%a,ccsr,info) + call psb_ldbase_symbmm(a%a,b%a,ccsr,info) else info = psb_err_alloc_dealloc_ end if @@ -302,7 +303,7 @@ end subroutine psb_ldsymbmm subroutine psb_ldbase_symbmm(a,b,c,info) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_ldbase_symbmm + use psb_sort_mod implicit none class(psb_ld_base_sparse_mat), intent(in) :: a,b diff --git a/base/serial/psb_sgeprt.f90 b/base/serial/psb_sgeprt.f90 index 3ebb975b..9f3205fd 100644 --- a/base/serial/psb_sgeprt.f90 +++ b/base/serial/psb_sgeprt.f90 @@ -29,7 +29,7 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! -! File: psb_scsprt.f90 +! File: psb_geprt.f90 ! Subroutine: ! Arguments: @@ -124,13 +124,14 @@ subroutine psb_sgeprt2(iout,a,head) character(len=80) :: frmtv integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol - write(iout,'(a)') '%%MatrixMarket matrix array real general' + write(iout,'(a)') '%%MatrixMarket matrix array complex general' write(iout,'(a)') '% '//trim(head) write(iout,'(a)') '% ' nrow = size(a,1) ncol = size(a,2) write(iout,*) nrow,ncol + write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))' do i=1,nrow @@ -156,12 +157,12 @@ subroutine psb_sgeprt1(iout,a,head) character(len=80) :: frmtv integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol - write(iout,'(a)') '%%MatrixMarket matrix array real general' + write(iout,'(a)') '%%MatrixMarket matrix array complex general' write(iout,'(a)') '% '//trim(head) write(iout,'(a)') '% ' nrow = size(a,1) ncol = 1 - write(iout,*) nrow + write(iout,*) nrow,ncol write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))' diff --git a/base/serial/psb_snumbmm.f90 b/base/serial/psb_snumbmm.f90 index ceffb977..99075a8b 100644 --- a/base/serial/psb_snumbmm.f90 +++ b/base/serial/psb_snumbmm.f90 @@ -40,7 +40,8 @@ ! ! subroutine psb_snumbmm(a,b,c) - use psb_base_mod, psb_protect_name => psb_snumbmm + use psb_mat_mod + use psb_s_serial_mod, only : psb_sbase_numbmm implicit none type(psb_sspmat_type), intent(in) :: a,b @@ -60,7 +61,7 @@ subroutine psb_snumbmm(a,b,c) select type(aa=>c%a) type is (psb_s_csr_sparse_mat) - call psb_numbmm(a%a,b%a,aa) + call psb_sbase_numbmm(a%a,b%a,aa) class default info = psb_err_invalid_mat_state_ call psb_errpush(info,name) @@ -81,7 +82,6 @@ end subroutine psb_snumbmm subroutine psb_sbase_numbmm(a,b,c) use psb_mat_mod use psb_string_mod - use psb_serial_mod, psb_protect_name => psb_sbase_numbmm implicit none class(psb_s_base_sparse_mat), intent(in) :: a,b @@ -234,10 +234,10 @@ contains end subroutine psb_sbase_numbmm - - subroutine psb_lsnumbmm(a,b,c) - use psb_base_mod, psb_protect_name => psb_lsnumbmm + use psb_mat_mod + use psb_s_serial_mod, only : psb_lsbase_numbmm + implicit none type(psb_lsspmat_type), intent(in) :: a,b @@ -257,7 +257,7 @@ subroutine psb_lsnumbmm(a,b,c) select type(aa=>c%a) type is (psb_ls_csr_sparse_mat) - call psb_numbmm(a%a,b%a,aa) + call psb_lsbase_numbmm(a%a,b%a,aa) class default info = psb_err_invalid_mat_state_ call psb_errpush(info,name) @@ -278,7 +278,6 @@ end subroutine psb_lsnumbmm subroutine psb_lsbase_numbmm(a,b,c) use psb_mat_mod use psb_string_mod - use psb_serial_mod, psb_protect_name => psb_lsbase_numbmm implicit none class(psb_ls_base_sparse_mat), intent(in) :: a,b diff --git a/base/serial/psb_srwextd.f90 b/base/serial/psb_srwextd.f90 index eb7ecf00..3ecd7a8e 100644 --- a/base/serial/psb_srwextd.f90 +++ b/base/serial/psb_srwextd.f90 @@ -40,7 +40,7 @@ ! subroutine psb_srwextd(nr,a,info,b,rowscale) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_srwextd + use psb_s_serial_mod, only : psb_sbase_rwextd implicit none ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) @@ -63,23 +63,23 @@ subroutine psb_srwextd(nr,a,info,b,rowscale) select type(aa=> a%a) type is (psb_s_csr_sparse_mat) if (present(b)) then - call psb_rwextd(nr,aa,info,b%a,rowscale) + call psb_sbase_rwextd(nr,aa,info,b%a,rowscale) else - call psb_rwextd(nr,aa,info,rowscale=rowscale) + call psb_sbase_rwextd(nr,aa,info,rowscale=rowscale) end if type is (psb_s_coo_sparse_mat) if (present(b)) then - call psb_rwextd(nr,aa,info,b%a,rowscale=rowscale) + call psb_sbase_rwextd(nr,aa,info,b%a,rowscale=rowscale) else - call psb_rwextd(nr,aa,info,rowscale=rowscale) + call psb_sbase_rwextd(nr,aa,info,rowscale=rowscale) end if class default call aa%mv_to_coo(actmp,info) if (info == psb_success_) then if (present(b)) then - call psb_rwextd(nr,actmp,info,b%a,rowscale=rowscale) + call psb_sbase_rwextd(nr,actmp,info,b%a,rowscale=rowscale) else - call psb_rwextd(nr,actmp,info,rowscale=rowscale) + call psb_sbase_rwextd(nr,actmp,info,rowscale=rowscale) end if end if if (info == psb_success_) call aa%mv_from_coo(actmp,info) @@ -95,9 +95,9 @@ subroutine psb_srwextd(nr,a,info,b,rowscale) return end subroutine psb_srwextd + subroutine psb_sbase_rwextd(nr,a,info,b,rowscale) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_sbase_rwextd implicit none ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) @@ -240,7 +240,7 @@ end subroutine psb_sbase_rwextd subroutine psb_lsrwextd(nr,a,info,b,rowscale) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_lsrwextd + use psb_s_serial_mod, only : psb_lsbase_rwextd implicit none ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) @@ -264,23 +264,23 @@ subroutine psb_lsrwextd(nr,a,info,b,rowscale) select type(aa=> a%a) type is (psb_ls_csr_sparse_mat) if (present(b)) then - call psb_rwextd(nr,aa,info,b%a,rowscale) + call psb_lsbase_rwextd(nr,aa,info,b%a,rowscale) else - call psb_rwextd(nr,aa,info,rowscale=rowscale) + call psb_lsbase_rwextd(nr,aa,info,rowscale=rowscale) end if type is (psb_ls_coo_sparse_mat) if (present(b)) then - call psb_rwextd(nr,aa,info,b%a,rowscale=rowscale) + call psb_lsbase_rwextd(nr,aa,info,b%a,rowscale=rowscale) else - call psb_rwextd(nr,aa,info,rowscale=rowscale) + call psb_lsbase_rwextd(nr,aa,info,rowscale=rowscale) end if class default call aa%mv_to_coo(actmp,info) if (info == psb_success_) then if (present(b)) then - call psb_rwextd(nr,actmp,info,b%a,rowscale=rowscale) + call psb_lsbase_rwextd(nr,actmp,info,b%a,rowscale=rowscale) else - call psb_rwextd(nr,actmp,info,rowscale=rowscale) + call psb_lsbase_rwextd(nr,actmp,info,rowscale=rowscale) end if end if if (info == psb_success_) call aa%mv_from_coo(actmp,info) @@ -296,9 +296,9 @@ subroutine psb_lsrwextd(nr,a,info,b,rowscale) return end subroutine psb_lsrwextd + subroutine psb_lsbase_rwextd(nr,a,info,b,rowscale) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_lsbase_rwextd implicit none ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) diff --git a/base/serial/psb_sspspmm.f90 b/base/serial/psb_sspspmm.f90 index 008bcce6..e1ae9af3 100644 --- a/base/serial/psb_sspspmm.f90 +++ b/base/serial/psb_sspspmm.f90 @@ -36,7 +36,8 @@ ! ! subroutine psb_sspspmm(a,b,c,info) - use psb_base_mod, psb_protect_name => psb_sspspmm + use psb_mat_mod + use psb_s_serial_mod, psb_protect_name => psb_sspspmm implicit none type(psb_sspmat_type), intent(in) :: a,b @@ -115,9 +116,9 @@ subroutine psb_sspspmm(a,b,c,info) end subroutine psb_sspspmm - subroutine psb_lsspspmm(a,b,c,info) - use psb_base_mod, psb_protect_name => psb_lsspspmm + use psb_mat_mod + use psb_s_serial_mod, psb_protect_name => psb_lsspspmm implicit none type(psb_lsspmat_type), intent(in) :: a,b diff --git a/base/serial/psb_ssymbmm.f90 b/base/serial/psb_ssymbmm.f90 index 729dd856..d76b48ea 100644 --- a/base/serial/psb_ssymbmm.f90 +++ b/base/serial/psb_ssymbmm.f90 @@ -40,7 +40,8 @@ ! subroutine psb_ssymbmm(a,b,c,info) - use psb_base_mod, psb_protect_name => psb_ssymbmm + use psb_mat_mod + use psb_s_serial_mod, only : psb_sbase_symbmm implicit none type(psb_sspmat_type), intent(in) :: a,b @@ -61,7 +62,7 @@ subroutine psb_ssymbmm(a,b,c,info) allocate(ccsr,stat=info) if (info == psb_success_) then - call psb_symbmm(a%a,b%a,ccsr,info) + call psb_sbase_symbmm(a%a,b%a,ccsr,info) else info = psb_err_alloc_dealloc_ end if @@ -83,7 +84,8 @@ end subroutine psb_ssymbmm subroutine psb_sbase_symbmm(a,b,c,info) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_sbase_symbmm + use psb_sort_mod + use psb_serial_mod, only : symbmm implicit none class(psb_s_base_sparse_mat), intent(in) :: a,b @@ -256,10 +258,9 @@ contains end subroutine psb_sbase_symbmm - - subroutine psb_lssymbmm(a,b,c,info) - use psb_base_mod, psb_protect_name => psb_lssymbmm + use psb_mat_mod + use psb_s_serial_mod, only : psb_lsbase_symbmm implicit none type(psb_lsspmat_type), intent(in) :: a,b @@ -280,7 +281,7 @@ subroutine psb_lssymbmm(a,b,c,info) allocate(ccsr,stat=info) if (info == psb_success_) then - call psb_symbmm(a%a,b%a,ccsr,info) + call psb_lsbase_symbmm(a%a,b%a,ccsr,info) else info = psb_err_alloc_dealloc_ end if @@ -302,7 +303,7 @@ end subroutine psb_lssymbmm subroutine psb_lsbase_symbmm(a,b,c,info) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_lsbase_symbmm + use psb_sort_mod implicit none class(psb_ls_base_sparse_mat), intent(in) :: a,b diff --git a/base/serial/psb_zgeprt.f90 b/base/serial/psb_zgeprt.f90 index f7615473..3fc0eb0c 100644 --- a/base/serial/psb_zgeprt.f90 +++ b/base/serial/psb_zgeprt.f90 @@ -29,7 +29,7 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! -! File: psb_scsprt.f90 +! File: psb_geprt.f90 ! Subroutine: ! Arguments: @@ -131,6 +131,7 @@ subroutine psb_zgeprt2(iout,a,head) ncol = size(a,2) write(iout,*) nrow,ncol + write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))' do i=1,nrow @@ -161,7 +162,7 @@ subroutine psb_zgeprt1(iout,a,head) write(iout,'(a)') '% ' nrow = size(a,1) ncol = 1 - write(iout,*) nrow + write(iout,*) nrow,ncol write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))' diff --git a/base/serial/psb_znumbmm.f90 b/base/serial/psb_znumbmm.f90 index be4e1026..31b8f6b2 100644 --- a/base/serial/psb_znumbmm.f90 +++ b/base/serial/psb_znumbmm.f90 @@ -40,7 +40,8 @@ ! ! subroutine psb_znumbmm(a,b,c) - use psb_base_mod, psb_protect_name => psb_znumbmm + use psb_mat_mod + use psb_z_serial_mod, only : psb_zbase_numbmm implicit none type(psb_zspmat_type), intent(in) :: a,b @@ -60,7 +61,7 @@ subroutine psb_znumbmm(a,b,c) select type(aa=>c%a) type is (psb_z_csr_sparse_mat) - call psb_numbmm(a%a,b%a,aa) + call psb_zbase_numbmm(a%a,b%a,aa) class default info = psb_err_invalid_mat_state_ call psb_errpush(info,name) @@ -81,7 +82,6 @@ end subroutine psb_znumbmm subroutine psb_zbase_numbmm(a,b,c) use psb_mat_mod use psb_string_mod - use psb_serial_mod, psb_protect_name => psb_zbase_numbmm implicit none class(psb_z_base_sparse_mat), intent(in) :: a,b @@ -234,10 +234,10 @@ contains end subroutine psb_zbase_numbmm - - subroutine psb_lznumbmm(a,b,c) - use psb_base_mod, psb_protect_name => psb_lznumbmm + use psb_mat_mod + use psb_z_serial_mod, only : psb_lzbase_numbmm + implicit none type(psb_lzspmat_type), intent(in) :: a,b @@ -257,7 +257,7 @@ subroutine psb_lznumbmm(a,b,c) select type(aa=>c%a) type is (psb_lz_csr_sparse_mat) - call psb_numbmm(a%a,b%a,aa) + call psb_lzbase_numbmm(a%a,b%a,aa) class default info = psb_err_invalid_mat_state_ call psb_errpush(info,name) @@ -278,7 +278,6 @@ end subroutine psb_lznumbmm subroutine psb_lzbase_numbmm(a,b,c) use psb_mat_mod use psb_string_mod - use psb_serial_mod, psb_protect_name => psb_lzbase_numbmm implicit none class(psb_lz_base_sparse_mat), intent(in) :: a,b diff --git a/base/serial/psb_zrwextd.f90 b/base/serial/psb_zrwextd.f90 index f3e07f26..393a3d9d 100644 --- a/base/serial/psb_zrwextd.f90 +++ b/base/serial/psb_zrwextd.f90 @@ -40,7 +40,7 @@ ! subroutine psb_zrwextd(nr,a,info,b,rowscale) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_zrwextd + use psb_z_serial_mod, only : psb_zbase_rwextd implicit none ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) @@ -63,23 +63,23 @@ subroutine psb_zrwextd(nr,a,info,b,rowscale) select type(aa=> a%a) type is (psb_z_csr_sparse_mat) if (present(b)) then - call psb_rwextd(nr,aa,info,b%a,rowscale) + call psb_zbase_rwextd(nr,aa,info,b%a,rowscale) else - call psb_rwextd(nr,aa,info,rowscale=rowscale) + call psb_zbase_rwextd(nr,aa,info,rowscale=rowscale) end if type is (psb_z_coo_sparse_mat) if (present(b)) then - call psb_rwextd(nr,aa,info,b%a,rowscale=rowscale) + call psb_zbase_rwextd(nr,aa,info,b%a,rowscale=rowscale) else - call psb_rwextd(nr,aa,info,rowscale=rowscale) + call psb_zbase_rwextd(nr,aa,info,rowscale=rowscale) end if class default call aa%mv_to_coo(actmp,info) if (info == psb_success_) then if (present(b)) then - call psb_rwextd(nr,actmp,info,b%a,rowscale=rowscale) + call psb_zbase_rwextd(nr,actmp,info,b%a,rowscale=rowscale) else - call psb_rwextd(nr,actmp,info,rowscale=rowscale) + call psb_zbase_rwextd(nr,actmp,info,rowscale=rowscale) end if end if if (info == psb_success_) call aa%mv_from_coo(actmp,info) @@ -95,9 +95,9 @@ subroutine psb_zrwextd(nr,a,info,b,rowscale) return end subroutine psb_zrwextd + subroutine psb_zbase_rwextd(nr,a,info,b,rowscale) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_zbase_rwextd implicit none ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) @@ -240,7 +240,7 @@ end subroutine psb_zbase_rwextd subroutine psb_lzrwextd(nr,a,info,b,rowscale) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_lzrwextd + use psb_z_serial_mod, only : psb_lzbase_rwextd implicit none ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) @@ -264,23 +264,23 @@ subroutine psb_lzrwextd(nr,a,info,b,rowscale) select type(aa=> a%a) type is (psb_lz_csr_sparse_mat) if (present(b)) then - call psb_rwextd(nr,aa,info,b%a,rowscale) + call psb_lzbase_rwextd(nr,aa,info,b%a,rowscale) else - call psb_rwextd(nr,aa,info,rowscale=rowscale) + call psb_lzbase_rwextd(nr,aa,info,rowscale=rowscale) end if type is (psb_lz_coo_sparse_mat) if (present(b)) then - call psb_rwextd(nr,aa,info,b%a,rowscale=rowscale) + call psb_lzbase_rwextd(nr,aa,info,b%a,rowscale=rowscale) else - call psb_rwextd(nr,aa,info,rowscale=rowscale) + call psb_lzbase_rwextd(nr,aa,info,rowscale=rowscale) end if class default call aa%mv_to_coo(actmp,info) if (info == psb_success_) then if (present(b)) then - call psb_rwextd(nr,actmp,info,b%a,rowscale=rowscale) + call psb_lzbase_rwextd(nr,actmp,info,b%a,rowscale=rowscale) else - call psb_rwextd(nr,actmp,info,rowscale=rowscale) + call psb_lzbase_rwextd(nr,actmp,info,rowscale=rowscale) end if end if if (info == psb_success_) call aa%mv_from_coo(actmp,info) @@ -296,9 +296,9 @@ subroutine psb_lzrwextd(nr,a,info,b,rowscale) return end subroutine psb_lzrwextd + subroutine psb_lzbase_rwextd(nr,a,info,b,rowscale) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_lzbase_rwextd implicit none ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) diff --git a/base/serial/psb_zspspmm.f90 b/base/serial/psb_zspspmm.f90 index a1436ad1..e7b3df44 100644 --- a/base/serial/psb_zspspmm.f90 +++ b/base/serial/psb_zspspmm.f90 @@ -36,7 +36,8 @@ ! ! subroutine psb_zspspmm(a,b,c,info) - use psb_base_mod, psb_protect_name => psb_zspspmm + use psb_mat_mod + use psb_z_serial_mod, psb_protect_name => psb_zspspmm implicit none type(psb_zspmat_type), intent(in) :: a,b @@ -115,9 +116,9 @@ subroutine psb_zspspmm(a,b,c,info) end subroutine psb_zspspmm - subroutine psb_lzspspmm(a,b,c,info) - use psb_base_mod, psb_protect_name => psb_lzspspmm + use psb_mat_mod + use psb_z_serial_mod, psb_protect_name => psb_lzspspmm implicit none type(psb_lzspmat_type), intent(in) :: a,b diff --git a/base/serial/psb_zsymbmm.f90 b/base/serial/psb_zsymbmm.f90 index ada82326..9c31b0e7 100644 --- a/base/serial/psb_zsymbmm.f90 +++ b/base/serial/psb_zsymbmm.f90 @@ -40,7 +40,8 @@ ! subroutine psb_zsymbmm(a,b,c,info) - use psb_base_mod, psb_protect_name => psb_zsymbmm + use psb_mat_mod + use psb_z_serial_mod, only : psb_zbase_symbmm implicit none type(psb_zspmat_type), intent(in) :: a,b @@ -61,7 +62,7 @@ subroutine psb_zsymbmm(a,b,c,info) allocate(ccsr,stat=info) if (info == psb_success_) then - call psb_symbmm(a%a,b%a,ccsr,info) + call psb_zbase_symbmm(a%a,b%a,ccsr,info) else info = psb_err_alloc_dealloc_ end if @@ -83,7 +84,8 @@ end subroutine psb_zsymbmm subroutine psb_zbase_symbmm(a,b,c,info) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_zbase_symbmm + use psb_sort_mod + use psb_serial_mod, only : symbmm implicit none class(psb_z_base_sparse_mat), intent(in) :: a,b @@ -256,10 +258,9 @@ contains end subroutine psb_zbase_symbmm - - subroutine psb_lzsymbmm(a,b,c,info) - use psb_base_mod, psb_protect_name => psb_lzsymbmm + use psb_mat_mod + use psb_z_serial_mod, only : psb_lzbase_symbmm implicit none type(psb_lzspmat_type), intent(in) :: a,b @@ -280,7 +281,7 @@ subroutine psb_lzsymbmm(a,b,c,info) allocate(ccsr,stat=info) if (info == psb_success_) then - call psb_symbmm(a%a,b%a,ccsr,info) + call psb_lzbase_symbmm(a%a,b%a,ccsr,info) else info = psb_err_alloc_dealloc_ end if @@ -302,7 +303,7 @@ end subroutine psb_lzsymbmm subroutine psb_lzbase_symbmm(a,b,c,info) use psb_mat_mod - use psb_serial_mod, psb_protect_name => psb_lzbase_symbmm + use psb_sort_mod implicit none class(psb_lz_base_sparse_mat), intent(in) :: a,b diff --git a/base/serial/psi_c_serial_impl.F90 b/base/serial/psi_c_serial_impl.F90 index a3898349..d6706c88 100644 --- a/base/serial/psi_c_serial_impl.F90 +++ b/base/serial/psi_c_serial_impl.F90 @@ -33,7 +33,7 @@ subroutine psi_c_exscanv(n,x,info,shift) use psi_c_serial_mod, psb_protect_name => psi_c_exscanv use psb_const_mod use psb_error_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -45,14 +45,15 @@ subroutine psi_c_exscanv(n,x,info,shift) complex(psb_spk_) :: shift_, tp, ts integer(psb_ipk_) :: i logical is_nested, is_parallel - + + info = psb_success_ if (present(shift)) then shift_ = shift else shift_ = czero end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) is_parallel = omp_in_parallel() if (is_parallel) then call inner_c_exscan() @@ -70,7 +71,7 @@ subroutine psi_c_exscanv(n,x,info,shift) end do #endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) contains subroutine inner_c_exscan() ! Note: all these variables are private, but SUMB should *really* be @@ -909,7 +910,8 @@ subroutine psi_cgthmv(n,k,idx,alpha,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: x(:,:), y(:),alpha,beta ! Locals @@ -994,7 +996,8 @@ subroutine psi_cgthv(n,idx,alpha,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: x(:), y(:),alpha,beta ! Locals @@ -1050,7 +1053,8 @@ subroutine psi_cgthzmm(n,k,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: x(:,:), y(:,:) ! Locals @@ -1068,7 +1072,8 @@ subroutine psi_cgthzmv(n,k,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: x(:,:), y(:) ! Locals @@ -1089,7 +1094,8 @@ subroutine psi_cgthzv(n,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: x(:), y(:) ! Locals @@ -1106,7 +1112,8 @@ subroutine psi_csctmm(n,k,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: beta, x(:,:), y(:,:) ! Locals @@ -1132,7 +1139,8 @@ subroutine psi_csctmv(n,k,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: beta, x(:), y(:,:) ! Locals @@ -1170,7 +1178,8 @@ subroutine psi_csctv(n,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_spk_) :: beta, x(:), y(:) ! Locals @@ -1567,3 +1576,300 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine caxpbyv2 + +subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent (in) :: x(:) + complex(psb_spk_), intent (inout) :: y(:) + complex(psb_spk_), intent (inout) :: z(:) + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='c_upd_xyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == czero) then + if (gamma == czero) then + if (alpha == czero) then + if (delta == czero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = czero + z(i) = czero + end do + else if (delta /= czero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = czero + z(i) = delta*z(i) + end do + end if + else if (alpha /= czero) then + if (delta == czero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = czero + end do + else if (delta /= czero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= czero) then + + if (alpha == czero) then + + if (delta == czero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = czero + z(i) = czero ! gamma*y(i) + end do + + else if (delta /= czero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = czero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= czero) then + + if (delta == czero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= czero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= czero) then + + if (gamma == czero) then + if (alpha == czero) then + if (delta == czero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = czero + end do + + else if (delta /= czero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= czero) then + if (delta == czero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = czero + end do + + else if (delta /= czero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= czero) then + if (alpha == czero) then + if (delta == czero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= czero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= czero) then + if (delta == czero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= czero) then + ! a n b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_c_upd_xyz + +subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent (in) :: x(:) + complex(psb_spk_), intent (inout) :: y(:) + complex(psb_spk_), intent (inout) :: z(:) + complex(psb_spk_), intent (inout) :: w(:) + complex(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='c_xyzw' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==czero).or.(b==czero).or. & + & (c==czero).or.(d==czero).or.& + & (e==czero).or.(f==czero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_cxyzw diff --git a/base/serial/psi_d_serial_impl.F90 b/base/serial/psi_d_serial_impl.F90 index 1b5b1442..772f28ea 100644 --- a/base/serial/psi_d_serial_impl.F90 +++ b/base/serial/psi_d_serial_impl.F90 @@ -33,7 +33,7 @@ subroutine psi_d_exscanv(n,x,info,shift) use psi_d_serial_mod, psb_protect_name => psi_d_exscanv use psb_const_mod use psb_error_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -45,14 +45,15 @@ subroutine psi_d_exscanv(n,x,info,shift) real(psb_dpk_) :: shift_, tp, ts integer(psb_ipk_) :: i logical is_nested, is_parallel - + + info = psb_success_ if (present(shift)) then shift_ = shift else shift_ = dzero end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) is_parallel = omp_in_parallel() if (is_parallel) then call inner_d_exscan() @@ -70,7 +71,7 @@ subroutine psi_d_exscanv(n,x,info,shift) end do #endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) contains subroutine inner_d_exscan() ! Note: all these variables are private, but SUMB should *really* be @@ -909,7 +910,8 @@ subroutine psi_dgthmv(n,k,idx,alpha,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: x(:,:), y(:),alpha,beta ! Locals @@ -994,7 +996,8 @@ subroutine psi_dgthv(n,idx,alpha,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: x(:), y(:),alpha,beta ! Locals @@ -1050,7 +1053,8 @@ subroutine psi_dgthzmm(n,k,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: x(:,:), y(:,:) ! Locals @@ -1068,7 +1072,8 @@ subroutine psi_dgthzmv(n,k,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: x(:,:), y(:) ! Locals @@ -1089,7 +1094,8 @@ subroutine psi_dgthzv(n,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: x(:), y(:) ! Locals @@ -1106,7 +1112,8 @@ subroutine psi_dsctmm(n,k,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: beta, x(:,:), y(:,:) ! Locals @@ -1132,7 +1139,8 @@ subroutine psi_dsctmv(n,k,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: beta, x(:), y(:,:) ! Locals @@ -1170,7 +1178,8 @@ subroutine psi_dsctv(n,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_dpk_) :: beta, x(:), y(:) ! Locals @@ -1567,3 +1576,300 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine daxpbyv2 + +subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent (in) :: x(:) + real(psb_dpk_), intent (inout) :: y(:) + real(psb_dpk_), intent (inout) :: z(:) + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='d_upd_xyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == dzero) then + if (gamma == dzero) then + if (alpha == dzero) then + if (delta == dzero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + z(i) = dzero + end do + else if (delta /= dzero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + z(i) = delta*z(i) + end do + end if + else if (alpha /= dzero) then + if (delta == dzero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = dzero + end do + else if (delta /= dzero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= dzero) then + + if (alpha == dzero) then + + if (delta == dzero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + z(i) = dzero ! gamma*y(i) + end do + + else if (delta /= dzero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= dzero) then + + if (delta == dzero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= dzero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= dzero) then + + if (gamma == dzero) then + if (alpha == dzero) then + if (delta == dzero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = dzero + end do + + else if (delta /= dzero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= dzero) then + if (delta == dzero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = dzero + end do + + else if (delta /= dzero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= dzero) then + if (alpha == dzero) then + if (delta == dzero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= dzero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= dzero) then + if (delta == dzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= dzero) then + ! a n b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_d_upd_xyz + +subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent (in) :: x(:) + real(psb_dpk_), intent (inout) :: y(:) + real(psb_dpk_), intent (inout) :: z(:) + real(psb_dpk_), intent (inout) :: w(:) + real(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='d_xyzw' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==dzero).or.(b==dzero).or. & + & (c==dzero).or.(d==dzero).or.& + & (e==dzero).or.(f==dzero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_dxyzw diff --git a/base/serial/psi_e_serial_impl.F90 b/base/serial/psi_e_serial_impl.F90 index 9cdcdf0e..10ea49cf 100644 --- a/base/serial/psi_e_serial_impl.F90 +++ b/base/serial/psi_e_serial_impl.F90 @@ -33,7 +33,7 @@ subroutine psi_e_exscanv(n,x,info,shift) use psi_e_serial_mod, psb_protect_name => psi_e_exscanv use psb_const_mod use psb_error_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -45,14 +45,15 @@ subroutine psi_e_exscanv(n,x,info,shift) integer(psb_epk_) :: shift_, tp, ts integer(psb_ipk_) :: i logical is_nested, is_parallel - + + info = psb_success_ if (present(shift)) then shift_ = shift else shift_ = ezero end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) is_parallel = omp_in_parallel() if (is_parallel) then call inner_e_exscan() @@ -70,7 +71,7 @@ subroutine psi_e_exscanv(n,x,info,shift) end do #endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) contains subroutine inner_e_exscan() ! Note: all these variables are private, but SUMB should *really* be @@ -909,7 +910,8 @@ subroutine psi_egthmv(n,k,idx,alpha,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_epk_) :: x(:,:), y(:),alpha,beta ! Locals @@ -994,7 +996,8 @@ subroutine psi_egthv(n,idx,alpha,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_epk_) :: x(:), y(:),alpha,beta ! Locals @@ -1050,7 +1053,8 @@ subroutine psi_egthzmm(n,k,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_epk_) :: x(:,:), y(:,:) ! Locals @@ -1068,7 +1072,8 @@ subroutine psi_egthzmv(n,k,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_epk_) :: x(:,:), y(:) ! Locals @@ -1089,7 +1094,8 @@ subroutine psi_egthzv(n,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_epk_) :: x(:), y(:) ! Locals @@ -1106,7 +1112,8 @@ subroutine psi_esctmm(n,k,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_epk_) :: beta, x(:,:), y(:,:) ! Locals @@ -1132,7 +1139,8 @@ subroutine psi_esctmv(n,k,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_epk_) :: beta, x(:), y(:,:) ! Locals @@ -1170,7 +1178,8 @@ subroutine psi_esctv(n,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_epk_) :: beta, x(:), y(:) ! Locals @@ -1567,3 +1576,300 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine eaxpbyv2 + +subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_epk_), intent (in) :: x(:) + integer(psb_epk_), intent (inout) :: y(:) + integer(psb_epk_), intent (inout) :: z(:) + integer(psb_epk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='e_upd_xyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == ezero) then + if (gamma == ezero) then + if (alpha == ezero) then + if (delta == ezero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = ezero + z(i) = ezero + end do + else if (delta /= ezero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = ezero + z(i) = delta*z(i) + end do + end if + else if (alpha /= ezero) then + if (delta == ezero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = ezero + end do + else if (delta /= ezero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= ezero) then + + if (alpha == ezero) then + + if (delta == ezero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = ezero + z(i) = ezero ! gamma*y(i) + end do + + else if (delta /= ezero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = ezero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= ezero) then + + if (delta == ezero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= ezero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= ezero) then + + if (gamma == ezero) then + if (alpha == ezero) then + if (delta == ezero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = ezero + end do + + else if (delta /= ezero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= ezero) then + if (delta == ezero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = ezero + end do + + else if (delta /= ezero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= ezero) then + if (alpha == ezero) then + if (delta == ezero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= ezero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= ezero) then + if (delta == ezero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= ezero) then + ! a n b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_e_upd_xyz + +subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_epk_), intent (in) :: x(:) + integer(psb_epk_), intent (inout) :: y(:) + integer(psb_epk_), intent (inout) :: z(:) + integer(psb_epk_), intent (inout) :: w(:) + integer(psb_epk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='e_xyzw' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==ezero).or.(b==ezero).or. & + & (c==ezero).or.(d==ezero).or.& + & (e==ezero).or.(f==ezero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_exyzw diff --git a/base/serial/psi_i2_serial_impl.F90 b/base/serial/psi_i2_serial_impl.F90 index d25617a9..ae6ee65a 100644 --- a/base/serial/psi_i2_serial_impl.F90 +++ b/base/serial/psi_i2_serial_impl.F90 @@ -33,7 +33,7 @@ subroutine psi_i2_exscanv(n,x,info,shift) use psi_i2_serial_mod, psb_protect_name => psi_i2_exscanv use psb_const_mod use psb_error_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -45,14 +45,15 @@ subroutine psi_i2_exscanv(n,x,info,shift) integer(psb_i2pk_) :: shift_, tp, ts integer(psb_ipk_) :: i logical is_nested, is_parallel - + + info = psb_success_ if (present(shift)) then shift_ = shift else shift_ = i2zero end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) is_parallel = omp_in_parallel() if (is_parallel) then call inner_i2_exscan() @@ -70,7 +71,7 @@ subroutine psi_i2_exscanv(n,x,info,shift) end do #endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) contains subroutine inner_i2_exscan() ! Note: all these variables are private, but SUMB should *really* be @@ -909,7 +910,8 @@ subroutine psi_i2gthmv(n,k,idx,alpha,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_i2pk_) :: x(:,:), y(:),alpha,beta ! Locals @@ -994,7 +996,8 @@ subroutine psi_i2gthv(n,idx,alpha,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_i2pk_) :: x(:), y(:),alpha,beta ! Locals @@ -1050,7 +1053,8 @@ subroutine psi_i2gthzmm(n,k,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_i2pk_) :: x(:,:), y(:,:) ! Locals @@ -1068,7 +1072,8 @@ subroutine psi_i2gthzmv(n,k,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_i2pk_) :: x(:,:), y(:) ! Locals @@ -1089,7 +1094,8 @@ subroutine psi_i2gthzv(n,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_i2pk_) :: x(:), y(:) ! Locals @@ -1106,7 +1112,8 @@ subroutine psi_i2sctmm(n,k,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_i2pk_) :: beta, x(:,:), y(:,:) ! Locals @@ -1132,7 +1139,8 @@ subroutine psi_i2sctmv(n,k,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_i2pk_) :: beta, x(:), y(:,:) ! Locals @@ -1170,7 +1178,8 @@ subroutine psi_i2sctv(n,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_i2pk_) :: beta, x(:), y(:) ! Locals @@ -1567,3 +1576,300 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine i2axpbyv2 + +subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_i2pk_), intent (in) :: x(:) + integer(psb_i2pk_), intent (inout) :: y(:) + integer(psb_i2pk_), intent (inout) :: z(:) + integer(psb_i2pk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='i2_upd_xyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == i2zero) then + if (gamma == i2zero) then + if (alpha == i2zero) then + if (delta == i2zero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = i2zero + z(i) = i2zero + end do + else if (delta /= i2zero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = i2zero + z(i) = delta*z(i) + end do + end if + else if (alpha /= i2zero) then + if (delta == i2zero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = i2zero + end do + else if (delta /= i2zero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= i2zero) then + + if (alpha == i2zero) then + + if (delta == i2zero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = i2zero + z(i) = i2zero ! gamma*y(i) + end do + + else if (delta /= i2zero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = i2zero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= i2zero) then + + if (delta == i2zero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= i2zero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= i2zero) then + + if (gamma == i2zero) then + if (alpha == i2zero) then + if (delta == i2zero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = i2zero + end do + + else if (delta /= i2zero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= i2zero) then + if (delta == i2zero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = i2zero + end do + + else if (delta /= i2zero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= i2zero) then + if (alpha == i2zero) then + if (delta == i2zero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= i2zero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= i2zero) then + if (delta == i2zero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= i2zero) then + ! a n b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_i2_upd_xyz + +subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_i2pk_), intent (in) :: x(:) + integer(psb_i2pk_), intent (inout) :: y(:) + integer(psb_i2pk_), intent (inout) :: z(:) + integer(psb_i2pk_), intent (inout) :: w(:) + integer(psb_i2pk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='i2_xyzw' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==i2zero).or.(b==i2zero).or. & + & (c==i2zero).or.(d==i2zero).or.& + & (e==i2zero).or.(f==i2zero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_i2xyzw diff --git a/base/serial/psi_m_serial_impl.F90 b/base/serial/psi_m_serial_impl.F90 index 05c8e60f..25eaca32 100644 --- a/base/serial/psi_m_serial_impl.F90 +++ b/base/serial/psi_m_serial_impl.F90 @@ -33,7 +33,7 @@ subroutine psi_m_exscanv(n,x,info,shift) use psi_m_serial_mod, psb_protect_name => psi_m_exscanv use psb_const_mod use psb_error_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -45,14 +45,15 @@ subroutine psi_m_exscanv(n,x,info,shift) integer(psb_mpk_) :: shift_, tp, ts integer(psb_ipk_) :: i logical is_nested, is_parallel - + + info = psb_success_ if (present(shift)) then shift_ = shift else shift_ = mzero end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) is_parallel = omp_in_parallel() if (is_parallel) then call inner_m_exscan() @@ -70,7 +71,7 @@ subroutine psi_m_exscanv(n,x,info,shift) end do #endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) contains subroutine inner_m_exscan() ! Note: all these variables are private, but SUMB should *really* be @@ -909,7 +910,8 @@ subroutine psi_mgthmv(n,k,idx,alpha,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_mpk_) :: x(:,:), y(:),alpha,beta ! Locals @@ -994,7 +996,8 @@ subroutine psi_mgthv(n,idx,alpha,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_mpk_) :: x(:), y(:),alpha,beta ! Locals @@ -1050,7 +1053,8 @@ subroutine psi_mgthzmm(n,k,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_mpk_) :: x(:,:), y(:,:) ! Locals @@ -1068,7 +1072,8 @@ subroutine psi_mgthzmv(n,k,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_mpk_) :: x(:,:), y(:) ! Locals @@ -1089,7 +1094,8 @@ subroutine psi_mgthzv(n,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_mpk_) :: x(:), y(:) ! Locals @@ -1106,7 +1112,8 @@ subroutine psi_msctmm(n,k,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_mpk_) :: beta, x(:,:), y(:,:) ! Locals @@ -1132,7 +1139,8 @@ subroutine psi_msctmv(n,k,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) integer(psb_mpk_) :: beta, x(:), y(:,:) ! Locals @@ -1170,7 +1178,8 @@ subroutine psi_msctv(n,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) integer(psb_mpk_) :: beta, x(:), y(:) ! Locals @@ -1567,3 +1576,300 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine maxpbyv2 + +subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_mpk_), intent (in) :: x(:) + integer(psb_mpk_), intent (inout) :: y(:) + integer(psb_mpk_), intent (inout) :: z(:) + integer(psb_mpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='m_upd_xyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == mzero) then + if (gamma == mzero) then + if (alpha == mzero) then + if (delta == mzero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = mzero + z(i) = mzero + end do + else if (delta /= mzero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = mzero + z(i) = delta*z(i) + end do + end if + else if (alpha /= mzero) then + if (delta == mzero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = mzero + end do + else if (delta /= mzero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= mzero) then + + if (alpha == mzero) then + + if (delta == mzero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = mzero + z(i) = mzero ! gamma*y(i) + end do + + else if (delta /= mzero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = mzero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= mzero) then + + if (delta == mzero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= mzero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= mzero) then + + if (gamma == mzero) then + if (alpha == mzero) then + if (delta == mzero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = mzero + end do + + else if (delta /= mzero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= mzero) then + if (delta == mzero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = mzero + end do + + else if (delta /= mzero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= mzero) then + if (alpha == mzero) then + if (delta == mzero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= mzero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= mzero) then + if (delta == mzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= mzero) then + ! a n b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_m_upd_xyz + +subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_mpk_), intent (in) :: x(:) + integer(psb_mpk_), intent (inout) :: y(:) + integer(psb_mpk_), intent (inout) :: z(:) + integer(psb_mpk_), intent (inout) :: w(:) + integer(psb_mpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='m_xyzw' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==mzero).or.(b==mzero).or. & + & (c==mzero).or.(d==mzero).or.& + & (e==mzero).or.(f==mzero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_mxyzw diff --git a/base/serial/psi_s_serial_impl.F90 b/base/serial/psi_s_serial_impl.F90 index 26a57e68..6baa8dd7 100644 --- a/base/serial/psi_s_serial_impl.F90 +++ b/base/serial/psi_s_serial_impl.F90 @@ -33,7 +33,7 @@ subroutine psi_s_exscanv(n,x,info,shift) use psi_s_serial_mod, psb_protect_name => psi_s_exscanv use psb_const_mod use psb_error_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -45,14 +45,15 @@ subroutine psi_s_exscanv(n,x,info,shift) real(psb_spk_) :: shift_, tp, ts integer(psb_ipk_) :: i logical is_nested, is_parallel - + + info = psb_success_ if (present(shift)) then shift_ = shift else shift_ = szero end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) is_parallel = omp_in_parallel() if (is_parallel) then call inner_s_exscan() @@ -70,7 +71,7 @@ subroutine psi_s_exscanv(n,x,info,shift) end do #endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) contains subroutine inner_s_exscan() ! Note: all these variables are private, but SUMB should *really* be @@ -909,7 +910,8 @@ subroutine psi_sgthmv(n,k,idx,alpha,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: x(:,:), y(:),alpha,beta ! Locals @@ -994,7 +996,8 @@ subroutine psi_sgthv(n,idx,alpha,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: x(:), y(:),alpha,beta ! Locals @@ -1050,7 +1053,8 @@ subroutine psi_sgthzmm(n,k,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: x(:,:), y(:,:) ! Locals @@ -1068,7 +1072,8 @@ subroutine psi_sgthzmv(n,k,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: x(:,:), y(:) ! Locals @@ -1089,7 +1094,8 @@ subroutine psi_sgthzv(n,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: x(:), y(:) ! Locals @@ -1106,7 +1112,8 @@ subroutine psi_ssctmm(n,k,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: beta, x(:,:), y(:,:) ! Locals @@ -1132,7 +1139,8 @@ subroutine psi_ssctmv(n,k,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: beta, x(:), y(:,:) ! Locals @@ -1170,7 +1178,8 @@ subroutine psi_ssctv(n,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) real(psb_spk_) :: beta, x(:), y(:) ! Locals @@ -1567,3 +1576,300 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine saxpbyv2 + +subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent (in) :: x(:) + real(psb_spk_), intent (inout) :: y(:) + real(psb_spk_), intent (inout) :: z(:) + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='s_upd_xyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == szero) then + if (gamma == szero) then + if (alpha == szero) then + if (delta == szero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = szero + z(i) = szero + end do + else if (delta /= szero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = szero + z(i) = delta*z(i) + end do + end if + else if (alpha /= szero) then + if (delta == szero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = szero + end do + else if (delta /= szero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= szero) then + + if (alpha == szero) then + + if (delta == szero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = szero + z(i) = szero ! gamma*y(i) + end do + + else if (delta /= szero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = szero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= szero) then + + if (delta == szero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= szero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= szero) then + + if (gamma == szero) then + if (alpha == szero) then + if (delta == szero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = szero + end do + + else if (delta /= szero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= szero) then + if (delta == szero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = szero + end do + + else if (delta /= szero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= szero) then + if (alpha == szero) then + if (delta == szero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= szero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= szero) then + if (delta == szero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= szero) then + ! a n b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_s_upd_xyz + +subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent (in) :: x(:) + real(psb_spk_), intent (inout) :: y(:) + real(psb_spk_), intent (inout) :: z(:) + real(psb_spk_), intent (inout) :: w(:) + real(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='s_xyzw' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==szero).or.(b==szero).or. & + & (c==szero).or.(d==szero).or.& + & (e==szero).or.(f==szero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_sxyzw diff --git a/base/serial/psi_z_serial_impl.F90 b/base/serial/psi_z_serial_impl.F90 index 0b15b2d6..dcd02540 100644 --- a/base/serial/psi_z_serial_impl.F90 +++ b/base/serial/psi_z_serial_impl.F90 @@ -33,7 +33,7 @@ subroutine psi_z_exscanv(n,x,info,shift) use psi_z_serial_mod, psb_protect_name => psi_z_exscanv use psb_const_mod use psb_error_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -45,14 +45,15 @@ subroutine psi_z_exscanv(n,x,info,shift) complex(psb_dpk_) :: shift_, tp, ts integer(psb_ipk_) :: i logical is_nested, is_parallel - + + info = psb_success_ if (present(shift)) then shift_ = shift else shift_ = zzero end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) is_parallel = omp_in_parallel() if (is_parallel) then call inner_z_exscan() @@ -70,7 +71,7 @@ subroutine psi_z_exscanv(n,x,info,shift) end do #endif -#if defined(OPENMP) +#if defined(PSB_OPENMP) contains subroutine inner_z_exscan() ! Note: all these variables are private, but SUMB should *really* be @@ -909,7 +910,8 @@ subroutine psi_zgthmv(n,k,idx,alpha,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: x(:,:), y(:),alpha,beta ! Locals @@ -994,7 +996,8 @@ subroutine psi_zgthv(n,idx,alpha,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: x(:), y(:),alpha,beta ! Locals @@ -1050,7 +1053,8 @@ subroutine psi_zgthzmm(n,k,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: x(:,:), y(:,:) ! Locals @@ -1068,7 +1072,8 @@ subroutine psi_zgthzmv(n,k,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: x(:,:), y(:) ! Locals @@ -1089,7 +1094,8 @@ subroutine psi_zgthzv(n,idx,x,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: x(:), y(:) ! Locals @@ -1106,7 +1112,8 @@ subroutine psi_zsctmm(n,k,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: beta, x(:,:), y(:,:) ! Locals @@ -1132,7 +1139,8 @@ subroutine psi_zsctmv(n,k,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, k, idx(:) + integer(psb_mpk_) :: n, k + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: beta, x(:), y(:,:) ! Locals @@ -1170,7 +1178,8 @@ subroutine psi_zsctv(n,idx,x,beta,y) use psb_const_mod implicit none - integer(psb_ipk_) :: n, idx(:) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) complex(psb_dpk_) :: beta, x(:), y(:) ! Locals @@ -1567,3 +1576,300 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine zaxpbyv2 + +subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent (in) :: x(:) + complex(psb_dpk_), intent (inout) :: y(:) + complex(psb_dpk_), intent (inout) :: z(:) + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='z_upd_xyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == zzero) then + if (gamma == zzero) then + if (alpha == zzero) then + if (delta == zzero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + z(i) = zzero + end do + else if (delta /= zzero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + z(i) = delta*z(i) + end do + end if + else if (alpha /= zzero) then + if (delta == zzero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = zzero + end do + else if (delta /= zzero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= zzero) then + + if (alpha == zzero) then + + if (delta == zzero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + z(i) = zzero ! gamma*y(i) + end do + + else if (delta /= zzero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= zzero) then + + if (delta == zzero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= zzero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= zzero) then + + if (gamma == zzero) then + if (alpha == zzero) then + if (delta == zzero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = zzero + end do + + else if (delta /= zzero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= zzero) then + if (delta == zzero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = zzero + end do + + else if (delta /= zzero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= zzero) then + if (alpha == zzero) then + if (delta == zzero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= zzero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= zzero) then + if (delta == zzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= zzero) then + ! a n b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_z_upd_xyz + +subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent (in) :: x(:) + complex(psb_dpk_), intent (inout) :: y(:) + complex(psb_dpk_), intent (inout) :: z(:) + complex(psb_dpk_), intent (inout) :: w(:) + complex(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='z_xyzw' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==zzero).or.(b==zzero).or. & + & (c==zzero).or.(d==zzero).or.& + & (e==zzero).or.(f==zzero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_zxyzw diff --git a/base/serial/sort/psb_c_hsort_impl.f90 b/base/serial/sort/psb_c_hsort_impl.f90 index c68ec73b..2400fcf7 100644 --- a/base/serial/sort/psb_c_hsort_impl.f90 +++ b/base/serial/sort/psb_c_hsort_impl.f90 @@ -899,7 +899,9 @@ subroutine psi_c_idx_heap_get_first(key,index,last,heap,idxs,dir,info) endif key = heap(1) + index = idxs(1) heap(1) = heap(last) + idxs(1) = idxs(last) last = last - 1 select case(dir) diff --git a/base/serial/sort/psb_z_hsort_impl.f90 b/base/serial/sort/psb_z_hsort_impl.f90 index 199f5663..e796f831 100644 --- a/base/serial/sort/psb_z_hsort_impl.f90 +++ b/base/serial/sort/psb_z_hsort_impl.f90 @@ -899,7 +899,9 @@ subroutine psi_z_idx_heap_get_first(key,index,last,heap,idxs,dir,info) endif key = heap(1) + index = idxs(1) heap(1) = heap(last) + idxs(1) = idxs(last) last = last - 1 select case(dir) diff --git a/base/tools/psb_c_glob_transpose.F90 b/base/tools/psb_c_glob_transpose.F90 index 4f6804fc..37a4e100 100644 --- a/base/tools/psb_c_glob_transpose.F90 +++ b/base/tools/psb_c_glob_transpose.F90 @@ -94,12 +94,12 @@ ! ! subroutine psb_lc_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif use psb_base_mod, psb_protect_name => psb_lc_coo_glob_transpose Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_lc_coo_sparse_mat), intent(inout) :: ain @@ -392,12 +392,12 @@ subroutine psb_lc_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) end subroutine psb_lc_coo_glob_transpose subroutine psb_c_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif use psb_base_mod, psb_protect_name => psb_c_coo_glob_transpose Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_c_coo_sparse_mat), intent(inout) :: ain diff --git a/base/tools/psb_c_par_csr_spspmm.f90 b/base/tools/psb_c_par_csr_spspmm.f90 index d5684b11..2dbd00f0 100644 --- a/base/tools/psb_c_par_csr_spspmm.f90 +++ b/base/tools/psb_c_par_csr_spspmm.f90 @@ -62,7 +62,11 @@ ! Error code. ! Subroutine psb_c_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) - use psb_base_mod, psb_protect_name => psb_c_par_csr_spspmm + use psb_mat_mod + use psb_comm_mod + use psb_penv_mod + use psb_c_tools_mod, psb_protect_name => psb_c_par_csr_spspmm + use psb_c_serial_mod, only : psb_ccsrspspmm, psb_cbase_rwextd Implicit None type(psb_c_csr_sparse_mat),intent(in) :: acsr @@ -132,7 +136,7 @@ Subroutine psb_c_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) call desc_c%indxmap%g2lip(ltcsr%ja(1:nnz),info) end if call ltcsr%mv_to_ifmt(tcsr,info) - if (info == psb_success_) call psb_rwextd(ncol,bcsr,info,b=tcsr) + if (info == psb_success_) call psb_cbase_rwextd(ncol,bcsr,info,b=tcsr) if (info == psb_success_) call tcsr%free() if(info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3') @@ -146,7 +150,7 @@ Subroutine psb_c_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) & 'starting spspmm 3' if (debug_level >= psb_debug_outer_) write(debug_unit,*) me,' ',trim(name),& & 'starting spspmm ',acsr%get_nrows(),acsr%get_ncols(),bcsr%get_nrows(),bcsr%get_ncols() - call psb_spspmm(acsr,bcsr,ccsr,info) + call psb_ccsrspspmm(acsr,bcsr,ccsr,info) call psb_erractionrestore(err_act) return @@ -158,7 +162,11 @@ Subroutine psb_c_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) End Subroutine psb_c_par_csr_spspmm Subroutine psb_lc_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) - use psb_base_mod, psb_protect_name => psb_lc_par_csr_spspmm + use psb_mat_mod + use psb_comm_mod + use psb_penv_mod + use psb_c_tools_mod, psb_protect_name => psb_lc_par_csr_spspmm + use psb_c_serial_mod, only : psb_lccsrspspmm, psb_lcbase_rwextd Implicit None type(psb_lc_csr_sparse_mat),intent(in) :: acsr @@ -226,7 +234,7 @@ Subroutine psb_lc_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) else call desc_c%indxmap%g2lip(tcsr1%ja(1:nnz),info) end if - if (info == psb_success_) call psb_rwextd(nacol,bcsr,info,b=tcsr1) + if (info == psb_success_) call psb_lcbase_rwextd(nacol,bcsr,info,b=tcsr1) if (info == psb_success_) call tcsr1%free() if(info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3') @@ -241,7 +249,7 @@ Subroutine psb_lc_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) & 'starting spspmm 3' if (debug_level >= psb_debug_outer_) write(debug_unit,*) me,' ',trim(name),& & 'starting spspmm ',acsr%get_nrows(),acsr%get_ncols(),bcsr%get_nrows(),bcsr%get_ncols() - call psb_spspmm(acsr,bcsr,ccsr,info) + call psb_lccsrspspmm(acsr,bcsr,ccsr,info) call psb_erractionrestore(err_act) return diff --git a/base/tools/psb_c_remap.F90 b/base/tools/psb_c_remap.F90 index 881b2ad0..ccab6c92 100644 --- a/base/tools/psb_c_remap.F90 +++ b/base/tools/psb_c_remap.F90 @@ -55,10 +55,12 @@ subroutine psb_c_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & ! locals type(psb_ctxt_type) :: ctxt, newctxt - integer(psb_ipk_) :: np, me, err_act + integer(psb_mpk_) :: np, me, nrm, mipd, i + integer(psb_ipk_) :: err_act integer(psb_ipk_) :: rnp, rme - integer(psb_ipk_) :: ipdest, id1, id2, imd, i, nsrc - integer(psb_ipk_), allocatable :: newnl(:), nzsrc(:), ids(:) + integer(psb_ipk_) :: ipdest, id1, id2, imd, nsrc + integer(psb_ipk_), allocatable :: newnl(:), nzsrc(:) + integer(psb_mpk_), allocatable :: ids(:), misrc(:) type(psb_lc_coo_sparse_mat) :: acoo_snd, acoo_rcv integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name @@ -84,28 +86,29 @@ subroutine psb_c_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & endif !!$ write(0,*) ' Remapping from ',np,' onto ', np_remap - + mipd = ipd if (desc_in%get_fmt() == 'BLOCK') then ! ! Should we spread the processes in the new context, ! or should we keep them close? ! - if (.true.) then - allocate(ids(0:np_remap-1)) - if (np_remap <= np/2) then + if (.true.) then + nrm = np_remap + allocate(ids(0:nrm-1)) + if (nrm <= np/2) then ids(0) = 0 - do ipdest=1,np_remap -1 - ids(ipdest) = ids(ipdest-1) + np/np_remap + do ipdest=1,nrm -1 + ids(ipdest) = ids(ipdest-1) + np/nrm end do !!$ write(0,*) ' IDS ',ids(:) else - do ipdest = 0, np_remap-1 + do ipdest = 0, nrm-1 ids(ipdest) = ipdest end do end if - call psb_init(newctxt,np=np_remap,basectxt=ctxt,ids=ids) + call psb_init(newctxt,np=nrm,basectxt=ctxt,ids=ids) else - call psb_init(newctxt,np=np_remap,basectxt=ctxt) + call psb_init(newctxt,np=nrm,basectxt=ctxt) end if call psb_info(newctxt,rme,rnp) @@ -140,12 +143,12 @@ subroutine psb_c_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & ipdest = ( ((me-imd*id1)/id2) + imd) end if if (allocated(ids)) then - ipd = ids(ipdest) + mipd = ids(ipdest) else - ipd = ipdest + mipd = ipdest end if !!$ write(0,*) ' Sending my data from ',me,' to ', & -!!$ & ipd, 'out of ',rnp,rnp-1 +!!$ & mipd, 'out of ',rnp,rnp-1 ! ! Compute local rows for all new @@ -158,13 +161,14 @@ subroutine psb_c_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & if (rme>=0) then ! if (rme < imd) then - isrc = [ (i, i=rme*id1,min(rme*id1+id1-1,np-1)) ] + misrc = [ (i, i=rme*id1,min(rme*id1+id1-1,np-1)) ] else - isrc = [ (i, i= imd*id1+((rme-imd))*id2,& + misrc = [ (i, i= imd*id1+((rme-imd))*id2,& & min(imd*id1+(rme-imd)*id2+id2-1,np-1) ) ] end if -!!$ write(0,*) me,rme,imd,' ISRC: ',isrc(:) - nsrc = size(isrc) +!!$ write(0,*) me,rme,imd,' ISRC: ',misrc(:) + isrc = misrc + nsrc = size(misrc) !!$ write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),& !!$ & ' out ',desc_out%get_local_rows(),desc_out%get_global_rows() else @@ -187,24 +191,24 @@ subroutine psb_c_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & integer(psb_ipk_) :: nrl, ncl, nzl, nzp call a_in%cp_to(acoo_snd) nzsnd = acoo_snd%get_nzeros() - call psb_snd(ctxt,nzsnd,ipd) - call psb_snd(ctxt,desc_in%get_local_rows(),ipd) + call psb_snd(ctxt,nzsnd,mipd) + call psb_snd(ctxt,desc_in%get_local_rows(),mipd) ! Convert to global numbering call psb_loc_to_glob(acoo_snd%ia(1:nzsnd),desc_in,info) call psb_loc_to_glob(acoo_snd%ja(1:nzsnd),desc_in,info) - call psb_snd(ctxt,acoo_snd%ia(1:nzsnd),ipd) - call psb_snd(ctxt,acoo_snd%ja(1:nzsnd),ipd) - call psb_snd(ctxt,acoo_snd%val(1:nzsnd),ipd) + call psb_snd(ctxt,acoo_snd%ia(1:nzsnd),mipd) + call psb_snd(ctxt,acoo_snd%ja(1:nzsnd),mipd) + call psb_snd(ctxt,acoo_snd%val(1:nzsnd),mipd) if (rme>=0) then ! prepare to receive - nzsrc = isrc - nrsrc = isrc + nzsrc = misrc + nrsrc = misrc nzl = 0 do ip=1, nsrc - call psb_rcv(ctxt,nzsrc(ip),isrc(ip)) - call psb_rcv(ctxt,nrsrc(ip),isrc(ip)) + call psb_rcv(ctxt,nzsrc(ip),misrc(ip)) + call psb_rcv(ctxt,nrsrc(ip),misrc(ip)) nzl = nzl + nzsrc(ip) end do !!$ write(0,*) rme,' Check on NR:',newnl(rme+1),sum(nrsrc) @@ -213,9 +217,9 @@ subroutine psb_c_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & ncl = acoo_rcv%get_ncols() nzp = 0 do ip=1, nsrc - call psb_rcv(ctxt,acoo_rcv%ia(nzp+1:nzp+nzsrc(ip)),isrc(ip)) - call psb_rcv(ctxt,acoo_rcv%ja(nzp+1:nzp+nzsrc(ip)),isrc(ip)) - call psb_rcv(ctxt,acoo_rcv%val(nzp+1:nzp+nzsrc(ip)),isrc(ip)) + call psb_rcv(ctxt,acoo_rcv%ia(nzp+1:nzp+nzsrc(ip)),misrc(ip)) + call psb_rcv(ctxt,acoo_rcv%ja(nzp+1:nzp+nzsrc(ip)),misrc(ip)) + call psb_rcv(ctxt,acoo_rcv%val(nzp+1:nzp+nzsrc(ip)),misrc(ip)) nzp = nzp + nzsrc(ip) end do call acoo_rcv%set_nzeros(nzp) diff --git a/base/tools/psb_c_remote_mat.F90 b/base/tools/psb_c_remote_mat.F90 index ae2eaaf2..bb40edf3 100644 --- a/base/tools/psb_c_remote_mat.F90 +++ b/base/tools/psb_c_remote_mat.F90 @@ -73,11 +73,11 @@ Subroutine psb_lc_remote_mat(a,desc_a,b,info) use psb_base_mod, psb_protect_name => psb_lc_remote_mat -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif diff --git a/base/tools/psb_c_remote_vect.F90 b/base/tools/psb_c_remote_vect.F90 index bd5286fa..5cacd9a3 100644 --- a/base/tools/psb_c_remote_vect.F90 +++ b/base/tools/psb_c_remote_vect.F90 @@ -66,11 +66,11 @@ subroutine psb_c_remote_vect(n,v,iv,desc_a,x,ix, info) use psb_base_mod, psb_protect_name => psb_c_remote_vect -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_ipk_), intent(in) :: n diff --git a/base/tools/psb_callc.f90 b/base/tools/psb_callc.f90 index 272ece8b..82348a78 100644 --- a/base/tools/psb_callc.f90 +++ b/base/tools/psb_callc.f90 @@ -116,7 +116,7 @@ subroutine psb_calloc_vect(x, desc_a,info, dupl, bldmode) end if call x%set_dupl(dupl_) call x%set_remote_build(bldmode_) - call x%set_nrmv(0) + call x%set_nrmv(izero) if (x%is_remote_build()) then nrmt_ = max(100,(desc_a%get_local_cols()-desc_a%get_local_rows())) call psb_ensure_size(nrmt_,x%rmtv,info) diff --git a/base/tools/psb_ccdbldext.F90 b/base/tools/psb_ccdbldext.F90 index a9ead509..1e7d630e 100644 --- a/base/tools/psb_ccdbldext.F90 +++ b/base/tools/psb_ccdbldext.F90 @@ -64,11 +64,11 @@ Subroutine psb_ccdbldext(a,desc_a,novr,desc_ov,info, extype) use psb_base_mod, psb_protect_name => psb_ccdbldext use psi_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif diff --git a/base/tools/psb_cd_inloc.f90 b/base/tools/psb_cd_inloc.f90 index 30c3e9b3..4d0ff7f6 100644 --- a/base/tools/psb_cd_inloc.f90 +++ b/base/tools/psb_cd_inloc.f90 @@ -43,6 +43,7 @@ ! info - integer. Eventually returns an error code subroutine psb_cd_inloc(v, ctxt, desc, info, globalcheck,idx,usehash) use psb_base_mod + use psb_desc_mod use psi_mod use psb_repl_map_mod use psb_list_map_mod @@ -125,7 +126,7 @@ subroutine psb_cd_inloc(v, ctxt, desc, info, globalcheck,idx,usehash) if (me == psb_root_) then exch(1)=m exch(2)=n - exch(3)=psb_cd_get_large_threshold() + exch(3)=psb_cd_get_hash_threshold() call psb_bcast(ctxt,exch(1:3),root=psb_root_) else call psb_bcast(ctxt,exch(1:3),root=psb_root_) @@ -140,7 +141,7 @@ subroutine psb_cd_inloc(v, ctxt, desc, info, globalcheck,idx,usehash) call psb_errpush(err,name,l_err=l_err) goto 9999 endif - call psb_cd_set_large_threshold(exch(3)) + call psb_cd_set_hash_threshold(exch(3)) endif if (debug_level >= psb_debug_ext_) & & write(debug_unit,*) me,' ',trim(name),': doing global checks' @@ -163,7 +164,6 @@ subroutine psb_cd_inloc(v, ctxt, desc, info, globalcheck,idx,usehash) ! 3. any overlap? ! Checks 2 and 3 are controlled by globalcheck ! - if (check_.or.(.not.islarge)) then if (debug_size) & & write(debug_unit,*) me,' ',trim(name),': Going for global checks' @@ -248,7 +248,6 @@ subroutine psb_cd_inloc(v, ctxt, desc, info, globalcheck,idx,usehash) if (check_) then ! Sort, eliminate duplicates, then ! scramble back into original position. - ix(1) = -1 if (present(idx)) then if (size(idx) >= loc_row) then !$omp parallel do private(i) @@ -256,8 +255,7 @@ subroutine psb_cd_inloc(v, ctxt, desc, info, globalcheck,idx,usehash) ix(i) = idx(i) end do end if - end if - if (ix(1) == -1) then + else !$omp parallel do private(i) do i=1, loc_row ix(i) = i diff --git a/base/tools/psb_cd_remap.F90 b/base/tools/psb_cd_remap.F90 index 32a2e94e..882af13d 100644 --- a/base/tools/psb_cd_remap.F90 +++ b/base/tools/psb_cd_remap.F90 @@ -51,7 +51,8 @@ subroutine psb_cd_remap(np_remap, desc_in, desc_out, info) !locals type(psb_ctxt_type) :: ctxt, newctxt - integer(psb_ipk_) :: np, me, err_act + integer(psb_mpk_) :: np, me, nprm + integer(psb_ipk_) :: err_act integer(psb_ipk_) :: rnp, rme integer(psb_ipk_) :: ipdest, id1, id2, imd, i integer(psb_ipk_), allocatable :: newnl(:) @@ -82,7 +83,8 @@ subroutine psb_cd_remap(np_remap, desc_in, desc_out, info) if (desc_in%get_fmt() == 'BLOCK') then ! OK - call psb_init(newctxt,np=np_remap,basectxt=ctxt) + nprm = np_remap + call psb_init(newctxt,np=nprm,basectxt=ctxt) call psb_info(newctxt,rme,rnp) write(0,*) 'Old context: ',me,np,' New context: ',rme,rnp call psb_bcast(ctxt,rnp) diff --git a/base/tools/psb_cdals.f90 b/base/tools/psb_cdals.f90 index 1387b1a8..92ad3e94 100644 --- a/base/tools/psb_cdals.f90 +++ b/base/tools/psb_cdals.f90 @@ -45,6 +45,7 @@ ! info - integer. Error code (if any). subroutine psb_cdals(m, n, parts, ctxt, desc, info) use psb_base_mod + use psb_desc_mod use psi_mod use psb_repl_map_mod use psb_list_map_mod @@ -99,7 +100,7 @@ subroutine psb_cdals(m, n, parts, ctxt, desc, info) & write(debug_unit,*) me,' ',trim(name),': doing global checks' !global check on m and n parameters if (me == psb_root_) then - exch(1)=m; exch(2)=n; exch(3)=psb_cd_get_large_threshold() + exch(1)=m; exch(2)=n; exch(3)=psb_cd_get_hash_threshold() call psb_bcast(ctxt,exch(1:3),root=psb_root_) else call psb_bcast(ctxt,exch(1:3),root=psb_root_) @@ -112,7 +113,7 @@ subroutine psb_cdals(m, n, parts, ctxt, desc, info) call psb_errpush(err,name,m_err=(/2/)) goto 9999 endif - call psb_cd_set_large_threshold(exch(3)) + call psb_cd_set_hash_threshold(exch(3)) endif call psb_nullify_desc(desc) diff --git a/base/tools/psb_cdalv.f90 b/base/tools/psb_cdalv.f90 index fcc10c79..2f740dc8 100644 --- a/base/tools/psb_cdalv.f90 +++ b/base/tools/psb_cdalv.f90 @@ -46,6 +46,7 @@ subroutine psb_cdalv(v, ctxt, desc, info, flag) use psb_base_mod use psi_mod + use psb_desc_mod use psb_repl_map_mod use psb_glist_map_mod use psb_hash_map_mod @@ -102,7 +103,7 @@ subroutine psb_cdalv(v, ctxt, desc, info, flag) if (me == psb_root_) then exch(1)=m exch(2)=n - exch(3)=psb_cd_get_large_threshold() + exch(3)=psb_cd_get_hash_threshold() call psb_bcast(ctxt,exch(1:3),root=psb_root_) else call psb_bcast(ctxt,exch(1:3),root=psb_root_) @@ -117,7 +118,7 @@ subroutine psb_cdalv(v, ctxt, desc, info, flag) call psb_errpush(err,name,l_err=l_err) goto 9999 endif - call psb_cd_set_large_threshold(exch(3)) + call psb_cd_set_hash_threshold(exch(3)) endif call psb_nullify_desc(desc) diff --git a/base/tools/psb_cdins.F90 b/base/tools/psb_cdins.F90 index 4de000db..d29aeef8 100644 --- a/base/tools/psb_cdins.F90 +++ b/base/tools/psb_cdins.F90 @@ -45,7 +45,7 @@ ! ila(:) - integer(psb_ipk_), optional The row indices in local numbering ! jla(:) - integer(psb_ipk_), optional The col indices in local numbering ! -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_cdinsrc(nz,ia,ja,desc_a,info,ila,jla) use psb_base_mod, psb_protect_name => psb_cdinsrc use psi_mod @@ -182,7 +182,7 @@ end subroutine psb_lcdinsrc ! mask(:) - logical, optional, target ! lidx(:) - integer(psb_ipk_), optional User-defined local col indices ! -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_cdinsc(nz,ja,desc,info,jla,mask,lidx) use psb_base_mod, psb_protect_name => psb_cdinsc use psi_mod diff --git a/base/tools/psb_cspasb.f90 b/base/tools/psb_cspasb.f90 index 0c5f14ab..db8af75a 100644 --- a/base/tools/psb_cspasb.f90 +++ b/base/tools/psb_cspasb.f90 @@ -44,7 +44,7 @@ ! psb_upd_perm_ Permutation(more memory) ! ! -subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) +subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and) use psb_base_mod, psb_protect_name => psb_cspasb use psb_sort_mod use psi_mod @@ -58,6 +58,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_), optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_c_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and !....Locals.... type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me, err_act @@ -65,6 +66,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err class(psb_i_base_vect_type), allocatable :: ivm + logical :: bld_and_ info = psb_success_ name = 'psb_spasb' @@ -93,7 +95,11 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) if (debug_level >= psb_debug_ext_)& & write(debug_unit, *) me,' ',trim(name),& & ' Begin matrix assembly...' - + if (present(bld_and)) then + bld_and_ = bld_and + else + bld_and_ = .false. + end if !check on errors encountered in psdspins if (a%is_bld()) then @@ -171,7 +177,49 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) end if - + if (bld_and_) then +!!$ allocate(a%ad,mold=a%a) +!!$ allocate(a%and,mold=a%a)o + call a%split_nd(n_row,n_col,info) +!!$ block +!!$ character(len=1024) :: fname +!!$ type(psb_c_coo_sparse_mat) :: acoo +!!$ type(psb_c_csr_sparse_mat), allocatable :: aclip +!!$ type(psb_c_ecsr_sparse_mat), allocatable :: andclip +!!$ logical, parameter :: use_ecsr=.true. +!!$ allocate(aclip) +!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) +!!$ allocate(a%ad,mold=a%a) +!!$ call a%ad%mv_from_coo(acoo,info) +!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) +!!$ if (use_ecsr) then +!!$ allocate(andclip) +!!$ call andclip%mv_from_coo(acoo,info) +!!$ call move_alloc(andclip,a%and) +!!$ else +!!$ allocate(a%and,mold=a%a) +!!$ call a%and%mv_from_coo(acoo,info) +!!$ end if +!!$ if (.false.) then +!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%ad%print(25) +!!$ close(25) +!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%and%print(25) +!!$ close(25) +!!$ !call andclip%set_cols(n_col) +!!$ write(*,*) me,' ',trim(name),' ad ',& +!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col +!!$ write(*,*) me,' ',trim(name),' and ',& +!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col +!!$ end if +!!$ end block + else + if (allocated(a%ad)) deallocate(a%ad) + if (allocated(a%and)) deallocate(a%and) + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/base/tools/psb_csphalo.F90 b/base/tools/psb_csphalo.F90 index 19d7e1dc..4c56cc5e 100644 --- a/base/tools/psb_csphalo.F90 +++ b/base/tools/psb_csphalo.F90 @@ -74,11 +74,11 @@ Subroutine psb_csphalo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,outfmt,data) use psb_base_mod, psb_protect_name => psb_csphalo -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -100,7 +100,7 @@ Subroutine psb_csphalo(a,desc_a,blk,info,rowcnv,colcnv,& integer(psb_mpk_) :: icomm, minfo integer(psb_mpk_), allocatable :: brvindx(:), & & rvsz(:), bsdindx(:),sdsz(:) -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) ! If globals are 8 bytes but locals are 4, things get tricky integer(psb_ipk_), allocatable :: liasnd(:), ljasnd(:) integer(psb_lpk_), allocatable :: iasnd(:), jasnd(:), iarcv(:), jarcv(:) @@ -268,7 +268,7 @@ Subroutine psb_csphalo(a,desc_a,blk,info,rowcnv,colcnv,& call psb_ensure_size(max(iszs,1),iasnd,info) if (info == psb_success_) call psb_ensure_size(max(iszs,1),jasnd,info) if (info == psb_success_) call psb_ensure_size(max(iszs,1),valsnd,info) -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) ! If globals are 8 bytes but locals are not, things get tricky if (info == psb_success_) call psb_ensure_size(max(iszs,1),liasnd,info) if (info == psb_success_) call psb_ensure_size(max(iszs,1),ljasnd,info) @@ -540,11 +540,11 @@ Subroutine psb_lcsphalo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,outfmt,data) use psb_base_mod, psb_protect_name => psb_lcsphalo -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -883,11 +883,11 @@ Subroutine psb_lc_csr_halo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,data,outcol_glob,col_desc) use psb_base_mod, psb_protect_name => psb_lc_csr_halo -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -1243,11 +1243,11 @@ Subroutine psb_c_lc_csr_halo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,data,outcol_glob,col_desc) use psb_base_mod, psb_protect_name => psb_c_lc_csr_halo -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif diff --git a/base/tools/psb_cspins.F90 b/base/tools/psb_cspins.F90 index e5f2731d..6ed5c629 100644 --- a/base/tools/psb_cspins.F90 +++ b/base/tools/psb_cspins.F90 @@ -51,7 +51,7 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) use psb_base_mod, psb_protect_name => psb_cspins use psi_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -78,6 +78,9 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) integer(psb_lpk_), allocatable :: lila(:),ljla(:) complex(psb_spk_), allocatable :: lval(:) character(len=20) :: name + logical, parameter :: do_timings=.false. + integer(psb_ipk_), save :: ins_phase1=-1, ins_phase2=-1, ins_phase3=-1, ins_phase4=-1 + integer(psb_ipk_), save :: ins_phase11=-1, ins_phase12=-1 info = psb_success_ name = 'psb_cspins' @@ -120,6 +123,19 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) else local_ = .false. endif + if ((do_timings).and.(ins_phase1==-1)) & + & ins_phase1 = psb_get_timer_idx("SPINS: and send ") + if ((do_timings).and.(ins_phase2==-1)) & + & ins_phase2 = psb_get_timer_idx("SPINS: and cmp ad") + if ((do_timings).and.(ins_phase3==-1)) & + & ins_phase3 = psb_get_timer_idx("SPINS: and rcv") + if ((do_timings).and.(ins_phase4==-1)) & + & ins_phase4 = psb_get_timer_idx("SPINS: and cmp and") + if ((do_timings).and.(ins_phase11==-1)) & + & ins_phase11 = psb_get_timer_idx("SPINS: noand exch ") + if ((do_timings).and.(ins_phase12==-1)) & + & ins_phase12 = psb_get_timer_idx("SPINS: noand cmp") + if (desc_a%is_bld()) then @@ -134,11 +150,11 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=(/info/)) goto 9999 end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) block - logical :: is_in_parallel + logical :: is_in_parallel is_in_parallel = omp_in_parallel() - if (is_in_parallel) then + if (.false..and.is_in_parallel) then !$omp parallel private(ila,jla,nrow,ncol,nnl,k) call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) !$omp critical(spins) @@ -148,7 +164,7 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) !write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_ai_,name,& - & a_err='psb_cdins',i_err=(/info/)) + & a_err='g2l_ins 1',i_err=(/info/)) goto 9998 end if nrow = desc_a%get_local_rows() @@ -189,22 +205,25 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) !write(0,*) me,' after csput',psb_errstatus_fatal() !$omp end parallel else + !write(0,*) me,' Before g2l ',psb_errstatus_fatal() call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) !write(0,*) me,' Before g2l_ins ',psb_errstatus_fatal() if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) !write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info - if (info /= psb_success_) then + if ((info /= psb_success_).or.psb_errstatus_fatal()) then call psb_errpush(psb_err_from_subroutine_ai_,name,& - & a_err='psb_cdins',i_err=(/info/)) + & a_err='g2l_ins 2 ',i_err=(/info/)) goto 9999 end if nrow = desc_a%get_local_rows() ncol = desc_a%get_local_cols() !write(0,*) me,' Before csput',psb_errstatus_fatal() - if (a%is_bld()) then + if (a%is_bld()) then + !write(0,*) me,' before csput ',psb_errstatus_fatal(),info,nz call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info) - if (info /= psb_success_) then + !write(0,*) me,' after csput ',psb_errstatus_fatal(),info,nz + if ((info /= psb_success_).or.psb_errstatus_fatal()) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='a%csput') goto 9999 @@ -237,9 +256,17 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) end if end block #else + if (do_timings) call psb_tic(ins_phase1) !write(0,*) me,' Before g2l ',psb_errstatus_fatal() call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='g2l',i_err=(/info/)) + goto 9999 + end if + if (do_timings) call psb_toc(ins_phase1) + if (do_timings) call psb_tic(ins_phase2) if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) @@ -247,20 +274,25 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) !write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_ai_,name,& - & a_err='psb_cdins',i_err=(/info/)) + & a_err='g2l_ins',i_err=(/info/)) goto 9999 end if nrow = desc_a%get_local_rows() ncol = desc_a%get_local_cols() + if (do_timings) call psb_toc(ins_phase2) + !write(0,*) me,' Before csput',psb_errstatus_fatal() if (a%is_bld()) then + if (do_timings) call psb_tic(ins_phase3) call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='a%csput') goto 9999 end if - + if (do_timings) call psb_toc(ins_phase3) + if (do_timings) call psb_tic(ins_phase4) + if (a%is_remote_build()) then nnl = count(ila(1:nz)<0) if (nnl > 0) then @@ -279,7 +311,8 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & 1_psb_lpk_,desc_a%get_global_rows(),info) end if end if - + if (do_timings) call psb_toc(ins_phase4) + else info = psb_err_invalid_a_and_cd_state_ call psb_errpush(info,name) @@ -287,6 +320,12 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) end if #endif if (info /= 0) goto 9999 + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='unknown',i_err=(/info/)) + goto 9999 + end if endif else if (desc_a%is_asb()) then @@ -299,16 +338,16 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=(/info/)) goto 9999 end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel private(ila,jla,nrow,ncol,nnl,k) #endif if (local_) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp workshare #endif ila(1:nz) = ia(1:nz) jla(1:nz) = ja(1:nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp end workshare #endif else @@ -341,7 +380,7 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & 1_psb_lpk_,desc_a%get_global_rows(),info) end if end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp end parallel #endif @@ -458,7 +497,7 @@ subroutine psb_cspins_csr_lirp(nr,irp,ja,val,irw,a,desc_a,info,rebuild,local) end subroutine psb_cspins_csr_lirp -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_cspins_csr_iirp(nr,irw,irp,ja,val,a,desc_a,info,rebuild,local) use psb_base_mod, psb_protect_name => psb_cspins_csr_iirp use psi_mod diff --git a/base/tools/psb_d_glob_transpose.F90 b/base/tools/psb_d_glob_transpose.F90 index caf99400..3c323dbd 100644 --- a/base/tools/psb_d_glob_transpose.F90 +++ b/base/tools/psb_d_glob_transpose.F90 @@ -94,12 +94,12 @@ ! ! subroutine psb_ld_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif use psb_base_mod, psb_protect_name => psb_ld_coo_glob_transpose Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ld_coo_sparse_mat), intent(inout) :: ain @@ -392,12 +392,12 @@ subroutine psb_ld_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) end subroutine psb_ld_coo_glob_transpose subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif use psb_base_mod, psb_protect_name => psb_d_coo_glob_transpose Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_d_coo_sparse_mat), intent(inout) :: ain diff --git a/base/tools/psb_d_par_csr_spspmm.f90 b/base/tools/psb_d_par_csr_spspmm.f90 index f9d110f7..3ed62f05 100644 --- a/base/tools/psb_d_par_csr_spspmm.f90 +++ b/base/tools/psb_d_par_csr_spspmm.f90 @@ -62,7 +62,11 @@ ! Error code. ! Subroutine psb_d_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) - use psb_base_mod, psb_protect_name => psb_d_par_csr_spspmm + use psb_mat_mod + use psb_comm_mod + use psb_penv_mod + use psb_d_tools_mod, psb_protect_name => psb_d_par_csr_spspmm + use psb_d_serial_mod, only : psb_dcsrspspmm, psb_dbase_rwextd Implicit None type(psb_d_csr_sparse_mat),intent(in) :: acsr @@ -132,7 +136,7 @@ Subroutine psb_d_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) call desc_c%indxmap%g2lip(ltcsr%ja(1:nnz),info) end if call ltcsr%mv_to_ifmt(tcsr,info) - if (info == psb_success_) call psb_rwextd(ncol,bcsr,info,b=tcsr) + if (info == psb_success_) call psb_dbase_rwextd(ncol,bcsr,info,b=tcsr) if (info == psb_success_) call tcsr%free() if(info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3') @@ -146,7 +150,7 @@ Subroutine psb_d_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) & 'starting spspmm 3' if (debug_level >= psb_debug_outer_) write(debug_unit,*) me,' ',trim(name),& & 'starting spspmm ',acsr%get_nrows(),acsr%get_ncols(),bcsr%get_nrows(),bcsr%get_ncols() - call psb_spspmm(acsr,bcsr,ccsr,info) + call psb_dcsrspspmm(acsr,bcsr,ccsr,info) call psb_erractionrestore(err_act) return @@ -158,7 +162,11 @@ Subroutine psb_d_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) End Subroutine psb_d_par_csr_spspmm Subroutine psb_ld_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) - use psb_base_mod, psb_protect_name => psb_ld_par_csr_spspmm + use psb_mat_mod + use psb_comm_mod + use psb_penv_mod + use psb_d_tools_mod, psb_protect_name => psb_ld_par_csr_spspmm + use psb_d_serial_mod, only : psb_ldcsrspspmm, psb_ldbase_rwextd Implicit None type(psb_ld_csr_sparse_mat),intent(in) :: acsr @@ -226,7 +234,7 @@ Subroutine psb_ld_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) else call desc_c%indxmap%g2lip(tcsr1%ja(1:nnz),info) end if - if (info == psb_success_) call psb_rwextd(nacol,bcsr,info,b=tcsr1) + if (info == psb_success_) call psb_ldbase_rwextd(nacol,bcsr,info,b=tcsr1) if (info == psb_success_) call tcsr1%free() if(info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3') @@ -241,7 +249,7 @@ Subroutine psb_ld_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) & 'starting spspmm 3' if (debug_level >= psb_debug_outer_) write(debug_unit,*) me,' ',trim(name),& & 'starting spspmm ',acsr%get_nrows(),acsr%get_ncols(),bcsr%get_nrows(),bcsr%get_ncols() - call psb_spspmm(acsr,bcsr,ccsr,info) + call psb_ldcsrspspmm(acsr,bcsr,ccsr,info) call psb_erractionrestore(err_act) return diff --git a/base/tools/psb_d_remap.F90 b/base/tools/psb_d_remap.F90 index 2157b56b..dc321918 100644 --- a/base/tools/psb_d_remap.F90 +++ b/base/tools/psb_d_remap.F90 @@ -55,10 +55,12 @@ subroutine psb_d_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & ! locals type(psb_ctxt_type) :: ctxt, newctxt - integer(psb_ipk_) :: np, me, err_act + integer(psb_mpk_) :: np, me, nrm, mipd, i + integer(psb_ipk_) :: err_act integer(psb_ipk_) :: rnp, rme - integer(psb_ipk_) :: ipdest, id1, id2, imd, i, nsrc - integer(psb_ipk_), allocatable :: newnl(:), nzsrc(:), ids(:) + integer(psb_ipk_) :: ipdest, id1, id2, imd, nsrc + integer(psb_ipk_), allocatable :: newnl(:), nzsrc(:) + integer(psb_mpk_), allocatable :: ids(:), misrc(:) type(psb_ld_coo_sparse_mat) :: acoo_snd, acoo_rcv integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name @@ -84,28 +86,29 @@ subroutine psb_d_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & endif !!$ write(0,*) ' Remapping from ',np,' onto ', np_remap - + mipd = ipd if (desc_in%get_fmt() == 'BLOCK') then ! ! Should we spread the processes in the new context, ! or should we keep them close? ! - if (.true.) then - allocate(ids(0:np_remap-1)) - if (np_remap <= np/2) then + if (.true.) then + nrm = np_remap + allocate(ids(0:nrm-1)) + if (nrm <= np/2) then ids(0) = 0 - do ipdest=1,np_remap -1 - ids(ipdest) = ids(ipdest-1) + np/np_remap + do ipdest=1,nrm -1 + ids(ipdest) = ids(ipdest-1) + np/nrm end do !!$ write(0,*) ' IDS ',ids(:) else - do ipdest = 0, np_remap-1 + do ipdest = 0, nrm-1 ids(ipdest) = ipdest end do end if - call psb_init(newctxt,np=np_remap,basectxt=ctxt,ids=ids) + call psb_init(newctxt,np=nrm,basectxt=ctxt,ids=ids) else - call psb_init(newctxt,np=np_remap,basectxt=ctxt) + call psb_init(newctxt,np=nrm,basectxt=ctxt) end if call psb_info(newctxt,rme,rnp) @@ -140,12 +143,12 @@ subroutine psb_d_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & ipdest = ( ((me-imd*id1)/id2) + imd) end if if (allocated(ids)) then - ipd = ids(ipdest) + mipd = ids(ipdest) else - ipd = ipdest + mipd = ipdest end if !!$ write(0,*) ' Sending my data from ',me,' to ', & -!!$ & ipd, 'out of ',rnp,rnp-1 +!!$ & mipd, 'out of ',rnp,rnp-1 ! ! Compute local rows for all new @@ -158,13 +161,14 @@ subroutine psb_d_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & if (rme>=0) then ! if (rme < imd) then - isrc = [ (i, i=rme*id1,min(rme*id1+id1-1,np-1)) ] + misrc = [ (i, i=rme*id1,min(rme*id1+id1-1,np-1)) ] else - isrc = [ (i, i= imd*id1+((rme-imd))*id2,& + misrc = [ (i, i= imd*id1+((rme-imd))*id2,& & min(imd*id1+(rme-imd)*id2+id2-1,np-1) ) ] end if -!!$ write(0,*) me,rme,imd,' ISRC: ',isrc(:) - nsrc = size(isrc) +!!$ write(0,*) me,rme,imd,' ISRC: ',misrc(:) + isrc = misrc + nsrc = size(misrc) !!$ write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),& !!$ & ' out ',desc_out%get_local_rows(),desc_out%get_global_rows() else @@ -187,24 +191,24 @@ subroutine psb_d_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & integer(psb_ipk_) :: nrl, ncl, nzl, nzp call a_in%cp_to(acoo_snd) nzsnd = acoo_snd%get_nzeros() - call psb_snd(ctxt,nzsnd,ipd) - call psb_snd(ctxt,desc_in%get_local_rows(),ipd) + call psb_snd(ctxt,nzsnd,mipd) + call psb_snd(ctxt,desc_in%get_local_rows(),mipd) ! Convert to global numbering call psb_loc_to_glob(acoo_snd%ia(1:nzsnd),desc_in,info) call psb_loc_to_glob(acoo_snd%ja(1:nzsnd),desc_in,info) - call psb_snd(ctxt,acoo_snd%ia(1:nzsnd),ipd) - call psb_snd(ctxt,acoo_snd%ja(1:nzsnd),ipd) - call psb_snd(ctxt,acoo_snd%val(1:nzsnd),ipd) + call psb_snd(ctxt,acoo_snd%ia(1:nzsnd),mipd) + call psb_snd(ctxt,acoo_snd%ja(1:nzsnd),mipd) + call psb_snd(ctxt,acoo_snd%val(1:nzsnd),mipd) if (rme>=0) then ! prepare to receive - nzsrc = isrc - nrsrc = isrc + nzsrc = misrc + nrsrc = misrc nzl = 0 do ip=1, nsrc - call psb_rcv(ctxt,nzsrc(ip),isrc(ip)) - call psb_rcv(ctxt,nrsrc(ip),isrc(ip)) + call psb_rcv(ctxt,nzsrc(ip),misrc(ip)) + call psb_rcv(ctxt,nrsrc(ip),misrc(ip)) nzl = nzl + nzsrc(ip) end do !!$ write(0,*) rme,' Check on NR:',newnl(rme+1),sum(nrsrc) @@ -213,9 +217,9 @@ subroutine psb_d_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & ncl = acoo_rcv%get_ncols() nzp = 0 do ip=1, nsrc - call psb_rcv(ctxt,acoo_rcv%ia(nzp+1:nzp+nzsrc(ip)),isrc(ip)) - call psb_rcv(ctxt,acoo_rcv%ja(nzp+1:nzp+nzsrc(ip)),isrc(ip)) - call psb_rcv(ctxt,acoo_rcv%val(nzp+1:nzp+nzsrc(ip)),isrc(ip)) + call psb_rcv(ctxt,acoo_rcv%ia(nzp+1:nzp+nzsrc(ip)),misrc(ip)) + call psb_rcv(ctxt,acoo_rcv%ja(nzp+1:nzp+nzsrc(ip)),misrc(ip)) + call psb_rcv(ctxt,acoo_rcv%val(nzp+1:nzp+nzsrc(ip)),misrc(ip)) nzp = nzp + nzsrc(ip) end do call acoo_rcv%set_nzeros(nzp) diff --git a/base/tools/psb_d_remote_mat.F90 b/base/tools/psb_d_remote_mat.F90 index 35116dc5..bab23d59 100644 --- a/base/tools/psb_d_remote_mat.F90 +++ b/base/tools/psb_d_remote_mat.F90 @@ -73,11 +73,11 @@ Subroutine psb_ld_remote_mat(a,desc_a,b,info) use psb_base_mod, psb_protect_name => psb_ld_remote_mat -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif diff --git a/base/tools/psb_d_remote_vect.F90 b/base/tools/psb_d_remote_vect.F90 index 4a409fa5..440c8cc5 100644 --- a/base/tools/psb_d_remote_vect.F90 +++ b/base/tools/psb_d_remote_vect.F90 @@ -66,11 +66,11 @@ subroutine psb_d_remote_vect(n,v,iv,desc_a,x,ix, info) use psb_base_mod, psb_protect_name => psb_d_remote_vect -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_ipk_), intent(in) :: n diff --git a/base/tools/psb_dallc.f90 b/base/tools/psb_dallc.f90 index 108e2000..7b7b21f7 100644 --- a/base/tools/psb_dallc.f90 +++ b/base/tools/psb_dallc.f90 @@ -116,7 +116,7 @@ subroutine psb_dalloc_vect(x, desc_a,info, dupl, bldmode) end if call x%set_dupl(dupl_) call x%set_remote_build(bldmode_) - call x%set_nrmv(0) + call x%set_nrmv(izero) if (x%is_remote_build()) then nrmt_ = max(100,(desc_a%get_local_cols()-desc_a%get_local_rows())) call psb_ensure_size(nrmt_,x%rmtv,info) diff --git a/base/tools/psb_dcdbldext.F90 b/base/tools/psb_dcdbldext.F90 index fdafb500..a5d059a3 100644 --- a/base/tools/psb_dcdbldext.F90 +++ b/base/tools/psb_dcdbldext.F90 @@ -64,11 +64,11 @@ Subroutine psb_dcdbldext(a,desc_a,novr,desc_ov,info, extype) use psb_base_mod, psb_protect_name => psb_dcdbldext use psi_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif diff --git a/base/tools/psb_dspasb.f90 b/base/tools/psb_dspasb.f90 index 3132f249..236568a1 100644 --- a/base/tools/psb_dspasb.f90 +++ b/base/tools/psb_dspasb.f90 @@ -44,7 +44,7 @@ ! psb_upd_perm_ Permutation(more memory) ! ! -subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) +subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, bld_and) use psb_base_mod, psb_protect_name => psb_dspasb use psb_sort_mod use psi_mod @@ -58,6 +58,7 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_), optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_d_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and !....Locals.... type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me, err_act @@ -65,6 +66,7 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err class(psb_i_base_vect_type), allocatable :: ivm + logical :: bld_and_ info = psb_success_ name = 'psb_spasb' @@ -93,7 +95,11 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) if (debug_level >= psb_debug_ext_)& & write(debug_unit, *) me,' ',trim(name),& & ' Begin matrix assembly...' - + if (present(bld_and)) then + bld_and_ = bld_and + else + bld_and_ = .false. + end if !check on errors encountered in psdspins if (a%is_bld()) then @@ -171,7 +177,49 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) end if - + if (bld_and_) then +!!$ allocate(a%ad,mold=a%a) +!!$ allocate(a%and,mold=a%a)o + call a%split_nd(n_row,n_col,info) +!!$ block +!!$ character(len=1024) :: fname +!!$ type(psb_d_coo_sparse_mat) :: acoo +!!$ type(psb_d_csr_sparse_mat), allocatable :: aclip +!!$ type(psb_d_ecsr_sparse_mat), allocatable :: andclip +!!$ logical, parameter :: use_ecsr=.true. +!!$ allocate(aclip) +!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) +!!$ allocate(a%ad,mold=a%a) +!!$ call a%ad%mv_from_coo(acoo,info) +!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) +!!$ if (use_ecsr) then +!!$ allocate(andclip) +!!$ call andclip%mv_from_coo(acoo,info) +!!$ call move_alloc(andclip,a%and) +!!$ else +!!$ allocate(a%and,mold=a%a) +!!$ call a%and%mv_from_coo(acoo,info) +!!$ end if +!!$ if (.false.) then +!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%ad%print(25) +!!$ close(25) +!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%and%print(25) +!!$ close(25) +!!$ !call andclip%set_cols(n_col) +!!$ write(*,*) me,' ',trim(name),' ad ',& +!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col +!!$ write(*,*) me,' ',trim(name),' and ',& +!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col +!!$ end if +!!$ end block + else + if (allocated(a%ad)) deallocate(a%ad) + if (allocated(a%and)) deallocate(a%and) + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/base/tools/psb_dsphalo.F90 b/base/tools/psb_dsphalo.F90 index d5e383ef..c793eb2a 100644 --- a/base/tools/psb_dsphalo.F90 +++ b/base/tools/psb_dsphalo.F90 @@ -74,11 +74,11 @@ Subroutine psb_dsphalo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,outfmt,data) use psb_base_mod, psb_protect_name => psb_dsphalo -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -100,7 +100,7 @@ Subroutine psb_dsphalo(a,desc_a,blk,info,rowcnv,colcnv,& integer(psb_mpk_) :: icomm, minfo integer(psb_mpk_), allocatable :: brvindx(:), & & rvsz(:), bsdindx(:),sdsz(:) -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) ! If globals are 8 bytes but locals are 4, things get tricky integer(psb_ipk_), allocatable :: liasnd(:), ljasnd(:) integer(psb_lpk_), allocatable :: iasnd(:), jasnd(:), iarcv(:), jarcv(:) @@ -268,7 +268,7 @@ Subroutine psb_dsphalo(a,desc_a,blk,info,rowcnv,colcnv,& call psb_ensure_size(max(iszs,1),iasnd,info) if (info == psb_success_) call psb_ensure_size(max(iszs,1),jasnd,info) if (info == psb_success_) call psb_ensure_size(max(iszs,1),valsnd,info) -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) ! If globals are 8 bytes but locals are not, things get tricky if (info == psb_success_) call psb_ensure_size(max(iszs,1),liasnd,info) if (info == psb_success_) call psb_ensure_size(max(iszs,1),ljasnd,info) @@ -540,11 +540,11 @@ Subroutine psb_ldsphalo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,outfmt,data) use psb_base_mod, psb_protect_name => psb_ldsphalo -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -883,11 +883,11 @@ Subroutine psb_ld_csr_halo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,data,outcol_glob,col_desc) use psb_base_mod, psb_protect_name => psb_ld_csr_halo -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -1243,11 +1243,11 @@ Subroutine psb_d_ld_csr_halo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,data,outcol_glob,col_desc) use psb_base_mod, psb_protect_name => psb_d_ld_csr_halo -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif diff --git a/base/tools/psb_dspins.F90 b/base/tools/psb_dspins.F90 index cdeaa931..a9cbbe4b 100644 --- a/base/tools/psb_dspins.F90 +++ b/base/tools/psb_dspins.F90 @@ -51,7 +51,7 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) use psb_base_mod, psb_protect_name => psb_dspins use psi_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -78,6 +78,9 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) integer(psb_lpk_), allocatable :: lila(:),ljla(:) real(psb_dpk_), allocatable :: lval(:) character(len=20) :: name + logical, parameter :: do_timings=.false. + integer(psb_ipk_), save :: ins_phase1=-1, ins_phase2=-1, ins_phase3=-1, ins_phase4=-1 + integer(psb_ipk_), save :: ins_phase11=-1, ins_phase12=-1 info = psb_success_ name = 'psb_dspins' @@ -120,6 +123,19 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) else local_ = .false. endif + if ((do_timings).and.(ins_phase1==-1)) & + & ins_phase1 = psb_get_timer_idx("SPINS: and send ") + if ((do_timings).and.(ins_phase2==-1)) & + & ins_phase2 = psb_get_timer_idx("SPINS: and cmp ad") + if ((do_timings).and.(ins_phase3==-1)) & + & ins_phase3 = psb_get_timer_idx("SPINS: and rcv") + if ((do_timings).and.(ins_phase4==-1)) & + & ins_phase4 = psb_get_timer_idx("SPINS: and cmp and") + if ((do_timings).and.(ins_phase11==-1)) & + & ins_phase11 = psb_get_timer_idx("SPINS: noand exch ") + if ((do_timings).and.(ins_phase12==-1)) & + & ins_phase12 = psb_get_timer_idx("SPINS: noand cmp") + if (desc_a%is_bld()) then @@ -134,11 +150,11 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=(/info/)) goto 9999 end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) block - logical :: is_in_parallel + logical :: is_in_parallel is_in_parallel = omp_in_parallel() - if (is_in_parallel) then + if (.false..and.is_in_parallel) then !$omp parallel private(ila,jla,nrow,ncol,nnl,k) call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) !$omp critical(spins) @@ -148,7 +164,7 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) !write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_ai_,name,& - & a_err='psb_cdins',i_err=(/info/)) + & a_err='g2l_ins 1',i_err=(/info/)) goto 9998 end if nrow = desc_a%get_local_rows() @@ -189,22 +205,25 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) !write(0,*) me,' after csput',psb_errstatus_fatal() !$omp end parallel else + !write(0,*) me,' Before g2l ',psb_errstatus_fatal() call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) !write(0,*) me,' Before g2l_ins ',psb_errstatus_fatal() if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) !write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info - if (info /= psb_success_) then + if ((info /= psb_success_).or.psb_errstatus_fatal()) then call psb_errpush(psb_err_from_subroutine_ai_,name,& - & a_err='psb_cdins',i_err=(/info/)) + & a_err='g2l_ins 2 ',i_err=(/info/)) goto 9999 end if nrow = desc_a%get_local_rows() ncol = desc_a%get_local_cols() !write(0,*) me,' Before csput',psb_errstatus_fatal() - if (a%is_bld()) then + if (a%is_bld()) then + !write(0,*) me,' before csput ',psb_errstatus_fatal(),info,nz call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info) - if (info /= psb_success_) then + !write(0,*) me,' after csput ',psb_errstatus_fatal(),info,nz + if ((info /= psb_success_).or.psb_errstatus_fatal()) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='a%csput') goto 9999 @@ -237,9 +256,17 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) end if end block #else + if (do_timings) call psb_tic(ins_phase1) !write(0,*) me,' Before g2l ',psb_errstatus_fatal() call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='g2l',i_err=(/info/)) + goto 9999 + end if + if (do_timings) call psb_toc(ins_phase1) + if (do_timings) call psb_tic(ins_phase2) if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) @@ -247,20 +274,25 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) !write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_ai_,name,& - & a_err='psb_cdins',i_err=(/info/)) + & a_err='g2l_ins',i_err=(/info/)) goto 9999 end if nrow = desc_a%get_local_rows() ncol = desc_a%get_local_cols() + if (do_timings) call psb_toc(ins_phase2) + !write(0,*) me,' Before csput',psb_errstatus_fatal() if (a%is_bld()) then + if (do_timings) call psb_tic(ins_phase3) call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='a%csput') goto 9999 end if - + if (do_timings) call psb_toc(ins_phase3) + if (do_timings) call psb_tic(ins_phase4) + if (a%is_remote_build()) then nnl = count(ila(1:nz)<0) if (nnl > 0) then @@ -279,7 +311,8 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & 1_psb_lpk_,desc_a%get_global_rows(),info) end if end if - + if (do_timings) call psb_toc(ins_phase4) + else info = psb_err_invalid_a_and_cd_state_ call psb_errpush(info,name) @@ -287,6 +320,12 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) end if #endif if (info /= 0) goto 9999 + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='unknown',i_err=(/info/)) + goto 9999 + end if endif else if (desc_a%is_asb()) then @@ -299,16 +338,16 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=(/info/)) goto 9999 end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel private(ila,jla,nrow,ncol,nnl,k) #endif if (local_) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp workshare #endif ila(1:nz) = ia(1:nz) jla(1:nz) = ja(1:nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp end workshare #endif else @@ -341,7 +380,7 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & 1_psb_lpk_,desc_a%get_global_rows(),info) end if end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp end parallel #endif @@ -458,7 +497,7 @@ subroutine psb_dspins_csr_lirp(nr,irp,ja,val,irw,a,desc_a,info,rebuild,local) end subroutine psb_dspins_csr_lirp -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_dspins_csr_iirp(nr,irw,irp,ja,val,a,desc_a,info,rebuild,local) use psb_base_mod, psb_protect_name => psb_dspins_csr_iirp use psi_mod diff --git a/base/tools/psb_e_remote_vect.F90 b/base/tools/psb_e_remote_vect.F90 index 9fb15ff9..9b190667 100644 --- a/base/tools/psb_e_remote_vect.F90 +++ b/base/tools/psb_e_remote_vect.F90 @@ -66,11 +66,11 @@ subroutine psb_e_remote_vect(n,v,iv,desc_a,x,ix, info) use psb_base_mod, psb_protect_name => psb_e_remote_vect -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_ipk_), intent(in) :: n diff --git a/base/tools/psb_i2_remote_vect.F90 b/base/tools/psb_i2_remote_vect.F90 index 3f6bffbd..11f0cb7c 100644 --- a/base/tools/psb_i2_remote_vect.F90 +++ b/base/tools/psb_i2_remote_vect.F90 @@ -66,11 +66,11 @@ subroutine psb_i2_remote_vect(n,v,iv,desc_a,x,ix, info) use psb_base_mod, psb_protect_name => psb_i2_remote_vect -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_ipk_), intent(in) :: n diff --git a/base/tools/psb_iallc.f90 b/base/tools/psb_iallc.f90 index 7ed69ed6..21d4d8a5 100644 --- a/base/tools/psb_iallc.f90 +++ b/base/tools/psb_iallc.f90 @@ -116,7 +116,7 @@ subroutine psb_ialloc_vect(x, desc_a,info, dupl, bldmode) end if call x%set_dupl(dupl_) call x%set_remote_build(bldmode_) - call x%set_nrmv(0) + call x%set_nrmv(izero) if (x%is_remote_build()) then nrmt_ = max(100,(desc_a%get_local_cols()-desc_a%get_local_rows())) call psb_ensure_size(nrmt_,x%rmtv,info) diff --git a/base/tools/psb_icdasb.F90 b/base/tools/psb_icdasb.F90 index 31d92133..c2d9c27a 100644 --- a/base/tools/psb_icdasb.F90 +++ b/base/tools/psb_icdasb.F90 @@ -45,11 +45,11 @@ subroutine psb_icdasb(desc,info,ext_hv,mold) use psb_base_mod, psb_protect_name => psb_icdasb use psi_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif implicit none -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif !...Parameters.... @@ -67,7 +67,7 @@ subroutine psb_icdasb(desc,info,ext_hv,mold) integer(psb_mpk_) :: icomm integer(psb_ipk_) :: np,me logical :: ext_hv_ - logical, parameter :: do_timings=.true. + logical, parameter :: do_timings=.false. integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1 integer(psb_ipk_), save :: idx_phase11=-1, idx_phase12=-1, idx_phase13=-1 integer(psb_ipk_), save :: idx_total=-1 diff --git a/base/tools/psb_lallc.f90 b/base/tools/psb_lallc.f90 index 53857029..a781e55a 100644 --- a/base/tools/psb_lallc.f90 +++ b/base/tools/psb_lallc.f90 @@ -116,7 +116,7 @@ subroutine psb_lalloc_vect(x, desc_a,info, dupl, bldmode) end if call x%set_dupl(dupl_) call x%set_remote_build(bldmode_) - call x%set_nrmv(0) + call x%set_nrmv(izero) if (x%is_remote_build()) then nrmt_ = max(100,(desc_a%get_local_cols()-desc_a%get_local_rows())) call psb_ensure_size(nrmt_,x%rmtv,info) diff --git a/base/tools/psb_m_remote_vect.F90 b/base/tools/psb_m_remote_vect.F90 index 01b5aeb3..54f5ef9c 100644 --- a/base/tools/psb_m_remote_vect.F90 +++ b/base/tools/psb_m_remote_vect.F90 @@ -66,11 +66,11 @@ subroutine psb_m_remote_vect(n,v,iv,desc_a,x,ix, info) use psb_base_mod, psb_protect_name => psb_m_remote_vect -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_ipk_), intent(in) :: n diff --git a/base/tools/psb_s_glob_transpose.F90 b/base/tools/psb_s_glob_transpose.F90 index c7dc818f..8b7bff59 100644 --- a/base/tools/psb_s_glob_transpose.F90 +++ b/base/tools/psb_s_glob_transpose.F90 @@ -94,12 +94,12 @@ ! ! subroutine psb_ls_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif use psb_base_mod, psb_protect_name => psb_ls_coo_glob_transpose Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_ls_coo_sparse_mat), intent(inout) :: ain @@ -392,12 +392,12 @@ subroutine psb_ls_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) end subroutine psb_ls_coo_glob_transpose subroutine psb_s_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif use psb_base_mod, psb_protect_name => psb_s_coo_glob_transpose Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_s_coo_sparse_mat), intent(inout) :: ain diff --git a/base/tools/psb_s_par_csr_spspmm.f90 b/base/tools/psb_s_par_csr_spspmm.f90 index 549aeba4..4e70478f 100644 --- a/base/tools/psb_s_par_csr_spspmm.f90 +++ b/base/tools/psb_s_par_csr_spspmm.f90 @@ -62,7 +62,11 @@ ! Error code. ! Subroutine psb_s_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) - use psb_base_mod, psb_protect_name => psb_s_par_csr_spspmm + use psb_mat_mod + use psb_comm_mod + use psb_penv_mod + use psb_s_tools_mod, psb_protect_name => psb_s_par_csr_spspmm + use psb_s_serial_mod, only : psb_scsrspspmm, psb_sbase_rwextd Implicit None type(psb_s_csr_sparse_mat),intent(in) :: acsr @@ -132,7 +136,7 @@ Subroutine psb_s_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) call desc_c%indxmap%g2lip(ltcsr%ja(1:nnz),info) end if call ltcsr%mv_to_ifmt(tcsr,info) - if (info == psb_success_) call psb_rwextd(ncol,bcsr,info,b=tcsr) + if (info == psb_success_) call psb_sbase_rwextd(ncol,bcsr,info,b=tcsr) if (info == psb_success_) call tcsr%free() if(info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3') @@ -146,7 +150,7 @@ Subroutine psb_s_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) & 'starting spspmm 3' if (debug_level >= psb_debug_outer_) write(debug_unit,*) me,' ',trim(name),& & 'starting spspmm ',acsr%get_nrows(),acsr%get_ncols(),bcsr%get_nrows(),bcsr%get_ncols() - call psb_spspmm(acsr,bcsr,ccsr,info) + call psb_scsrspspmm(acsr,bcsr,ccsr,info) call psb_erractionrestore(err_act) return @@ -158,7 +162,11 @@ Subroutine psb_s_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) End Subroutine psb_s_par_csr_spspmm Subroutine psb_ls_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) - use psb_base_mod, psb_protect_name => psb_ls_par_csr_spspmm + use psb_mat_mod + use psb_comm_mod + use psb_penv_mod + use psb_s_tools_mod, psb_protect_name => psb_ls_par_csr_spspmm + use psb_s_serial_mod, only : psb_lscsrspspmm, psb_lsbase_rwextd Implicit None type(psb_ls_csr_sparse_mat),intent(in) :: acsr @@ -226,7 +234,7 @@ Subroutine psb_ls_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) else call desc_c%indxmap%g2lip(tcsr1%ja(1:nnz),info) end if - if (info == psb_success_) call psb_rwextd(nacol,bcsr,info,b=tcsr1) + if (info == psb_success_) call psb_lsbase_rwextd(nacol,bcsr,info,b=tcsr1) if (info == psb_success_) call tcsr1%free() if(info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3') @@ -241,7 +249,7 @@ Subroutine psb_ls_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) & 'starting spspmm 3' if (debug_level >= psb_debug_outer_) write(debug_unit,*) me,' ',trim(name),& & 'starting spspmm ',acsr%get_nrows(),acsr%get_ncols(),bcsr%get_nrows(),bcsr%get_ncols() - call psb_spspmm(acsr,bcsr,ccsr,info) + call psb_lscsrspspmm(acsr,bcsr,ccsr,info) call psb_erractionrestore(err_act) return diff --git a/base/tools/psb_s_remap.F90 b/base/tools/psb_s_remap.F90 index 899c1b26..b7cf7369 100644 --- a/base/tools/psb_s_remap.F90 +++ b/base/tools/psb_s_remap.F90 @@ -55,10 +55,12 @@ subroutine psb_s_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & ! locals type(psb_ctxt_type) :: ctxt, newctxt - integer(psb_ipk_) :: np, me, err_act + integer(psb_mpk_) :: np, me, nrm, mipd, i + integer(psb_ipk_) :: err_act integer(psb_ipk_) :: rnp, rme - integer(psb_ipk_) :: ipdest, id1, id2, imd, i, nsrc - integer(psb_ipk_), allocatable :: newnl(:), nzsrc(:), ids(:) + integer(psb_ipk_) :: ipdest, id1, id2, imd, nsrc + integer(psb_ipk_), allocatable :: newnl(:), nzsrc(:) + integer(psb_mpk_), allocatable :: ids(:), misrc(:) type(psb_ls_coo_sparse_mat) :: acoo_snd, acoo_rcv integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name @@ -84,28 +86,29 @@ subroutine psb_s_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & endif !!$ write(0,*) ' Remapping from ',np,' onto ', np_remap - + mipd = ipd if (desc_in%get_fmt() == 'BLOCK') then ! ! Should we spread the processes in the new context, ! or should we keep them close? ! - if (.true.) then - allocate(ids(0:np_remap-1)) - if (np_remap <= np/2) then + if (.true.) then + nrm = np_remap + allocate(ids(0:nrm-1)) + if (nrm <= np/2) then ids(0) = 0 - do ipdest=1,np_remap -1 - ids(ipdest) = ids(ipdest-1) + np/np_remap + do ipdest=1,nrm -1 + ids(ipdest) = ids(ipdest-1) + np/nrm end do !!$ write(0,*) ' IDS ',ids(:) else - do ipdest = 0, np_remap-1 + do ipdest = 0, nrm-1 ids(ipdest) = ipdest end do end if - call psb_init(newctxt,np=np_remap,basectxt=ctxt,ids=ids) + call psb_init(newctxt,np=nrm,basectxt=ctxt,ids=ids) else - call psb_init(newctxt,np=np_remap,basectxt=ctxt) + call psb_init(newctxt,np=nrm,basectxt=ctxt) end if call psb_info(newctxt,rme,rnp) @@ -140,12 +143,12 @@ subroutine psb_s_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & ipdest = ( ((me-imd*id1)/id2) + imd) end if if (allocated(ids)) then - ipd = ids(ipdest) + mipd = ids(ipdest) else - ipd = ipdest + mipd = ipdest end if !!$ write(0,*) ' Sending my data from ',me,' to ', & -!!$ & ipd, 'out of ',rnp,rnp-1 +!!$ & mipd, 'out of ',rnp,rnp-1 ! ! Compute local rows for all new @@ -158,13 +161,14 @@ subroutine psb_s_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & if (rme>=0) then ! if (rme < imd) then - isrc = [ (i, i=rme*id1,min(rme*id1+id1-1,np-1)) ] + misrc = [ (i, i=rme*id1,min(rme*id1+id1-1,np-1)) ] else - isrc = [ (i, i= imd*id1+((rme-imd))*id2,& + misrc = [ (i, i= imd*id1+((rme-imd))*id2,& & min(imd*id1+(rme-imd)*id2+id2-1,np-1) ) ] end if -!!$ write(0,*) me,rme,imd,' ISRC: ',isrc(:) - nsrc = size(isrc) +!!$ write(0,*) me,rme,imd,' ISRC: ',misrc(:) + isrc = misrc + nsrc = size(misrc) !!$ write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),& !!$ & ' out ',desc_out%get_local_rows(),desc_out%get_global_rows() else @@ -187,24 +191,24 @@ subroutine psb_s_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & integer(psb_ipk_) :: nrl, ncl, nzl, nzp call a_in%cp_to(acoo_snd) nzsnd = acoo_snd%get_nzeros() - call psb_snd(ctxt,nzsnd,ipd) - call psb_snd(ctxt,desc_in%get_local_rows(),ipd) + call psb_snd(ctxt,nzsnd,mipd) + call psb_snd(ctxt,desc_in%get_local_rows(),mipd) ! Convert to global numbering call psb_loc_to_glob(acoo_snd%ia(1:nzsnd),desc_in,info) call psb_loc_to_glob(acoo_snd%ja(1:nzsnd),desc_in,info) - call psb_snd(ctxt,acoo_snd%ia(1:nzsnd),ipd) - call psb_snd(ctxt,acoo_snd%ja(1:nzsnd),ipd) - call psb_snd(ctxt,acoo_snd%val(1:nzsnd),ipd) + call psb_snd(ctxt,acoo_snd%ia(1:nzsnd),mipd) + call psb_snd(ctxt,acoo_snd%ja(1:nzsnd),mipd) + call psb_snd(ctxt,acoo_snd%val(1:nzsnd),mipd) if (rme>=0) then ! prepare to receive - nzsrc = isrc - nrsrc = isrc + nzsrc = misrc + nrsrc = misrc nzl = 0 do ip=1, nsrc - call psb_rcv(ctxt,nzsrc(ip),isrc(ip)) - call psb_rcv(ctxt,nrsrc(ip),isrc(ip)) + call psb_rcv(ctxt,nzsrc(ip),misrc(ip)) + call psb_rcv(ctxt,nrsrc(ip),misrc(ip)) nzl = nzl + nzsrc(ip) end do !!$ write(0,*) rme,' Check on NR:',newnl(rme+1),sum(nrsrc) @@ -213,9 +217,9 @@ subroutine psb_s_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & ncl = acoo_rcv%get_ncols() nzp = 0 do ip=1, nsrc - call psb_rcv(ctxt,acoo_rcv%ia(nzp+1:nzp+nzsrc(ip)),isrc(ip)) - call psb_rcv(ctxt,acoo_rcv%ja(nzp+1:nzp+nzsrc(ip)),isrc(ip)) - call psb_rcv(ctxt,acoo_rcv%val(nzp+1:nzp+nzsrc(ip)),isrc(ip)) + call psb_rcv(ctxt,acoo_rcv%ia(nzp+1:nzp+nzsrc(ip)),misrc(ip)) + call psb_rcv(ctxt,acoo_rcv%ja(nzp+1:nzp+nzsrc(ip)),misrc(ip)) + call psb_rcv(ctxt,acoo_rcv%val(nzp+1:nzp+nzsrc(ip)),misrc(ip)) nzp = nzp + nzsrc(ip) end do call acoo_rcv%set_nzeros(nzp) diff --git a/base/tools/psb_s_remote_mat.F90 b/base/tools/psb_s_remote_mat.F90 index df64266b..713f8736 100644 --- a/base/tools/psb_s_remote_mat.F90 +++ b/base/tools/psb_s_remote_mat.F90 @@ -73,11 +73,11 @@ Subroutine psb_ls_remote_mat(a,desc_a,b,info) use psb_base_mod, psb_protect_name => psb_ls_remote_mat -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif diff --git a/base/tools/psb_s_remote_vect.F90 b/base/tools/psb_s_remote_vect.F90 index a8464663..d103b694 100644 --- a/base/tools/psb_s_remote_vect.F90 +++ b/base/tools/psb_s_remote_vect.F90 @@ -66,11 +66,11 @@ subroutine psb_s_remote_vect(n,v,iv,desc_a,x,ix, info) use psb_base_mod, psb_protect_name => psb_s_remote_vect -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_ipk_), intent(in) :: n diff --git a/base/tools/psb_sallc.f90 b/base/tools/psb_sallc.f90 index 951d8128..d318e45f 100644 --- a/base/tools/psb_sallc.f90 +++ b/base/tools/psb_sallc.f90 @@ -116,7 +116,7 @@ subroutine psb_salloc_vect(x, desc_a,info, dupl, bldmode) end if call x%set_dupl(dupl_) call x%set_remote_build(bldmode_) - call x%set_nrmv(0) + call x%set_nrmv(izero) if (x%is_remote_build()) then nrmt_ = max(100,(desc_a%get_local_cols()-desc_a%get_local_rows())) call psb_ensure_size(nrmt_,x%rmtv,info) diff --git a/base/tools/psb_scdbldext.F90 b/base/tools/psb_scdbldext.F90 index 40ac778f..bdced541 100644 --- a/base/tools/psb_scdbldext.F90 +++ b/base/tools/psb_scdbldext.F90 @@ -64,11 +64,11 @@ Subroutine psb_scdbldext(a,desc_a,novr,desc_ov,info, extype) use psb_base_mod, psb_protect_name => psb_scdbldext use psi_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif diff --git a/base/tools/psb_sspasb.f90 b/base/tools/psb_sspasb.f90 index cfa316eb..110097c5 100644 --- a/base/tools/psb_sspasb.f90 +++ b/base/tools/psb_sspasb.f90 @@ -44,7 +44,7 @@ ! psb_upd_perm_ Permutation(more memory) ! ! -subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) +subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and) use psb_base_mod, psb_protect_name => psb_sspasb use psb_sort_mod use psi_mod @@ -58,6 +58,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_), optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_s_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and !....Locals.... type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me, err_act @@ -65,6 +66,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err class(psb_i_base_vect_type), allocatable :: ivm + logical :: bld_and_ info = psb_success_ name = 'psb_spasb' @@ -93,7 +95,11 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) if (debug_level >= psb_debug_ext_)& & write(debug_unit, *) me,' ',trim(name),& & ' Begin matrix assembly...' - + if (present(bld_and)) then + bld_and_ = bld_and + else + bld_and_ = .false. + end if !check on errors encountered in psdspins if (a%is_bld()) then @@ -171,7 +177,49 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) end if - + if (bld_and_) then +!!$ allocate(a%ad,mold=a%a) +!!$ allocate(a%and,mold=a%a)o + call a%split_nd(n_row,n_col,info) +!!$ block +!!$ character(len=1024) :: fname +!!$ type(psb_s_coo_sparse_mat) :: acoo +!!$ type(psb_s_csr_sparse_mat), allocatable :: aclip +!!$ type(psb_s_ecsr_sparse_mat), allocatable :: andclip +!!$ logical, parameter :: use_ecsr=.true. +!!$ allocate(aclip) +!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) +!!$ allocate(a%ad,mold=a%a) +!!$ call a%ad%mv_from_coo(acoo,info) +!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) +!!$ if (use_ecsr) then +!!$ allocate(andclip) +!!$ call andclip%mv_from_coo(acoo,info) +!!$ call move_alloc(andclip,a%and) +!!$ else +!!$ allocate(a%and,mold=a%a) +!!$ call a%and%mv_from_coo(acoo,info) +!!$ end if +!!$ if (.false.) then +!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%ad%print(25) +!!$ close(25) +!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%and%print(25) +!!$ close(25) +!!$ !call andclip%set_cols(n_col) +!!$ write(*,*) me,' ',trim(name),' ad ',& +!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col +!!$ write(*,*) me,' ',trim(name),' and ',& +!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col +!!$ end if +!!$ end block + else + if (allocated(a%ad)) deallocate(a%ad) + if (allocated(a%and)) deallocate(a%and) + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/base/tools/psb_ssphalo.F90 b/base/tools/psb_ssphalo.F90 index be0b340a..81e9616f 100644 --- a/base/tools/psb_ssphalo.F90 +++ b/base/tools/psb_ssphalo.F90 @@ -74,11 +74,11 @@ Subroutine psb_ssphalo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,outfmt,data) use psb_base_mod, psb_protect_name => psb_ssphalo -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -100,7 +100,7 @@ Subroutine psb_ssphalo(a,desc_a,blk,info,rowcnv,colcnv,& integer(psb_mpk_) :: icomm, minfo integer(psb_mpk_), allocatable :: brvindx(:), & & rvsz(:), bsdindx(:),sdsz(:) -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) ! If globals are 8 bytes but locals are 4, things get tricky integer(psb_ipk_), allocatable :: liasnd(:), ljasnd(:) integer(psb_lpk_), allocatable :: iasnd(:), jasnd(:), iarcv(:), jarcv(:) @@ -268,7 +268,7 @@ Subroutine psb_ssphalo(a,desc_a,blk,info,rowcnv,colcnv,& call psb_ensure_size(max(iszs,1),iasnd,info) if (info == psb_success_) call psb_ensure_size(max(iszs,1),jasnd,info) if (info == psb_success_) call psb_ensure_size(max(iszs,1),valsnd,info) -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) ! If globals are 8 bytes but locals are not, things get tricky if (info == psb_success_) call psb_ensure_size(max(iszs,1),liasnd,info) if (info == psb_success_) call psb_ensure_size(max(iszs,1),ljasnd,info) @@ -540,11 +540,11 @@ Subroutine psb_lssphalo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,outfmt,data) use psb_base_mod, psb_protect_name => psb_lssphalo -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -883,11 +883,11 @@ Subroutine psb_ls_csr_halo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,data,outcol_glob,col_desc) use psb_base_mod, psb_protect_name => psb_ls_csr_halo -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -1243,11 +1243,11 @@ Subroutine psb_s_ls_csr_halo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,data,outcol_glob,col_desc) use psb_base_mod, psb_protect_name => psb_s_ls_csr_halo -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif diff --git a/base/tools/psb_sspins.F90 b/base/tools/psb_sspins.F90 index 39e4ad79..377c6e23 100644 --- a/base/tools/psb_sspins.F90 +++ b/base/tools/psb_sspins.F90 @@ -51,7 +51,7 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) use psb_base_mod, psb_protect_name => psb_sspins use psi_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -78,6 +78,9 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) integer(psb_lpk_), allocatable :: lila(:),ljla(:) real(psb_spk_), allocatable :: lval(:) character(len=20) :: name + logical, parameter :: do_timings=.false. + integer(psb_ipk_), save :: ins_phase1=-1, ins_phase2=-1, ins_phase3=-1, ins_phase4=-1 + integer(psb_ipk_), save :: ins_phase11=-1, ins_phase12=-1 info = psb_success_ name = 'psb_sspins' @@ -120,6 +123,19 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) else local_ = .false. endif + if ((do_timings).and.(ins_phase1==-1)) & + & ins_phase1 = psb_get_timer_idx("SPINS: and send ") + if ((do_timings).and.(ins_phase2==-1)) & + & ins_phase2 = psb_get_timer_idx("SPINS: and cmp ad") + if ((do_timings).and.(ins_phase3==-1)) & + & ins_phase3 = psb_get_timer_idx("SPINS: and rcv") + if ((do_timings).and.(ins_phase4==-1)) & + & ins_phase4 = psb_get_timer_idx("SPINS: and cmp and") + if ((do_timings).and.(ins_phase11==-1)) & + & ins_phase11 = psb_get_timer_idx("SPINS: noand exch ") + if ((do_timings).and.(ins_phase12==-1)) & + & ins_phase12 = psb_get_timer_idx("SPINS: noand cmp") + if (desc_a%is_bld()) then @@ -134,11 +150,11 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=(/info/)) goto 9999 end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) block - logical :: is_in_parallel + logical :: is_in_parallel is_in_parallel = omp_in_parallel() - if (is_in_parallel) then + if (.false..and.is_in_parallel) then !$omp parallel private(ila,jla,nrow,ncol,nnl,k) call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) !$omp critical(spins) @@ -148,7 +164,7 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) !write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_ai_,name,& - & a_err='psb_cdins',i_err=(/info/)) + & a_err='g2l_ins 1',i_err=(/info/)) goto 9998 end if nrow = desc_a%get_local_rows() @@ -189,22 +205,25 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) !write(0,*) me,' after csput',psb_errstatus_fatal() !$omp end parallel else + !write(0,*) me,' Before g2l ',psb_errstatus_fatal() call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) !write(0,*) me,' Before g2l_ins ',psb_errstatus_fatal() if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) !write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info - if (info /= psb_success_) then + if ((info /= psb_success_).or.psb_errstatus_fatal()) then call psb_errpush(psb_err_from_subroutine_ai_,name,& - & a_err='psb_cdins',i_err=(/info/)) + & a_err='g2l_ins 2 ',i_err=(/info/)) goto 9999 end if nrow = desc_a%get_local_rows() ncol = desc_a%get_local_cols() !write(0,*) me,' Before csput',psb_errstatus_fatal() - if (a%is_bld()) then + if (a%is_bld()) then + !write(0,*) me,' before csput ',psb_errstatus_fatal(),info,nz call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info) - if (info /= psb_success_) then + !write(0,*) me,' after csput ',psb_errstatus_fatal(),info,nz + if ((info /= psb_success_).or.psb_errstatus_fatal()) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='a%csput') goto 9999 @@ -237,9 +256,17 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) end if end block #else + if (do_timings) call psb_tic(ins_phase1) !write(0,*) me,' Before g2l ',psb_errstatus_fatal() call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='g2l',i_err=(/info/)) + goto 9999 + end if + if (do_timings) call psb_toc(ins_phase1) + if (do_timings) call psb_tic(ins_phase2) if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) @@ -247,20 +274,25 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) !write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_ai_,name,& - & a_err='psb_cdins',i_err=(/info/)) + & a_err='g2l_ins',i_err=(/info/)) goto 9999 end if nrow = desc_a%get_local_rows() ncol = desc_a%get_local_cols() + if (do_timings) call psb_toc(ins_phase2) + !write(0,*) me,' Before csput',psb_errstatus_fatal() if (a%is_bld()) then + if (do_timings) call psb_tic(ins_phase3) call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='a%csput') goto 9999 end if - + if (do_timings) call psb_toc(ins_phase3) + if (do_timings) call psb_tic(ins_phase4) + if (a%is_remote_build()) then nnl = count(ila(1:nz)<0) if (nnl > 0) then @@ -279,7 +311,8 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & 1_psb_lpk_,desc_a%get_global_rows(),info) end if end if - + if (do_timings) call psb_toc(ins_phase4) + else info = psb_err_invalid_a_and_cd_state_ call psb_errpush(info,name) @@ -287,6 +320,12 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) end if #endif if (info /= 0) goto 9999 + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='unknown',i_err=(/info/)) + goto 9999 + end if endif else if (desc_a%is_asb()) then @@ -299,16 +338,16 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=(/info/)) goto 9999 end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel private(ila,jla,nrow,ncol,nnl,k) #endif if (local_) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp workshare #endif ila(1:nz) = ia(1:nz) jla(1:nz) = ja(1:nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp end workshare #endif else @@ -341,7 +380,7 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & 1_psb_lpk_,desc_a%get_global_rows(),info) end if end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp end parallel #endif @@ -458,7 +497,7 @@ subroutine psb_sspins_csr_lirp(nr,irp,ja,val,irw,a,desc_a,info,rebuild,local) end subroutine psb_sspins_csr_lirp -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_sspins_csr_iirp(nr,irw,irp,ja,val,a,desc_a,info,rebuild,local) use psb_base_mod, psb_protect_name => psb_sspins_csr_iirp use psi_mod diff --git a/base/tools/psb_z_glob_transpose.F90 b/base/tools/psb_z_glob_transpose.F90 index 9bc92da3..df86635b 100644 --- a/base/tools/psb_z_glob_transpose.F90 +++ b/base/tools/psb_z_glob_transpose.F90 @@ -94,12 +94,12 @@ ! ! subroutine psb_lz_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif use psb_base_mod, psb_protect_name => psb_lz_coo_glob_transpose Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_lz_coo_sparse_mat), intent(inout) :: ain @@ -392,12 +392,12 @@ subroutine psb_lz_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) end subroutine psb_lz_coo_glob_transpose subroutine psb_z_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif use psb_base_mod, psb_protect_name => psb_z_coo_glob_transpose Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif type(psb_z_coo_sparse_mat), intent(inout) :: ain diff --git a/base/tools/psb_z_par_csr_spspmm.f90 b/base/tools/psb_z_par_csr_spspmm.f90 index 4b88ffab..5ccf58d3 100644 --- a/base/tools/psb_z_par_csr_spspmm.f90 +++ b/base/tools/psb_z_par_csr_spspmm.f90 @@ -62,7 +62,11 @@ ! Error code. ! Subroutine psb_z_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) - use psb_base_mod, psb_protect_name => psb_z_par_csr_spspmm + use psb_mat_mod + use psb_comm_mod + use psb_penv_mod + use psb_z_tools_mod, psb_protect_name => psb_z_par_csr_spspmm + use psb_z_serial_mod, only : psb_zcsrspspmm, psb_zbase_rwextd Implicit None type(psb_z_csr_sparse_mat),intent(in) :: acsr @@ -132,7 +136,7 @@ Subroutine psb_z_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) call desc_c%indxmap%g2lip(ltcsr%ja(1:nnz),info) end if call ltcsr%mv_to_ifmt(tcsr,info) - if (info == psb_success_) call psb_rwextd(ncol,bcsr,info,b=tcsr) + if (info == psb_success_) call psb_zbase_rwextd(ncol,bcsr,info,b=tcsr) if (info == psb_success_) call tcsr%free() if(info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3') @@ -146,7 +150,7 @@ Subroutine psb_z_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) & 'starting spspmm 3' if (debug_level >= psb_debug_outer_) write(debug_unit,*) me,' ',trim(name),& & 'starting spspmm ',acsr%get_nrows(),acsr%get_ncols(),bcsr%get_nrows(),bcsr%get_ncols() - call psb_spspmm(acsr,bcsr,ccsr,info) + call psb_zcsrspspmm(acsr,bcsr,ccsr,info) call psb_erractionrestore(err_act) return @@ -158,7 +162,11 @@ Subroutine psb_z_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) End Subroutine psb_z_par_csr_spspmm Subroutine psb_lz_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) - use psb_base_mod, psb_protect_name => psb_lz_par_csr_spspmm + use psb_mat_mod + use psb_comm_mod + use psb_penv_mod + use psb_z_tools_mod, psb_protect_name => psb_lz_par_csr_spspmm + use psb_z_serial_mod, only : psb_lzcsrspspmm, psb_lzbase_rwextd Implicit None type(psb_lz_csr_sparse_mat),intent(in) :: acsr @@ -226,7 +234,7 @@ Subroutine psb_lz_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) else call desc_c%indxmap%g2lip(tcsr1%ja(1:nnz),info) end if - if (info == psb_success_) call psb_rwextd(nacol,bcsr,info,b=tcsr1) + if (info == psb_success_) call psb_lzbase_rwextd(nacol,bcsr,info,b=tcsr1) if (info == psb_success_) call tcsr1%free() if(info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='Extend am3') @@ -241,7 +249,7 @@ Subroutine psb_lz_par_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) & 'starting spspmm 3' if (debug_level >= psb_debug_outer_) write(debug_unit,*) me,' ',trim(name),& & 'starting spspmm ',acsr%get_nrows(),acsr%get_ncols(),bcsr%get_nrows(),bcsr%get_ncols() - call psb_spspmm(acsr,bcsr,ccsr,info) + call psb_lzcsrspspmm(acsr,bcsr,ccsr,info) call psb_erractionrestore(err_act) return diff --git a/base/tools/psb_z_remap.F90 b/base/tools/psb_z_remap.F90 index f9c5c39c..661ae3cc 100644 --- a/base/tools/psb_z_remap.F90 +++ b/base/tools/psb_z_remap.F90 @@ -55,10 +55,12 @@ subroutine psb_z_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & ! locals type(psb_ctxt_type) :: ctxt, newctxt - integer(psb_ipk_) :: np, me, err_act + integer(psb_mpk_) :: np, me, nrm, mipd, i + integer(psb_ipk_) :: err_act integer(psb_ipk_) :: rnp, rme - integer(psb_ipk_) :: ipdest, id1, id2, imd, i, nsrc - integer(psb_ipk_), allocatable :: newnl(:), nzsrc(:), ids(:) + integer(psb_ipk_) :: ipdest, id1, id2, imd, nsrc + integer(psb_ipk_), allocatable :: newnl(:), nzsrc(:) + integer(psb_mpk_), allocatable :: ids(:), misrc(:) type(psb_lz_coo_sparse_mat) :: acoo_snd, acoo_rcv integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name @@ -84,28 +86,29 @@ subroutine psb_z_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & endif !!$ write(0,*) ' Remapping from ',np,' onto ', np_remap - + mipd = ipd if (desc_in%get_fmt() == 'BLOCK') then ! ! Should we spread the processes in the new context, ! or should we keep them close? ! - if (.true.) then - allocate(ids(0:np_remap-1)) - if (np_remap <= np/2) then + if (.true.) then + nrm = np_remap + allocate(ids(0:nrm-1)) + if (nrm <= np/2) then ids(0) = 0 - do ipdest=1,np_remap -1 - ids(ipdest) = ids(ipdest-1) + np/np_remap + do ipdest=1,nrm -1 + ids(ipdest) = ids(ipdest-1) + np/nrm end do !!$ write(0,*) ' IDS ',ids(:) else - do ipdest = 0, np_remap-1 + do ipdest = 0, nrm-1 ids(ipdest) = ipdest end do end if - call psb_init(newctxt,np=np_remap,basectxt=ctxt,ids=ids) + call psb_init(newctxt,np=nrm,basectxt=ctxt,ids=ids) else - call psb_init(newctxt,np=np_remap,basectxt=ctxt) + call psb_init(newctxt,np=nrm,basectxt=ctxt) end if call psb_info(newctxt,rme,rnp) @@ -140,12 +143,12 @@ subroutine psb_z_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & ipdest = ( ((me-imd*id1)/id2) + imd) end if if (allocated(ids)) then - ipd = ids(ipdest) + mipd = ids(ipdest) else - ipd = ipdest + mipd = ipdest end if !!$ write(0,*) ' Sending my data from ',me,' to ', & -!!$ & ipd, 'out of ',rnp,rnp-1 +!!$ & mipd, 'out of ',rnp,rnp-1 ! ! Compute local rows for all new @@ -158,13 +161,14 @@ subroutine psb_z_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & if (rme>=0) then ! if (rme < imd) then - isrc = [ (i, i=rme*id1,min(rme*id1+id1-1,np-1)) ] + misrc = [ (i, i=rme*id1,min(rme*id1+id1-1,np-1)) ] else - isrc = [ (i, i= imd*id1+((rme-imd))*id2,& + misrc = [ (i, i= imd*id1+((rme-imd))*id2,& & min(imd*id1+(rme-imd)*id2+id2-1,np-1) ) ] end if -!!$ write(0,*) me,rme,imd,' ISRC: ',isrc(:) - nsrc = size(isrc) +!!$ write(0,*) me,rme,imd,' ISRC: ',misrc(:) + isrc = misrc + nsrc = size(misrc) !!$ write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),& !!$ & ' out ',desc_out%get_local_rows(),desc_out%get_global_rows() else @@ -187,24 +191,24 @@ subroutine psb_z_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & integer(psb_ipk_) :: nrl, ncl, nzl, nzp call a_in%cp_to(acoo_snd) nzsnd = acoo_snd%get_nzeros() - call psb_snd(ctxt,nzsnd,ipd) - call psb_snd(ctxt,desc_in%get_local_rows(),ipd) + call psb_snd(ctxt,nzsnd,mipd) + call psb_snd(ctxt,desc_in%get_local_rows(),mipd) ! Convert to global numbering call psb_loc_to_glob(acoo_snd%ia(1:nzsnd),desc_in,info) call psb_loc_to_glob(acoo_snd%ja(1:nzsnd),desc_in,info) - call psb_snd(ctxt,acoo_snd%ia(1:nzsnd),ipd) - call psb_snd(ctxt,acoo_snd%ja(1:nzsnd),ipd) - call psb_snd(ctxt,acoo_snd%val(1:nzsnd),ipd) + call psb_snd(ctxt,acoo_snd%ia(1:nzsnd),mipd) + call psb_snd(ctxt,acoo_snd%ja(1:nzsnd),mipd) + call psb_snd(ctxt,acoo_snd%val(1:nzsnd),mipd) if (rme>=0) then ! prepare to receive - nzsrc = isrc - nrsrc = isrc + nzsrc = misrc + nrsrc = misrc nzl = 0 do ip=1, nsrc - call psb_rcv(ctxt,nzsrc(ip),isrc(ip)) - call psb_rcv(ctxt,nrsrc(ip),isrc(ip)) + call psb_rcv(ctxt,nzsrc(ip),misrc(ip)) + call psb_rcv(ctxt,nrsrc(ip),misrc(ip)) nzl = nzl + nzsrc(ip) end do !!$ write(0,*) rme,' Check on NR:',newnl(rme+1),sum(nrsrc) @@ -213,9 +217,9 @@ subroutine psb_z_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, & ncl = acoo_rcv%get_ncols() nzp = 0 do ip=1, nsrc - call psb_rcv(ctxt,acoo_rcv%ia(nzp+1:nzp+nzsrc(ip)),isrc(ip)) - call psb_rcv(ctxt,acoo_rcv%ja(nzp+1:nzp+nzsrc(ip)),isrc(ip)) - call psb_rcv(ctxt,acoo_rcv%val(nzp+1:nzp+nzsrc(ip)),isrc(ip)) + call psb_rcv(ctxt,acoo_rcv%ia(nzp+1:nzp+nzsrc(ip)),misrc(ip)) + call psb_rcv(ctxt,acoo_rcv%ja(nzp+1:nzp+nzsrc(ip)),misrc(ip)) + call psb_rcv(ctxt,acoo_rcv%val(nzp+1:nzp+nzsrc(ip)),misrc(ip)) nzp = nzp + nzsrc(ip) end do call acoo_rcv%set_nzeros(nzp) diff --git a/base/tools/psb_z_remote_mat.F90 b/base/tools/psb_z_remote_mat.F90 index 5461c5d5..a9dc1721 100644 --- a/base/tools/psb_z_remote_mat.F90 +++ b/base/tools/psb_z_remote_mat.F90 @@ -73,11 +73,11 @@ Subroutine psb_lz_remote_mat(a,desc_a,b,info) use psb_base_mod, psb_protect_name => psb_lz_remote_mat -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif diff --git a/base/tools/psb_z_remote_vect.F90 b/base/tools/psb_z_remote_vect.F90 index ed705bb5..9670598a 100644 --- a/base/tools/psb_z_remote_vect.F90 +++ b/base/tools/psb_z_remote_vect.F90 @@ -66,11 +66,11 @@ subroutine psb_z_remote_vect(n,v,iv,desc_a,x,ix, info) use psb_base_mod, psb_protect_name => psb_z_remote_vect -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif integer(psb_ipk_), intent(in) :: n diff --git a/base/tools/psb_zallc.f90 b/base/tools/psb_zallc.f90 index be4d9089..b43e57ca 100644 --- a/base/tools/psb_zallc.f90 +++ b/base/tools/psb_zallc.f90 @@ -116,7 +116,7 @@ subroutine psb_zalloc_vect(x, desc_a,info, dupl, bldmode) end if call x%set_dupl(dupl_) call x%set_remote_build(bldmode_) - call x%set_nrmv(0) + call x%set_nrmv(izero) if (x%is_remote_build()) then nrmt_ = max(100,(desc_a%get_local_cols()-desc_a%get_local_rows())) call psb_ensure_size(nrmt_,x%rmtv,info) diff --git a/base/tools/psb_zcdbldext.F90 b/base/tools/psb_zcdbldext.F90 index 32d0b51a..29b93c5b 100644 --- a/base/tools/psb_zcdbldext.F90 +++ b/base/tools/psb_zcdbldext.F90 @@ -64,11 +64,11 @@ Subroutine psb_zcdbldext(a,desc_a,novr,desc_ov,info, extype) use psb_base_mod, psb_protect_name => psb_zcdbldext use psi_mod -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif diff --git a/base/tools/psb_zspasb.f90 b/base/tools/psb_zspasb.f90 index aeeef94d..2cb53368 100644 --- a/base/tools/psb_zspasb.f90 +++ b/base/tools/psb_zspasb.f90 @@ -44,7 +44,7 @@ ! psb_upd_perm_ Permutation(more memory) ! ! -subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) +subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and) use psb_base_mod, psb_protect_name => psb_zspasb use psb_sort_mod use psi_mod @@ -58,6 +58,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_), optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_z_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and !....Locals.... type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me, err_act @@ -65,6 +66,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err class(psb_i_base_vect_type), allocatable :: ivm + logical :: bld_and_ info = psb_success_ name = 'psb_spasb' @@ -93,7 +95,11 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) if (debug_level >= psb_debug_ext_)& & write(debug_unit, *) me,' ',trim(name),& & ' Begin matrix assembly...' - + if (present(bld_and)) then + bld_and_ = bld_and + else + bld_and_ = .false. + end if !check on errors encountered in psdspins if (a%is_bld()) then @@ -171,7 +177,49 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) end if - + if (bld_and_) then +!!$ allocate(a%ad,mold=a%a) +!!$ allocate(a%and,mold=a%a)o + call a%split_nd(n_row,n_col,info) +!!$ block +!!$ character(len=1024) :: fname +!!$ type(psb_z_coo_sparse_mat) :: acoo +!!$ type(psb_z_csr_sparse_mat), allocatable :: aclip +!!$ type(psb_z_ecsr_sparse_mat), allocatable :: andclip +!!$ logical, parameter :: use_ecsr=.true. +!!$ allocate(aclip) +!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) +!!$ allocate(a%ad,mold=a%a) +!!$ call a%ad%mv_from_coo(acoo,info) +!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) +!!$ if (use_ecsr) then +!!$ allocate(andclip) +!!$ call andclip%mv_from_coo(acoo,info) +!!$ call move_alloc(andclip,a%and) +!!$ else +!!$ allocate(a%and,mold=a%a) +!!$ call a%and%mv_from_coo(acoo,info) +!!$ end if +!!$ if (.false.) then +!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%ad%print(25) +!!$ close(25) +!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%and%print(25) +!!$ close(25) +!!$ !call andclip%set_cols(n_col) +!!$ write(*,*) me,' ',trim(name),' ad ',& +!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col +!!$ write(*,*) me,' ',trim(name),' and ',& +!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col +!!$ end if +!!$ end block + else + if (allocated(a%ad)) deallocate(a%ad) + if (allocated(a%and)) deallocate(a%and) + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/base/tools/psb_zsphalo.F90 b/base/tools/psb_zsphalo.F90 index 5e24a93c..6d814b39 100644 --- a/base/tools/psb_zsphalo.F90 +++ b/base/tools/psb_zsphalo.F90 @@ -74,11 +74,11 @@ Subroutine psb_zsphalo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,outfmt,data) use psb_base_mod, psb_protect_name => psb_zsphalo -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -100,7 +100,7 @@ Subroutine psb_zsphalo(a,desc_a,blk,info,rowcnv,colcnv,& integer(psb_mpk_) :: icomm, minfo integer(psb_mpk_), allocatable :: brvindx(:), & & rvsz(:), bsdindx(:),sdsz(:) -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) ! If globals are 8 bytes but locals are 4, things get tricky integer(psb_ipk_), allocatable :: liasnd(:), ljasnd(:) integer(psb_lpk_), allocatable :: iasnd(:), jasnd(:), iarcv(:), jarcv(:) @@ -268,7 +268,7 @@ Subroutine psb_zsphalo(a,desc_a,blk,info,rowcnv,colcnv,& call psb_ensure_size(max(iszs,1),iasnd,info) if (info == psb_success_) call psb_ensure_size(max(iszs,1),jasnd,info) if (info == psb_success_) call psb_ensure_size(max(iszs,1),valsnd,info) -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) ! If globals are 8 bytes but locals are not, things get tricky if (info == psb_success_) call psb_ensure_size(max(iszs,1),liasnd,info) if (info == psb_success_) call psb_ensure_size(max(iszs,1),ljasnd,info) @@ -540,11 +540,11 @@ Subroutine psb_lzsphalo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,outfmt,data) use psb_base_mod, psb_protect_name => psb_lzsphalo -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -883,11 +883,11 @@ Subroutine psb_lz_csr_halo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,data,outcol_glob,col_desc) use psb_base_mod, psb_protect_name => psb_lz_csr_halo -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif @@ -1243,11 +1243,11 @@ Subroutine psb_z_lz_csr_halo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,data,outcol_glob,col_desc) use psb_base_mod, psb_protect_name => psb_z_lz_csr_halo -#ifdef MPI_MOD +#ifdef PSB_MPI_MOD use mpi #endif Implicit None -#ifdef MPI_H +#ifdef PSB_MPI_H include 'mpif.h' #endif diff --git a/base/tools/psb_zspins.F90 b/base/tools/psb_zspins.F90 index 0c0ff91f..f5181030 100644 --- a/base/tools/psb_zspins.F90 +++ b/base/tools/psb_zspins.F90 @@ -51,7 +51,7 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) use psb_base_mod, psb_protect_name => psb_zspins use psi_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -78,6 +78,9 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) integer(psb_lpk_), allocatable :: lila(:),ljla(:) complex(psb_dpk_), allocatable :: lval(:) character(len=20) :: name + logical, parameter :: do_timings=.false. + integer(psb_ipk_), save :: ins_phase1=-1, ins_phase2=-1, ins_phase3=-1, ins_phase4=-1 + integer(psb_ipk_), save :: ins_phase11=-1, ins_phase12=-1 info = psb_success_ name = 'psb_zspins' @@ -120,6 +123,19 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) else local_ = .false. endif + if ((do_timings).and.(ins_phase1==-1)) & + & ins_phase1 = psb_get_timer_idx("SPINS: and send ") + if ((do_timings).and.(ins_phase2==-1)) & + & ins_phase2 = psb_get_timer_idx("SPINS: and cmp ad") + if ((do_timings).and.(ins_phase3==-1)) & + & ins_phase3 = psb_get_timer_idx("SPINS: and rcv") + if ((do_timings).and.(ins_phase4==-1)) & + & ins_phase4 = psb_get_timer_idx("SPINS: and cmp and") + if ((do_timings).and.(ins_phase11==-1)) & + & ins_phase11 = psb_get_timer_idx("SPINS: noand exch ") + if ((do_timings).and.(ins_phase12==-1)) & + & ins_phase12 = psb_get_timer_idx("SPINS: noand cmp") + if (desc_a%is_bld()) then @@ -134,11 +150,11 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=(/info/)) goto 9999 end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) block - logical :: is_in_parallel + logical :: is_in_parallel is_in_parallel = omp_in_parallel() - if (is_in_parallel) then + if (.false..and.is_in_parallel) then !$omp parallel private(ila,jla,nrow,ncol,nnl,k) call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) !$omp critical(spins) @@ -148,7 +164,7 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) !write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_ai_,name,& - & a_err='psb_cdins',i_err=(/info/)) + & a_err='g2l_ins 1',i_err=(/info/)) goto 9998 end if nrow = desc_a%get_local_rows() @@ -189,22 +205,25 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) !write(0,*) me,' after csput',psb_errstatus_fatal() !$omp end parallel else + !write(0,*) me,' Before g2l ',psb_errstatus_fatal() call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) !write(0,*) me,' Before g2l_ins ',psb_errstatus_fatal() if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) !write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info - if (info /= psb_success_) then + if ((info /= psb_success_).or.psb_errstatus_fatal()) then call psb_errpush(psb_err_from_subroutine_ai_,name,& - & a_err='psb_cdins',i_err=(/info/)) + & a_err='g2l_ins 2 ',i_err=(/info/)) goto 9999 end if nrow = desc_a%get_local_rows() ncol = desc_a%get_local_cols() !write(0,*) me,' Before csput',psb_errstatus_fatal() - if (a%is_bld()) then + if (a%is_bld()) then + !write(0,*) me,' before csput ',psb_errstatus_fatal(),info,nz call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info) - if (info /= psb_success_) then + !write(0,*) me,' after csput ',psb_errstatus_fatal(),info,nz + if ((info /= psb_success_).or.psb_errstatus_fatal()) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='a%csput') goto 9999 @@ -237,9 +256,17 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) end if end block #else + if (do_timings) call psb_tic(ins_phase1) !write(0,*) me,' Before g2l ',psb_errstatus_fatal() call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='g2l',i_err=(/info/)) + goto 9999 + end if + if (do_timings) call psb_toc(ins_phase1) + if (do_timings) call psb_tic(ins_phase2) if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& & mask=(ila(1:nz)>0)) @@ -247,20 +274,25 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) !write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info if (info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_ai_,name,& - & a_err='psb_cdins',i_err=(/info/)) + & a_err='g2l_ins',i_err=(/info/)) goto 9999 end if nrow = desc_a%get_local_rows() ncol = desc_a%get_local_cols() + if (do_timings) call psb_toc(ins_phase2) + !write(0,*) me,' Before csput',psb_errstatus_fatal() if (a%is_bld()) then + if (do_timings) call psb_tic(ins_phase3) call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='a%csput') goto 9999 end if - + if (do_timings) call psb_toc(ins_phase3) + if (do_timings) call psb_tic(ins_phase4) + if (a%is_remote_build()) then nnl = count(ila(1:nz)<0) if (nnl > 0) then @@ -279,7 +311,8 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & 1_psb_lpk_,desc_a%get_global_rows(),info) end if end if - + if (do_timings) call psb_toc(ins_phase4) + else info = psb_err_invalid_a_and_cd_state_ call psb_errpush(info,name) @@ -287,6 +320,12 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) end if #endif if (info /= 0) goto 9999 + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_ + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='unknown',i_err=(/info/)) + goto 9999 + end if endif else if (desc_a%is_asb()) then @@ -299,16 +338,16 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & a_err='allocate',i_err=(/info/)) goto 9999 end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp parallel private(ila,jla,nrow,ncol,nnl,k) #endif if (local_) then -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp workshare #endif ila(1:nz) = ia(1:nz) jla(1:nz) = ja(1:nz) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp end workshare #endif else @@ -341,7 +380,7 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) & 1_psb_lpk_,desc_a%get_global_rows(),info) end if end if -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$omp end parallel #endif @@ -458,7 +497,7 @@ subroutine psb_zspins_csr_lirp(nr,irp,ja,val,irw,a,desc_a,info,rebuild,local) end subroutine psb_zspins_csr_lirp -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine psb_zspins_csr_iirp(nr,irw,irp,ja,val,a,desc_a,info,rebuild,local) use psb_base_mod, psb_protect_name => psb_zspins_csr_iirp use psi_mod diff --git a/cbind/CMakeLists.txt b/cbind/CMakeLists.txt new file mode 100644 index 00000000..28497625 --- /dev/null +++ b/cbind/CMakeLists.txt @@ -0,0 +1,95 @@ +set(PSB_cbind_source_files + base/psb_d_tools_cbind_mod.F90 + base/psb_s_tools_cbind_mod.F90 + base/psb_d_psblas_cbind_mod.f90 + base/psb_objhandle_mod.F90 + base/psb_base_psblas_cbind_mod.f90 + base/psb_z_psblas_cbind_mod.f90 + base/psb_c_comm_cbind_mod.f90 + base/psb_z_serial_cbind_mod.F90 + base/psb_d_serial_cbind_mod.F90 + base/psb_c_tools_cbind_mod.F90 + base/psb_c_serial_cbind_mod.F90 + base/psb_base_string_cbind_mod.f90 + base/psb_base_tools_cbind_mod.F90 + base/psb_z_comm_cbind_mod.f90 + base/psb_s_serial_cbind_mod.F90 + base/psb_base_cbind_mod.f90 + base/psb_s_comm_cbind_mod.f90 + base/psb_s_psblas_cbind_mod.f90 + base/psb_c_psblas_cbind_mod.f90 + base/psb_d_comm_cbind_mod.f90 + base/psb_z_tools_cbind_mod.F90 + base/psb_cpenv_mod.f90 + util/psb_c_util_cbind_mod.f90 + util/psb_s_util_cbind_mod.f90 + util/psb_util_cbind_mod.f90 + util/psb_d_util_cbind_mod.f90 + util/psb_z_util_cbind_mod.f90 + krylov/psb_ckrylov_cbind_mod.f90 + krylov/psb_base_krylov_cbind_mod.f90 + krylov/psb_skrylov_cbind_mod.f90 + krylov/psb_dkrylov_cbind_mod.f90 + krylov/psb_zkrylov_cbind_mod.f90 + prec/psb_dprec_cbind_mod.f90 + prec/psb_cprec_cbind_mod.f90 + prec/psb_prec_cbind_mod.f90 + prec/psb_sprec_cbind_mod.f90 + prec/psb_zprec_cbind_mod.f90 +) +foreach(file IN LISTS PSB_cbind_source_files) + list(APPEND cbind_source_files ${CMAKE_CURRENT_LIST_DIR}/${file}) +endforeach() + +list(APPEND PSB_cbind_source_C_files + base/psb_c_dcomm.c + base/psb_c_scomm.c + base/psb_c_zcomm.c + base/psb_c_ccomm.c + base/psb_c_dbase.c + base/psb_c_base.c + base/psb_c_zbase.c + base/psb_c_cbase.c + base/psb_c_sbase.c + prec/psb_c_dprec.c + prec/psb_c_cprec.c + prec/psb_c_zprec.c + prec/psb_c_sprec.c + test/pdegen/pdegen3dc.c + + ) + +list(APPEND PSB_cbind_header_C_files + base/psb_c_sbase.h + base/psb_c_base.h + base/psb_c_dcomm.h + base/psb_c_dbase.h + base/psb_c_scomm.h + base/psb_c_ccomm.h + base/psb_base_cbind.h + base/psb_c_cbase.h + base/psb_c_zbase.h + base/psb_c_zcomm.h + util/psb_c_zutil.h + util/psb_c_dutil.h + util/psb_c_sutil.h + util/psb_c_cutil.h + util/psb_util_cbind.h + krylov/psb_krylov_cbind.h + prec/psb_c_sprec.h + prec/psb_c_cprec.h + prec/psb_prec_cbind.h + prec/psb_c_dprec.h + prec/psb_c_zprec.h +) + +#if (SERIAL_MPI) +# list(APPEND PSB_base_source_C_files modules/fakempi.c) +#endif() + +foreach(file IN LISTS PSB_cbind_source_C_files) + list(APPEND cbind_source_C_files ${CMAKE_CURRENT_LIST_DIR}/${file}) +endforeach() +foreach(file IN LISTS PSB_cbind_header_C_files) + list(APPEND cbind_header_C_files ${CMAKE_CURRENT_LIST_DIR}/${file}) +endforeach() diff --git a/cbind/Makefile b/cbind/Makefile index acc82fa9..9724c711 100644 --- a/cbind/Makefile +++ b/cbind/Makefile @@ -33,5 +33,5 @@ clean: cd util && $(MAKE) clean veryclean: clean - cd test/pargen && $(MAKE) clean + cd test/pdegen && $(MAKE) clean /bin/rm -f $(HERE)/$(LIBNAME) $(LIBMOD) *$(.mod) *.h diff --git a/cbind/base/psb_c_base.h b/cbind/base/psb_c_base.h index febb2ad3..708a0e0f 100644 --- a/cbind/base/psb_c_base.h +++ b/cbind/base/psb_c_base.h @@ -1,4 +1,3 @@ - #ifndef PSB_C_BASE__ #define PSB_C_BASE__ #ifdef __cplusplus @@ -7,42 +6,19 @@ extern "C" { #endif #include +#ifdef __cplusplus +#include +#else #include +#endif #include #include #include #include #include - - typedef int32_t psb_m_t; - -#if defined(IPK4) && defined(LPK4) - typedef int32_t psb_i_t; - typedef int32_t psb_l_t; -#elif defined(IPK4) && defined(LPK8) - typedef int32_t psb_i_t; - typedef int64_t psb_l_t; -#elif defined(IPK8) && defined(LPK8) - typedef int64_t psb_i_t; - typedef int64_t psb_l_t; -#else -#endif - typedef int64_t psb_e_t; - - typedef float psb_s_t; - typedef double psb_d_t; - -#ifdef __cplusplus - typedef std::complex psb_c_t; - typedef std::complex psb_z_t; -#else - typedef float complex psb_c_t; - typedef float complex psb_z_t; -#endif - -#define PSB_ERR_ERROR -1 -#define PSB_ERR_SUCCESS 0 +#include "psb_config.h" +#include "psb_types.h" typedef struct PSB_C_DESCRIPTOR { @@ -68,6 +44,7 @@ extern "C" { /* Environment routines */ void psb_c_init(psb_c_ctxt *cctxt); + void psb_c_init_from_fint(psb_c_ctxt *cctxt, psb_i_t f_comm); void psb_c_exit(psb_c_ctxt cctxt); void psb_c_exit_ctxt(psb_c_ctxt cctxt); void psb_c_abort(psb_c_ctxt cctxt); diff --git a/cbind/base/psb_c_tools_cbind_mod.F90 b/cbind/base/psb_c_tools_cbind_mod.F90 index 8f64cbb0..b7895de2 100644 --- a/cbind/base/psb_c_tools_cbind_mod.F90 +++ b/cbind/base/psb_c_tools_cbind_mod.F90 @@ -292,7 +292,7 @@ contains function psb_c_cspasb_opt(mh,cdh,afmt,upd) bind(c) result(res) -#ifdef HAVE_LIBRSB +#ifdef PSB_HAVE_LIBRSB use psb_c_rsb_mat_mod #endif implicit none @@ -301,7 +301,7 @@ contains character(c_char) :: afmt(*) integer(psb_c_ipk_) :: info,n character(len=5) :: fafmt -#ifdef HAVE_LIBRSB +#ifdef PSB_HAVE_LIBRSB type(psb_c_rsb_sparse_mat) :: arsb #endif @@ -313,7 +313,7 @@ contains call stringc2f(afmt,fafmt) select case(fafmt) -#ifdef HAVE_LIBRSB +#ifdef PSB_HAVE_LIBRSB case('RSB') call psb_spasb(double_spmat_pool(mh)%item,descriptor_pool(cdh)%item,info,& & upd=upd,mold=arsb) diff --git a/cbind/base/psb_cpenv_mod.f90 b/cbind/base/psb_cpenv_mod.f90 index e0e7b8a2..6bc67bfb 100644 --- a/cbind/base/psb_cpenv_mod.f90 +++ b/cbind/base/psb_cpenv_mod.f90 @@ -50,6 +50,28 @@ contains end subroutine psb_c_init + ! Get MPI_Fint from C, psb_c_object_type and start a psb_ctxt_type + ! context from it. + subroutine psb_c_init_from_fint(cctxt,fint) bind(c) + use psb_base_mod, only : psb_init, psb_ctxt_type + implicit none + + type(psb_c_object_type) :: cctxt + integer(psb_c_mpk_), value :: fint + type(psb_ctxt_type), pointer :: ctxt + integer :: info + + ! Local variables + integer(psb_mpk_) :: fmctxt + + allocate(ctxt,stat=info) + if (info /= 0) return + fmctxt = fint + call psb_init(ctxt,extcomm=fmctxt) + cctxt%item = c_loc(ctxt) + + end subroutine psb_c_init_from_fint + function psb_c2f_ctxt(cctxt) result(res) implicit none type(psb_c_object_type), value :: cctxt @@ -70,12 +92,13 @@ contains integer(psb_c_ipk_) :: info ! Local variables + integer(psb_c_mpk_) :: mctxt type(psb_ctxt_type), pointer :: ctxt ctxt => psb_c2f_ctxt(cctxt) - call ctxt%get_i_ctxt(ictxt,info) - + call ctxt%get_i_ctxt(mctxt,info) + ictxt = mctxt end subroutine function psb_c_cmp_ctxt(cctxt1, cctxt2) bind(c,name="psb_c_cmp_ctxt") result(res) @@ -177,6 +200,7 @@ contains type(psb_c_object_type), value :: cctxt integer(psb_c_ipk_), value :: n, root integer(psb_c_mpk_) :: v(*) + integer(psb_c_mpk_) :: mroot type(psb_ctxt_type), pointer :: ctxt ctxt => psb_c2f_ctxt(cctxt) @@ -186,8 +210,9 @@ contains return end if if (n==0) return + mroot=root - call psb_bcast(ctxt,v(1:n),root=root) + call psb_bcast(ctxt,v(1:n),root=mroot) end subroutine psb_c_mbcast subroutine psb_c_ibcast(cctxt,n,v,root) bind(c) @@ -197,6 +222,7 @@ contains integer(psb_c_ipk_), value :: n, root integer(psb_c_ipk_) :: v(*) type(psb_ctxt_type), pointer :: ctxt + integer(psb_c_mpk_) :: mroot ctxt => psb_c2f_ctxt(cctxt) @@ -205,8 +231,9 @@ contains return end if if (n==0) return + mroot=root - call psb_bcast(ctxt,v(1:n),root=root) + call psb_bcast(ctxt,v(1:n),root=mroot) end subroutine psb_c_ibcast subroutine psb_c_lbcast(cctxt,n,v,root) bind(c) @@ -216,6 +243,7 @@ contains integer(psb_c_ipk_), value :: n, root integer(psb_c_lpk_) :: v(*) type(psb_ctxt_type), pointer :: ctxt + integer(psb_c_mpk_) :: mroot ctxt => psb_c2f_ctxt(cctxt) if (n < 0) then @@ -223,8 +251,9 @@ contains return end if if (n==0) return + mroot=root - call psb_bcast(ctxt,v(1:n),root=root) + call psb_bcast(ctxt,v(1:n),root=mroot) end subroutine psb_c_lbcast subroutine psb_c_ebcast(cctxt,n,v,root) bind(c) @@ -234,6 +263,7 @@ contains integer(psb_c_ipk_), value :: n, root integer(psb_c_epk_) :: v(*) type(psb_ctxt_type), pointer :: ctxt + integer(psb_c_mpk_) :: mroot ctxt => psb_c2f_ctxt(cctxt) if (n < 0) then @@ -241,8 +271,9 @@ contains return end if if (n==0) return + mroot=root - call psb_bcast(ctxt,v(1:n),root=root) + call psb_bcast(ctxt,v(1:n),root=mroot) end subroutine psb_c_ebcast subroutine psb_c_sbcast(cctxt,n,v,root) bind(c) @@ -252,6 +283,7 @@ contains integer(psb_c_ipk_), value :: n, root real(c_float) :: v(*) type(psb_ctxt_type), pointer :: ctxt + integer(psb_c_mpk_) :: mroot ctxt => psb_c2f_ctxt(cctxt) if (n < 0) then @@ -259,8 +291,9 @@ contains return end if if (n==0) return + mroot=root - call psb_bcast(ctxt,v(1:n),root=root) + call psb_bcast(ctxt,v(1:n),root=mroot) end subroutine psb_c_sbcast subroutine psb_c_dbcast(cctxt,n,v,root) bind(c) @@ -270,6 +303,7 @@ contains integer(psb_c_ipk_), value :: n, root real(c_double) :: v(*) type(psb_ctxt_type), pointer :: ctxt + integer(psb_c_mpk_) :: mroot ctxt => psb_c2f_ctxt(cctxt) if (n < 0) then @@ -277,8 +311,9 @@ contains return end if if (n==0) return + mroot=root - call psb_bcast(ctxt,v(1:n),root=root) + call psb_bcast(ctxt,v(1:n),root=mroot) end subroutine psb_c_dbcast @@ -289,6 +324,7 @@ contains integer(psb_c_ipk_), value :: n, root complex(c_float_complex) :: v(*) type(psb_ctxt_type), pointer :: ctxt + integer(psb_c_mpk_) :: mroot ctxt => psb_c2f_ctxt(cctxt) if (n < 0) then @@ -296,8 +332,9 @@ contains return end if if (n==0) return + mroot=root - call psb_bcast(ctxt,v(1:n),root=root) + call psb_bcast(ctxt,v(1:n),root=mroot) end subroutine psb_c_cbcast subroutine psb_c_zbcast(cctxt,n,v,root) bind(c) @@ -307,6 +344,7 @@ contains integer(psb_c_ipk_), value :: n, root complex(c_double_complex) :: v(*) type(psb_ctxt_type), pointer :: ctxt + integer(psb_c_mpk_) :: mroot ctxt => psb_c2f_ctxt(cctxt) if (n < 0) then @@ -314,8 +352,9 @@ contains return end if if (n==0) return + mroot=root - call psb_bcast(ctxt,v(1:n),root=root) + call psb_bcast(ctxt,v(1:n),root=mroot) end subroutine psb_c_zbcast subroutine psb_c_hbcast(cctxt,v,root) bind(c) @@ -326,6 +365,7 @@ contains character(c_char) :: v(*) integer(psb_ipk_) :: iam, np, n type(psb_ctxt_type), pointer :: ctxt + integer(psb_c_mpk_) :: mroot ctxt => psb_c2f_ctxt(cctxt) call psb_info(ctxt,iam,np) @@ -337,8 +377,9 @@ contains n = n + 1 end do end if - call psb_bcast(ctxt,n,root=root) - call psb_bcast(ctxt,v(1:n),root=root) + mroot=root + call psb_bcast(ctxt,n,root=mroot) + call psb_bcast(ctxt,v(1:n),root=mroot) end subroutine psb_c_hbcast function psb_c_f2c_errmsg(cmesg,len) bind(c) result(res) diff --git a/cbind/base/psb_d_tools_cbind_mod.F90 b/cbind/base/psb_d_tools_cbind_mod.F90 index 67ae8b86..2de6990c 100644 --- a/cbind/base/psb_d_tools_cbind_mod.F90 +++ b/cbind/base/psb_d_tools_cbind_mod.F90 @@ -292,7 +292,7 @@ contains function psb_c_dspasb_opt(mh,cdh,afmt,upd) bind(c) result(res) -#ifdef HAVE_LIBRSB +#ifdef PSB_HAVE_LIBRSB use psb_d_rsb_mat_mod #endif implicit none @@ -301,7 +301,7 @@ contains character(c_char) :: afmt(*) integer(psb_c_ipk_) :: info,n character(len=5) :: fafmt -#ifdef HAVE_LIBRSB +#ifdef PSB_HAVE_LIBRSB type(psb_d_rsb_sparse_mat) :: arsb #endif @@ -313,7 +313,7 @@ contains call stringc2f(afmt,fafmt) select case(fafmt) -#ifdef HAVE_LIBRSB +#ifdef PSB_HAVE_LIBRSB case('RSB') call psb_spasb(double_spmat_pool(mh)%item,descriptor_pool(cdh)%item,info,& & upd=upd,mold=arsb) diff --git a/cbind/base/psb_s_tools_cbind_mod.F90 b/cbind/base/psb_s_tools_cbind_mod.F90 index 91d9b322..517ad361 100644 --- a/cbind/base/psb_s_tools_cbind_mod.F90 +++ b/cbind/base/psb_s_tools_cbind_mod.F90 @@ -292,7 +292,7 @@ contains function psb_c_sspasb_opt(mh,cdh,afmt,upd) bind(c) result(res) -#ifdef HAVE_LIBRSB +#ifdef PSB_HAVE_LIBRSB use psb_s_rsb_mat_mod #endif implicit none @@ -301,7 +301,7 @@ contains character(c_char) :: afmt(*) integer(psb_c_ipk_) :: info,n character(len=5) :: fafmt -#ifdef HAVE_LIBRSB +#ifdef PSB_HAVE_LIBRSB type(psb_s_rsb_sparse_mat) :: arsb #endif @@ -313,7 +313,7 @@ contains call stringc2f(afmt,fafmt) select case(fafmt) -#ifdef HAVE_LIBRSB +#ifdef PSB_HAVE_LIBRSB case('RSB') call psb_spasb(double_spmat_pool(mh)%item,descriptor_pool(cdh)%item,info,& & upd=upd,mold=arsb) diff --git a/cbind/base/psb_z_tools_cbind_mod.F90 b/cbind/base/psb_z_tools_cbind_mod.F90 index 59d4cca8..3e94b715 100644 --- a/cbind/base/psb_z_tools_cbind_mod.F90 +++ b/cbind/base/psb_z_tools_cbind_mod.F90 @@ -292,7 +292,7 @@ contains function psb_c_zspasb_opt(mh,cdh,afmt,upd) bind(c) result(res) -#ifdef HAVE_LIBRSB +#ifdef PSB_HAVE_LIBRSB use psb_z_rsb_mat_mod #endif implicit none @@ -301,7 +301,7 @@ contains character(c_char) :: afmt(*) integer(psb_c_ipk_) :: info,n character(len=5) :: fafmt -#ifdef HAVE_LIBRSB +#ifdef PSB_HAVE_LIBRSB type(psb_z_rsb_sparse_mat) :: arsb #endif @@ -313,7 +313,7 @@ contains call stringc2f(afmt,fafmt) select case(fafmt) -#ifdef HAVE_LIBRSB +#ifdef PSB_HAVE_LIBRSB case('RSB') call psb_spasb(double_spmat_pool(mh)%item,descriptor_pool(cdh)%item,info,& & upd=upd,mold=arsb) diff --git a/cbind/krylov/psb_ckrylov_cbind_mod.f90 b/cbind/krylov/psb_ckrylov_cbind_mod.f90 index 56cd51ab..758ecd02 100644 --- a/cbind/krylov/psb_ckrylov_cbind_mod.f90 +++ b/cbind/krylov/psb_ckrylov_cbind_mod.f90 @@ -8,7 +8,7 @@ contains & ah,ph,bh,xh,cdh,options) bind(c) result(res) use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_objhandle_mod use psb_prec_cbind_mod use psb_base_string_cbind_mod @@ -33,7 +33,7 @@ contains & ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res) use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_objhandle_mod use psb_prec_cbind_mod use psb_base_string_cbind_mod diff --git a/cbind/krylov/psb_dkrylov_cbind_mod.f90 b/cbind/krylov/psb_dkrylov_cbind_mod.f90 index 43b3ca8c..b1119067 100644 --- a/cbind/krylov/psb_dkrylov_cbind_mod.f90 +++ b/cbind/krylov/psb_dkrylov_cbind_mod.f90 @@ -8,7 +8,7 @@ contains & ah,ph,bh,xh,cdh,options) bind(c) result(res) use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_objhandle_mod use psb_prec_cbind_mod use psb_base_string_cbind_mod @@ -33,7 +33,7 @@ contains & ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res) use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_objhandle_mod use psb_prec_cbind_mod use psb_base_string_cbind_mod diff --git a/cbind/krylov/psb_skrylov_cbind_mod.f90 b/cbind/krylov/psb_skrylov_cbind_mod.f90 index 60d41d14..41bb9506 100644 --- a/cbind/krylov/psb_skrylov_cbind_mod.f90 +++ b/cbind/krylov/psb_skrylov_cbind_mod.f90 @@ -8,7 +8,7 @@ contains & ah,ph,bh,xh,cdh,options) bind(c) result(res) use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_objhandle_mod use psb_prec_cbind_mod use psb_base_string_cbind_mod @@ -33,7 +33,7 @@ contains & ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res) use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_objhandle_mod use psb_prec_cbind_mod use psb_base_string_cbind_mod diff --git a/cbind/krylov/psb_zkrylov_cbind_mod.f90 b/cbind/krylov/psb_zkrylov_cbind_mod.f90 index 22e74386..37f24be7 100644 --- a/cbind/krylov/psb_zkrylov_cbind_mod.f90 +++ b/cbind/krylov/psb_zkrylov_cbind_mod.f90 @@ -8,7 +8,7 @@ contains & ah,ph,bh,xh,cdh,options) bind(c) result(res) use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_objhandle_mod use psb_prec_cbind_mod use psb_base_string_cbind_mod @@ -33,7 +33,7 @@ contains & ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res) use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_objhandle_mod use psb_prec_cbind_mod use psb_base_string_cbind_mod diff --git a/cbind/test/pargen/Makefile b/cbind/test/pdegen/Makefile similarity index 50% rename from cbind/test/pargen/Makefile rename to cbind/test/pdegen/Makefile index 2e74497f..cd7c8d68 100644 --- a/cbind/test/pargen/Makefile +++ b/cbind/test/pdegen/Makefile @@ -9,7 +9,7 @@ FINCLUDES=$(FMFLAG). $(FMFLAG)$(HERE) $(FMFLAG)$(MODDIR) CINCLUDES=-I. -I$(HERE) -I$(INCLUDEDIR) PSBC_LIBS= -L$(LIBDIR) -lpsb_cbind -PSB_LIBS=-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base -L$(LIBDIR) +PSB_LIBS=-lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base -L$(LIBDIR) # # Compilers and such @@ -17,17 +17,13 @@ PSB_LIBS=-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base -L$(LIBDIR) EXEDIR=./runs -all: ppdec build +all: pdegen3dc -ppdec: ppdec.o - $(MPFC) ppdec.o -o ppdec $(PSBC_LIBS) $(PSB_LIBS) $(PSBLDLIBS) -lm -lgfortran - /bin/mv ppdec $(EXEDIR) +pdegen3dc: pdegen3dc.o + $(FLINK) pdegen3dc.o -o pdegen3dc $(PSBC_LIBS) $(PSB_LIBS)\ + $(PSBLDLIBS) -lm + /bin/mv pdegen3dc $(EXEDIR) -build: build.o - $(MPFC) build.o -o build $(PSBC_LIBS) $(PSB_LIBS) $(PSBLDLIBS) -lm -lgfortran - /bin/mv build $(EXEDIR) -# \ -# -lifcore -lifcoremt -lguide -limf -lirc -lintlc -lcxaguard -L/opt/intel/fc/10.0.023/lib/ -lm .f90.o: $(MPFC) $(FCOPT) $(FINCLUDES) $(FDEFINES) -c $< @@ -36,11 +32,11 @@ build: build.o clean: - /bin/rm -f ppdec.o build.o $(EXEDIR)/ppdec + /bin/rm -f pdegen3dc.o $(EXEDIR)/pdegen3dc verycleanlib: (cd ../..; make veryclean) lib: (cd ../../; make library) tests: all - cd runs ; ./ppdec < ppde.inp + cd runs ; ./pdegen3dc < pdegen3d.inp diff --git a/cbind/test/pargen/ppdec.c b/cbind/test/pdegen/pdegen3dc.c similarity index 100% rename from cbind/test/pargen/ppdec.c rename to cbind/test/pdegen/pdegen3dc.c diff --git a/cbind/test/pargen/runs/ppde.inp b/cbind/test/pdegen/runs/pdegen3d.inp similarity index 100% rename from cbind/test/pargen/runs/ppde.inp rename to cbind/test/pdegen/runs/pdegen3d.inp diff --git a/cbind/util/psb_util_cbind_mod.f90 b/cbind/util/psb_util_cbind_mod.f90 index 1ded8136..0322cfc4 100644 --- a/cbind/util/psb_util_cbind_mod.f90 +++ b/cbind/util/psb_util_cbind_mod.f90 @@ -18,11 +18,12 @@ contains implicit none integer(psb_c_ipk_) :: idx - integer(psb_c_ipk_), value :: modes, base + integer(psb_c_ipk_), value :: modes + integer(psb_c_mpk_), value :: base integer(psb_c_ipk_) :: ijk(modes) integer(psb_c_ipk_) :: sizes(modes) - integer(psb_ipk_) :: fijk(modes), fsizes(modes) + integer(psb_mpk_) :: fijk(modes), fsizes(modes) fijk(1:modes) = ijk(1:modes) fsizes(1:modes) = sizes(1:modes) @@ -37,11 +38,12 @@ contains implicit none integer(psb_c_lpk_) :: idx - integer(psb_c_ipk_), value :: modes, base + integer(psb_c_ipk_), value :: modes + integer(psb_c_mpk_), value :: base integer(psb_c_ipk_) :: ijk(modes) integer(psb_c_ipk_) :: sizes(modes) - integer(psb_ipk_) :: fijk(modes), fsizes(modes) + integer(psb_mpk_) :: fijk(modes), fsizes(modes) fijk(1:modes) = ijk(1:modes) fsizes(1:modes) = sizes(1:modes) @@ -56,15 +58,17 @@ contains integer(psb_c_ipk_) :: res integer(psb_c_ipk_), value :: idx - integer(psb_c_ipk_), value :: modes, base + integer(psb_c_ipk_), value :: modes + integer(psb_c_mpk_), value :: base integer(psb_c_ipk_) :: ijk(modes) integer(psb_c_ipk_) :: sizes(modes) - integer(psb_ipk_) :: fijk(modes), fsizes(modes) + integer(psb_mpk_) :: fijk(modes), fsizes(modes) res = -1 fsizes(1:modes) = sizes(1:modes) + call idx2ijk(fijk,idx,fsizes,base=base) ijk(1:modes) = fijk(1:modes) @@ -79,11 +83,12 @@ contains integer(psb_c_ipk_) :: res integer(psb_c_lpk_), value :: idx - integer(psb_c_ipk_), value :: modes, base + integer(psb_c_ipk_), value :: modes + integer(psb_c_mpk_), value :: base integer(psb_c_ipk_) :: ijk(modes) integer(psb_c_ipk_) :: sizes(modes) - integer(psb_ipk_) :: fijk(modes), fsizes(modes) + integer(psb_mpk_) :: fijk(modes), fsizes(modes) res = -1 diff --git a/cmake/CapitalizeString.cmake b/cmake/CapitalizeString.cmake new file mode 100644 index 00000000..ae50b474 --- /dev/null +++ b/cmake/CapitalizeString.cmake @@ -0,0 +1,7 @@ +function(CapitalizeString string output_variable) + string(TOUPPER "${string}" _upper_string) + string(TOLOWER "${string}" _lower_string) + string(SUBSTRING "${_upper_string}" 0 1 _start) + string(SUBSTRING "${_lower_string}" 1 -1 _end) + set(${output_variable} "${_start}${_end}" PARENT_SCOPE) +endfunction() diff --git a/cmake/CheckOutOfSourceBuild.cmake b/cmake/CheckOutOfSourceBuild.cmake new file mode 100644 index 00000000..ad176952 --- /dev/null +++ b/cmake/CheckOutOfSourceBuild.cmake @@ -0,0 +1,21 @@ +#-------------------------- +# Prohibit in-source builds +#-------------------------- +if ("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}") + message(FATAL_ERROR "ERROR! " + "CMAKE_CURRENT_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR}" + " == CMAKE_CURRENT_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR}" + "\nThis archive does not support in-source builds:\n" + "You must now delete the CMakeCache.txt file and the CMakeFiles/ directory under " + "the 'src' source directory or you will not be able to configure correctly!" + "\nYou must now run something like:\n" + " $ rm -r CMakeCache.txt CMakeFiles/" + "\n" + "Please create a directory outside the ${CMAKE_PROJECT_NAME} source tree and build under that outside directory " + "in a manner such as\n" + " $ mkdir build\n" + " $ cd build\n" + " $ CC=gcc FC=gfortran cmake -DBUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/path/to/install/dir /path/to/psblas3/src/dir \n" + "\nsubstituting the appropriate syntax for your shell (the above line assumes the bash shell)." + ) +endif() diff --git a/cmake/FindMETIS.cmake b/cmake/FindMETIS.cmake new file mode 100644 index 00000000..21f95f57 --- /dev/null +++ b/cmake/FindMETIS.cmake @@ -0,0 +1,95 @@ +if (METIS_INCLUDES AND METIS_LIBRARIES) + set(METIS_FIND_QUIETLY TRUE) +endif (METIS_INCLUDES AND METIS_LIBRARIES) + +if( DEFINED ENV{METISDIR} ) + if( NOT DEFINED METIS_ROOT ) + set(METIS_ROOT "$ENV{METISDIR}") + endif() +endif() + +if( (DEFINED ENV{METIS_ROOT}) OR (DEFINED METIS_ROOT) ) + if( NOT DEFINED METIS_ROOT) + set(METIS_ROOT "$ENV{METIS_ROOT}") + endif() + set(METIS_HINTS "${METIS_ROOT}") +endif() + +find_path(METIS_INCLUDES + NAMES + metis.h + HINTS + ${METIS_HINTS} + PATHS + "${INCLUDE_INSTALL_DIR}" + /usr/local/opt + /usr/local + PATH_SUFFIXES + include + ) + +if(METIS_INCLUDES) + foreach(include IN_LISTS METIS_INCLUDES) + get_filename_component(mts_include_dir "${include}" DIRECTORY) + get_filename_component(mts_abs_include_dir "${mts_include_dir}" ABSOLUTE) + get_filename_component(new_mts_hint "${include_dir}/.." ABSOLUTE ) + list(APPEND METIS_HINTS "${new_mts_hint}") + break() + endforeach() +endif() + +if(METIS_HINTS) + list(REMOVE_DUPLICATES METIS_HINTS) +endif() + +macro(_metis_check_version) + file(READ "${METIS_INCLUDES}/metis.h" _metis_version_header) + + string(REGEX MATCH "define[ \t]+METIS_VER_MAJOR[ \t]+([0-9]+)" _metis_major_version_match "${_metis_version_header}") + set(METIS_MAJOR_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+METIS_VER_MINOR[ \t]+([0-9]+)" _metis_minor_version_match "${_metis_version_header}") + set(METIS_MINOR_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+METIS_VER_SUBMINOR[ \t]+([0-9]+)" _metis_subminor_version_match "${_metis_version_header}") + set(METIS_SUBMINOR_VERSION "${CMAKE_MATCH_1}") + if(NOT METIS_MAJOR_VERSION) + message(STATUS "Could not determine Metis version. Assuming version 4.0.0") + set(METIS_VERSION 4.0.0) + else() + set(METIS_VERSION ${METIS_MAJOR_VERSION}.${METIS_MINOR_VERSION}.${METIS_SUBMINOR_VERSION}) + endif() + if(${METIS_VERSION} VERSION_LESS ${Metis_FIND_VERSION}) + set(METIS_VERSION_OK FALSE) + else() + set(METIS_VERSION_OK TRUE) + endif() + + if(NOT METIS_VERSION_OK) + message(STATUS "Metis version ${METIS_VERSION} found in ${METIS_INCLUDES}, " + "but at least version ${Metis_FIND_VERSION} is required") + endif(NOT METIS_VERSION_OK) +endmacro(_metis_check_version) + +if(METIS_INCLUDES AND Metis_FIND_VERSION) + _metis_check_version() +else() + set(METIS_VERSION_OK TRUE) +endif() + + +find_library(METIS_LIBRARIES metis + HINTS + ${METIS_HINTS} + PATHS + "${LIB_INSTALL_DIR}" + /usr/local/ + /usr/local/opt + PATH_SUFFIXES + lib + lib64 + metis/lib) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(METIS DEFAULT_MSG + METIS_INCLUDES METIS_LIBRARIES METIS_VERSION_OK) + +mark_as_advanced(METIS_INCLUDES METIS_LIBRARIES) diff --git a/cmake/makeDist.cmake b/cmake/makeDist.cmake new file mode 100644 index 00000000..f0b46566 --- /dev/null +++ b/cmake/makeDist.cmake @@ -0,0 +1,79 @@ +# CMake file to be called in script mode (${CMAKE_COMMAND} -P ) to +# Generate a source archive release asset from add_custom_command +# +# See SourceDistTarget.cmake + +if(NOT CMAKE_ARGV3) + message(FATAL_ERROR "Must pass the top level src dir to ${CMAKE_ARGV2} as the first argument") +endif() + +if(NOT CMAKE_ARGV4) + message(FATAL_ERROR "Must pass the top level src dir to ${CMAKE_ARGV2} as the second argument") +endif() + +find_package(Git) +if(NOT GIT_FOUND) + message( FATAL_ERROR "You can't create a source archive release asset without git!") +endif() + +execute_process(COMMAND "${GIT_EXECUTABLE}" describe --always + RESULT_VARIABLE git_status + OUTPUT_VARIABLE git_version + WORKING_DIRECTORY "${CMAKE_ARGV3}" + OUTPUT_STRIP_TRAILING_WHITESPACE) +if(NOT (git_status STREQUAL "0")) + message( FATAL_ERROR "git describe --always failed with exit status: ${git_status} and message: +${git_version}") +endif() + +set(archive "PSBLAS-${git_version}") +set(l_archive "PSBLAS-${git_version}") +set(release_asset "${CMAKE_ARGV4}/${archive}.tar.gz") +execute_process( + COMMAND "${GIT_EXECUTABLE}" archive "--prefix=${archive}/" -o "${release_asset}" "${git_version}" + RESULT_VARIABLE git_status + OUTPUT_VARIABLE git_output + WORKING_DIRECTORY "${CMAKE_ARGV3}" + OUTPUT_STRIP_TRAILING_WHITESPACE) + +if(NOT (git_status STREQUAL "0")) + message( FATAL_ERROR "git archive ... failed with exit status: ${git_status} and message: +${git_output}") +else() + message( STATUS "Source code release asset created from `git archive`: ${release_asset}") +endif() + +file(SHA256 "${release_asset}" tarball_sha256) +set(sha256_checksum "${tarball_sha256} ${archive}.tar.gz") +configure_file("${CMAKE_ARGV3}/cmake/PSBLAS-VER-SHA256.txt.in" + "${CMAKE_ARGV4}/${l_archive}-SHA256.txt" + @ONLY) +message( STATUS + "SHA 256 checksum of release tarball written out as: ${CMAKE_ARGV4}/${l_archive}-SHA256.txt" ) + +find_program(GPG_EXECUTABLE + gpg + DOC "Location of GnuPG (gpg) executable") + +if(GPG_EXECUTABLE) + execute_process( + COMMAND "${GPG_EXECUTABLE}" --armor --detach-sign --comment "@gpg_comment@" "${CMAKE_ARGV4}/${l_archive}-SHA256.txt" + RESULT_VARIABLE gpg_status + OUTPUT_VARIABLE gpg_output + WORKING_DIRECTORY "${CMAKE_ARGV4}") + if(NOT (gpg_status STREQUAL "0")) + message( WARNING "GPG signing of ${CMAKE_ARGV4}/${l_archive}-SHA256.txt appears to have failed +with status: ${gpg_status} and output: ${gpg_output}") + else() + configure_file("${CMAKE_ARGV3}/cmake/PSBLAS-VER-SHA256.txt.asc.in" + "${CMAKE_ARGV4}/${l_archive}-GPG.comment" + @ONLY) + file(READ "${CMAKE_ARGV4}/${l_archive}-GPG.comment" gpg_comment) + configure_file("${CMAKE_ARGV4}/${l_archive}-SHA256.txt.asc" + "${CMAKE_ARGV4}/${l_archive}-SHA256.txt.asc.out" + @ONLY) + file(RENAME "${CMAKE_ARGV4}/${l_archive}-SHA256.txt.asc.out" + "${CMAKE_ARGV4}/${l_archive}-SHA256.txt.asc") + message(STATUS "GPG signed SHA256 checksum created: ${CMAKE_ARGV4}/${l_archive}-SHA256.txt.asc") + endif() +endif() diff --git a/cmake/pkg/psblasConfig.cmake.in b/cmake/pkg/psblasConfig.cmake.in new file mode 100644 index 00000000..3fe60345 --- /dev/null +++ b/cmake/pkg/psblasConfig.cmake.in @@ -0,0 +1,16 @@ +# Config file for the INSTALLED package +# Allow other CMake projects to find this package if it is installed +# Requires the use of the standard CMake module CMakePackageConfigHelpers + +set ( @CMAKE_PROJECT_NAME@_VERSION @VERSION@ ) + +###@COMPILER_CONSISTENCY_CHECK@ + +@PACKAGE_INIT@ + +# Provide the targets +set_and_check ( @PACKAGE_NAME@_CONFIG_INSTALL_DIR "@PACKAGE_EXPORT_INSTALL_DIR@" ) +include ( "${@PACKAGE_NAME@_CONFIG_INSTALL_DIR}/@PACKAGE_NAME@-targets.cmake" ) + +# Make the module files available via include +set_and_check ( @CMAKE_PROJECT_NAME@_INCLUDE_DIRS "@PACKAGE_INSTALL_MOD_DIR@" ) diff --git a/cmake/psblas-VER-SHA256.txt.asc.in b/cmake/psblas-VER-SHA256.txt.asc.in new file mode 100644 index 00000000..22cd9211 --- /dev/null +++ b/cmake/psblas-VER-SHA256.txt.asc.in @@ -0,0 +1,12 @@ +Mac users can use GPGTools - https://gpgtools.org +Comment: Download Izaak Beekman's GPG public key from your +Comment: trusted key server or from +Comment: https://izaakbeekman.com/izaak.pubkey.txt +Comment: Next add it to your GPG keyring, e.g., +Comment: `curl https://izaakbeekman.com/izaak.pubkey.txt | gpg --import` +Comment: Make sure you have verified that the release archive's +Comment: SHA256 checksum matches the provided +Comment: psblas-@git_version@-SHA256.txt and ensure that this file +Comment: and it's signature are in the same directory. Then +Comment: verify with: +Comment: `gpg --verify psblas-@git_version@-SHA256.txt.asc` diff --git a/cmake/psblas-VER-SHA256.txt.in b/cmake/psblas-VER-SHA256.txt.in new file mode 100644 index 00000000..1b3888c4 --- /dev/null +++ b/cmake/psblas-VER-SHA256.txt.in @@ -0,0 +1,3 @@ +# To verify cryptographic checksums `shasum -c psblas-@git_version@-SHA256.txt` on Mac OS X, or +# `sha256sum -c psblas-@git_version@-SHA256.txt` on Linux. +@sha256_checksum@ diff --git a/cmake/psblasConfig.cmake.in b/cmake/psblasConfig.cmake.in new file mode 100644 index 00000000..12ebe66d --- /dev/null +++ b/cmake/psblasConfig.cmake.in @@ -0,0 +1,26 @@ +# Config file for the installed package + +set(psblas_VERSION "@psblas_VERSION@") + +# Include directories +set(psblas_INCLUDE_DIRS "@CMAKE_INSTALL_INCLUDEDIR@") + +include(CMakeFindDependencyMacro) + + +set(PSB_CMAKE_INSTALL_PREFIX "@PSB_CMAKE_INSTALL_PREFIX@") +set(PSB_CMAKE_INSTALL_LIBDIR "@PSB_CMAKE_INSTALL_LIBDIR@") +set(PSB_CMAKE_INSTALL_INCLUDEDIR "@PSB_CMAKE_INSTALL_INCLUDEDIR@") +set(PSB_CMAKE_INSTALL_MODULDIR "@PSB_CMAKE_INSTALL_MODULDIR@") + + +set(PSB_IPK_SIZE "@IPK_SIZE@") +set(PSB_LPK_SIZE "@LPK_SIZE@") +# Define IPKDEF and LPKDEF based on the sizes +set(PSB_IPKDEF "#define PSB_IPK@IPK_SIZE@") +set(PSB_LPKDEF "#define PSB_LPK@LPK_SIZE@") + + + +# Provide the targets +include("${CMAKE_CURRENT_LIST_DIR}/@CMAKE_PROJECT_NAME@Targets.cmake") diff --git a/cmake/setVersion.cmake b/cmake/setVersion.cmake new file mode 100644 index 00000000..f79ee619 --- /dev/null +++ b/cmake/setVersion.cmake @@ -0,0 +1,90 @@ +include(CMakeParseArguments) + +# Function to parse version info from git and/or .VERSION file +function(set_version) + set(options "") + set(oneValueArgs VERSION_VARIABLE GIT_DESCRIBE_VAR CUSTOM_VERSION_FILE CUSTOM_VERSION_REGEX ) + set(multiValueArgs "") + cmake_parse_arguments(set_version "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + # Algorithm: + # 1. Get first line of .VERSION file, which will be set via `git archive` so long as + # + # 2. If not a packaged release check if this is an active git repo + # 3. Get version info from `git describe` + # 4. First the most recent tag is fetched if available + # 5. Then the full `git describe` output is fetched + + + if(NOT set_version_CUSTOM_VERSION_REGEX) + set(_VERSION_REGEX "[vV]*[0-9]+\\.[0-9]+\\.[0-9]+") + else() + set(_VERSION_REGEX ${set_version_CUSTOM_VERSION_REGEX}) + endif() + if(NOT set_version_CUSTOM_VERSION_FILE) + set(_VERSION_FILE "${CMAKE_SOURCE_DIR}/.VERSION") + else() + set(_VERSION_FILE "${set_version_CUSTOM_VERSION_FILE}") + endif() + + file(STRINGS "${_VERSION_FILE}" first_line + LIMIT_COUNT 1 + ) + + string(REGEX MATCH ${_VERSION_REGEX} + _package_version "${first_line}") + + if((NOT (_package_version MATCHES ${_VERSION_REGEX})) AND (EXISTS "${CMAKE_SOURCE_DIR}/.git")) + message( STATUS "Build from git repository detected") + find_package(Git) + if(GIT_FOUND) + set(GIT_FOUND "${GIT_FOUND}" PARENT_SCOPE) + execute_process(COMMAND "${GIT_EXECUTABLE}" describe --abbrev=0 + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + RESULT_VARIABLE _git_status + OUTPUT_VARIABLE _git_output + OUTPUT_STRIP_TRAILING_WHITESPACE) + if((_git_status STREQUAL "0") AND (_git_output MATCHES ${_VERSION_REGEX})) + set(_package_version "${_git_output}") + endif() + execute_process(COMMAND "${GIT_EXECUTABLE}" describe --always + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + RESULT_VARIABLE _git_status + OUTPUT_VARIABLE _full_git_describe + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT (_git_status STREQUAL "0")) + set(_full_git_describe NOTFOUND) + endif() + else() + message( WARNING "Could not find git executable!") + endif() + endif() + + if(NOT (_package_version MATCHES ${_VERSION_REGEX})) + message( WARNING "Could not extract version from git, falling back on ${_VERSION_FILE}.") + file(STRINGS ".VERSION" _package_version + REGEX ${_VERSION_REGEX} + ) + endif() + + if(NOT _full_git_describe) + set(_full_git_describe ${_package_version}) + endif() + + # Strip leading "v" character from package version tags so that + # the version string can be passed to the CMake `project` command + string(REPLACE "v" "" _package_version "${_package_version}") + string(REPLACE "V" "" _package_version "${_package_version}") + + if(set_version_VERSION_VARIABLE) + set(${set_version_VERSION_VARIABLE} ${_package_version} PARENT_SCOPE) + else() + set(PROJECT_VERSION ${_package_version} PARENT_SCOPE) + endif() + if(set_version_GIT_DESCRIBE_VAR) + set(${set_version_GIT_DESCRIBE_VAR} ${_full_git_describe} PARENT_SCOPE) + else() + set(FULL_GIT_DESCRIBE ${_full_git_describe} PARENT_SCOPE) + endif() + +endfunction() diff --git a/cmake/uninstall.cmake.in b/cmake/uninstall.cmake.in new file mode 100644 index 00000000..dd395930 --- /dev/null +++ b/cmake/uninstall.cmake.in @@ -0,0 +1,23 @@ +# Adapted from http://www.cmake.org/Wiki/CMake_FAQ#Can_I_do_.22make_uninstall.22_with_CMake.3F May 1, 2014 + +if(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt") + message(FATAL_ERROR "Cannot find install manifest: @CMAKE_BINARY_DIR@/install_manifest.txt") +endif(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt") + +file(READ "@CMAKE_BINARY_DIR@/install_manifest.txt" files) +string(REGEX REPLACE "\n" ";" files "${files}") +foreach(file ${files}) + message(STATUS "Uninstalling $ENV{DESTDIR}${file}") + if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") + exec_program( + "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" + OUTPUT_VARIABLE rm_out + RETURN_VALUE rm_retval + ) + if(NOT "${rm_retval}" STREQUAL 0) + message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}") + endif(NOT "${rm_retval}" STREQUAL 0) + else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") + message(STATUS "File $ENV{DESTDIR}${file} does not exist.") + endif(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") +endforeach(file) diff --git a/config/ax_c_openacc.m4 b/config/ax_c_openacc.m4 new file mode 100644 index 00000000..52f645a9 --- /dev/null +++ b/config/ax_c_openacc.m4 @@ -0,0 +1,104 @@ +# AC_OPENACC +# --------- +# Check which options need to be passed to the C compiler to support Openacc. +# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these +# options. +# The options are necessary at compile time (so the #pragmas are understood) +# and at link time (so the appropriate library is linked with). +# This macro takes care to not produce redundant options if $CC $CFLAGS already +# supports Openacc. +# +# For each candidate option, we do a compile test first, then a link test; +# if the compile test succeeds but the link test fails, that means we have +# found the correct option but it doesn't work because the libraries are +# broken. (This can happen, for instance, with SunPRO C and a bad combination +# of operating system patches.) +# +# Several of the options in our candidate list can be misinterpreted by +# compilers that don't use them to activate Openacc support; for example, +# many compilers understand "-openacc" to mean "write output to a file +# named 'penmp'" rather than "enable Openacc". We can't completely avoid +# the possibility of clobbering files named 'penmp' or 'mp' in configure's +# working directory; therefore, this macro will bomb out if any such file +# already exists when it's invoked. +AC_DEFUN([AX_C_OPENACC], +[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl +[AC_ARG_ENABLE([openacc], + [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl +[ + OPENACC_[]_AC_LANG_PREFIX[]FLAGS= + if test "$enable_openacc" != no; then + AC_LANG_PUSH([C]) + AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found' + dnl Try these flags: + dnl (on by default) '' + dnl GCC >= 4.2 -fopenacc + dnl SunPRO C -xopenacc + dnl Intel C -openacc + dnl SGI C, PGI C -mp + dnl Tru64 Compaq C -omp + dnl IBM XL C (AIX, Linux) -qsmp=omp + dnl Cray CCE -homp + dnl NEC SX -Popenacc + dnl Lahey Fortran (Linux) --openacc + for ac_option in '' -fopenacc -openacc -acc; do + + ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS + _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option" + AC_COMPILE_IFELSE([ +#ifndef _OPENACC +#error "OpenACC not supported" +#endif +#include + int main (void) { acc_init (0); return 0;} +], + [AC_LINK_IFELSE([ +#ifndef _OPENACC +#error "OpenACC not supported" +#endif +#include + int main (void) { acc_init (0); return 0;} +], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])]) + _AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS + + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then + break + fi + done + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then + ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported' + elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then + ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed' + fi + dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we + dnl started probing for Openacc support, so if they exist now, they were + dnl created by the probe loop and it's safe to delete them. + rm -f penmp mp]) + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \ + test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then + OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" + fi + AC_LANG_POP([C]) + fi +]) + +# _AC_OPENACC_SAFE_WD +# ------------------ +# AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at +# configure time for files that AC_OPENACC clobbers. +AC_DEFUN([_AX_OPENACC_SAFE_WD], +[m4_syscmd([test ! -e penmp && test ! -e mp])]dnl +[m4_if(sysval, [0], [], [m4_fatal(m4_normalize( + [AX_OPENACC clobbers files named 'mp' and 'penmp'. + To use AX_OPENACC you must not have either of these files + at the top level of your source tree.]))])]dnl +[if test -e penmp || test -e mp; then + AC_MSG_ERROR(m4_normalize( + [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'. + Aborting configure because one of these files already exists.])) +fi]) + diff --git a/config/ax_cxx_openacc.m4 b/config/ax_cxx_openacc.m4 new file mode 100644 index 00000000..5a2ad278 --- /dev/null +++ b/config/ax_cxx_openacc.m4 @@ -0,0 +1,104 @@ +# AC_OPENACC +# --------- +# Check which options need to be passed to the C compiler to support Openacc. +# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these +# options. +# The options are necessary at compile time (so the #pragmas are understood) +# and at link time (so the appropriate library is linked with). +# This macro takes care to not produce redundant options if $CC $CFLAGS already +# supports Openacc. +# +# For each candidate option, we do a compile test first, then a link test; +# if the compile test succeeds but the link test fails, that means we have +# found the correct option but it doesn't work because the libraries are +# broken. (This can happen, for instance, with SunPRO C and a bad combination +# of operating system patches.) +# +# Several of the options in our candidate list can be misinterpreted by +# compilers that don't use them to activate Openacc support; for example, +# many compilers understand "-openacc" to mean "write output to a file +# named 'penmp'" rather than "enable Openacc". We can't completely avoid +# the possibility of clobbering files named 'penmp' or 'mp' in configure's +# working directory; therefore, this macro will bomb out if any such file +# already exists when it's invoked. +AC_DEFUN([AX_CXX_OPENACC], +[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl +[AC_ARG_ENABLE([openacc], + [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl +[ + OPENACC_[]_AC_LANG_PREFIX[]FLAGS= + if test "$enable_openacc" != no; then + AC_LANG_PUSH([C++]) + AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found' + dnl Try these flags: + dnl (on by default) '' + dnl GCC >= 4.2 -fopenacc + dnl SunPRO C -xopenacc + dnl Intel C -openacc + dnl SGI C, PGI C -mp + dnl Tru64 Compaq C -omp + dnl IBM XL C (AIX, Linux) -qsmp=omp + dnl Cray CCE -homp + dnl NEC SX -Popenacc + dnl Lahey Fortran (Linux) --openacc + for ac_option in '' -fopenacc -openacc -acc; do + + ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS + _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option" + AC_COMPILE_IFELSE([ +#ifndef _OPENACC +#error "OpenACC not supported" +#endif +#include + int main (void) { acc_init (acc_get_device_type()); return 0;} +], + [AC_LINK_IFELSE([ +#ifndef _OPENACC +#error "OpenACC not supported" +#endif +#include + int main (void) { acc_init (acc_get_device_type()); return 0;} +], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])]) + _AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS + + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then + break + fi + done + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then + ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported' + elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then + ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed' + fi + dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we + dnl started probing for Openacc support, so if they exist now, they were + dnl created by the probe loop and it's safe to delete them. + rm -f penmp mp]) + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \ + test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then + OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" + fi + AC_LANG_POP([C++]) + fi +]) + +dnl _AC_OPENACC_SAFE_WD +dnl ------------------ +dnl AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at +dnl configure time for files that AC_OPENACC clobbers. +dnl AC_DEFUN([_AX_OPENACC_SAFE_WD], +dnl [m4_syscmd([test ! -e penmp && test ! -e mp])]dnl +dnl [m4_if(sysval, [0], [], [m4_fatal(m4_normalize( +dnl [AX_OPENACC clobbers files named 'mp' and 'penmp'. +dnl To use AX_OPENACC you must not have either of these files +dnl at the top level of your source tree.]))])]dnl +dnl [if test -e penmp || test -e mp; then +dnl AC_MSG_ERROR(m4_normalize( +dnl [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'. +dnl Aborting configure because one of these files already exists.])) +dnl fi]) + diff --git a/config/ax_fc_openacc.m4 b/config/ax_fc_openacc.m4 new file mode 100644 index 00000000..59775b2b --- /dev/null +++ b/config/ax_fc_openacc.m4 @@ -0,0 +1,108 @@ +# AC_OPENACC +# --------- +# Check which options need to be passed to the C compiler to support Openacc. +# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these +# options. +# The options are necessary at compile time (so the #pragmas are understood) +# and at link time (so the appropriate library is linked with). +# This macro takes care to not produce redundant options if $CC $CFLAGS already +# supports Openacc. +# +# For each candidate option, we do a compile test first, then a link test; +# if the compile test succeeds but the link test fails, that means we have +# found the correct option but it doesn't work because the libraries are +# broken. (This can happen, for instance, with SunPRO C and a bad combination +# of operating system patches.) +# +# Several of the options in our candidate list can be misinterpreted by +# compilers that don't use them to activate Openacc support; for example, +# many compilers understand "-openacc" to mean "write output to a file +# named 'penmp'" rather than "enable Openacc". We can't completely avoid +# the possibility of clobbering files named 'penmp' or 'mp' in configure's +# working directory; therefore, this macro will bomb out if any such file +# already exists when it's invoked. +AC_DEFUN([AX_FC_OPENACC], +[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl +[AC_ARG_ENABLE([openacc], + [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl +[ + OPENACC_[]_AC_LANG_PREFIX[]FLAGS= + if test "$enable_openacc" != no; then + AC_LANG_PUSH([Fortran]) + AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found' + dnl Try these flags: + dnl (on by default) '' + dnl GCC >= 4.2 -fopenacc + dnl SunPRO C -xopenacc + dnl Intel C -openacc + dnl SGI C, PGI C -mp + dnl Tru64 Compaq C -omp + dnl IBM XL C (AIX, Linux) -qsmp=omp + dnl Cray CCE -homp + dnl NEC SX -Popenacc + dnl Lahey Fortran (Linux) --openacc + for ac_option in '' -fopenacc -openacc -acc; do + + ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS + _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option" + AC_COMPILE_IFELSE([ + program main + use openacc + implicit none + integer tid, np + tid = 42 + call acc_init(0) + end +], + [AC_LINK_IFELSE([ + program main + use openacc + implicit none + integer tid, np + tid = 42 + call acc_init(0) + end +], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])]) + _AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS + + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported'; then + break + fi + done + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then + ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported' + elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then + ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed' + fi + dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we + dnl started probing for Openacc support, so if they exist now, they were + dnl created by the probe loop and it's safe to delete them. + rm -f penmp mp]) + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \ + test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then + OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" + fi + AC_LANG_POP([Fortran]) + fi +]) + +# _AC_OPENACC_SAFE_WD +# ------------------ +# AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at +# configure time for files that AC_OPENACC clobbers. +AC_DEFUN([_AX_OPENACC_SAFE_WD], +[m4_syscmd([test ! -e penmp && test ! -e mp])]dnl +[m4_if(sysval, [0], [], [m4_fatal(m4_normalize( + [AX_OPENACC clobbers files named 'mp' and 'penmp'. + To use AX_OPENACC you must not have either of these files + at the top level of your source tree.]))])]dnl +[if test -e penmp || test -e mp; then + AC_MSG_ERROR(m4_normalize( + [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'. + Aborting configure because one of these files already exists.])) +fi]) + diff --git a/config/pac.m4 b/config/pac.m4 index 69d2f863..9a9a1b0f 100644 --- a/config/pac.m4 +++ b/config/pac.m4 @@ -380,7 +380,7 @@ AC_ARG_ENABLE(serial, AS_HELP_STRING([--enable-serial], [Specify whether to enable a fake mpi library to run in serial mode. ]), [ -pac_cv_serial_mpi="yes"; +pac_cv_serial_mpi="$enableval"; ] dnl , dnl [pac_cv_serial_mpi="no";] @@ -388,7 +388,6 @@ dnl [pac_cv_serial_mpi="no";] if test x"$pac_cv_serial_mpi" == x"yes" ; then AC_MSG_RESULT([yes.]) else - pac_cv_serial_mpi="no"; AC_MSG_RESULT([no.]) fi ] @@ -410,12 +409,8 @@ AC_DEFUN([PAC_ARG_OPENMP], AC_ARG_ENABLE(openmp, AS_HELP_STRING([--enable-openmp], [Specify whether to enable openmp. ]), -[ -pac_cv_openmp="yes"; -] -dnl , -dnl [pac_cv_openmp="no";] - ) +[pac_cv_openmp="$enableval";]) +dnl AC_MSG_NOTICE([Result from test: "x$pac_cv_openmp"]) if test x"$pac_cv_openmp" == x"yes" ; then AC_MSG_RESULT([yes.]) AC_LANG_PUSH([Fortran]) @@ -455,7 +450,7 @@ AC_ARG_ENABLE(long-integers, AS_HELP_STRING([--enable-long-integers], [Specify usage of 64 bits integers. ]), [ -pac_cv_long_integers="yes"; +pac_cv_long_integers="$enableval"; ] dnl , dnl [pac_cv_long_integers="no";] @@ -2018,3 +2013,338 @@ CPPFLAGS="$SAVE_CPPFLAGS"; ])dnl +dnl @synopsis PAC_ARG_WITH_LIBRSB +dnl +dnl Test for --with-librsb="pathname". +dnl +dnl Defines the path to LIBRSB build dir. +dnl +dnl note: Renamed after PAC_ARG_WITH_LIBS as in the Trilinos package. +dnl +dnl Example use: +dnl +dnl PAC_ARG_WITH_LIBRSB +dnl +dnl tests for --with-librsb and pre-pends to LIBRSB_PATH +dnl +dnl @author Salvatore Filippone +dnl + +AC_DEFUN(PAC_ARG_WITH_LIBRSB, + [SAVE_LIBS="$LIBS" + SAVE_CPPFLAGS="$CPPFLAGS" + + AC_ARG_WITH(librsb, + AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example, + --with-librsb=/opt/packages/librsb]), + [pac_cv_librsb_dir=$withval], + [pac_cv_librsb_dir='']) + + if test "x$pac_cv_librsb_dir" != "x"; then + LIBS="-L$pac_cv_librsb_dir $LIBS" + RSB_INCLUDES="-I$pac_cv_librsb_dir" + # CPPFLAGS="$GPU_INCLUDES $RSB_INCLUDES $CPPFLAGS" + RSB_LIBDIR="-L$pac_cv_librsb_dir" + fi + #AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir]) + AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h], + [pac_rsb_header_ok=yes], + [pac_rsb_header_ok=no; RSB_INCLUDES=""]) + + if test "x$pac_rsb_header_ok" == "xyes" ; then + RSB_LIBS="-lrsb $RSB_LIBDIR" + # LIBS="$GPU_LIBS $RSB_LIBS -lm $LIBS"; + # AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS]) + # AC_TRY_LINK_FUNC(spgpuCreate, + # [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ], + # [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""]) + # AC_MSG_RESULT($pac_gpu_lib_ok) + # if test "x$pac_cv_have_spgpu" == "xyes" ; then + # AC_MSG_NOTICE([Have found SPGPU]) + RSBLIBNAME="librsb.a"; + LIBRSB_DIR="$pac_cv_librsb_dir"; + # SPGPU_DEFINES="-DHAVE_SPGPU"; + LIBRSB_INCDIR="$LIBRSB_DIR"; + LIBRSB_INCLUDES="-I$LIBRSB_INCDIR"; + LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR"; + # RSB_DIR="$pac_cv_rsb_dir"; + LIBRSB_DEFINES="-DHAVE_RSB"; + LRSB=-lpsb_rsb + # RSB_INCLUDES="-I$pac_cv_rsb_dir/include" + # RSB_LIBDIR="-L$pac_cv_rsb_dir/lib64 -L$pac_cv_rsb_dir/lib" + FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES"; + CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_RSB $CDEFINES"; + fi +# fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +]) +dnl + + +dnl @synopsis PAC_CHECK_SPGPU +dnl +dnl Will try to find the spgpu library and headers. +dnl +dnl Will use $CC +dnl +dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND. +dnl Note : This file will be likely to induce the compiler to create a module file +dnl (for a module called conftest). +dnl Depending on the compiler flags, this could cause a conftest.mod file to appear +dnl in the present directory, or in another, or with another name. So be warned! +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN(PAC_CHECK_SPGPU, + [SAVE_LIBS="$LIBS" + SAVE_CPPFLAGS="$CPPFLAGS" + if test "x$pac_cv_have_cuda" == "x"; then + PAC_CHECK_CUDA() + fi +dnl AC_MSG_NOTICE([From CUDA: $pac_cv_have_cuda ]) + if test "x$pac_cv_have_cuda" == "xyes"; then + AC_ARG_WITH(spgpu, AC_HELP_STRING([--with-spgpu=DIR], [Specify the directory for SPGPU library and includes.]), + [pac_cv_spgpudir=$withval], + [pac_cv_spgpudir='']) + + AC_LANG([C]) + if test "x$pac_cv_spgpudir" != "x"; then + LIBS="-L$pac_cv_spgpudir/lib $LIBS" + GPU_INCLUDES="-I$pac_cv_spgpudir/include" + CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS" + GPU_LIBDIR="-L$pac_cv_spgpudir/lib" + fi + AC_MSG_CHECKING([spgpu dir $pac_cv_spgpudir]) + AC_CHECK_HEADER([core.h], + [pac_gpu_header_ok=yes], + [pac_gpu_header_ok=no; GPU_INCLUDES=""]) + + if test "x$pac_gpu_header_ok" == "xyes" ; then + GPU_LIBS="-lspgpu $GPU_LIBDIR" + LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS"; + AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS]) + AC_TRY_LINK_FUNC(spgpuCreate, + [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ], + [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""]) + AC_MSG_RESULT($pac_gpu_lib_ok) + if test "x$pac_cv_have_spgpu" == "xyes" ; then + AC_MSG_NOTICE([Have found SPGPU]) + SPGPULIBNAME="libpsbgpu.a"; + SPGPU_DIR="$pac_cv_spgpudir"; + SPGPU_DEFINES="-DHAVE_SPGPU"; + SPGPU_INCDIR="$SPGPU_DIR/include"; + SPGPU_INCLUDES="-I$SPGPU_INCDIR"; + SPGPU_LIBS="-lspgpu -L$SPGPU_DIR/lib"; + LGPU=-lpsb_gpu + CUDA_DIR="$pac_cv_cuda_dir"; + CUDA_DEFINES="-DHAVE_CUDA"; + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib -L$pac_cv_cuda_dir/../math_libs/lib64" + FDEFINES="$psblas_cv_define_prepend-DHAVE_GPU $psblas_cv_define_prepend-DHAVE_SPGPU $psblas_cv_define_prepend-DHAVE_CUDA $FDEFINES"; + CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES" ; + fi + fi +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +])dnl + + + +dnl @synopsis PAC_ARG_CUDA +dnl +dnl Test for --enable-cuda +dnl +dnl +dnl +dnl Example use: +dnl +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN([PAC_ARG_CUDA], +[AC_MSG_CHECKING([whether we want cuda ]) +AC_ARG_ENABLE(cuda, +AS_HELP_STRING([--enable-cuda], +[Specify whether to enable cuda. ]), +[ +pac_cv_cuda="$enableval"; +] +) +] +) + + +dnl @synopsis PAC_CHECK_CUDA +dnl +dnl Will try to find the cuda library and headers. +dnl +dnl Will use $CC +dnl +dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND. +dnl Note : This file will be likely to induce the compiler to create a module file +dnl (for a module called conftest). +dnl Depending on the compiler flags, this could cause a conftest.mod file to appear +dnl in the present directory, or in another, or with another name. So be warned! +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN(PAC_CHECK_CUDA, +[AC_ARG_WITH(cudadir, AC_HELP_STRING([--with-cudadir=DIR], [Specify the CUDA install directory.]), + [pac_cv_cuda_dir=$withval], + [pac_cv_cuda_dir='']) + +AC_LANG([C]) +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_cuda_dir" != "x"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib -L$pac_cv_cuda_dir/../math_libs/lib64" + if test -f "$pac_cv_cuda_dir/bin/nvcc"; then + CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc" + else + CUDA_NVCC="nvcc" + fi +fi +AC_MSG_CHECKING([cuda dir $pac_cv_cuda_dir]) +AC_CHECK_HEADER([cuda_runtime.h], + [pac_cuda_header_ok=yes], + [pac_cuda_header_ok=no; CUDA_INCLUDES=""]) + +if test "x$pac_cuda_header_ok" == "xyes" ; then + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + AC_MSG_CHECKING([for cudaMemcpy in $CUDA_LIBS]) + AC_TRY_LINK_FUNC(cudaMemcpy, + [pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; ], + [pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS=""]) + AC_MSG_RESULT($pac_cuda_lib_ok) + +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +])dnl + +dnl @synopsis PAC_ARG_WITH_CUDACC +dnl +dnl Test for --with-cudacc="set_of_cc". +dnl +dnl Defines the CC to compile for +dnl +dnl +dnl Example use: +dnl +dnl PAC_ARG_WITH_CUDACC +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN([PAC_ARG_WITH_CUDACC], +[ +AC_ARG_WITH(cudacc, +AC_HELP_STRING([--with-cudacc], [A comma-separated list of CCs to compile to, for example, + --with-cudacc=50,60,70,75]), +[pac_cv_cudacc=$withval], +[pac_cv_cudacc='']) +]) + + +dnl @synopsis PAC_CHECK_CUDA_VERSION +dnl +dnl Will try to find the cuda version +dnl +dnl Will use $CC +dnl +dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND. +dnl Note : This file will be likely to induce the compiler to create a module file +dnl (for a module called conftest). +dnl Depending on the compiler flags, this could cause a conftest.mod file to appear +dnl in the present directory, or in another, or with another name. So be warned! +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN(PAC_CHECK_CUDA_VERSION, +[AC_LANG_PUSH([C]) +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_have_cuda" == "x"; then + PAC_CHECK_CUDA() +fi +if test "x$pac_cv_have_cuda" == "xyes"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + AC_MSG_CHECKING([for CUDA version]) + AC_LINK_IFELSE([AC_LANG_SOURCE([ +#include +#include + +int main(int argc, char **argv) +{ + printf("%d",CUDA_VERSION); + return(0); +} ])], + [pac_cv_cuda_version=`./conftest${ac_exeext} | sed 's/^ *//'`;], + [pac_cv_cuda_version="unknown";]) + + AC_MSG_RESULT($pac_cv_cuda_version) + fi +AC_LANG_POP([C]) +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +])dnl + + + +dnl @synopsis PAC_ARG_OPENACC +dnl +dnl Test for --enable-openacc +dnl +dnl +dnl +dnl Example use: +dnl +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN([PAC_ARG_OPENACC], +[AC_MSG_CHECKING([whether we want openacc ]) +AC_ARG_ENABLE(openacc, +AS_HELP_STRING([--enable-openacc], +[Specify whether to enable openacc. ]), +[ +pac_cv_openacc="$enableval"; +] +dnl , +dnl [pac_cv_openacc="no";] + ) +if test x"$pac_cv_openacc" == x"yes" ; then + AC_MSG_RESULT([yes.]) +# AC_LANG_PUSH([Fortran]) +# AC_OPENACC() +# pac_cv_openacc_fcopt="$OPENACC_FCFLAGS"; +# AC_LANG_POP() +# AC_LANG_PUSH([C]) +# AC_OPENACC() +# pac_cv_openacc_ccopt="$OPENACC_CFLAGS"; +# AC_LANG_POP() +# AC_LANG_PUSH([C++]) +# AC_OPENACC() +# pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS"; +# AC_LANG_POP() +else + pac_cv_openacc="no"; + AC_MSG_RESULT([no.]) +fi +] +) + diff --git a/configure b/configure index 752fe192..4f83aa19 100755 --- a/configure +++ b/configure @@ -1,11 +1,11 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for PSBLAS 3.7.0. +# Generated by GNU Autoconf 2.72 for PSBLAS 3.9.0. # # Report bugs to . # # -# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, +# Copyright (C) 1992-1996, 1998-2017, 2020-2023 Free Software Foundation, # Inc. # # @@ -17,7 +17,6 @@ # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh -as_nop=: if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 then : emulate sh @@ -26,12 +25,13 @@ then : # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST -else $as_nop - case `(set -o) 2>/dev/null` in #( +else case e in #( + e) case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; +esac ;; esac fi @@ -103,7 +103,7 @@ IFS=$as_save_IFS ;; esac -# We did not find ourselves, most probably we were run as `sh COMMAND' +# We did not find ourselves, most probably we were run as 'sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 @@ -133,15 +133,14 @@ case $- in # (((( esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail -# out after a failed `exec'. +# out after a failed 'exec'. printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 exit 255 fi # We don't want this to propagate to other subprocesses. { _as_can_reexec=; unset _as_can_reexec;} if test "x$CONFIG_SHELL" = x; then - as_bourne_compatible="as_nop=: -if test \${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 + as_bourne_compatible="if test \${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 then : emulate sh NULLCMD=: @@ -149,12 +148,13 @@ then : # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST -else \$as_nop - case \`(set -o) 2>/dev/null\` in #( +else case e in #( + e) case \`(set -o) 2>/dev/null\` in #( *posix*) : set -o posix ;; #( *) : ;; +esac ;; esac fi " @@ -172,8 +172,9 @@ as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } if ( set x; as_fn_ret_success y && test x = \"\$1\" ) then : -else \$as_nop - exitcode=1; echo positional parameters were not saved. +else case e in #( + e) exitcode=1; echo positional parameters were not saved. ;; +esac fi test x\$exitcode = x0 || exit 1 blah=\$(echo \$(echo blah)) @@ -182,19 +183,19 @@ test -x / || exit 1" as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && - test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 -test \$(( 1 + 1 )) = 2 || exit 1" + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1" if (eval "$as_required") 2>/dev/null then : as_have_required=yes -else $as_nop - as_have_required=no +else case e in #( + e) as_have_required=no ;; +esac fi if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null then : -else $as_nop - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +else case e in #( + e) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_found=false for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do @@ -227,12 +228,13 @@ IFS=$as_save_IFS if $as_found then : -else $as_nop - if { test -f "$SHELL" || test -f "$SHELL.exe"; } && +else case e in #( + e) if { test -f "$SHELL" || test -f "$SHELL.exe"; } && as_run=a "$SHELL" -c "$as_bourne_compatible""$as_required" 2>/dev/null then : CONFIG_SHELL=$SHELL as_have_required=yes -fi +fi ;; +esac fi @@ -254,7 +256,7 @@ case $- in # (((( esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail -# out after a failed `exec'. +# out after a failed 'exec'. printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 exit 255 fi @@ -274,7 +276,8 @@ $0: message. Then install a modern shell, or manually run $0: the script under such a shell if you do have one." fi exit 1 -fi +fi ;; +esac fi fi SHELL=${CONFIG_SHELL-/bin/sh} @@ -313,14 +316,6 @@ as_fn_exit () as_fn_set_status $1 exit $1 } # as_fn_exit -# as_fn_nop -# --------- -# Do nothing but, unlike ":", preserve the value of $?. -as_fn_nop () -{ - return $? -} -as_nop=as_fn_nop # as_fn_mkdir_p # ------------- @@ -389,11 +384,12 @@ then : { eval $1+=\$2 }' -else $as_nop - as_fn_append () +else case e in #( + e) as_fn_append () { eval $1=\$$1\$2 - } + } ;; +esac fi # as_fn_append # as_fn_arith ARG... @@ -407,21 +403,14 @@ then : { as_val=$(( $* )) }' -else $as_nop - as_fn_arith () +else case e in #( + e) as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` - } + } ;; +esac fi # as_fn_arith -# as_fn_nop -# --------- -# Do nothing but, unlike ":", preserve the value of $?. -as_fn_nop () -{ - return $? -} -as_nop=as_fn_nop # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- @@ -495,6 +484,8 @@ as_cr_alnum=$as_cr_Letters$as_cr_digits /[$]LINENO/= ' <$as_myself | sed ' + t clear + :clear s/[$]LINENO.*/&-/ t lineno b @@ -543,7 +534,6 @@ esac as_echo='printf %s\n' as_echo_n='printf %s' - rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file @@ -555,9 +545,9 @@ if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: - # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. - # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. - # In both cases, we have to default to `cp -pR'. + # 1) On MSYS, both 'ln -s file dir' and 'ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; 'ln -s' creates a wrapper executable. + # In both cases, we have to default to 'cp -pR'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -pR' elif ln conf$$.file conf$$ 2>/dev/null; then @@ -582,10 +572,12 @@ as_test_x='test -x' as_executable_p=as_fn_executable_p # Sed expression to map a string onto a valid CPP name. -as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" +as_sed_cpp="y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g" +as_tr_cpp="eval sed '$as_sed_cpp'" # deprecated # Sed expression to map a string onto a valid variable name. -as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" +as_sed_sh="y%*+%pp%;s%[^_$as_cr_alnum]%_%g" +as_tr_sh="eval sed '$as_sed_sh'" # deprecated test -n "$DJDIR" || exec 7<&0 /dev/null && - as_fn_error $? "invalid feature name: \`$ac_useropt'" + as_fn_error $? "invalid feature name: '$ac_useropt'" ac_useropt_orig=$ac_useropt ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in @@ -976,7 +1022,7 @@ do ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: \`$ac_useropt'" + as_fn_error $? "invalid feature name: '$ac_useropt'" ac_useropt_orig=$ac_useropt ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in @@ -1189,7 +1235,7 @@ do ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: \`$ac_useropt'" + as_fn_error $? "invalid package name: '$ac_useropt'" ac_useropt_orig=$ac_useropt ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in @@ -1205,7 +1251,7 @@ do ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: \`$ac_useropt'" + as_fn_error $? "invalid package name: '$ac_useropt'" ac_useropt_orig=$ac_useropt ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in @@ -1235,8 +1281,8 @@ do | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries=$ac_optarg ;; - -*) as_fn_error $? "unrecognized option: \`$ac_option' -Try \`$0 --help' for more information" + -*) as_fn_error $? "unrecognized option: '$ac_option' +Try '$0 --help' for more information" ;; *=*) @@ -1244,7 +1290,7 @@ Try \`$0 --help' for more information" # Reject names that are not valid shell variable names. case $ac_envvar in #( '' | [0-9]* | *[!_$as_cr_alnum]* ) - as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + as_fn_error $? "invalid variable name: '$ac_envvar'" ;; esac eval $ac_envvar=\$ac_optarg export $ac_envvar ;; @@ -1294,7 +1340,7 @@ do as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" done -# There might be people who depend on the old broken behavior: `$host' +# There might be people who depend on the old broken behavior: '$host' # used to hold the argument of --host etc. # FIXME: To remove some day. build=$build_alias @@ -1362,7 +1408,7 @@ if test ! -r "$srcdir/$ac_unique_file"; then test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" fi -ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_msg="sources are in $srcdir, but 'cd $srcdir' does not work" ac_abs_confdir=`( cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" pwd)` @@ -1390,7 +1436,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures PSBLAS 3.7.0 to adapt to many kinds of systems. +'configure' configures PSBLAS 3.9.0 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1404,11 +1450,11 @@ Configuration: --help=short display options specific to this package --help=recursive display the short help of all the included packages -V, --version display version information and exit - -q, --quiet, --silent do not print \`checking ...' messages + -q, --quiet, --silent do not print 'checking ...' messages --cache-file=FILE cache test results in FILE [disabled] - -C, --config-cache alias for \`--cache-file=config.cache' + -C, --config-cache alias for '--cache-file=config.cache' -n, --no-create do not create output files - --srcdir=DIR find the sources in DIR [configure dir or \`..'] + --srcdir=DIR find the sources in DIR [configure dir or '..'] Installation directories: --prefix=PREFIX install architecture-independent files in PREFIX @@ -1416,10 +1462,10 @@ Installation directories: --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [PREFIX] -By default, \`make install' will install all the files in -\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify -an installation prefix other than \`$ac_default_prefix' using \`--prefix', -for instance \`--prefix=\$HOME'. +By default, 'make install' will install all the files in +'$ac_default_prefix/bin', '$ac_default_prefix/lib' etc. You can specify +an installation prefix other than '$ac_default_prefix' using '--prefix', +for instance '--prefix=\$HOME'. For better control, use the options below. @@ -1457,7 +1503,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of PSBLAS 3.7.0:";; + short | recursive ) echo "Configuration of PSBLAS 3.9.0:";; esac cat <<\_ACEOF @@ -1475,6 +1521,9 @@ Optional Features: --disable-silent-rules verbose build output (undo: "make V=0") --enable-openmp Specify whether to enable openmp. --disable-openmp do not use OpenMP + --enable-cuda Specify whether to enable cuda. + --enable-openacc Specify whether to enable openacc. + --disable-openacc do not use Openacc Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -1485,6 +1534,8 @@ Optional Packages: to [CXXOPT] --with-fcopt additional [FCOPT] flags to be added: will prepend to [FCOPT] + --with-extra-opt additional [EXTRA_OPT] flags to be added: will + prepend to [EXTRA_OPT] --with-libs List additional link flags here. For example, --with-libs=-lspecial_system_lib or --with-libs=-L/path/to/libs @@ -1492,6 +1543,8 @@ Optional Packages: to [CLIBS] --with-flibs additional [FLIBS] flags to be added: will prepend to [FLIBS] + --with-extra-nvcc additional [EXTRA_NVCC] flags to be added: will + prepend to [EXTRA_NVCC] --with-library-path additional [LIBRARYPATH] flags to be added: will prepend to [LIBRARYPATH] --with-include-path additional [INCLUDEPATH] flags to be added: will @@ -1505,12 +1558,6 @@ Optional Packages: --with-blas= use BLAS library --with-blasdir= search for BLAS library in --with-lapack= use LAPACK library - --with-rsb Specify Recursive Sparse BLAS library linkage info - (that is, the output of librsb-config --static - --ldflags, or a directory where the usual - bin/include/lib subdirs with a regular RSB - installation resides, or nothing to make the - configure script invoke librsb-config) --with-metis=LIBNAME Specify the library name for METIS library. Default: "-lmetis" --with-metisincfile=DIR Specify the name for METIS include file. @@ -1523,6 +1570,17 @@ Optional Packages: --with-amddir=DIR Specify the directory for AMD library and includes. --with-amdincdir=DIR Specify the directory for AMD includes. --with-amdlibdir=DIR Specify the directory for AMD library. + --with-cudadir=DIR Specify the CUDA install directory. + --with-cudacc A comma-separated list of CCs to compile to, for + example, --with-cudacc=50,60,70,75 + --with-extraopenacc additional [EXTRAOPENACC] flags to be added: will + prepend to [EXTRAOPENACC] + --with-ccopenacc additional [CCOPENACC] flags to be added: will + prepend to [CCOPENACC] + --with-cxxopenacc additional [CXXOPENACC] flags to be added: will + prepend to [CXXOPENACC] + --with-fcopenacc additional [FCOPENACC] flags to be added: will + prepend to [FCOPENACC] Some influential environment variables: FC Fortran compiler command @@ -1540,7 +1598,7 @@ Some influential environment variables: MPICXX MPI C++ compiler command MPIFC MPI Fortran compiler command -Use these variables to override the choices made by `configure' or to help +Use these variables to override the choices made by 'configure' or to help it to find libraries and programs with nonstandard names/locations. Report bugs to . @@ -1607,10 +1665,10 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -PSBLAS configure 3.7.0 -generated by GNU Autoconf 2.71 +PSBLAS configure 3.9.0 +generated by GNU Autoconf 2.72 -Copyright (C) 2021 Free Software Foundation, Inc. +Copyright (C) 2023 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF @@ -1649,11 +1707,12 @@ printf "%s\n" "$ac_try_echo"; } >&5 } && test -s conftest.$ac_objext then : ac_retval=0 -else $as_nop - printf "%s\n" "$as_me: failed program was:" >&5 +else case e in #( + e) printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_retval=1 + ac_retval=1 ;; +esac fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval @@ -1688,11 +1747,12 @@ printf "%s\n" "$ac_try_echo"; } >&5 } && test -s conftest.$ac_objext then : ac_retval=0 -else $as_nop - printf "%s\n" "$as_me: failed program was:" >&5 +else case e in #( + e) printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_retval=1 + ac_retval=1 ;; +esac fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval @@ -1727,11 +1787,12 @@ printf "%s\n" "$ac_try_echo"; } >&5 } && test -s conftest.$ac_objext then : ac_retval=0 -else $as_nop - printf "%s\n" "$as_me: failed program was:" >&5 +else case e in #( + e) printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_retval=1 + ac_retval=1 ;; +esac fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval @@ -1769,11 +1830,12 @@ printf "%s\n" "$ac_try_echo"; } >&5 } then : ac_retval=0 -else $as_nop - printf "%s\n" "$as_me: failed program was:" >&5 +else case e in #( + e) printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_retval=1 + ac_retval=1 ;; +esac fi # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would @@ -1796,15 +1858,15 @@ printf %s "checking for $2... " >&6; } if eval test \${$3+y} then : printf %s "(cached) " >&6 -else $as_nop - cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Define $2 to an innocuous variant, in case declares $2. For example, HP-UX 11i declares gettimeofday. */ #define $2 innocuous_$2 /* System header to define __stub macros and hopefully few prototypes, - which can conflict with char $2 (); below. */ + which can conflict with char $2 (void); below. */ #include #undef $2 @@ -1815,7 +1877,7 @@ else $as_nop #ifdef __cplusplus extern "C" #endif -char $2 (); +char $2 (void); /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ @@ -1834,11 +1896,13 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : eval "$3=yes" -else $as_nop - eval "$3=no" +else case e in #( + e) eval "$3=no" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext + conftest$ac_exeext conftest.$ac_ext ;; +esac fi eval ac_res=\$$3 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 @@ -1878,11 +1942,12 @@ printf "%s\n" "$ac_try_echo"; } >&5 } then : ac_retval=0 -else $as_nop - printf "%s\n" "$as_me: failed program was:" >&5 +else case e in #( + e) printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_retval=1 + ac_retval=1 ;; +esac fi # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would @@ -1905,15 +1970,15 @@ printf %s "checking for $2... " >&6; } if eval test \${$3+y} then : printf %s "(cached) " >&6 -else $as_nop - cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Define $2 to an innocuous variant, in case declares $2. For example, HP-UX 11i declares gettimeofday. */ #define $2 innocuous_$2 /* System header to define __stub macros and hopefully few prototypes, - which can conflict with char $2 (); below. */ + which can conflict with char $2 (void); below. */ #include #undef $2 @@ -1924,7 +1989,7 @@ else $as_nop #ifdef __cplusplus extern "C" #endif -char $2 (); +char $2 (void); /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ @@ -1943,11 +2008,13 @@ _ACEOF if ac_fn_cxx_try_link "$LINENO" then : eval "$3=yes" -else $as_nop - eval "$3=no" +else case e in #( + e) eval "$3=no" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext + conftest$ac_exeext conftest.$ac_ext ;; +esac fi eval ac_res=\$$3 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 @@ -1987,11 +2054,12 @@ printf "%s\n" "$ac_try_echo"; } >&5 } then : ac_retval=0 -else $as_nop - printf "%s\n" "$as_me: failed program was:" >&5 +else case e in #( + e) printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_retval=1 + ac_retval=1 ;; +esac fi # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would @@ -2003,238 +2071,6 @@ fi } # ac_fn_fc_try_link -# ac_fn_c_try_run LINENO -# ---------------------- -# Try to run conftest.$ac_ext, and return whether this succeeded. Assumes that -# executables *can* be run. -ac_fn_c_try_run () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - if { { ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf "%s\n" "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' - { { case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf "%s\n" "$ac_try_echo"; } >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; } -then : - ac_retval=0 -else $as_nop - printf "%s\n" "$as_me: program exited with status $ac_status" >&5 - printf "%s\n" "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_retval=$ac_status -fi - rm -rf conftest.dSYM conftest_ipa8_conftest.oo - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - as_fn_set_status $ac_retval - -} # ac_fn_c_try_run - -# ac_fn_c_compute_int LINENO EXPR VAR INCLUDES -# -------------------------------------------- -# Tries to find the compile-time value of EXPR in a program that includes -# INCLUDES, setting VAR accordingly. Returns whether the value could be -# computed -ac_fn_c_compute_int () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - if test "$cross_compiling" = yes; then - # Depending upon the size, compute the lo and hi bounds. -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -int -main (void) -{ -static int test_array [1 - 2 * !(($2) >= 0)]; -test_array [0] = 0; -return test_array [0]; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ac_lo=0 ac_mid=0 - while :; do - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -int -main (void) -{ -static int test_array [1 - 2 * !(($2) <= $ac_mid)]; -test_array [0] = 0; -return test_array [0]; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ac_hi=$ac_mid; break -else $as_nop - as_fn_arith $ac_mid + 1 && ac_lo=$as_val - if test $ac_lo -le $ac_mid; then - ac_lo= ac_hi= - break - fi - as_fn_arith 2 '*' $ac_mid + 1 && ac_mid=$as_val -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - done -else $as_nop - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -int -main (void) -{ -static int test_array [1 - 2 * !(($2) < 0)]; -test_array [0] = 0; -return test_array [0]; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ac_hi=-1 ac_mid=-1 - while :; do - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -int -main (void) -{ -static int test_array [1 - 2 * !(($2) >= $ac_mid)]; -test_array [0] = 0; -return test_array [0]; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ac_lo=$ac_mid; break -else $as_nop - as_fn_arith '(' $ac_mid ')' - 1 && ac_hi=$as_val - if test $ac_mid -le $ac_hi; then - ac_lo= ac_hi= - break - fi - as_fn_arith 2 '*' $ac_mid && ac_mid=$as_val -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - done -else $as_nop - ac_lo= ac_hi= -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext -# Binary search between lo and hi bounds. -while test "x$ac_lo" != "x$ac_hi"; do - as_fn_arith '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo && ac_mid=$as_val - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -int -main (void) -{ -static int test_array [1 - 2 * !(($2) <= $ac_mid)]; -test_array [0] = 0; -return test_array [0]; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ac_hi=$ac_mid -else $as_nop - as_fn_arith '(' $ac_mid ')' + 1 && ac_lo=$as_val -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext -done -case $ac_lo in #(( -?*) eval "$3=\$ac_lo"; ac_retval=0 ;; -'') ac_retval=1 ;; -esac - else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -static long int longval (void) { return $2; } -static unsigned long int ulongval (void) { return $2; } -#include -#include -int -main (void) -{ - - FILE *f = fopen ("conftest.val", "w"); - if (! f) - return 1; - if (($2) < 0) - { - long int i = longval (); - if (i != ($2)) - return 1; - fprintf (f, "%ld", i); - } - else - { - unsigned long int i = ulongval (); - if (i != ($2)) - return 1; - fprintf (f, "%lu", i); - } - /* Do not output a trailing newline, as this causes \r\n confusion - on some platforms. */ - return ferror (f) || fclose (f) != 0; - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_run "$LINENO" -then : - echo >>conftest.val; read $3 &6; } if eval test \${$3+y} then : printf %s "(cached) " >&6 -else $as_nop - cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 #include <$2> @@ -2256,10 +2092,12 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : eval "$3=yes" -else $as_nop - eval "$3=no" +else case e in #( + e) eval "$3=no" ;; +esac fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac fi eval ac_res=\$$3 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 @@ -2291,8 +2129,8 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by PSBLAS $as_me 3.7.0, which was -generated by GNU Autoconf 2.71. Invocation command line was +It was created by PSBLAS $as_me 3.9.0, which was +generated by GNU Autoconf 2.72. Invocation command line was $ $0$ac_configure_args_raw @@ -2538,10 +2376,10 @@ esac printf "%s\n" "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" \ - || { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} + || { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} as_fn_error $? "failed to load site script $ac_site_file -See \`config.log' for more details" "$LINENO" 5; } +See 'config.log' for more details" "$LINENO" 5; } fi done @@ -2577,9 +2415,7 @@ struct stat; /* Most of the following tests are stolen from RCS 5.7 src/conf.sh. */ struct buf { int x; }; struct buf * (*rcsopen) (struct buf *, struct stat *, int); -static char *e (p, i) - char **p; - int i; +static char *e (char **p, int i) { return p[i]; } @@ -2593,6 +2429,21 @@ static char *f (char * (*g) (char **, int), char **p, ...) return s; } +/* C89 style stringification. */ +#define noexpand_stringify(a) #a +const char *stringified = noexpand_stringify(arbitrary+token=sequence); + +/* C89 style token pasting. Exercises some of the corner cases that + e.g. old MSVC gets wrong, but not very hard. */ +#define noexpand_concat(a,b) a##b +#define expand_concat(a,b) noexpand_concat(a,b) +extern int vA; +extern int vbee; +#define aye A +#define bee B +int *pvA = &expand_concat(v,aye); +int *pvbee = &noexpand_concat(v,bee); + /* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has function prototypes and stuff, but not \xHH hex character constants. These do not provoke an error unfortunately, instead are silently treated @@ -2620,16 +2471,19 @@ ok |= (argc == 0 || f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]); # Test code for whether the C compiler supports C99 (global declarations) ac_c_conftest_c99_globals=' -// Does the compiler advertise C99 conformance? +/* Does the compiler advertise C99 conformance? */ #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L # error "Compiler does not advertise C99 conformance" #endif +// See if C++-style comments work. + #include extern int puts (const char *); extern int printf (const char *, ...); extern int dprintf (int, const char *, ...); extern void *malloc (size_t); +extern void free (void *); // Check varargs macros. These examples are taken from C99 6.10.3.5. // dprintf is used instead of fprintf to avoid needing to declare @@ -2679,7 +2533,6 @@ typedef const char *ccp; static inline int test_restrict (ccp restrict text) { - // See if C++-style comments work. // Iterate through items via the restricted pointer. // Also check for declarations in for loops. for (unsigned int i = 0; *(text+i) != '\''\0'\''; ++i) @@ -2745,6 +2598,8 @@ ac_c_conftest_c99_main=' ia->datasize = 10; for (int i = 0; i < ia->datasize; ++i) ia->data[i] = i * 1.234; + // Work around memory leak warnings. + free (ia); // Check named initializers. struct named_init ni = { @@ -2766,7 +2621,7 @@ ac_c_conftest_c99_main=' # Test code for whether the C compiler supports C11 (global declarations) ac_c_conftest_c11_globals=' -// Does the compiler advertise C11 conformance? +/* Does the compiler advertise C11 conformance? */ #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112L # error "Compiler does not advertise C11 conformance" #endif @@ -3174,8 +3029,9 @@ IFS=$as_save_IFS if $as_found then : -else $as_nop - as_fn_error $? "cannot find required auxiliary files:$ac_missing_aux_files" "$LINENO" 5 +else case e in #( + e) as_fn_error $? "cannot find required auxiliary files:$ac_missing_aux_files" "$LINENO" 5 ;; +esac fi @@ -3203,12 +3059,12 @@ for ac_var in $ac_precious_vars; do eval ac_new_val=\$ac_env_${ac_var}_value case $ac_old_set,$ac_new_set in set,) - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 -printf "%s\n" "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' was set to '$ac_old_val' in the previous run" >&5 +printf "%s\n" "$as_me: error: '$ac_var' was set to '$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 -printf "%s\n" "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' was not set in the previous run" >&5 +printf "%s\n" "$as_me: error: '$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) @@ -3217,18 +3073,18 @@ printf "%s\n" "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_old_val_w=`echo x $ac_old_val` ac_new_val_w=`echo x $ac_new_val` if test "$ac_old_val_w" != "$ac_new_val_w"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 -printf "%s\n" "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' has changed since the previous run:" >&5 +printf "%s\n" "$as_me: error: '$ac_var' has changed since the previous run:" >&2;} ac_cache_corrupted=: else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 -printf "%s\n" "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in '$ac_var' since the previous run:" >&5 +printf "%s\n" "$as_me: warning: ignoring whitespace changes in '$ac_var' since the previous run:" >&2;} eval $ac_var=\$ac_old_val fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 -printf "%s\n" "$as_me: former value: \`$ac_old_val'" >&2;} - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 -printf "%s\n" "$as_me: current value: \`$ac_new_val'" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: former value: '$ac_old_val'" >&5 +printf "%s\n" "$as_me: former value: '$ac_old_val'" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: current value: '$ac_new_val'" >&5 +printf "%s\n" "$as_me: current value: '$ac_new_val'" >&2;} fi;; esac # Pass precious variables to config.status. @@ -3244,11 +3100,11 @@ printf "%s\n" "$as_me: current value: \`$ac_new_val'" >&2;} fi done if $ac_cache_corrupted; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 printf "%s\n" "$as_me: error: changes in the environment can compromise the build" >&2;} - as_fn_error $? "run \`${MAKE-make} distclean' and/or \`rm $cache_file' + as_fn_error $? "run '${MAKE-make} distclean' and/or 'rm $cache_file' and start over" "$LINENO" 5 fi ## -------------------- ## @@ -3265,7 +3121,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu # VERSION is the file containing the PSBLAS version code # FIXME -psblas_cv_version="3.7.0" +psblas_cv_version="3.9.0" # A sample source file @@ -3280,7 +3136,7 @@ psblas_cv_version="3.7.0" documentation, you can make your own by hand for your needs. Be sure to specify the library paths of your interest. Examples: - ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -3294,7 +3150,7 @@ printf "%s\n" "$as_me: documentation, you can make your own by hand for your needs. Be sure to specify the library paths of your interest. Examples: - ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -3329,8 +3185,8 @@ if test -z "$INSTALL"; then if test ${ac_cv_path_install+y} then : printf %s "(cached) " >&6 -else $as_nop - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +else case e in #( + e) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS @@ -3384,7 +3240,8 @@ esac IFS=$as_save_IFS rm -rf conftest.one conftest.two conftest.dir - + ;; +esac fi if test ${ac_cv_path_install+y}; then INSTALL=$ac_cv_path_install @@ -3440,7 +3297,7 @@ ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu if test -n "$ac_tool_prefix"; then - for ac_prog in ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 ifort ifc nagfor gfortran + for ac_prog in ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 flang ifx ifort ifc nagfor gfortran do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 @@ -3449,8 +3306,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_FC+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$FC"; then +else case e in #( + e) if test -n "$FC"; then ac_cv_prog_FC="$FC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -3472,7 +3329,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi FC=$ac_cv_prog_FC if test -n "$FC"; then @@ -3489,7 +3347,7 @@ fi fi if test -z "$FC"; then ac_ct_FC=$FC - for ac_prog in ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 ifort ifc nagfor gfortran + for ac_prog in ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 flang ifx ifort ifc nagfor gfortran do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -3498,8 +3356,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_FC+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$ac_ct_FC"; then +else case e in #( + e) if test -n "$ac_ct_FC"; then ac_cv_prog_ac_ct_FC="$ac_ct_FC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -3521,7 +3379,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi ac_ct_FC=$ac_cv_prog_ac_ct_FC if test -n "$ac_ct_FC"; then @@ -3615,8 +3474,8 @@ printf "%s\n" "$ac_try_echo"; } >&5 printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } then : - # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. -# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' + # Autoconf-2.13 could set the ac_cv_exeext variable to 'no'. +# So ignore a value of 'no', otherwise this would lead to 'EXEEXT = no' # in a Makefile. We should not override ac_cv_exeext if it was cached, # so that the user can short-circuit this test for compilers unknown to # Autoconf. @@ -3636,7 +3495,7 @@ do ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` fi # We set ac_cv_exeext here because the later test for it is not - # safe: cross compilers may not add the suffix if given an `-o' + # safe: cross compilers may not add the suffix if given an '-o' # argument, so we may need to know it at that point already. # Even if this section looks crufty: it has the advantage of # actually working. @@ -3647,8 +3506,9 @@ do done test "$ac_cv_exeext" = no && ac_cv_exeext= -else $as_nop - ac_file='' +else case e in #( + e) ac_file='' ;; +esac fi if test -z "$ac_file" then : @@ -3657,13 +3517,14 @@ printf "%s\n" "no" >&6; } printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 -{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} as_fn_error 77 "Fortran compiler cannot create executables -See \`config.log' for more details" "$LINENO" 5; } -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -printf "%s\n" "yes" >&6; } +See 'config.log' for more details" "$LINENO" 5; } +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for Fortran compiler default output file name" >&5 printf %s "checking for Fortran compiler default output file name... " >&6; } @@ -3687,10 +3548,10 @@ printf "%s\n" "$ac_try_echo"; } >&5 printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } then : - # If both `conftest.exe' and `conftest' are `present' (well, observable) -# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will -# work properly (i.e., refer to `conftest.exe'), while it won't with -# `rm'. + # If both 'conftest.exe' and 'conftest' are 'present' (well, observable) +# catch 'conftest.exe'. For instance with Cygwin, 'ls conftest' will +# work properly (i.e., refer to 'conftest.exe'), while it won't with +# 'rm'. for ac_file in conftest.exe conftest conftest.*; do test -f "$ac_file" || continue case $ac_file in @@ -3700,11 +3561,12 @@ for ac_file in conftest.exe conftest conftest.*; do * ) break;; esac done -else $as_nop - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +else case e in #( + e) { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of executables: cannot compile and link -See \`config.log' for more details" "$LINENO" 5; } +See 'config.log' for more details" "$LINENO" 5; } ;; +esac fi rm -f conftest conftest$ac_cv_exeext { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 @@ -3753,26 +3615,27 @@ printf "%s\n" "$ac_try_echo"; } >&5 if test "$cross_compiling" = maybe; then cross_compiling=yes else - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} as_fn_error 77 "cannot run Fortran compiled programs. -If you meant to cross compile, use \`--host'. -See \`config.log' for more details" "$LINENO" 5; } +If you meant to cross compile, use '--host'. +See 'config.log' for more details" "$LINENO" 5; } fi fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 printf "%s\n" "$cross_compiling" >&6; } -rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +rm -f conftest.$ac_ext conftest$ac_cv_exeext \ + conftest.o conftest.obj conftest.out ac_clean_files=$ac_clean_files_save { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 printf %s "checking for suffix of object files... " >&6; } if test ${ac_cv_objext+y} then : printf %s "(cached) " >&6 -else $as_nop - cat > conftest.$ac_ext <<_ACEOF +else case e in #( + e) cat > conftest.$ac_ext <<_ACEOF program main end @@ -3798,22 +3661,24 @@ then : break;; esac done -else $as_nop - printf "%s\n" "$as_me: failed program was:" >&5 +else case e in #( + e) printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 -{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of object files: cannot compile -See \`config.log' for more details" "$LINENO" 5; } +See 'config.log' for more details" "$LINENO" 5; } ;; +esac fi -rm -f conftest.$ac_cv_objext conftest.$ac_ext +rm -f conftest.$ac_cv_objext conftest.$ac_ext ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 printf "%s\n" "$ac_cv_objext" >&6; } OBJEXT=$ac_cv_objext ac_objext=$OBJEXT -# If we don't use `.F' as extension, the preprocessor is not run on the +# If we don't use '.F' as extension, the preprocessor is not run on the # input file. (Note that this only needs to work for GNU compilers.) ac_save_ext=$ac_ext ac_ext=F @@ -3822,8 +3687,8 @@ printf %s "checking whether the compiler supports GNU Fortran... " >&6; } if test ${ac_cv_fc_compiler_gnu+y} then : printf %s "(cached) " >&6 -else $as_nop - cat > conftest.$ac_ext <<_ACEOF +else case e in #( + e) cat > conftest.$ac_ext <<_ACEOF program main #ifndef __GNUC__ choke me @@ -3834,12 +3699,14 @@ _ACEOF if ac_fn_fc_try_compile "$LINENO" then : ac_compiler_gnu=yes -else $as_nop - ac_compiler_gnu=no +else case e in #( + e) ac_compiler_gnu=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_cv_fc_compiler_gnu=$ac_compiler_gnu - + ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_compiler_gnu" >&5 printf "%s\n" "$ac_cv_fc_compiler_gnu" >&6; } @@ -3854,8 +3721,8 @@ printf %s "checking whether $FC accepts -g... " >&6; } if test ${ac_cv_prog_fc_g+y} then : printf %s "(cached) " >&6 -else $as_nop - FCFLAGS=-g +else case e in #( + e) FCFLAGS=-g cat > conftest.$ac_ext <<_ACEOF program main @@ -3864,11 +3731,13 @@ _ACEOF if ac_fn_fc_try_compile "$LINENO" then : ac_cv_prog_fc_g=yes -else $as_nop - ac_cv_prog_fc_g=no +else case e in #( + e) ac_cv_prog_fc_g=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - + ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_fc_g" >&5 printf "%s\n" "$ac_cv_prog_fc_g" >&6; } @@ -3916,7 +3785,7 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then - for ac_prog in xlc pgcc icc gcc cc + for ac_prog in xlc pgcc clang icx icc gcc cc do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 @@ -3925,8 +3794,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$CC"; then +else case e in #( + e) if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -3948,7 +3817,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi CC=$ac_cv_prog_CC if test -n "$CC"; then @@ -3965,7 +3835,7 @@ fi fi if test -z "$CC"; then ac_ct_CC=$CC - for ac_prog in xlc pgcc icc gcc cc + for ac_prog in xlc pgcc clang icx icc gcc cc do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -3974,8 +3844,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_CC+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$ac_ct_CC"; then +else case e in #( + e) if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -3997,7 +3867,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then @@ -4026,10 +3897,10 @@ esac fi -test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} as_fn_error $? "no acceptable C compiler found in \$PATH -See \`config.log' for more details" "$LINENO" 5; } +See 'config.log' for more details" "$LINENO" 5; } # Provide some information about the compiler. printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 @@ -4061,8 +3932,8 @@ printf %s "checking whether the compiler supports GNU C... " >&6; } if test ${ac_cv_c_compiler_gnu+y} then : printf %s "(cached) " >&6 -else $as_nop - cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int @@ -4079,12 +3950,14 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_compiler_gnu=yes -else $as_nop - ac_compiler_gnu=no +else case e in #( + e) ac_compiler_gnu=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu - + ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 printf "%s\n" "$ac_cv_c_compiler_gnu" >&6; } @@ -4102,8 +3975,8 @@ printf %s "checking whether $CC accepts -g... " >&6; } if test ${ac_cv_prog_cc_g+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_save_c_werror_flag=$ac_c_werror_flag +else case e in #( + e) ac_save_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes ac_cv_prog_cc_g=no CFLAGS="-g" @@ -4121,8 +3994,8 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_cv_prog_cc_g=yes -else $as_nop - CFLAGS="" +else case e in #( + e) CFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4137,8 +4010,8 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : -else $as_nop - ac_c_werror_flag=$ac_save_c_werror_flag +else case e in #( + e) ac_c_werror_flag=$ac_save_c_werror_flag CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4155,12 +4028,15 @@ if ac_fn_c_try_compile "$LINENO" then : ac_cv_prog_cc_g=yes fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - ac_c_werror_flag=$ac_save_c_werror_flag + ac_c_werror_flag=$ac_save_c_werror_flag ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 printf "%s\n" "$ac_cv_prog_cc_g" >&6; } @@ -4187,8 +4063,8 @@ printf %s "checking for $CC option to enable C11 features... " >&6; } if test ${ac_cv_prog_cc_c11+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_cv_prog_cc_c11=no +else case e in #( + e) ac_cv_prog_cc_c11=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4205,25 +4081,28 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cc_c11" != "xno" && break done rm -f conftest.$ac_ext -CC=$ac_save_CC +CC=$ac_save_CC ;; +esac fi if test "x$ac_cv_prog_cc_c11" = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 printf "%s\n" "unsupported" >&6; } -else $as_nop - if test "x$ac_cv_prog_cc_c11" = x +else case e in #( + e) if test "x$ac_cv_prog_cc_c11" = x then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 printf "%s\n" "none needed" >&6; } -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5 printf "%s\n" "$ac_cv_prog_cc_c11" >&6; } - CC="$CC $ac_cv_prog_cc_c11" + CC="$CC $ac_cv_prog_cc_c11" ;; +esac fi ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c11 - ac_prog_cc_stdc=c11 + ac_prog_cc_stdc=c11 ;; +esac fi fi if test x$ac_prog_cc_stdc = xno @@ -4233,8 +4112,8 @@ printf %s "checking for $CC option to enable C99 features... " >&6; } if test ${ac_cv_prog_cc_c99+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_cv_prog_cc_c99=no +else case e in #( + e) ac_cv_prog_cc_c99=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4251,25 +4130,28 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cc_c99" != "xno" && break done rm -f conftest.$ac_ext -CC=$ac_save_CC +CC=$ac_save_CC ;; +esac fi if test "x$ac_cv_prog_cc_c99" = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 printf "%s\n" "unsupported" >&6; } -else $as_nop - if test "x$ac_cv_prog_cc_c99" = x +else case e in #( + e) if test "x$ac_cv_prog_cc_c99" = x then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 printf "%s\n" "none needed" >&6; } -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 printf "%s\n" "$ac_cv_prog_cc_c99" >&6; } - CC="$CC $ac_cv_prog_cc_c99" + CC="$CC $ac_cv_prog_cc_c99" ;; +esac fi ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99 - ac_prog_cc_stdc=c99 + ac_prog_cc_stdc=c99 ;; +esac fi fi if test x$ac_prog_cc_stdc = xno @@ -4279,8 +4161,8 @@ printf %s "checking for $CC option to enable C89 features... " >&6; } if test ${ac_cv_prog_cc_c89+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_cv_prog_cc_c89=no +else case e in #( + e) ac_cv_prog_cc_c89=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4297,25 +4179,28 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext -CC=$ac_save_CC +CC=$ac_save_CC ;; +esac fi if test "x$ac_cv_prog_cc_c89" = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 printf "%s\n" "unsupported" >&6; } -else $as_nop - if test "x$ac_cv_prog_cc_c89" = x +else case e in #( + e) if test "x$ac_cv_prog_cc_c89" = x then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 printf "%s\n" "none needed" >&6; } -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 printf "%s\n" "$ac_cv_prog_cc_c89" >&6; } - CC="$CC $ac_cv_prog_cc_c89" + CC="$CC $ac_cv_prog_cc_c89" ;; +esac fi ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89 - ac_prog_cc_stdc=c89 + ac_prog_cc_stdc=c89 ;; +esac fi fi @@ -4340,8 +4225,8 @@ printf %s "checking whether $CC understands -c and -o together... " >&6; } if test ${am_cv_prog_cc_c_o+y} then : printf %s "(cached) " >&6 -else $as_nop - cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int @@ -4371,7 +4256,8 @@ _ACEOF fi done rm -f core conftest* - unset am_i + unset am_i ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 printf "%s\n" "$am_cv_prog_cc_c_o" >&6; } @@ -4412,7 +4298,7 @@ if test -z "$CXX"; then CXX=$CCC else if test -n "$ac_tool_prefix"; then - for ac_prog in CC xlc++ icpc g++ + for ac_prog in CC xlc++ clang++ icpx icpc g++ do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 @@ -4421,8 +4307,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CXX+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$CXX"; then +else case e in #( + e) if test -n "$CXX"; then ac_cv_prog_CXX="$CXX" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -4444,7 +4330,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi CXX=$ac_cv_prog_CXX if test -n "$CXX"; then @@ -4461,7 +4348,7 @@ fi fi if test -z "$CXX"; then ac_ct_CXX=$CXX - for ac_prog in CC xlc++ icpc g++ + for ac_prog in CC xlc++ clang++ icpx icpc g++ do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -4470,8 +4357,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_CXX+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$ac_ct_CXX"; then +else case e in #( + e) if test -n "$ac_ct_CXX"; then ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -4493,7 +4380,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi ac_ct_CXX=$ac_cv_prog_ac_ct_CXX if test -n "$ac_ct_CXX"; then @@ -4553,8 +4441,8 @@ printf %s "checking whether the compiler supports GNU C++... " >&6; } if test ${ac_cv_cxx_compiler_gnu+y} then : printf %s "(cached) " >&6 -else $as_nop - cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int @@ -4571,12 +4459,14 @@ _ACEOF if ac_fn_cxx_try_compile "$LINENO" then : ac_compiler_gnu=yes -else $as_nop - ac_compiler_gnu=no +else case e in #( + e) ac_compiler_gnu=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_cv_cxx_compiler_gnu=$ac_compiler_gnu - + ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5 printf "%s\n" "$ac_cv_cxx_compiler_gnu" >&6; } @@ -4594,8 +4484,8 @@ printf %s "checking whether $CXX accepts -g... " >&6; } if test ${ac_cv_prog_cxx_g+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_save_cxx_werror_flag=$ac_cxx_werror_flag +else case e in #( + e) ac_save_cxx_werror_flag=$ac_cxx_werror_flag ac_cxx_werror_flag=yes ac_cv_prog_cxx_g=no CXXFLAGS="-g" @@ -4613,8 +4503,8 @@ _ACEOF if ac_fn_cxx_try_compile "$LINENO" then : ac_cv_prog_cxx_g=yes -else $as_nop - CXXFLAGS="" +else case e in #( + e) CXXFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4629,8 +4519,8 @@ _ACEOF if ac_fn_cxx_try_compile "$LINENO" then : -else $as_nop - ac_cxx_werror_flag=$ac_save_cxx_werror_flag +else case e in #( + e) ac_cxx_werror_flag=$ac_save_cxx_werror_flag CXXFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4647,12 +4537,15 @@ if ac_fn_cxx_try_compile "$LINENO" then : ac_cv_prog_cxx_g=yes fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - ac_cxx_werror_flag=$ac_save_cxx_werror_flag + ac_cxx_werror_flag=$ac_save_cxx_werror_flag ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5 printf "%s\n" "$ac_cv_prog_cxx_g" >&6; } @@ -4679,8 +4572,8 @@ printf %s "checking for $CXX option to enable C++11 features... " >&6; } if test ${ac_cv_prog_cxx_cxx11+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_cv_prog_cxx_cxx11=no +else case e in #( + e) ac_cv_prog_cxx_cxx11=no ac_save_CXX=$CXX cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4697,25 +4590,28 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cxx_cxx11" != "xno" && break done rm -f conftest.$ac_ext -CXX=$ac_save_CXX +CXX=$ac_save_CXX ;; +esac fi if test "x$ac_cv_prog_cxx_cxx11" = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 printf "%s\n" "unsupported" >&6; } -else $as_nop - if test "x$ac_cv_prog_cxx_cxx11" = x +else case e in #( + e) if test "x$ac_cv_prog_cxx_cxx11" = x then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 printf "%s\n" "none needed" >&6; } -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_cxx11" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_cxx11" >&5 printf "%s\n" "$ac_cv_prog_cxx_cxx11" >&6; } - CXX="$CXX $ac_cv_prog_cxx_cxx11" + CXX="$CXX $ac_cv_prog_cxx_cxx11" ;; +esac fi ac_cv_prog_cxx_stdcxx=$ac_cv_prog_cxx_cxx11 - ac_prog_cxx_stdcxx=cxx11 + ac_prog_cxx_stdcxx=cxx11 ;; +esac fi fi if test x$ac_prog_cxx_stdcxx = xno @@ -4725,8 +4621,8 @@ printf %s "checking for $CXX option to enable C++98 features... " >&6; } if test ${ac_cv_prog_cxx_cxx98+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_cv_prog_cxx_cxx98=no +else case e in #( + e) ac_cv_prog_cxx_cxx98=no ac_save_CXX=$CXX cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4743,25 +4639,28 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cxx_cxx98" != "xno" && break done rm -f conftest.$ac_ext -CXX=$ac_save_CXX +CXX=$ac_save_CXX ;; +esac fi if test "x$ac_cv_prog_cxx_cxx98" = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 printf "%s\n" "unsupported" >&6; } -else $as_nop - if test "x$ac_cv_prog_cxx_cxx98" = x +else case e in #( + e) if test "x$ac_cv_prog_cxx_cxx98" = x then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 printf "%s\n" "none needed" >&6; } -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_cxx98" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_cxx98" >&5 printf "%s\n" "$ac_cv_prog_cxx_cxx98" >&6; } - CXX="$CXX $ac_cv_prog_cxx_cxx98" + CXX="$CXX $ac_cv_prog_cxx_cxx98" ;; +esac fi ac_cv_prog_cxx_stdcxx=$ac_cv_prog_cxx_cxx98 - ac_prog_cxx_stdcxx=cxx98 + ac_prog_cxx_stdcxx=cxx98 ;; +esac fi fi @@ -4796,7 +4695,7 @@ printf %s "checking whether we want serial mpi stubs... " >&6; } if test ${enable_serial+y} then : enableval=$enable_serial; -pac_cv_serial_mpi="yes"; +pac_cv_serial_mpi="$enableval"; fi @@ -4805,7 +4704,6 @@ if test x"$pac_cv_serial_mpi" == x"yes" ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes." >&5 printf "%s\n" "yes." >&6; } else - pac_cv_serial_mpi="no"; { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no." >&5 printf "%s\n" "no." >&6; } fi @@ -4814,10 +4712,11 @@ fi #Note : we miss the name of the Intel C compiler if test x"$pac_cv_serial_mpi" == x"yes" ; then - FAKEMPI="fakempi.o"; + FAKEMPI="psb_fakempi.o"; MPIFC="$FC"; MPICC="$CC"; MPICXX="$CXX"; + CSERIALMPI="#define PSB_SERIAL_MPI" else ac_ext=c ac_cpp='$CPP $CPPFLAGS' @@ -4827,7 +4726,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu if test "X$MPICC" = "X" ; then # This is our MPICC compiler preference: it will override ACX_MPI's first try. - for ac_prog in mpxlc mpiicc mpcc mpicc cc + for ac_prog in mpxlc mpiicx mpiicc mpcc mpicc cc do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -4836,8 +4735,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_MPICC+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$MPICC"; then +else case e in #( + e) if test -n "$MPICC"; then ac_cv_prog_MPICC="$MPICC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -4859,7 +4758,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi MPICC=$ac_cv_prog_MPICC if test -n "$MPICC"; then @@ -4874,6 +4774,9 @@ fi test -n "$MPICC" && break done +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: test with $MPICC" >&5 +printf "%s\n" "$as_me: test with $MPICC" >&6;} fi @@ -4890,8 +4793,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_MPICC+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$MPICC"; then +else case e in #( + e) if test -n "$MPICC"; then ac_cv_prog_MPICC="$MPICC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -4913,7 +4816,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi MPICC=$ac_cv_prog_MPICC if test -n "$MPICC"; then @@ -4949,16 +4853,22 @@ printf %s "checking for MPI_Init in -lmpi... " >&6; } if test ${ac_cv_lib_mpi_MPI_Init+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lmpi $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char MPI_Init (); + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char MPI_Init (void); int main (void) { @@ -4970,12 +4880,14 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_mpi_MPI_Init=yes -else $as_nop - ac_cv_lib_mpi_MPI_Init=no +else case e in #( + e) ac_cv_lib_mpi_MPI_Init=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpi_MPI_Init" >&5 printf "%s\n" "$ac_cv_lib_mpi_MPI_Init" >&6; } @@ -4991,18 +4903,24 @@ printf %s "checking for MPI_Init in -lmpich... " >&6; } if test ${ac_cv_lib_mpich_MPI_Init+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lmpich $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char MPI_Init (); -int -main (void) + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char MPI_Init (void); +int +main (void) { return MPI_Init (); ; @@ -5012,12 +4930,14 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_mpich_MPI_Init=yes -else $as_nop - ac_cv_lib_mpich_MPI_Init=no +else case e in #( + e) ac_cv_lib_mpich_MPI_Init=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpich_MPI_Init" >&5 printf "%s\n" "$ac_cv_lib_mpich_MPI_Init" >&6; } @@ -5038,9 +4958,10 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -printf "%s\n" "yes" >&6; } +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi @@ -5061,11 +4982,6 @@ printf "%s\n" "#define HAVE_MPI 1" >>confdefs.h fi -ac_ext=${ac_fc_srcext-f} -ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' -ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_fc_compiler_gnu - ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -5074,7 +4990,7 @@ ac_compiler_gnu=$ac_cv_cxx_compiler_gnu if test "X$MPICXX" = "X" ; then # This is our MPICC compiler preference: it will override ACX_MPI's first try. - for ac_prog in mpxlc++ mpiicpc mpicxx + for ac_prog in mpxlc++ mpiicpx mpiicpc mpicxx do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -5083,8 +4999,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_MPICXX+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$MPICXX"; then +else case e in #( + e) if test -n "$MPICXX"; then ac_cv_prog_MPICXX="$MPICXX" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -5106,7 +5022,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi MPICXX=$ac_cv_prog_MPICXX if test -n "$MPICXX"; then @@ -5137,8 +5054,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_MPICXX+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$MPICXX"; then +else case e in #( + e) if test -n "$MPICXX"; then ac_cv_prog_MPICXX="$MPICXX" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -5160,7 +5077,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi MPICXX=$ac_cv_prog_MPICXX if test -n "$MPICXX"; then @@ -5196,8 +5114,8 @@ printf %s "checking for MPI_Init in -lmpi... " >&6; } if test ${ac_cv_lib_mpi_MPI_Init+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lmpi $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -5216,12 +5134,14 @@ _ACEOF if ac_fn_cxx_try_link "$LINENO" then : ac_cv_lib_mpi_MPI_Init=yes -else $as_nop - ac_cv_lib_mpi_MPI_Init=no +else case e in #( + e) ac_cv_lib_mpi_MPI_Init=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpi_MPI_Init" >&5 printf "%s\n" "$ac_cv_lib_mpi_MPI_Init" >&6; } @@ -5237,8 +5157,8 @@ printf %s "checking for MPI_Init in -lmpich... " >&6; } if test ${ac_cv_lib_mpich_MPI_Init+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lmpich $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -5257,12 +5177,14 @@ _ACEOF if ac_fn_cxx_try_link "$LINENO" then : ac_cv_lib_mpich_MPI_Init=yes -else $as_nop - ac_cv_lib_mpich_MPI_Init=no +else case e in #( + e) ac_cv_lib_mpich_MPI_Init=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpich_MPI_Init" >&5 printf "%s\n" "$ac_cv_lib_mpich_MPI_Init" >&6; } @@ -5283,9 +5205,10 @@ _ACEOF if ac_fn_cxx_try_compile "$LINENO" then : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -printf "%s\n" "yes" >&6; } +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi @@ -5313,7 +5236,7 @@ ac_compiler_gnu=$ac_cv_fc_compiler_gnu if test "X$MPIFC" = "X" ; then # This is our MPIFC compiler preference: it will override ACX_MPI's first try. - for ac_prog in mpxlf2003_r mpxlf2003 mpxlf95_r mpxlf90 mpiifort mpf95 mpf90 mpifort mpif95 mpif90 ftn + for ac_prog in mpxlf2003_r mpxlf2003 mpxlf95_r mpxlf90 mpiifx mpiifort mpf95 mpf90 mpifort mpif95 mpif90 ftn do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -5322,8 +5245,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_MPIFC+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$MPIFC"; then +else case e in #( + e) if test -n "$MPIFC"; then ac_cv_prog_MPIFC="$MPIFC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -5345,7 +5268,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi MPIFC=$ac_cv_prog_MPIFC if test -n "$MPIFC"; then @@ -5377,8 +5301,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_MPIFC+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$MPIFC"; then +else case e in #( + e) if test -n "$MPIFC"; then ac_cv_prog_MPIFC="$MPIFC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -5400,7 +5324,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi MPIFC=$ac_cv_prog_MPIFC if test -n "$MPIFC"; then @@ -5434,9 +5359,10 @@ then : MPILIBS=" " { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext @@ -5448,8 +5374,8 @@ printf %s "checking for MPI_Init in -lfmpi... " >&6; } if test ${ac_cv_lib_fmpi_MPI_Init+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lfmpi $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -5459,12 +5385,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_fmpi_MPI_Init=yes -else $as_nop - ac_cv_lib_fmpi_MPI_Init=no +else case e in #( + e) ac_cv_lib_fmpi_MPI_Init=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_fmpi_MPI_Init" >&5 printf "%s\n" "$ac_cv_lib_fmpi_MPI_Init" >&6; } @@ -5480,8 +5408,8 @@ printf %s "checking for MPI_Init in -lmpichf90... " >&6; } if test ${ac_cv_lib_mpichf90_MPI_Init+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lmpichf90 $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -5491,12 +5419,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_mpichf90_MPI_Init=yes -else $as_nop - ac_cv_lib_mpichf90_MPI_Init=no +else case e in #( + e) ac_cv_lib_mpichf90_MPI_Init=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpichf90_MPI_Init" >&5 printf "%s\n" "$ac_cv_lib_mpichf90_MPI_Init" >&6; } @@ -5513,8 +5443,8 @@ printf %s "checking for MPI_Init in -lmpi... " >&6; } if test ${ac_cv_lib_mpi_MPI_Init+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lmpi $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -5524,12 +5454,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_mpi_MPI_Init=yes -else $as_nop - ac_cv_lib_mpi_MPI_Init=no +else case e in #( + e) ac_cv_lib_mpi_MPI_Init=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpi_MPI_Init" >&5 printf "%s\n" "$ac_cv_lib_mpi_MPI_Init" >&6; } @@ -5545,8 +5477,8 @@ printf %s "checking for MPI_Init in -lmpich... " >&6; } if test ${ac_cv_lib_mpich_MPI_Init+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lmpich $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -5556,12 +5488,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_mpich_MPI_Init=yes -else $as_nop - ac_cv_lib_mpich_MPI_Init=no +else case e in #( + e) ac_cv_lib_mpich_MPI_Init=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpich_MPI_Init" >&5 printf "%s\n" "$ac_cv_lib_mpich_MPI_Init" >&6; } @@ -5584,10 +5518,11 @@ if ac_fn_fc_try_compile "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } -else $as_nop - MPILIBS="" +else case e in #( + e) MPILIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } +printf "%s\n" "no" >&6; } ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi @@ -5649,10 +5584,11 @@ CCOPT="${withval} ${CCOPT}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CCOPT = ${CCOPT}" >&5 printf "%s\n" "CCOPT = ${CCOPT}" >&6; } -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - + ;; +esac fi @@ -5668,10 +5604,11 @@ CXXOPT="${withval} ${CXXOPT}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CXXOPT = ${CXXOPT}" >&5 printf "%s\n" "CXXOPT = ${CXXOPT}" >&6; } -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - + ;; +esac fi @@ -5687,10 +5624,31 @@ FCOPT="${withval} ${FCOPT}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: FCOPT = ${FCOPT}" >&5 printf "%s\n" "FCOPT = ${FCOPT}" >&6; } -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } + ;; +esac +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional EXTRA_OPT flags should be added (should be invoked only once)" >&5 +printf %s "checking whether additional EXTRA_OPT flags should be added (should be invoked only once)... " >&6; } + +# Check whether --with-extra-opt was given. +if test ${with_extra_opt+y} +then : + withval=$with_extra_opt; +EXTRA_OPT="${withval} ${EXTRA_OPT}" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: EXTRA_OPT = ${EXTRA_OPT}" >&5 +printf "%s\n" "EXTRA_OPT = ${EXTRA_OPT}" >&6; } + +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + ;; +esac fi @@ -5706,10 +5664,11 @@ LIBS="${withval} ${LIBS}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: LIBS = ${LIBS}" >&5 printf "%s\n" "LIBS = ${LIBS}" >&6; } -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - + ;; +esac fi @@ -5726,10 +5685,11 @@ CLIBS="${withval} ${CLIBS}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CLIBS = ${CLIBS}" >&5 printf "%s\n" "CLIBS = ${CLIBS}" >&6; } -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - + ;; +esac fi @@ -5745,10 +5705,31 @@ FLIBS="${withval} ${FLIBS}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: FLIBS = ${FLIBS}" >&5 printf "%s\n" "FLIBS = ${FLIBS}" >&6; } -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } + ;; +esac +fi + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional EXTRA_NVCC flags should be added (should be invoked only once)" >&5 +printf %s "checking whether additional EXTRA_NVCC flags should be added (should be invoked only once)... " >&6; } + +# Check whether --with-extra-nvcc was given. +if test ${with_extra_nvcc+y} +then : + withval=$with_extra_nvcc; +EXTRA_NVCC="${withval} ${EXTRA_NVCC}" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: EXTRA_NVCC = ${EXTRA_NVCC}" >&5 +printf "%s\n" "EXTRA_NVCC = ${EXTRA_NVCC}" >&6; } +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + ;; +esac fi @@ -5765,10 +5746,11 @@ LIBRARYPATH="${withval} ${LIBRARYPATH}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: LIBRARYPATH = ${LIBRARYPATH}" >&5 printf "%s\n" "LIBRARYPATH = ${LIBRARYPATH}" >&6; } -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - + ;; +esac fi @@ -5784,10 +5766,11 @@ INCLUDEPATH="${withval} ${INCLUDEPATH}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: INCLUDEPATH = ${INCLUDEPATH}" >&5 printf "%s\n" "INCLUDEPATH = ${INCLUDEPATH}" >&6; } -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - + ;; +esac fi @@ -5803,17 +5786,18 @@ MODULE_PATH="${withval} ${MODULE_PATH}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: MODULE_PATH = ${MODULE_PATH}" >&5 printf "%s\n" "MODULE_PATH = ${MODULE_PATH}" >&6; } -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - + ;; +esac fi # we just gave the user the chance to append values to these variables -############################################################################### + if test -n "$ac_tool_prefix"; then @@ -5824,8 +5808,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_RANLIB+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$RANLIB"; then +else case e in #( + e) if test -n "$RANLIB"; then ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -5847,7 +5831,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi RANLIB=$ac_cv_prog_RANLIB if test -n "$RANLIB"; then @@ -5869,8 +5854,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_RANLIB+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$ac_ct_RANLIB"; then +else case e in #( + e) if test -n "$ac_ct_RANLIB"; then ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -5892,7 +5877,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB if test -n "$ac_ct_RANLIB"; then @@ -5919,8 +5905,167 @@ else fi -am__api_version='1.16' +am__api_version='1.17' + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether sleep supports fractional seconds" >&5 +printf %s "checking whether sleep supports fractional seconds... " >&6; } +if test ${am_cv_sleep_fractional_seconds+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if sleep 0.001 2>/dev/null +then : + am_cv_sleep_fractional_seconds=yes +else case e in #( + e) am_cv_sleep_fractional_seconds=no ;; +esac +fi + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_sleep_fractional_seconds" >&5 +printf "%s\n" "$am_cv_sleep_fractional_seconds" >&6; } + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking filesystem timestamp resolution" >&5 +printf %s "checking filesystem timestamp resolution... " >&6; } +if test ${am_cv_filesystem_timestamp_resolution+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) # Default to the worst case. +am_cv_filesystem_timestamp_resolution=2 + +# Only try to go finer than 1 sec if sleep can do it. +# Don't try 1 sec, because if 0.01 sec and 0.1 sec don't work, +# - 1 sec is not much of a win compared to 2 sec, and +# - it takes 2 seconds to perform the test whether 1 sec works. +# +# Instead, just use the default 2s on platforms that have 1s resolution, +# accept the extra 1s delay when using $sleep in the Automake tests, in +# exchange for not incurring the 2s delay for running the test for all +# packages. +# +am_try_resolutions= +if test "$am_cv_sleep_fractional_seconds" = yes; then + # Even a millisecond often causes a bunch of false positives, + # so just try a hundredth of a second. The time saved between .001 and + # .01 is not terribly consequential. + am_try_resolutions="0.01 0.1 $am_try_resolutions" +fi + +# In order to catch current-generation FAT out, we must *modify* files +# that already exist; the *creation* timestamp is finer. Use names +# that make ls -t sort them differently when they have equal +# timestamps than when they have distinct timestamps, keeping +# in mind that ls -t prints the *newest* file first. +rm -f conftest.ts? +: > conftest.ts1 +: > conftest.ts2 +: > conftest.ts3 + +# Make sure ls -t actually works. Do 'set' in a subshell so we don't +# clobber the current shell's arguments. (Outer-level square brackets +# are removed by m4; they're present so that m4 does not expand +# ; be careful, easy to get confused.) +if ( + set X `ls -t conftest.ts[12]` && + { + test "$*" != "X conftest.ts1 conftest.ts2" || + test "$*" != "X conftest.ts2 conftest.ts1"; + } +); then :; else + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + printf "%s\n" ""Bad output from ls -t: \"`ls -t conftest.ts[12]`\""" >&5 + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "ls -t produces unexpected output. +Make sure there is not a broken ls alias in your environment. +See 'config.log' for more details" "$LINENO" 5; } +fi + +for am_try_res in $am_try_resolutions; do + # Any one fine-grained sleep might happen to cross the boundary + # between two values of a coarser actual resolution, but if we do + # two fine-grained sleeps in a row, at least one of them will fall + # entirely within a coarse interval. + echo alpha > conftest.ts1 + sleep $am_try_res + echo beta > conftest.ts2 + sleep $am_try_res + echo gamma > conftest.ts3 + + # We assume that 'ls -t' will make use of high-resolution + # timestamps if the operating system supports them at all. + if (set X `ls -t conftest.ts?` && + test "$2" = conftest.ts3 && + test "$3" = conftest.ts2 && + test "$4" = conftest.ts1); then + # + # Ok, ls -t worked. If we're at a resolution of 1 second, we're done, + # because we don't need to test make. + make_ok=true + if test $am_try_res != 1; then + # But if we've succeeded so far with a subsecond resolution, we + # have one more thing to check: make. It can happen that + # everything else supports the subsecond mtimes, but make doesn't; + # notably on macOS, which ships make 3.81 from 2006 (the last one + # released under GPLv2). https://bugs.gnu.org/68808 + # + # We test $MAKE if it is defined in the environment, else "make". + # It might get overridden later, but our hope is that in practice + # it does not matter: it is the system "make" which is (by far) + # the most likely to be broken, whereas if the user overrides it, + # probably they did so with a better, or at least not worse, make. + # https://lists.gnu.org/archive/html/automake/2024-06/msg00051.html + # + # Create a Makefile (real tab character here): + rm -f conftest.mk + echo 'conftest.ts1: conftest.ts2' >conftest.mk + echo ' touch conftest.ts2' >>conftest.mk + # + # Now, running + # touch conftest.ts1; touch conftest.ts2; make + # should touch ts1 because ts2 is newer. This could happen by luck, + # but most often, it will fail if make's support is insufficient. So + # test for several consecutive successes. + # + # (We reuse conftest.ts[12] because we still want to modify existing + # files, not create new ones, per above.) + n=0 + make=${MAKE-make} + until test $n -eq 3; do + echo one > conftest.ts1 + sleep $am_try_res + echo two > conftest.ts2 # ts2 should now be newer than ts1 + if $make -f conftest.mk | grep 'up to date' >/dev/null; then + make_ok=false + break # out of $n loop + fi + n=`expr $n + 1` + done + fi + # + if $make_ok; then + # Everything we know to check worked out, so call this resolution good. + am_cv_filesystem_timestamp_resolution=$am_try_res + break # out of $am_try_res loop + fi + # Otherwise, we'll go on to check the next resolution. + fi +done +rm -f conftest.ts? +# (end _am_filesystem_timestamp_resolution) + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_filesystem_timestamp_resolution" >&5 +printf "%s\n" "$am_cv_filesystem_timestamp_resolution" >&6; } +# This check should not be cached, as it may vary across builds of +# different projects. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 printf %s "checking whether build environment is sane... " >&6; } # Reject unsafe characters in $srcdir or the absolute working directory @@ -5941,49 +6086,45 @@ esac # symlink; some systems play weird games with the mod time of symlinks # (eg FreeBSD returns the mod time of the symlink's containing # directory). -if ( - am_has_slept=no - for am_try in 1 2; do - echo "timestamp, slept: $am_has_slept" > conftest.file - set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` - if test "$*" = "X"; then - # -L didn't work. - set X `ls -t "$srcdir/configure" conftest.file` - fi - if test "$*" != "X $srcdir/configure conftest.file" \ - && test "$*" != "X conftest.file $srcdir/configure"; then - - # If neither matched, then we have a broken ls. This can happen - # if, for instance, CONFIG_SHELL is bash and it inherits a - # broken ls alias from the environment. This has actually - # happened. Such a system could not be considered "sane". - as_fn_error $? "ls -t appears to fail. Make sure there is not a broken - alias in your environment" "$LINENO" 5 - fi - if test "$2" = conftest.file || test $am_try -eq 2; then - break - fi - # Just in case. - sleep 1 - am_has_slept=yes - done - test "$2" = conftest.file - ) -then - # Ok. - : -else - as_fn_error $? "newly created file is older than distributed files! +am_build_env_is_sane=no +am_has_slept=no +rm -f conftest.file +for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + if ( + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$*" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + test "$2" = conftest.file + ); then + am_build_env_is_sane=yes + break + fi + # Just in case. + sleep "$am_cv_filesystem_timestamp_resolution" + am_has_slept=yes +done + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_build_env_is_sane" >&5 +printf "%s\n" "$am_build_env_is_sane" >&6; } +if test "$am_build_env_is_sane" = no; then + as_fn_error $? "newly created file is older than distributed files! Check your system clock" "$LINENO" 5 fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -printf "%s\n" "yes" >&6; } + # If we didn't sleep, we still need to ensure time stamps of config.status and # generated files are strictly newer. am_sleep_pid= -if grep 'slept: no' conftest.file >/dev/null 2>&1; then - ( sleep 1 ) & +if test -e conftest.file || grep 'slept: no' conftest.file >/dev/null 2>&1 +then : + +else case e in #( + e) ( sleep "$am_cv_filesystem_timestamp_resolution" ) & am_sleep_pid=$! + ;; +esac fi rm -f conftest.file @@ -5994,7 +6135,7 @@ test "$program_prefix" != NONE && test "$program_suffix" != NONE && program_transform_name="s&\$&$program_suffix&;$program_transform_name" # Double any \ or $. -# By default was `s,x,x', remove it if useless. +# By default was 's,x,x', remove it if useless. ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' program_transform_name=`printf "%s\n" "$program_transform_name" | sed "$ac_script"` @@ -6033,8 +6174,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_STRIP+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$STRIP"; then +else case e in #( + e) if test -n "$STRIP"; then ac_cv_prog_STRIP="$STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -6056,7 +6197,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi STRIP=$ac_cv_prog_STRIP if test -n "$STRIP"; then @@ -6078,8 +6220,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_STRIP+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$ac_ct_STRIP"; then +else case e in #( + e) if test -n "$ac_ct_STRIP"; then ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -6101,7 +6243,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP if test -n "$ac_ct_STRIP"; then @@ -6137,8 +6280,8 @@ if test -z "$MKDIR_P"; then if test ${ac_cv_path_mkdir+y} then : printf %s "(cached) " >&6 -else $as_nop - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +else case e in #( + e) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin do IFS=$as_save_IFS @@ -6152,7 +6295,7 @@ do as_fn_executable_p "$as_dir$ac_prog$ac_exec_ext" || continue case `"$as_dir$ac_prog$ac_exec_ext" --version 2>&1` in #( 'mkdir ('*'coreutils) '* | \ - 'BusyBox '* | \ + *'BusyBox '* | \ 'mkdir (fileutils) '4.1*) ac_cv_path_mkdir=$as_dir$ac_prog$ac_exec_ext break 3;; @@ -6161,18 +6304,17 @@ do done done IFS=$as_save_IFS - + ;; +esac fi test -d ./--version && rmdir ./--version if test ${ac_cv_path_mkdir+y}; then MKDIR_P="$ac_cv_path_mkdir -p" else - # As a last resort, use the slow shell script. Don't cache a - # value for MKDIR_P within a source directory, because that will - # break other packages using the cache if that directory is - # removed, or if the value is a relative name. - MKDIR_P="$ac_install_sh -d" + # As a last resort, use plain mkdir -p, + # in the hope it doesn't have the bugs of ancient mkdir. + MKDIR_P='mkdir -p' fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 @@ -6187,8 +6329,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_AWK+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -n "$AWK"; then +else case e in #( + e) if test -n "$AWK"; then ac_cv_prog_AWK="$AWK" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -6210,7 +6352,8 @@ done done IFS=$as_save_IFS -fi +fi ;; +esac fi AWK=$ac_cv_prog_AWK if test -n "$AWK"; then @@ -6232,8 +6375,8 @@ ac_make=`printf "%s\n" "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` if eval test \${ac_cv_prog_make_${ac_make}_set+y} then : printf %s "(cached) " >&6 -else $as_nop - cat >conftest.make <<\_ACEOF +else case e in #( + e) cat >conftest.make <<\_ACEOF SHELL = /bin/sh all: @echo '@@@%%%=$(MAKE)=@@@%%%' @@ -6245,7 +6388,8 @@ case `${MAKE-make} -f conftest.make 2>/dev/null` in *) eval ac_cv_prog_make_${ac_make}_set=no;; esac -rm -f conftest.make +rm -f conftest.make ;; +esac fi if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 @@ -6330,25 +6474,21 @@ else fi +AM_DEFAULT_VERBOSITY=1 # Check whether --enable-silent-rules was given. if test ${enable_silent_rules+y} then : enableval=$enable_silent_rules; fi -case $enable_silent_rules in # ((( - yes) AM_DEFAULT_VERBOSITY=0;; - no) AM_DEFAULT_VERBOSITY=1;; - *) AM_DEFAULT_VERBOSITY=1;; -esac am_make=${MAKE-make} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 printf %s "checking whether $am_make supports nested variables... " >&6; } if test ${am_cv_make_support_nested_variables+y} then : printf %s "(cached) " >&6 -else $as_nop - if printf "%s\n" 'TRUE=$(BAR$(V)) +else case e in #( + e) if printf "%s\n" 'TRUE=$(BAR$(V)) BAR0=false BAR1=true V=1 @@ -6358,19 +6498,50 @@ am__doit: am_cv_make_support_nested_variables=yes else am_cv_make_support_nested_variables=no -fi +fi ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 printf "%s\n" "$am_cv_make_support_nested_variables" >&6; } -if test $am_cv_make_support_nested_variables = yes; then - AM_V='$(V)' - AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' -else - AM_V=$AM_DEFAULT_VERBOSITY - AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY -fi AM_BACKSLASH='\' +am__rm_f_notfound= +if (rm -f && rm -fr && rm -rf) 2>/dev/null +then : + +else case e in #( + e) am__rm_f_notfound='""' ;; +esac +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking xargs -n works" >&5 +printf %s "checking xargs -n works... " >&6; } +if test ${am_cv_xargs_n_works+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test "`echo 1 2 3 | xargs -n2 echo`" = "1 2 +3" +then : + am_cv_xargs_n_works=yes +else case e in #( + e) am_cv_xargs_n_works=no ;; +esac +fi ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_xargs_n_works" >&5 +printf "%s\n" "$am_cv_xargs_n_works" >&6; } +if test "$am_cv_xargs_n_works" = yes +then : + am__xargs_n='xargs -n' +else case e in #( + e) am__xargs_n='am__xargs_n () { shift; sed "s/ /\\n/g" | while read am__xargs_n_arg; do "" "$am__xargs_n_arg"; done; }' + ;; +esac +fi + if test "`cd $srcdir && pwd`" != "`pwd`"; then # Use -I$(srcdir) only when $(srcdir) != ., so that make's output # is not polluted with repeated "-I." @@ -6393,7 +6564,7 @@ fi # Define the identity of the package. PACKAGE='psblas' - VERSION='3.7.0' + VERSION='3.9.0' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h @@ -6446,8 +6617,8 @@ printf %s "checking dependency style of $depcc... " >&6; } if test ${am_cv_CC_dependencies_compiler_type+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then +else case e in #( + e) if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up @@ -6534,7 +6705,7 @@ else $as_nop # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. - # When given -MP, icc 7.0 and 7.1 complain thusly: + # When given -MP, icc 7.0 and 7.1 complain thus: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported @@ -6551,7 +6722,8 @@ else $as_nop else am_cv_CC_dependencies_compiler_type=none fi - + ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 printf "%s\n" "$am_cv_CC_dependencies_compiler_type" >&6; } @@ -6575,8 +6747,8 @@ printf %s "checking dependency style of $depcc... " >&6; } if test ${am_cv_CXX_dependencies_compiler_type+y} then : printf %s "(cached) " >&6 -else $as_nop - if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then +else case e in #( + e) if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up @@ -6663,7 +6835,7 @@ else $as_nop # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. - # When given -MP, icc 7.0 and 7.1 complain thusly: + # When given -MP, icc 7.0 and 7.1 complain thus: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported @@ -6680,7 +6852,8 @@ else $as_nop else am_cv_CXX_dependencies_compiler_type=none fi - + ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_CXX_dependencies_compiler_type" >&5 printf "%s\n" "$am_cv_CXX_dependencies_compiler_type" >&6; } @@ -6712,47 +6885,9 @@ fi -# POSIX will say in a future version that running "rm -f" with no argument -# is OK; and we want to be able to make that assumption in our Makefile -# recipes. So use an aggressive probe to check that the usage we want is -# actually supported "in the wild" to an acceptable degree. -# See automake bug#10828. -# To make any issue more visible, cause the running configure to be aborted -# by default if the 'rm' program in use doesn't match our expectations; the -# user can still override this though. -if rm -f && rm -fr && rm -rf; then : OK; else - cat >&2 <<'END' -Oops! - -Your 'rm' program seems unable to run without file operands specified -on the command line, even when the '-f' option is present. This is contrary -to the behaviour of most rm programs out there, and not conforming with -the upcoming POSIX standard: - -Please tell bug-automake@gnu.org about your system, including the value -of your $PATH and any error possibly output before this message. This -can help us improve future automake versions. -END - if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then - echo 'Configuration will proceed anyway, since you have set the' >&2 - echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 - echo >&2 - else - cat >&2 <<'END' -Aborting the configuration process, to ensure you take notice of the issue. -You can download and install GNU coreutils to get an 'rm' implementation -that behaves properly: . -If you want to complete the configuration process using your problematic -'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM -to "yes", and re-run configure. - -END - as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5 - fi -fi @@ -6788,12 +6923,13 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } psblas_cv_fc="gcc" -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -6829,12 +6965,13 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } psblas_cv_fc="cray" -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -6847,12 +6984,12 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu if test x"$psblas_cv_fc" == "x" ; then if eval "$MPIFC -qversion 2>&1 | grep XL 2>/dev/null" ; then - psblas_cv_fc="xlf" + psblas_cv_fc="xlf"; # Some configurations of the XLF want "-WF," prepended to -D.. flags. # TODO : discover the exact conditions when the usage of -WF is needed. psblas_cv_define_prepend="-WF," if eval "$MPIFC -qversion 2>&1 | grep -e\"Version: 10\.\" 2>/dev/null"; then - FDEFINES="$psblas_cv_define_prepend-DXLF_10 $FDEFINES" + FDEFINES="$psblas_cv_define_prepend-DXLF_10 $FDEFINES"; sed -e's/(0-9*).*/$1/p' fi # Note : there could be problems with old xlf compiler versions ( <10.1 ) @@ -6861,20 +6998,52 @@ if test x"$psblas_cv_fc" == "x" ; then elif eval "$MPIFC -V 2>&1 | grep Sun 2>/dev/null" ; then # Sun compiler detection - psblas_cv_fc="sun" - elif eval "$MPIFC -V 2>&1 | grep Portland 2>/dev/null" ; then - # Portland group compiler detection - - psblas_cv_fc="pg" + psblas_cv_fc="sun"; + elif eval "$MPIFC --version 2>&1 | grep flang-new 2>&1 1>/dev/null" ; then + # LLVM compiler + psblas_cv_fc="flang-new"; + psblas_flang_version=`flang-new --version |grep flang| sed -e 's/^ *flang.* version *//gi'`; + psblas_flang_shv=`flang-new --version |grep flang| sed -e 's/^ *flang.* version *//gi' | sed -e's/\./ /g' | awk '{print $1}'`; + psblas_cv_define_prepend=""; + FDEFINES="$psblas_cv_define_prepend-DFLANG $FDEFINES" + psblas_shvs=`echo $psblas_flang_shv|sed -e's/^0-9*//g'`; + if test x"$psblas_shvs" != x""; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Running with LLVM $psblas_flang_version ($psblas_flang_shv). " >&5 +printf "%s\n" "$as_me: Running with LLVM $psblas_flang_version ($psblas_flang_shv). " >&6;} + if (( $psblas_flang_shv < 20 )) ; then + as_fn_error $? "The minimum supported LLVM version is version 20, bailing out. " "$LINENO" 5 + else + psblas_cv_fc="flang"; + fi + else + as_fn_error $? "Unrecognized LLVM version. The minimum supported LLVM version is version 20, bailing out. " "$LINENO" 5 + fi + elif eval "$MPIFC --version 2>&1 | grep flang 2>&1 1>/dev/null" ; then + # LLVM compiler + psblas_flang_version=`flang --version |grep flang| sed -e 's/^ *flang.* version *//gi'`; + psblas_flang_shv=`flang --version |grep flang| sed -e 's/^ *flang.* version *//gi' | sed -e's/\./ /g' | awk '{print $1}'`; + psblas_cv_fc="flang"; + psblas_cv_define_prepend=""; + FDEFINES="$psblas_cv_define_prepend-DFLANG $FDEFINES" + psblas_shvs=`echo $psblas_flang_shv|sed -e's/^0-9*//g'`; + if test x"$psblas_shvs" != x""; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Running with LLVM $psblas_flang_version ($psblas_flang_shv). " >&5 +printf "%s\n" "$as_me: Running with LLVM $psblas_flang_version ($psblas_flang_shv). " >&6;} + if (( $psblas_flang_shv < 20 )) ; then + as_fn_error $? "The minimum supported LLVM version is version 20, bailing out. " "$LINENO" 5 + fi + else + as_fn_error $? "Unrecognized LLVM version. The minimum supported LLVM version is version 20, bailing out. " "$LINENO" 5 + fi elif eval "$MPIFC -V 2>&1 | grep Intel.*Fortran.*Compiler 2>/dev/null" ; then # Intel compiler identification - psblas_cv_fc="ifc" + psblas_cv_fc="ifc"; elif eval "$MPIFC -v 2>&1 | grep NAG 2>/dev/null" ; then - psblas_cv_fc="nag" - FC="$MPIFC" + psblas_cv_fc="nag"; + FC="$MPIFC"; else - psblas_cv_fc="" + psblas_cv_fc=""; # unsupported MPI Fortran compiler { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Unknown Fortran compiler, proceeding with fingers crossed !" >&5 printf "%s\n" "$as_me: Unknown Fortran compiler, proceeding with fingers crossed !" >&6;} @@ -6906,15 +7075,16 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Sorry, we require GNU Fortran version 4.9 or later." >&5 printf "%s\n" "$as_me: Sorry, we require GNU Fortran version 4.9 or later." >&6;} echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 as_fn_error $? "Bailing out." "$LINENO" 5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -6926,126 +7096,57 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu fi - ############################################################################### # Linking, symbol mangling, and misc tests ############################################################################### +ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu -# Note : This is functional to Make.inc rules and structure (see below). -ac_ext=c +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for Fortran name-mangling scheme" >&5 +printf %s "checking for Fortran name-mangling scheme... " >&6; } +if test ${ac_cv_fc_mangling+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat > conftest.$ac_ext <<_ACEOF + subroutine foobar() + return + end + subroutine foo_bar() + return + end +_ACEOF +if ac_fn_fc_try_compile "$LINENO" +then : + mv conftest.$ac_objext cfortran_test.$ac_objext + + ac_save_LIBS=$LIBS + LIBS="cfortran_test.$ac_objext $LIBS $FCLIBS" + + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu - -ac_header= ac_cache= -for ac_item in $ac_header_c_list -do - if test $ac_cache; then - ac_fn_c_check_header_compile "$LINENO" $ac_header ac_cv_header_$ac_cache "$ac_includes_default" - if eval test \"x\$ac_cv_header_$ac_cache\" = xyes; then - printf "%s\n" "#define $ac_item 1" >> confdefs.h - fi - ac_header= ac_cache= - elif test $ac_header; then - ac_cache=$ac_item - else - ac_header=$ac_item - fi -done - - - - - - - - -if test $ac_cv_header_stdlib_h = yes && test $ac_cv_header_string_h = yes -then : - -printf "%s\n" "#define STDC_HEADERS 1" >>confdefs.h - -fi -# The cast to long int works around a bug in the HP C Compiler -# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects -# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. -# This bug is HP SR number 8606223364. -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking size of void *" >&5 -printf %s "checking size of void *... " >&6; } -if test ${ac_cv_sizeof_void_p+y} -then : - printf %s "(cached) " >&6 -else $as_nop - if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (void *))" "ac_cv_sizeof_void_p" "$ac_includes_default" -then : - -else $as_nop - if test "$ac_cv_type_void_p" = yes; then - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error 77 "cannot compute sizeof (void *) -See \`config.log' for more details" "$LINENO" 5; } - else - ac_cv_sizeof_void_p=0 - fi -fi - -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_void_p" >&5 -printf "%s\n" "$ac_cv_sizeof_void_p" >&6; } - - - -printf "%s\n" "#define SIZEOF_VOID_P $ac_cv_sizeof_void_p" >>confdefs.h - - -# Define for platforms with 64 bit (void * ) pointers -if test X"$ac_cv_sizeof_void_p" == X"8" ; then - CDEFINES="-DPtr64Bits $CDEFINES" -fi -ac_ext=${ac_fc_srcext-f} -ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' -ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_fc_compiler_gnu - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for Fortran name-mangling scheme" >&5 -printf %s "checking for Fortran name-mangling scheme... " >&6; } -if test ${ac_cv_fc_mangling+y} -then : - printf %s "(cached) " >&6 -else $as_nop - cat > conftest.$ac_ext <<_ACEOF - subroutine foobar() - return - end - subroutine foo_bar() - return - end -_ACEOF -if ac_fn_fc_try_compile "$LINENO" -then : - mv conftest.$ac_objext cfortran_test.$ac_objext - - ac_save_LIBS=$LIBS - LIBS="cfortran_test.$ac_objext $LIBS $FCLIBS" - - ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - ac_success=no - for ac_foobar in foobar FOOBAR; do - for ac_underscore in "" "_"; do - ac_func="$ac_foobar$ac_underscore" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ + ac_success=no + for ac_foobar in foobar FOOBAR; do + for ac_underscore in "" "_"; do + ac_func="$ac_foobar$ac_underscore" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char $ac_func (); + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (void); int main (void) { @@ -7092,8 +7193,14 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char $ac_func (); + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (void); int main (void) { @@ -7136,14 +7243,16 @@ ac_compiler_gnu=$ac_cv_fc_compiler_gnu LIBS=$ac_save_LIBS rm -rf conftest* rm -f cfortran_test* -else $as_nop - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +else case e in #( + e) { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} as_fn_error $? "cannot compile a simple Fortran program -See \`config.log' for more details" "$LINENO" 5; } +See 'config.log' for more details" "$LINENO" 5; } ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - + ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_mangling" >&5 printf "%s\n" "$ac_cv_fc_mangling" >&6; } @@ -7163,40 +7272,6 @@ pac_fc_sec_under=${pac_fc_under#*,} pac_fc_sec_under=${pac_fc_sec_under# } pac_fc_under=${pac_fc_under%%,*} pac_fc_under=${pac_fc_under# } -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking defines for C/Fortran name interfaces" >&5 -printf %s "checking defines for C/Fortran name interfaces... " >&6; } -if test "x$pac_fc_case" == "xlower case"; then - if test "x$pac_fc_under" == "xunderscore"; then - if test "x$pac_fc_sec_under" == "xno extra underscore"; then - pac_f_c_names="-DLowerUnderscore" - elif test "x$pac_fc_sec_under" == "xextra underscore"; then - pac_f_c_names="-DLowerDoubleUnderscore" - else - pac_f_c_names="-DUNKNOWN" - fi - elif test "x$pac_fc_under" == "xno underscore"; then - pac_f_c_names="-DLowerCase" - else - pac_f_c_names="-DUNKNOWN" - fi -elif test "x$pac_fc_case" == "xupper case"; then - if test "x$pac_fc_under" == "xunderscore"; then - if test "x$pac_fc_sec_under" == "xno extra underscore"; then - pac_f_c_names="-DUpperUnderscore" - elif test "x$pac_fc_sec_under" == "xextra underscore"; then - pac_f_c_names="-DUpperDoubleUnderscore" - else - pac_f_c_names="-DUNKNOWN" - fi - elif test "x$pac_fc_under" == "xno underscore"; then - pac_f_c_names="-DUpperCase" - else - pac_f_c_names="-DUNKNOWN" - fi -else - pac_f_c_names="-DUNKNOWN" -fi -CDEFINES="$pac_f_c_names $CDEFINES" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_f_c_names " >&5 printf "%s\n" " $pac_f_c_names " >&6; } @@ -7278,7 +7353,7 @@ if test "X$FCOPT" == "X" ; then if test "X$psblas_cv_fc" == "Xgcc" ; then # note that no space should be placed around the equality symbol in assignations # Note : 'native' is valid _only_ on GCC/x86 (32/64 bits) - FCOPT="-g -O3 -frecursive $FCOPT" + FCOPT="-g -O3 $FCOPT" elif test "X$psblas_cv_fc" == X"xlf" ; then # XL compiler : consider using -qarch=auto FCOPT="-O3 -qarch=auto -qlanglvl=extended -qxlf2003=polymorphic:autorealloc $FCOPT" @@ -7289,6 +7364,9 @@ if test "X$FCOPT" == "X" ; then elif test "X$psblas_cv_fc" == X"pg" ; then # other compilers .. FCOPT="-fast $FCOPT" + elif test "X$psblas_cv_fc" == X"flang" ; then + # other compilers .. + FCOPT="-O3" # NOTE : PG & Sun use -fast instead -O3 elif test "X$psblas_cv_fc" == X"sun" ; then # other compilers .. @@ -7306,48 +7384,10 @@ fi if test "X$psblas_cv_fc" == X"nag" ; then # Add needed options FCOPT="$FCOPT -dcfuns -f2003 -wmismatch=mpi_scatterv,mpi_alltoallv,mpi_gatherv,mpi_allgatherv" - EXTRA_OPT="-mismatch_all" + EXTRA_OPT="$EXTRA_OPT -mismatch_all" fi if test "X$psblas_cv_fc" == "Xgcc" ; then - FCOPT="-frecursive $FCOPT" - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for version 10 or later of GNU Fortran" >&5 -printf %s "checking for version 10 or later of GNU Fortran... " >&6; } - ac_ext=${ac_fc_srcext-f} -ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' -ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_fc_compiler_gnu - - ac_exeext='' - ac_ext='F90' - ac_fc=${MPIFC-$FC}; - cat > conftest.$ac_ext <<_ACEOF - - program main -#if ( __GNUC__ >= 10 ) - print *, "ok" -#else - this program will fail -#endif - end -_ACEOF -if ac_fn_fc_try_compile "$LINENO" -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -printf "%s\n" "yes" >&6; } - FCOPT="-fallow-argument-mismatch $FCOPT" -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } - -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - - + FCOPT="-frecursive $FCOPT" fi @@ -7375,7 +7415,7 @@ fi ############################################################################## BASEMODNAME=psb_base_mod PRECMODNAME=psb_prec_mod -METHDMODNAME=psb_krylov_mod +METHDMODNAME=psb_linsolve_mod UTILMODNAME=psb_util_mod if test "X$psblas_cv_fc" == X"cray" @@ -7385,7 +7425,7 @@ then FIFLAG="-I" BASEMODNAME=PSB_BASE_MOD PRECMODNAME=PSB_PREC_MOD - METHDMODNAME=PSB_KRYLOV_MOD + METHDMODNAME=PSB_LINSOLVE_MOD UTILMODNAME=PSB_UTIL_MOD else @@ -7395,8 +7435,8 @@ printf %s "checking fortran 90 modules extension... " >&6; } if test ${ax_cv_f90_modext+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_ext=${ac_fc_srcext-f} +else case e in #( + e) ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu @@ -7427,8 +7467,9 @@ then : fi fi -else $as_nop - ax_cv_f90_modext=unknown +else case e in #( + e) ax_cv_f90_modext=unknown ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext cd .. @@ -7439,7 +7480,8 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu - + ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_f90_modext" >&5 printf "%s\n" "$ax_cv_f90_modext" >&6; } @@ -7449,8 +7491,8 @@ printf %s "checking fortran 90 modules inclusion flag... " >&6; } if test ${ax_cv_f90_modflag+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_ext=${ac_fc_srcext-f} +else case e in #( + e) ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu @@ -7509,7 +7551,8 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu - + ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_f90_modflag" >&5 printf "%s\n" "$ax_cv_f90_modflag" >&6; } @@ -7531,8 +7574,8 @@ fi ############################################################################### # Custom test : do we have a module or include for MPI Fortran interface? if test x"$pac_cv_serial_mpi" == x"yes" ; then - FDEFINES="$psblas_cv_define_prepend-DSERIAL_MPI $psblas_cv_define_prepend-DMPI_MOD $FDEFINES"; -else + FDEFINES="$psblas_cv_define_prepend-DPSB_SERIAL_MPI $psblas_cv_define_prepend-DPSB_MPI_MOD $FDEFINES"; + else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking MPI Fortran 2008 interface" >&5 printf %s "checking MPI Fortran 2008 interface... " >&6; } ac_ext=${ac_fc_srcext-f} @@ -7555,13 +7598,14 @@ then : printf "%s\n" "yes" >&6; } pac_cv_mpi_f08="yes"; : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } pac_cv_mpi_f08="no"; echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -7572,7 +7616,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu if test x"$pac_cv_mpi_f08" == x"yes" ; then - FDEFINES="$psblas_cv_define_prepend-DMPI_MOD $FDEFINES"; + FDEFINES="$psblas_cv_define_prepend-DPSB_MPI_MOD $FDEFINES"; else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for Fortran MPI mod" >&5 printf %s "checking for Fortran MPI mod... " >&6; } @@ -7594,13 +7638,14 @@ if ac_fn_fc_try_compile "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } - FDEFINES="$psblas_cv_define_prepend-DMPI_MOD $FDEFINES" -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + FDEFINES="$psblas_cv_define_prepend-DPSB_MPI_MOD $FDEFINES" +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 - FDEFINES="$psblas_cv_define_prepend-DMPI_H $FDEFINES" + FDEFINES="$psblas_cv_define_prepend-DPSB_MPI_H $FDEFINES" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -7621,9 +7666,10 @@ printf %s "checking what size in bytes we want for local indices and data... " > if test ${with_ipk+y} then : withval=$with_ipk; pac_cv_ipk_size=$withval; -else $as_nop - pac_cv_ipk_size=4; - +else case e in #( + e) pac_cv_ipk_size=4; + ;; +esac fi if test x"$pac_cv_ipk_size" == x"4" || test x"$pac_cv_ipk_size" == x"8" ; then @@ -7644,9 +7690,10 @@ printf %s "checking what size in bytes we want for global indices and data... " if test ${with_lpk+y} then : withval=$with_lpk; pac_cv_lpk_size=$withval; -else $as_nop - pac_cv_lpk_size=8; - +else case e in #( + e) pac_cv_lpk_size=8; + ;; +esac fi if test x"$pac_cv_lpk_size" == x"4" || test x"$pac_cv_lpk_size" == x"8"; then @@ -7659,26 +7706,28 @@ printf "%s\n" "Unsupported value for LPK: $pac_cv_lpk_size, defaulting to 8." >& fi -# Defaults for IPK/LPK +# Defaults for PSB_IPK/PSB_LPK if test x"$pac_cv_ipk_size" == x"" ; then pac_cv_ipk_size=4 fi if test x"$pac_cv_lpk_size" == x"" ; then pac_cv_lpk_size=8 fi +PSB_IPKDEF="#define PSB_IPK$pac_cv_ipk_size" +PSB_LPKDEF="#define PSB_LPK$pac_cv_lpk_size" # Enforce sensible combination if (( $pac_cv_lpk_size < $pac_cv_ipk_size )); then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Invalid combination of size specs IPK ${pac_cv_ipk_size} LPK ${pac_cv_lpk_size}. " >&5 -printf "%s\n" "$as_me: Invalid combination of size specs IPK ${pac_cv_ipk_size} LPK ${pac_cv_lpk_size}. " >&6;}; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Invalid combination of size specs PSB_IPK ${pac_cv_ipk_size} PSB_LPK ${pac_cv_lpk_size}. " >&5 +printf "%s\n" "$as_me: Invalid combination of size specs PSB_IPK ${pac_cv_ipk_size} PSB_LPK ${pac_cv_lpk_size}. " >&6;}; { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Forcing equal values" >&5 printf "%s\n" "$as_me: Forcing equal values" >&6;} pac_cv_lpk_size=$pac_cv_ipk_size; fi -FDEFINES="$psblas_cv_define_prepend-DIPK${pac_cv_ipk_size} $FDEFINES"; -FDEFINES="$psblas_cv_define_prepend-DLPK${pac_cv_lpk_size} $FDEFINES"; -CDEFINES="-DIPK${pac_cv_ipk_size} -DLPK${pac_cv_lpk_size} $CDEFINES" +FDEFINES="$psblas_cv_define_prepend-DPSB_IPK${pac_cv_ipk_size} $FDEFINES"; +FDEFINES="$psblas_cv_define_prepend-DPSB_LPK${pac_cv_lpk_size} $FDEFINES"; FLINK="$MPIFC" +CLINK="$MPICC" if test -e penmp || test -e mp; then as_fn_error $? "AC_OPENMP clobbers files named 'mp' and 'penmp'. Aborting configure because one of these files already exists." "$LINENO" 5 fi @@ -7687,10 +7736,7 @@ printf %s "checking whether we want openmp ... " >&6; } # Check whether --enable-openmp was given. if test ${enable_openmp+y} then : - enableval=$enable_openmp; -pac_cv_openmp="yes"; - - + enableval=$enable_openmp; pac_cv_openmp="$enableval"; fi if test x"$pac_cv_openmp" == x"yes" ; then @@ -7714,8 +7760,8 @@ printf %s "checking for $FC option to support OpenMP... " >&6; } if test ${ac_cv_prog_fc_openmp+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_cv_prog_fc_openmp='not found' +else case e in #( + e) ac_cv_prog_fc_openmp='not found' for ac_option in '' -fopenmp -xopenmp -openmp -mp -omp -qsmp=omp -homp \ -Popenmp --openmp; do @@ -7725,7 +7771,7 @@ else $as_nop program main implicit none -!$ integer tid +!\$ integer tid tid = 42 call omp_set_num_threads(2) end @@ -7737,7 +7783,7 @@ then : program main implicit none -!$ integer tid +!\$ integer tid tid = 42 call omp_set_num_threads(2) end @@ -7746,8 +7792,9 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_prog_fc_openmp=$ac_option -else $as_nop - ac_cv_prog_fc_openmp='unsupported' +else case e in #( + e) ac_cv_prog_fc_openmp='unsupported' ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext @@ -7764,7 +7811,8 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext elif test "$ac_cv_prog_fc_openmp" = ''; then ac_cv_prog_fc_openmp='none needed' fi - rm -f penmp mp + rm -f penmp mp ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_fc_openmp" >&5 printf "%s\n" "$ac_cv_prog_fc_openmp" >&6; } @@ -7801,8 +7849,8 @@ printf %s "checking for $CC option to support OpenMP... " >&6; } if test ${ac_cv_prog_c_openmp+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_cv_prog_c_openmp='not found' +else case e in #( + e) ac_cv_prog_c_openmp='not found' for ac_option in '' -fopenmp -xopenmp -openmp -mp -omp -qsmp=omp -homp \ -Popenmp --openmp; do @@ -7833,8 +7881,9 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_prog_c_openmp=$ac_option -else $as_nop - ac_cv_prog_c_openmp='unsupported' +else case e in #( + e) ac_cv_prog_c_openmp='unsupported' ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext @@ -7851,7 +7900,8 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext elif test "$ac_cv_prog_c_openmp" = ''; then ac_cv_prog_c_openmp='none needed' fi - rm -f penmp mp + rm -f penmp mp ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_c_openmp" >&5 printf "%s\n" "$ac_cv_prog_c_openmp" >&6; } @@ -7888,8 +7938,8 @@ printf %s "checking for $CXX option to support OpenMP... " >&6; } if test ${ac_cv_prog_cxx_openmp+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_cv_prog_cxx_openmp='not found' +else case e in #( + e) ac_cv_prog_cxx_openmp='not found' for ac_option in '' -fopenmp -xopenmp -openmp -mp -omp -qsmp=omp -homp \ -Popenmp --openmp; do @@ -7920,8 +7970,9 @@ _ACEOF if ac_fn_cxx_try_link "$LINENO" then : ac_cv_prog_cxx_openmp=$ac_option -else $as_nop - ac_cv_prog_cxx_openmp='unsupported' +else case e in #( + e) ac_cv_prog_cxx_openmp='unsupported' ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext @@ -7938,7 +7989,8 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext elif test "$ac_cv_prog_cxx_openmp" = ''; then ac_cv_prog_cxx_openmp='none needed' fi - rm -f penmp mp + rm -f penmp mp ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_openmp" >&5 printf "%s\n" "$ac_cv_prog_cxx_openmp" >&6; } @@ -7964,12 +8016,14 @@ fi if test x"$pac_cv_openmp" == x"yes" ; then - FDEFINES="$psblas_cv_define_prepend-DOPENMP $FDEFINES"; - CDEFINES="-DOPENMP $CDEFINES"; + FDEFINES="$psblas_cv_define_prepend-DPSB_OPENMP $FDEFINES"; + CDEFINES="$CDEFINES"; + CHAVE_OPENMP="#define PSB_OPENMP" FCOPT="$FCOPT $pac_cv_openmp_fcopt"; CCOPT="$CCOPT $pac_cv_openmp_ccopt"; CXXOPT="$CXXOPT $pac_cv_openmp_cxxopt"; FLINK="$FLINK $pac_cv_openmp_fcopt"; + CLINK="$CLINK $pac_cv_openmp_fcopt"; fi # # Tests for support of various Fortran features; some of them are critical, @@ -8046,14 +8100,15 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 as_fn_error $? "Sorry, cannot build PSBLAS without support for TR15581. Please get a Fortran compiler that supports it, e.g. GNU Fortran 4.8." "$LINENO" 5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -8094,14 +8149,15 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 as_fn_error $? "Sorry, cannot build PSBLAS without support for EXTENDS. Please get a Fortran compiler that supports it, e.g. GNU Fortran 4.8." "$LINENO" 5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -8158,14 +8214,15 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 as_fn_error $? "Sorry, cannot build PSBLAS without support for CLASS and type bound procedures. Please get a Fortran compiler that supports them, e.g. GNU Fortran 4.8." "$LINENO" 5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -8208,14 +8265,15 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 as_fn_error $? "Sorry, cannot build PSBLAS without support for SOURCE= allocation. Please get a Fortran compiler that supports it, e.g. GNU Fortran 4.8." "$LINENO" 5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -8248,14 +8306,15 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 as_fn_error $? "Sorry, cannot build PSBLAS without support for MOVE_ALLOC. Please get a Fortran compiler that supports it, e.g. GNU Fortran 4.8." "$LINENO" 5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -8287,14 +8346,15 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 as_fn_error $? "Sorry, cannot build PSBLAS without support for ISO_C_BINDING. Please get a Fortran compiler that supports it, e.g. GNU Fortran 4.8." "$LINENO" 5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -8337,14 +8397,15 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 as_fn_error $? "Sorry, cannot build PSBLAS without support for SAME_TYPE_AS. Please get a Fortran compiler that supports it, e.g. GNU Fortran 4.8." "$LINENO" 5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -8385,14 +8446,15 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 as_fn_error $? "Sorry, cannot build PSBLAS without support for EXTENDS_TYPE_OF. Please get a Fortran compiler that supports it, e.g. GNU Fortran 4.8." "$LINENO" 5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -8435,14 +8497,15 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 as_fn_error $? "Sorry, cannot build PSBLAS without support for MOLD= allocation. Please get a Fortran compiler that supports it, e.g. GNU Fortran 4.8." "$LINENO" 5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -8474,13 +8537,14 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 as_fn_error $? "Sorry, cannot build PSBLAS without support for VOLATILE" "$LINENO" 5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -8512,13 +8576,14 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 as_fn_error $? "Sorry, cannot build PSBLAS without support for ISO_FORTRAN_ENV" "$LINENO" 5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -8565,13 +8630,14 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 as_fn_error $? "Sorry, cannot build PSBLAS without support for FINAL" "$LINENO" 5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -8619,13 +8685,14 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } : -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 - FDEFINES="$psblas_cv_define_prepend-DHAVE_BUGGY_GENERICS $FDEFINES" - + FDEFINES="$psblas_cv_define_prepend-DPSB_HAVE_BUGGY_GENERICS $FDEFINES" + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -8660,13 +8727,14 @@ if ac_fn_fc_try_compile "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } - FDEFINES="$psblas_cv_define_prepend-DHAVE_FLUSH_STMT $FDEFINES" -else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + FDEFINES="$psblas_cv_define_prepend-DPSB_HAVE_FLUSH_STMT $FDEFINES" +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 - + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c @@ -8711,7 +8779,7 @@ if test "X$RANLIB" == "X" ; then fi # This should be portable -AR="${AR} -cur" +AR="${AR} -cDr" ############################################################################### @@ -8781,8 +8849,9 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : pac_blas_ok=yes -else $as_nop - BLAS_LIBS="" +else case e in #( + e) BLAS_LIBS="" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext @@ -8806,16 +8875,22 @@ printf %s "checking for ATL_xerbla in -latlas... " >&6; } if test ${ac_cv_lib_atlas_ATL_xerbla+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-latlas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char ATL_xerbla (); + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char ATL_xerbla (void); int main (void) { @@ -8827,12 +8902,14 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_atlas_ATL_xerbla=yes -else $as_nop - ac_cv_lib_atlas_ATL_xerbla=no +else case e in #( + e) ac_cv_lib_atlas_ATL_xerbla=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_atlas_ATL_xerbla" >&5 printf "%s\n" "$ac_cv_lib_atlas_ATL_xerbla" >&6; } @@ -8848,8 +8925,8 @@ printf %s "checking for sgemm in -lf77blas... " >&6; } if test ${ac_cv_lib_f77blas_sgemm+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lf77blas -latlas $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -8859,12 +8936,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_f77blas_sgemm=yes -else $as_nop - ac_cv_lib_f77blas_sgemm=no +else case e in #( + e) ac_cv_lib_f77blas_sgemm=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_f77blas_sgemm" >&5 printf "%s\n" "$ac_cv_lib_f77blas_sgemm" >&6; } @@ -8881,16 +8960,22 @@ printf %s "checking for cblas_dgemm in -lcblas... " >&6; } if test ${ac_cv_lib_cblas_cblas_dgemm+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lcblas -lf77blas -latlas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char cblas_dgemm (); + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char cblas_dgemm (void); int main (void) { @@ -8902,12 +8987,14 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cblas_cblas_dgemm=yes -else $as_nop - ac_cv_lib_cblas_cblas_dgemm=no +else case e in #( + e) ac_cv_lib_cblas_cblas_dgemm=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cblas_cblas_dgemm" >&5 printf "%s\n" "$ac_cv_lib_cblas_cblas_dgemm" >&6; } @@ -8935,16 +9022,22 @@ printf %s "checking for ATL_xerbla in -lsatlas... " >&6; } if test ${ac_cv_lib_satlas_ATL_xerbla+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lsatlas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char ATL_xerbla (); + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char ATL_xerbla (void); int main (void) { @@ -8956,12 +9049,14 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_satlas_ATL_xerbla=yes -else $as_nop - ac_cv_lib_satlas_ATL_xerbla=no +else case e in #( + e) ac_cv_lib_satlas_ATL_xerbla=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_satlas_ATL_xerbla" >&5 printf "%s\n" "$ac_cv_lib_satlas_ATL_xerbla" >&6; } @@ -8977,8 +9072,8 @@ printf %s "checking for sgemm in -lsatlas... " >&6; } if test ${ac_cv_lib_satlas_sgemm+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lsatlas -lsatlas $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -8988,12 +9083,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_satlas_sgemm=yes -else $as_nop - ac_cv_lib_satlas_sgemm=no +else case e in #( + e) ac_cv_lib_satlas_sgemm=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_satlas_sgemm" >&5 printf "%s\n" "$ac_cv_lib_satlas_sgemm" >&6; } @@ -9010,16 +9107,22 @@ printf %s "checking for cblas_dgemm in -lsatlas... " >&6; } if test ${ac_cv_lib_satlas_cblas_dgemm+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lsatlas -lsatlas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char cblas_dgemm (); + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char cblas_dgemm (void); int main (void) { @@ -9031,12 +9134,14 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_satlas_cblas_dgemm=yes -else $as_nop - ac_cv_lib_satlas_cblas_dgemm=no +else case e in #( + e) ac_cv_lib_satlas_cblas_dgemm=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_satlas_cblas_dgemm" >&5 printf "%s\n" "$ac_cv_lib_satlas_cblas_dgemm" >&6; } @@ -9065,8 +9170,8 @@ printf %s "checking for sgemm in -lblas... " >&6; } if test ${ac_cv_lib_blas_sgemm+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lblas $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9076,12 +9181,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_blas_sgemm=yes -else $as_nop - ac_cv_lib_blas_sgemm=no +else case e in #( + e) ac_cv_lib_blas_sgemm=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_blas_sgemm" >&5 printf "%s\n" "$ac_cv_lib_blas_sgemm" >&6; } @@ -9092,8 +9199,8 @@ printf %s "checking for dgemm in -ldgemm... " >&6; } if test ${ac_cv_lib_dgemm_dgemm+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-ldgemm -lblas $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9103,12 +9210,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_dgemm_dgemm=yes -else $as_nop - ac_cv_lib_dgemm_dgemm=no +else case e in #( + e) ac_cv_lib_dgemm_dgemm=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dgemm_dgemm" >&5 printf "%s\n" "$ac_cv_lib_dgemm_dgemm" >&6; } @@ -9119,8 +9228,8 @@ printf %s "checking for sgemm in -lsgemm... " >&6; } if test ${ac_cv_lib_sgemm_sgemm+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lsgemm -lblas $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9130,12 +9239,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_sgemm_sgemm=yes -else $as_nop - ac_cv_lib_sgemm_sgemm=no +else case e in #( + e) ac_cv_lib_sgemm_sgemm=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_sgemm_sgemm" >&5 printf "%s\n" "$ac_cv_lib_sgemm_sgemm" >&6; } @@ -9163,8 +9274,8 @@ printf %s "checking for sgemm in -lopenblas... " >&6; } if test ${ac_cv_lib_openblas_sgemm+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lopenblas $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9174,12 +9285,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_openblas_sgemm=yes -else $as_nop - ac_cv_lib_openblas_sgemm=no +else case e in #( + e) ac_cv_lib_openblas_sgemm=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_openblas_sgemm" >&5 printf "%s\n" "$ac_cv_lib_openblas_sgemm" >&6; } @@ -9196,14 +9309,14 @@ if test $pac_blas_ok = no; then if test x"$ac_cv_fc_compiler_gnu" = xyes; then # 64 bit if test $host_cpu = x86_64; then - as_ac_Lib=`printf "%s\n" "ac_cv_lib_mkl_gf_lp64_$sgemm" | $as_tr_sh` + as_ac_Lib=`printf "%s\n" "ac_cv_lib_mkl_gf_lp64_$sgemm" | sed "$as_sed_sh"` { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $sgemm in -lmkl_gf_lp64" >&5 printf %s "checking for $sgemm in -lmkl_gf_lp64... " >&6; } if eval test \${$as_ac_Lib+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lmkl_gf_lp64 -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9213,12 +9326,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : eval "$as_ac_Lib=yes" -else $as_nop - eval "$as_ac_Lib=no" +else case e in #( + e) eval "$as_ac_Lib=no" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi eval ac_res=\$$as_ac_Lib { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 @@ -9230,14 +9345,14 @@ fi # 32 bit elif test $host_cpu = i686; then - as_ac_Lib=`printf "%s\n" "ac_cv_lib_mkl_gf_$sgemm" | $as_tr_sh` + as_ac_Lib=`printf "%s\n" "ac_cv_lib_mkl_gf_$sgemm" | sed "$as_sed_sh"` { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $sgemm in -lmkl_gf" >&5 printf %s "checking for $sgemm in -lmkl_gf... " >&6; } if eval test \${$as_ac_Lib+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lmkl_gf -lmkl_gf -lmkl_sequential -lmkl_core -lpthread $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9247,12 +9362,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : eval "$as_ac_Lib=yes" -else $as_nop - eval "$as_ac_Lib=no" +else case e in #( + e) eval "$as_ac_Lib=no" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi eval ac_res=\$$as_ac_Lib { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 @@ -9267,14 +9384,14 @@ fi else # 64-bit if test $host_cpu = x86_64; then - as_ac_Lib=`printf "%s\n" "ac_cv_lib_mkl_intel_lp64_$sgemm" | $as_tr_sh` + as_ac_Lib=`printf "%s\n" "ac_cv_lib_mkl_intel_lp64_$sgemm" | sed "$as_sed_sh"` { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $sgemm in -lmkl_intel_lp64" >&5 printf %s "checking for $sgemm in -lmkl_intel_lp64... " >&6; } if eval test \${$as_ac_Lib+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lmkl_intel_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9284,12 +9401,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : eval "$as_ac_Lib=yes" -else $as_nop - eval "$as_ac_Lib=no" +else case e in #( + e) eval "$as_ac_Lib=no" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi eval ac_res=\$$as_ac_Lib { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 @@ -9301,14 +9420,14 @@ fi # 32-bit elif test $host_cpu = i686; then - as_ac_Lib=`printf "%s\n" "ac_cv_lib_mkl_intel_$sgemm" | $as_tr_sh` + as_ac_Lib=`printf "%s\n" "ac_cv_lib_mkl_intel_$sgemm" | sed "$as_sed_sh"` { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $sgemm in -lmkl_intel" >&5 printf %s "checking for $sgemm in -lmkl_intel... " >&6; } if eval test \${$as_ac_Lib+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lmkl_intel -lmkl_intel -lmkl_sequential -lmkl_core -lpthread $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9318,12 +9437,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : eval "$as_ac_Lib=yes" -else $as_nop - eval "$as_ac_Lib=no" +else case e in #( + e) eval "$as_ac_Lib=no" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi eval ac_res=\$$as_ac_Lib { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 @@ -9338,14 +9459,14 @@ fi fi # Old versions of MKL if test $pac_blas_ok = no; then - as_ac_Lib=`printf "%s\n" "ac_cv_lib_mkl_$sgemm" | $as_tr_sh` + as_ac_Lib=`printf "%s\n" "ac_cv_lib_mkl_$sgemm" | sed "$as_sed_sh"` { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $sgemm in -lmkl" >&5 printf %s "checking for $sgemm in -lmkl... " >&6; } if eval test \${$as_ac_Lib+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lmkl -lguide -lpthread $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9355,12 +9476,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : eval "$as_ac_Lib=yes" -else $as_nop - eval "$as_ac_Lib=no" +else case e in #( + e) eval "$as_ac_Lib=no" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi eval ac_res=\$$as_ac_Lib { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 @@ -9399,8 +9522,8 @@ printf %s "checking for sgemm in -lcxml... " >&6; } if test ${ac_cv_lib_cxml_sgemm+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lcxml $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9410,12 +9533,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_cxml_sgemm=yes -else $as_nop - ac_cv_lib_cxml_sgemm=no +else case e in #( + e) ac_cv_lib_cxml_sgemm=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cxml_sgemm" >&5 printf "%s\n" "$ac_cv_lib_cxml_sgemm" >&6; } @@ -9433,8 +9558,8 @@ printf %s "checking for sgemm in -ldxml... " >&6; } if test ${ac_cv_lib_dxml_sgemm+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-ldxml $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9444,12 +9569,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_dxml_sgemm=yes -else $as_nop - ac_cv_lib_dxml_sgemm=no +else case e in #( + e) ac_cv_lib_dxml_sgemm=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dxml_sgemm" >&5 printf "%s\n" "$ac_cv_lib_dxml_sgemm" >&6; } @@ -9469,8 +9596,8 @@ printf %s "checking for acosp in -lsunmath... " >&6; } if test ${ac_cv_lib_sunmath_acosp+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lsunmath $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9480,12 +9607,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_sunmath_acosp=yes -else $as_nop - ac_cv_lib_sunmath_acosp=no +else case e in #( + e) ac_cv_lib_sunmath_acosp=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_sunmath_acosp" >&5 printf "%s\n" "$ac_cv_lib_sunmath_acosp" >&6; } @@ -9496,8 +9625,8 @@ printf %s "checking for sgemm in -lsunperf... " >&6; } if test ${ac_cv_lib_sunperf_sgemm+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lsunperf -lsunmath $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9507,12 +9636,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_sunperf_sgemm=yes -else $as_nop - ac_cv_lib_sunperf_sgemm=no +else case e in #( + e) ac_cv_lib_sunperf_sgemm=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_sunperf_sgemm" >&5 printf "%s\n" "$ac_cv_lib_sunperf_sgemm" >&6; } @@ -9535,8 +9666,8 @@ printf %s "checking for sgemm in -lscs... " >&6; } if test ${ac_cv_lib_scs_sgemm+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lscs $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9546,12 +9677,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_scs_sgemm=yes -else $as_nop - ac_cv_lib_scs_sgemm=no +else case e in #( + e) ac_cv_lib_scs_sgemm=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_scs_sgemm" >&5 printf "%s\n" "$ac_cv_lib_scs_sgemm" >&6; } @@ -9564,14 +9697,14 @@ fi # BLAS in SGIMATH library? if test $pac_blas_ok = no; then - as_ac_Lib=`printf "%s\n" "ac_cv_lib_complib.sgimath_$sgemm" | $as_tr_sh` + as_ac_Lib=`printf "%s\n" "ac_cv_lib_complib.sgimath_$sgemm" | sed "$as_sed_sh"` { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $sgemm in -lcomplib.sgimath" >&5 printf %s "checking for $sgemm in -lcomplib.sgimath... " >&6; } if eval test \${$as_ac_Lib+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lcomplib.sgimath $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9581,12 +9714,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : eval "$as_ac_Lib=yes" -else $as_nop - eval "$as_ac_Lib=no" +else case e in #( + e) eval "$as_ac_Lib=no" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi eval ac_res=\$$as_ac_Lib { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 @@ -9600,14 +9735,14 @@ fi # BLAS in IBM ESSL library? (requires generic BLAS lib, too) if test $pac_blas_ok = no; then - as_ac_Lib=`printf "%s\n" "ac_cv_lib_blas_$sgemm" | $as_tr_sh` + as_ac_Lib=`printf "%s\n" "ac_cv_lib_blas_$sgemm" | sed "$as_sed_sh"` { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $sgemm in -lblas" >&5 printf %s "checking for $sgemm in -lblas... " >&6; } if eval test \${$as_ac_Lib+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lblas $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9617,12 +9752,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : eval "$as_ac_Lib=yes" -else $as_nop - eval "$as_ac_Lib=no" +else case e in #( + e) eval "$as_ac_Lib=no" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi eval ac_res=\$$as_ac_Lib { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 @@ -9634,8 +9771,8 @@ printf %s "checking for sgemm in -lessl... " >&6; } if test ${ac_cv_lib_essl_sgemm+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lessl -lblas $FLIBS $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9645,12 +9782,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_essl_sgemm=yes -else $as_nop - ac_cv_lib_essl_sgemm=no +else case e in #( + e) ac_cv_lib_essl_sgemm=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_essl_sgemm" >&5 printf "%s\n" "$ac_cv_lib_essl_sgemm" >&6; } @@ -9674,8 +9813,8 @@ printf %s "checking for sgemm in -lblas... " >&6; } if test ${ac_cv_lib_blas_sgemm+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lblas $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9685,12 +9824,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_blas_sgemm=yes -else $as_nop - ac_cv_lib_blas_sgemm=no +else case e in #( + e) ac_cv_lib_blas_sgemm=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_blas_sgemm" >&5 printf "%s\n" "$ac_cv_lib_blas_sgemm" >&6; } @@ -9700,8 +9841,9 @@ then : LIBS="-lblas $LIBS" -else $as_nop - pac_blas_ok=yes;BLAS_LIBS="-lblas" +else case e in #( + e) pac_blas_ok=yes;BLAS_LIBS="-lblas" ;; +esac fi fi @@ -9716,8 +9858,9 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : pac_blas_ok=yes -else $as_nop - BLAS_LIBS="" +else case e in #( + e) BLAS_LIBS="" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext @@ -9735,8 +9878,8 @@ printf %s "checking for sgemm in -lblas... " >&6; } if test ${ac_cv_lib_blas_sgemm+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-lblas $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9746,12 +9889,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : ac_cv_lib_blas_sgemm=yes -else $as_nop - ac_cv_lib_blas_sgemm=no +else case e in #( + e) ac_cv_lib_blas_sgemm=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_blas_sgemm" >&5 printf "%s\n" "$ac_cv_lib_blas_sgemm" >&6; } @@ -9895,14 +10040,14 @@ ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu - as_ac_Lib=`printf "%s\n" "ac_cv_lib_$lapack""_cheev" | $as_tr_sh` + as_ac_Lib=`printf "%s\n" "ac_cv_lib_$lapack""_cheev" | sed "$as_sed_sh"` { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cheev in -l$lapack" >&5 printf %s "checking for cheev in -l$lapack... " >&6; } if eval test \${$as_ac_Lib+y} then : printf %s "(cached) " >&6 -else $as_nop - ac_check_lib_save_LIBS=$LIBS +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS LIBS="-l$lapack $FLIBS $LIBS" cat > conftest.$ac_ext <<_ACEOF program main @@ -9912,12 +10057,14 @@ _ACEOF if ac_fn_fc_try_link "$LINENO" then : eval "$as_ac_Lib=yes" -else $as_nop - eval "$as_ac_Lib=no" +else case e in #( + e) eval "$as_ac_Lib=no" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS +LIBS=$ac_check_lib_save_LIBS ;; +esac fi eval ac_res=\$$as_ac_Lib { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 @@ -9941,7 +10088,7 @@ done # Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: if test x"$pac_lapack_ok" = xyes; then - FDEFINES="$psblas_cv_define_prepend-DHAVE_LAPACK $FDEFINES" + FDEFINES="$psblas_cv_define_prepend-DPSB_HAVE_LAPACK $FDEFINES" : else pac_lapack_ok=no @@ -9999,38 +10146,43 @@ fi #AC_CHECK_LIB(umf,umfpack_di_solve,psblas_cv_have_umfpack=yes,psblas_cv_have_umfpack=no,[amd]) -# Check whether --with-rsb was given. -if test ${with_rsb+y} +ac_header= ac_cache= +for ac_item in $ac_header_c_list +do + if test $ac_cache; then + ac_fn_c_check_header_compile "$LINENO" $ac_header ac_cv_header_$ac_cache "$ac_includes_default" + if eval test \"x\$ac_cv_header_$ac_cache\" = xyes; then + printf "%s\n" "#define $ac_item 1" >> confdefs.h + fi + ac_header= ac_cache= + elif test $ac_header; then + ac_cache=$ac_item + else + ac_header=$ac_item + fi +done + + + + + + + + +if test $ac_cv_header_stdlib_h = yes && test $ac_cv_header_string_h = yes then : - withval=$with_rsb; if test x"$withval" = xno; then -want_rsb_libs= ; else if test x"$withval" = xyes ; then want_rsb_libs=yes ; else want_rsb_libs="$withval" ; fi ; fi -else $as_nop - want_rsb_libs="" -fi -if test x"$want_rsb_libs" != x ; then - if test x"$want_rsb_libs" = xyes ; then - want_rsb_libs="`librsb-config --static --ldflags`" - else - if test -d "$want_rsb_libs" ; then - want_rsb_libs="`$want_rsb_libs/bin/librsb-config --static --ldflags`" - else - true; - # we assume want_rsb_libs are linkage parameters - fi - fi - FDEFINES="$FDEFINES $psblas_cv_define_prepend-DHAVE_LIBRSB" -fi -RSB_LIBS="$want_rsb_libs" -LIBS="$RSB_LIBS ${LIBS}" +printf "%s\n" "#define STDC_HEADERS 1" >>confdefs.h +fi # Check whether --with-metis was given. if test ${with_metis+y} then : withval=$with_metis; psblas_cv_metis=$withval -else $as_nop - psblas_cv_metis='-lmetis' +else case e in #( + e) psblas_cv_metis='-lmetis' ;; +esac fi @@ -10038,8 +10190,9 @@ fi if test ${with_metisincfile+y} then : withval=$with_metisincfile; psblas_cv_metisincfile=$withval -else $as_nop - psblas_cv_metisincfile='metis.h' +else case e in #( + e) psblas_cv_metisincfile='metis.h' ;; +esac fi @@ -10047,8 +10200,9 @@ fi if test ${with_metisdir+y} then : withval=$with_metisdir; psblas_cv_metisdir=$withval -else $as_nop - psblas_cv_metisdir='' +else case e in #( + e) psblas_cv_metisdir='' ;; +esac fi @@ -10056,8 +10210,9 @@ fi if test ${with_metisincdir+y} then : withval=$with_metisincdir; psblas_cv_metisincdir=$withval -else $as_nop - psblas_cv_metisincdir='' +else case e in #( + e) psblas_cv_metisincdir='' ;; +esac fi @@ -10065,8 +10220,9 @@ fi if test ${with_metislibdir+y} then : withval=$with_metislibdir; psblas_cv_metislibdir=$withval -else $as_nop - psblas_cv_metislibdir='' +else case e in #( + e) psblas_cv_metislibdir='' ;; +esac fi @@ -10097,16 +10253,17 @@ fi printf "%s\n" "$as_me: metis dir $psblas_cv_metisdir" >&6;} for ac_header in limits.h "$psblas_cv_metisincfile" do : - as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | $as_tr_sh` + as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | sed "$as_sed_sh"` ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" if eval test \"x\$"$as_ac_Header"\" = x"yes" then : cat >>confdefs.h <<_ACEOF -#define `printf "%s\n" "HAVE_$ac_header" | $as_tr_cpp` 1 +#define `printf "%s\n" "HAVE_$ac_header" | sed "$as_sed_cpp"` 1 _ACEOF pac_metis_header_ok=yes -else $as_nop - pac_metis_header_ok=no; METIS_INCLUDES="" +else case e in #( + e) pac_metis_header_ok=no; METIS_INCLUDES="" ;; +esac fi done @@ -10119,16 +10276,17 @@ if test "x$pac_metis_header_ok" == "xno" ; then printf %s "checking for metis_h in $METIS_INCLUDES... " >&6; } for ac_header in limits.h "$psblas_cv_metisincfile" do : - as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | $as_tr_sh` + as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | sed "$as_sed_sh"` ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" if eval test \"x\$"$as_ac_Header"\" = x"yes" then : cat >>confdefs.h <<_ACEOF -#define `printf "%s\n" "HAVE_$ac_header" | $as_tr_cpp` 1 +#define `printf "%s\n" "HAVE_$ac_header" | sed "$as_sed_cpp"` 1 _ACEOF pac_metis_header_ok=yes -else $as_nop - pac_metis_header_ok=no; METIS_INCLUDES="" +else case e in #( + e) pac_metis_header_ok=no; METIS_INCLUDES="" ;; +esac fi done @@ -10139,16 +10297,17 @@ if test "x$pac_metis_header_ok" == "xno" ; then CPPFLAGS="$METIS_INCLUDES $SAVE_CPPFLAGS" for ac_header in limits.h "$psblas_cv_metisincfile" do : - as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | $as_tr_sh` + as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | sed "$as_sed_sh"` ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" if eval test \"x\$"$as_ac_Header"\" = x"yes" then : cat >>confdefs.h <<_ACEOF -#define `printf "%s\n" "HAVE_$ac_header" | $as_tr_cpp` 1 +#define `printf "%s\n" "HAVE_$ac_header" | sed "$as_sed_cpp"` 1 _ACEOF pac_metis_header_ok=yes -else $as_nop - pac_metis_header_ok=no; METIS_INCLUDES="" +else case e in #( + e) pac_metis_header_ok=no; METIS_INCLUDES="" ;; +esac fi done @@ -10175,8 +10334,9 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : pac_cv_metis_idx=`./conftest${ac_exeext} | sed 's/^ *//'` -else $as_nop - pac_cv_metis_idx="unknown" +else case e in #( + e) pac_cv_metis_idx="unknown" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext @@ -10212,8 +10372,9 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : pac_cv_metis_real=`./conftest${ac_exeext} | sed 's/^ *//'` -else $as_nop - pac_cv_metis_real="unknown" +else case e in #( + e) pac_cv_metis_real="unknown" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext @@ -10243,8 +10404,14 @@ printf %s "checking for METIS_PartGraphKway in $METIS_LIBS... " >&6; } /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char METIS_PartGraphKway (); + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char METIS_PartGraphKway (void); int main (void) { @@ -10256,8 +10423,9 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : psblas_cv_have_metis=yes;pac_metis_lib_ok=yes; -else $as_nop - psblas_cv_have_metis=no;pac_metis_lib_ok=no; METIS_LIBS="" +else case e in #( + e) psblas_cv_have_metis=no;pac_metis_lib_ok=no; METIS_LIBS="" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext @@ -10275,8 +10443,14 @@ printf %s "checking for METIS_PartGraphKway in $METIS_LIBS... " >&6; } /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char METIS_PartGraphKway (); + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char METIS_PartGraphKway (void); int main (void) { @@ -10288,8 +10462,9 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : psblas_cv_have_metis=yes;pac_metis_lib_ok=yes; -else $as_nop - psblas_cv_have_metis=no;pac_metis_lib_ok=no; METIS_LIBS="" +else case e in #( + e) psblas_cv_have_metis=no;pac_metis_lib_ok=no; METIS_LIBS="" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext @@ -10308,8 +10483,14 @@ printf %s "checking for METIS_PartGraphKway in $METIS_LIBS... " >&6; } /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char METIS_PartGraphKway (); + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char METIS_PartGraphKway (void); int main (void) { @@ -10321,8 +10502,9 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : psblas_cv_have_metis=yes;pac_metis_lib_ok="yes"; -else $as_nop - psblas_cv_have_metis=no;pac_metis_lib_ok="no"; METIS_LIBS="" +else case e in #( + e) psblas_cv_have_metis=no;pac_metis_lib_ok="no"; METIS_LIBS="" ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext @@ -10339,8 +10521,14 @@ printf %s "checking for METIS_SetDefaultOptions in $LIBS... " >&6; } /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char METIS_SetDefaultOptions (); + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char METIS_SetDefaultOptions (void); int main (void) { @@ -10352,9 +10540,10 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : psblas_cv_have_metis=yes;pac_metis_lib_ok=yes; -else $as_nop - psblas_cv_have_metis=no;pac_metis_lib_ok="no. Unusable METIS version, sorry."; METIS_LIBS="" - +else case e in #( + e) psblas_cv_have_metis=no;pac_metis_lib_ok="no. Unusable METIS version, sorry."; METIS_LIBS="" + ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext @@ -10367,8 +10556,8 @@ LIBS="$SAVE_LIBS"; CPPFLAGS="$SAVE_CPPFLAGS"; -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking Compatibility between metis and LPK" >&5 -printf %s "checking Compatibility between metis and LPK... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking Compatibility between metis and PSB_LPK" >&5 +printf %s "checking Compatibility between metis and PSB_LPK... " >&6; } if test "x$pac_cv_lpk_size" == "x4" ; then if test "x$pac_cv_metis_idx" == "x64" ; then psblas_cv_have_metis="no"; @@ -10393,9 +10582,12 @@ printf "%s\n" "$as_me: Unknown METIS REAL bitsize." >&6;} $psblas_cv_have_metis = "no"; fi if test "x$psblas_cv_have_metis" == "xyes" ; then - FDEFINES="$psblas_cv_define_prepend-DHAVE_METIS $psblas_cv_define_prepend-DMETIS_$pac_cv_metis_idx $psblas_cv_define_prepend-DMETIS_REAL_$pac_cv_metis_real $FDEFINES" - CDEFINES="-DHAVE_METIS_ $psblas_cv_metis_includes $CDEFINES -DMETIS_$pac_cv_metis_idx -DMETIS_REAL_$pac_cv_metis_real" + FDEFINES="$psblas_cv_define_prepend-DPSB_HAVE_METIS $psblas_cv_define_prepend-DPSB_METIS_$pac_cv_metis_idx $psblas_cv_define_prepend-DPSB_METIS_REAL_$pac_cv_metis_real $FDEFINES" + CDEFINES="$psblas_cv_metis_includes $CDEFINES" METISINCFILE=$psblas_cv_metisincfile + CHAVEMETIS="#define PSB_HAVE_METIS" + CINTMETIS="#define PSB_METIS_$pac_cv_metis_idx" + CREALMETIS="#define PSB_METIS_REAL_$pac_cv_metis_real" fi @@ -10403,8 +10595,9 @@ fi if test ${with_amd+y} then : withval=$with_amd; psblas_cv_amd=$withval -else $as_nop - psblas_cv_amd='-lamd' +else case e in #( + e) psblas_cv_amd='-lamd' ;; +esac fi @@ -10412,204 +10605,974 @@ fi if test ${with_amddir+y} then : withval=$with_amddir; psblas_cv_amddir=$withval -else $as_nop - psblas_cv_amddir='' +else case e in #( + e) psblas_cv_amddir='' ;; +esac +fi + + +# Check whether --with-amdincdir was given. +if test ${with_amdincdir+y} +then : + withval=$with_amdincdir; psblas_cv_amdincdir=$withval +else case e in #( + e) psblas_cv_amdincdir='' ;; +esac +fi + + +# Check whether --with-amdlibdir was given. +if test ${with_amdlibdir+y} +then : + withval=$with_amdlibdir; psblas_cv_amdlibdir=$withval +else case e in #( + e) psblas_cv_amdlibdir='' ;; +esac +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$psblas_cv_amddir" != "x"; then + AMD_LIBDIR="-L$psblas_cv_amddir" + LIBS="-L$psblas_cv_amddir $LIBS" + AMD_INCLUDES="-I$psblas_cv_amddir" + CPPFLAGS="$AMD_INCLUDES $CPPFLAGS" +fi +if test "x$psblas_cv_amdincdir" != "x"; then + AMD_INCLUDES="-I$psblas_cv_amdincdir" + CPPFLAGS="$AMD_INCLUDES $CPPFLAGS" +fi +if test "x$psblas_cv_amdlibdir" != "x"; then + LIBS="-L$psblas_cv_amdlibdir $LIBS" + AMD_LIBDIR="-L$psblas_cv_amdlibdir" +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: amd dir $psblas_cv_amddir" >&5 +printf "%s\n" "$as_me: amd dir $psblas_cv_amddir" >&6;} +ac_fn_c_check_header_compile "$LINENO" "amd.h" "ac_cv_header_amd_h" "$ac_includes_default" +if test "x$ac_cv_header_amd_h" = xyes +then : + pac_amd_header_ok=yes +else case e in #( + e) pac_amd_header_ok=no; AMD_INCLUDES="" ;; +esac +fi + +if test "x$pac_amd_header_ok" == "xno" ; then + unset ac_cv_header_amd_h + AMD_INCLUDES="-I$psblas_cv_amddir/include -I$psblas_cv_amddir/Include " + CPPFLAGS="$AMD_INCLUDES $SAVE_CPPFLAGS" + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for amd_h in $AMD_INCLUDES" >&5 +printf %s "checking for amd_h in $AMD_INCLUDES... " >&6; } + ac_fn_c_check_header_compile "$LINENO" "amd.h" "ac_cv_header_amd_h" "$ac_includes_default" +if test "x$ac_cv_header_amd_h" = xyes +then : + pac_amd_header_ok=yes +else case e in #( + e) pac_amd_header_ok=no; AMD_INCLUDES="" ;; +esac +fi + +fi +if test "x$pac_amd_header_ok" == "xno" ; then + unset ac_cv_header_amd_h + AMD_INCLUDES="-I$psblas_cv_amddir/UFconfig -I$psblas_cv_amddir/AMD/Include -I$psblas_cv_amddir/AMD/Include" + CPPFLAGS="$AMD_INCLUDES $SAVE_CPPFLAGS" + ac_fn_c_check_header_compile "$LINENO" "amd.h" "ac_cv_header_amd_h" "$ac_includes_default" +if test "x$ac_cv_header_amd_h" = xyes +then : + pac_amd_header_ok=yes +else case e in #( + e) pac_amd_header_ok=no; AMD_INCLUDES="" ;; +esac +fi + +fi + + +if test "x$pac_amd_header_ok" == "xyes" ; then + psblas_cv_amd_includes="$AMD_INCLUDES" + if test "x$AMD_LIBDIR" == "x" ; then + AMD_LIBS="$psblas_cv_amd" + else + AMD_LIBS="$psblas_cv_amd $AMD_LIBDIR" + fi + LIBS="$AMD_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for amd_order in $AMD_LIBS" >&5 +printf %s "checking for amd_order in $AMD_LIBS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char amd_order (void); +int +main (void) +{ +return amd_order (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + psblas_cv_have_amd=yes;pac_amd_lib_ok=yes; +else case e in #( + e) psblas_cv_have_amd=no;pac_amd_lib_ok=no; AMD_LIBS="" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_amd_lib_ok" >&5 +printf "%s\n" "$pac_amd_lib_ok" >&6; } + if test "x$pac_amd_lib_ok" == "xno" ; then + AMD_LIBDIR="-L$psblas_cv_amddir/Lib -L$psblas_cv_amddir/lib" + AMD_LIBS="$psblas_cv_amd $AMD_LIBDIR" + LIBS="$AMD_LIBS -lm $SAVE_LIBS" + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for amd_order in $AMD_LIBS" >&5 +printf %s "checking for amd_order in $AMD_LIBS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char amd_order (void); +int +main (void) +{ +return amd_order (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + psblas_cv_have_amd=yes;pac_amd_lib_ok=yes; +else case e in #( + e) psblas_cv_have_amd=no;pac_amd_lib_ok=no; AMD_LIBS="" ;; +esac fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_amd_lib_ok" >&5 +printf "%s\n" "$pac_amd_lib_ok" >&6; } + fi + if test "x$pac_amd_lib_ok" == "xno" ; then + AMD_LIBDIR="-L$psblas_cv_amddir/AMD/Lib -L$psblas_cv_amddir/AMD/Lib" + AMD_LIBS="$psblas_cv_amd $AMD_LIBDIR" + LIBS="$AMD_LIBS -lm $SAVE_LIBS" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for amd_order in $AMD_LIBS" >&5 +printf %s "checking for amd_order in $AMD_LIBS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char amd_order (void); +int +main (void) +{ +return amd_order (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + psblas_cv_have_amd=yes;pac_amd_lib_ok=yes; +else case e in #( + e) psblas_cv_have_amd=no;pac_amd_lib_ok=no; AMD_LIBS="" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_amd_lib_ok" >&5 +printf "%s\n" "$pac_amd_lib_ok" >&6; } + fi +fi +LIBS="$SAVE_LIBS"; +CPPFLAGS="$SAVE_CPPFLAGS"; + +if test "x$psblas_cv_have_amd" == "xyes" ; then + FDEFINES="$psblas_cv_define_prepend-DPSB_HAVE_AMD $FDEFINES" + CDEFINES="$psblas_cv_amd_includes $CDEFINES" + CHAVEAMD="#define PSB_HAVE_AMD" +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we want cuda " >&5 +printf %s "checking whether we want cuda ... " >&6; } +# Check whether --enable-cuda was given. +if test ${enable_cuda+y} +then : + enableval=$enable_cuda; +pac_cv_cuda="$enableval"; + + +fi + + + +if test "x$pac_cv_cuda" == "xyes"; then +if test "x$pac_cv_ipk_size" != "x4"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&5 +printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&6;} + pac_cv_cuda="no"; + PSB_HAVE_CUDA="no"; + CUDA_CC=""; + SPGPU_LIBS=""; + CUDAD=""; + CUDALD=""; + CUDEFINES=""; + CUDA_DEFINES=""; + CUDA_INCLUDES=""; + CUDA_LIBS=""; + FCUDEFINES=""; + CCUDEFINES=""; + CXXCUDEFINES=""; +else + + +# Check whether --with-cudadir was given. +if test ${with_cudadir+y} +then : + withval=$with_cudadir; pac_cv_cuda_dir=$withval +else case e in #( + e) pac_cv_cuda_dir='' ;; +esac +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_cuda_dir" != "x"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib -L$pac_cv_cuda_dir/../math_libs/lib64" + if test -f "$pac_cv_cuda_dir/bin/nvcc"; then + CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc" + else + CUDA_NVCC="nvcc" + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking cuda dir $pac_cv_cuda_dir" >&5 +printf %s "checking cuda dir $pac_cv_cuda_dir... " >&6; } +ac_fn_c_check_header_compile "$LINENO" "cuda_runtime.h" "ac_cv_header_cuda_runtime_h" "$ac_includes_default" +if test "x$ac_cv_header_cuda_runtime_h" = xyes +then : + pac_cuda_header_ok=yes +else case e in #( + e) pac_cuda_header_ok=no; CUDA_INCLUDES="" ;; +esac +fi + + +if test "x$pac_cuda_header_ok" == "xyes" ; then + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cudaMemcpy in $CUDA_LIBS" >&5 +printf %s "checking for cudaMemcpy in $CUDA_LIBS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char cudaMemcpy (void); +int +main (void) +{ +return cudaMemcpy (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; +else case e in #( + e) pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS="" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cuda_lib_ok" >&5 +printf "%s\n" "$pac_cuda_lib_ok" >&6; } + +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" + + + if test "x$pac_cv_have_cuda" == "xyes"; then + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_have_cuda" == "x"; then + +# Check whether --with-cudadir was given. +if test ${with_cudadir+y} +then : + withval=$with_cudadir; pac_cv_cuda_dir=$withval +else case e in #( + e) pac_cv_cuda_dir='' ;; +esac +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_cuda_dir" != "x"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib -L$pac_cv_cuda_dir/../math_libs/lib64" + if test -f "$pac_cv_cuda_dir/bin/nvcc"; then + CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc" + else + CUDA_NVCC="nvcc" + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking cuda dir $pac_cv_cuda_dir" >&5 +printf %s "checking cuda dir $pac_cv_cuda_dir... " >&6; } +ac_fn_c_check_header_compile "$LINENO" "cuda_runtime.h" "ac_cv_header_cuda_runtime_h" "$ac_includes_default" +if test "x$ac_cv_header_cuda_runtime_h" = xyes +then : + pac_cuda_header_ok=yes +else case e in #( + e) pac_cuda_header_ok=no; CUDA_INCLUDES="" ;; +esac +fi + + +if test "x$pac_cuda_header_ok" == "xyes" ; then + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cudaMemcpy in $CUDA_LIBS" >&5 +printf %s "checking for cudaMemcpy in $CUDA_LIBS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char cudaMemcpy (void); +int +main (void) +{ +return cudaMemcpy (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; +else case e in #( + e) pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS="" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cuda_lib_ok" >&5 +printf "%s\n" "$pac_cuda_lib_ok" >&6; } + +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" + +fi +if test "x$pac_cv_have_cuda" == "xyes"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for CUDA version" >&5 +printf %s "checking for CUDA version... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include + +int main(int argc, char **argv) +{ + printf("%d",CUDA_VERSION); + return(0); +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pac_cv_cuda_version=`./conftest${ac_exeext} | sed 's/^ *//'`; +else case e in #( + e) pac_cv_cuda_version="unknown"; ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cv_cuda_version" >&5 +printf "%s\n" "$pac_cv_cuda_version" >&6; } + fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" + + PSB_CUDA_VERSION="$pac_cv_cuda_version"; + if (( ${PSB_CUDA_VERSION} > 12080 )); then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Unsupported CUDA version ${PSB_CUDA_VERSION} disabling CUDA" >&5 +printf "%s\n" "$as_me: Unsupported CUDA version ${PSB_CUDA_VERSION} disabling CUDA" >&6;} + pac_cv_cuda="no"; + PSB_HAVE_CUDA="no"; + CUDA_CC=""; + SPGPU_LIBS=""; + CUDAD=""; + CUDALD=""; + CUDEFINES=""; + CUDA_DEFINES=""; + CUDA_INCLUDES=""; + CUDA_LIBS=""; + FCUDEFINES=""; + CCUDEFINES=""; + CXXCUDEFINES=""; + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: CUDA version ${PSB_CUDA_VERSION}" >&5 +printf "%s\n" "$as_me: CUDA version ${PSB_CUDA_VERSION}" >&6;} + PSB_HAVE_CUDA="yes"; + PSB_CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); + CUDA_DEFINES="-DPSB_HAVE_CUDA -DPSB_CUDA_SHORT_VERSION=${PSB_CUDA_SHORT_VERSION} -DPSB_CUDA_VERSION=${PSB_CUDA_VERSION}"; + CHAVECUDA="#define PSB_HAVE_CUDA" + CSHORTVCUDA="#define PSB_CUDA_SHORT_VERSION ${PSB_CUDA_SHORT_VERSION}" + CVERSIONCUDA="#define PSB_CUDA_VERSION ${PSB_CUDA_VERSION}" + SPGPU_LIBS="-lspgpu"; + CUDAD=cudad; + CUDALD=cudald; + LCUDA="-lpsb_cuda"; + EXTRALDLIBS="-lstdc++"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: At this point GPUTARGET is $CUDAD $CUDALD" >&5 +printf "%s\n" "$as_me: At this point GPUTARGET is $CUDAD $CUDALD" >&6;} + + + +# Check whether --with-cudacc was given. +if test ${with_cudacc+y} +then : + withval=$with_cudacc; pac_cv_cudacc=$withval +else case e in #( + e) pac_cv_cudacc='' ;; +esac +fi + + + if test "x$pac_cv_cudacc" == "x"; then + pac_cv_cudacc="50,60,70,75,80,86"; + CUDA_CC="$pac_cv_cudacc"; + fi + if (( $pac_cv_cuda_version >= 11070 )) + then + CUDEFINES="--dopt=on"; + fi + for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` + do + CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; + done + if test "x$pac_cv_cuda_version" != "xunknown"; then + FCUDEFINES=" ${CUDA_DEFINES}" + fi + fi + fi +fi +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we want openacc " >&5 +printf %s "checking whether we want openacc ... " >&6; } +# Check whether --enable-openacc was given. +if test ${enable_openacc+y} +then : + enableval=$enable_openacc; +pac_cv_openacc="$enableval"; + + +fi + +if test x"$pac_cv_openacc" == x"yes" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes." >&5 +printf "%s\n" "yes." >&6; } +# AC_LANG_PUSH([Fortran]) +# AC_OPENACC() +# pac_cv_openacc_fcopt="$OPENACC_FCFLAGS"; +# AC_LANG_POP() +# AC_LANG_PUSH([C]) +# AC_OPENACC() +# pac_cv_openacc_ccopt="$OPENACC_CFLAGS"; +# AC_LANG_POP() +# AC_LANG_PUSH([C++]) +# AC_OPENACC() +# pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS"; +# AC_LANG_POP() +else + pac_cv_openacc="no"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no." >&5 +printf "%s\n" "no." >&6; } +fi + + +if test x"$pac_cv_openacc" == x"yes" ; then + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional EXTRAOPENACC flags should be added (should be invoked only once)" >&5 +printf %s "checking whether additional EXTRAOPENACC flags should be added (should be invoked only once)... " >&6; } + +# Check whether --with-extraopenacc was given. +if test ${with_extraopenacc+y} +then : + withval=$with_extraopenacc; +EXTRAOPENACC="${withval} ${EXTRAOPENACC}" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: EXTRAOPENACC = ${EXTRAOPENACC}" >&5 +printf "%s\n" "EXTRAOPENACC = ${EXTRAOPENACC}" >&6; } + +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + ;; +esac +fi + + + if test -e penmp || test -e mp; then + as_fn_error $? "AX_OPENACC clobbers files named 'mp' and 'penmp'. Aborting configure because one of these files already exists." "$LINENO" 5 +fi +# Check whether --enable-openacc was given. +if test ${enable_openacc+y} +then : + enableval=$enable_openacc; +fi + + OPENACC_CFLAGS= + if test "$enable_openacc" != no; then + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to support Openacc" >&5 +printf %s "checking for $CC option to support Openacc... " >&6; } +if test ${ax_cv_prog_c_openacc+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ax_cv_prog_c_openacc='not found' + for ac_option in '' -fopenacc -openacc -acc; do + + ac_save_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS $ac_option" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef _OPENACC +#error "OpenACC not supported" +#endif +#include + int main (void) { acc_init (0); return 0;} + +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef _OPENACC +#error "OpenACC not supported" +#endif +#include + int main (void) { acc_init (0); return 0;} + +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ax_cv_prog_c_openacc=$ac_option +else case e in #( + e) ax_cv_prog_c_openacc='unsupported' ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ac_save_CFLAGS + + if test "$ax_cv_prog_c_openacc" != 'not found'; then + break + fi + done + if test "$ax_cv_prog_c_openacc" = 'not found'; then + ac_cv_prog_c_openacc='unsupported' + elif test "$ax_cv_prog_c_openacc" = ''; then + ac_cv_prog_c_openacc='none needed' + fi + rm -f penmp mp ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_c_openacc" >&5 +printf "%s\n" "$ax_cv_prog_c_openacc" >&6; } + if test "$ax_cv_prog_c_openacc" != 'unsupported' && \ + test "$ax_cv_prog_c_openacc" != 'none needed'; then + OPENACC_CFLAGS="$ax_cv_prog_c_openacc" + fi + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + fi + + CCOPENACC="$ax_cv_prog_c_openacc"; + # Check whether --enable-openacc was given. +if test ${enable_openacc+y} +then : + enableval=$enable_openacc; +fi + + OPENACC_CFLAGS= + if test "$enable_openacc" != no; then + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to support Openacc" >&5 +printf %s "checking for $CXX option to support Openacc... " >&6; } +if test ${ax_cv_prog_cxx_openacc+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ax_cv_prog_cxx_openacc='not found' + for ac_option in '' -fopenacc -openacc -acc; do + + ac_save_CXXFLAGS=$CXXFLAGS + CXXFLAGS="$CXXFLAGS $ac_option" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef _OPENACC +#error "OpenACC not supported" +#endif +#include + int main (void) { acc_init (acc_get_device_type()); return 0;} + +_ACEOF +if ac_fn_cxx_try_compile "$LINENO" +then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef _OPENACC +#error "OpenACC not supported" +#endif +#include + int main (void) { acc_init (acc_get_device_type()); return 0;} + +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + ax_cv_prog_cxx_openacc=$ac_option +else case e in #( + e) ax_cv_prog_cxx_openacc='unsupported' ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CXXFLAGS=$ac_save_CXXFLAGS + + if test "$ax_cv_prog_cxx_openacc" != 'not found'; then + break + fi + done + if test "$ax_cv_prog_cxx_openacc" = 'not found'; then + ac_cv_prog_cxx_openacc='unsupported' + elif test "$ax_cv_prog_cxx_openacc" = ''; then + ac_cv_prog_cxx_openacc='none needed' + fi + rm -f penmp mp ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_cxx_openacc" >&5 +printf "%s\n" "$ax_cv_prog_cxx_openacc" >&6; } + if test "$ax_cv_prog_cxx_openacc" != 'unsupported' && \ + test "$ax_cv_prog_cxx_openacc" != 'none needed'; then + OPENACC_CXXFLAGS="$ax_cv_prog_cxx_openacc" + fi + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + fi + + CXXOPENACC="$ax_cv_prog_cxx_openacc"; + # Check whether --enable-openacc was given. +if test ${enable_openacc+y} +then : + enableval=$enable_openacc; +fi + + OPENACC_CFLAGS= + if test "$enable_openacc" != no; then + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $FC option to support Openacc" >&5 +printf %s "checking for $FC option to support Openacc... " >&6; } +if test ${ax_cv_prog_fc_openacc+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ax_cv_prog_fc_openacc='not found' + for ac_option in '' -fopenacc -openacc -acc; do + + ac_save_FCFLAGS=$FCFLAGS + FCFLAGS="$FCFLAGS $ac_option" + cat > conftest.$ac_ext <<_ACEOF + program main + use openacc + implicit none + integer tid, np + tid = 42 + call acc_init(0) + end -# Check whether --with-amdincdir was given. -if test ${with_amdincdir+y} +_ACEOF +if ac_fn_fc_try_compile "$LINENO" then : - withval=$with_amdincdir; psblas_cv_amdincdir=$withval -else $as_nop - psblas_cv_amdincdir='' -fi + cat > conftest.$ac_ext <<_ACEOF + program main + use openacc + implicit none + integer tid, np + tid = 42 + call acc_init(0) + end -# Check whether --with-amdlibdir was given. -if test ${with_amdlibdir+y} +_ACEOF +if ac_fn_fc_try_link "$LINENO" then : - withval=$with_amdlibdir; psblas_cv_amdlibdir=$withval -else $as_nop - psblas_cv_amdlibdir='' + ax_cv_prog_fc_openacc=$ac_option +else case e in #( + e) ax_cv_prog_fc_openacc='unsupported' ;; +esac fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + FCFLAGS=$ac_save_FCFLAGS - -ac_ext=c + if test "$ax_cv_prog_fc_openacc" != 'unsupported'; then + break + fi + done + if test "$ax_cv_prog_fc_openacc" = 'not found'; then + ac_cv_prog_fc_openacc='unsupported' + elif test "$ax_cv_prog_fc_openacc" = ''; then + ac_cv_prog_fc_openacc='none needed' + fi + rm -f penmp mp ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_fc_openacc" >&5 +printf "%s\n" "$ax_cv_prog_fc_openacc" >&6; } + if test "$ax_cv_prog_fc_openacc" != 'unsupported' && \ + test "$ax_cv_prog_fc_openacc" != 'none needed'; then + OPENACC_FCFLAGS="$ax_cv_prog_fc_openacc" + fi + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu -SAVE_LIBS="$LIBS" -SAVE_CPPFLAGS="$CPPFLAGS" -if test "x$psblas_cv_amddir" != "x"; then - AMD_LIBDIR="-L$psblas_cv_amddir" - LIBS="-L$psblas_cv_amddir $LIBS" - AMD_INCLUDES="-I$psblas_cv_amddir" - CPPFLAGS="$AMD_INCLUDES $CPPFLAGS" -fi -if test "x$psblas_cv_amdincdir" != "x"; then - AMD_INCLUDES="-I$psblas_cv_amdincdir" - CPPFLAGS="$AMD_INCLUDES $CPPFLAGS" -fi -if test "x$psblas_cv_amdlibdir" != "x"; then - LIBS="-L$psblas_cv_amdlibdir $LIBS" - AMD_LIBDIR="-L$psblas_cv_amdlibdir" -fi + fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: amd dir $psblas_cv_amddir" >&5 -printf "%s\n" "$as_me: amd dir $psblas_cv_amddir" >&6;} -ac_fn_c_check_header_compile "$LINENO" "amd.h" "ac_cv_header_amd_h" "$ac_includes_default" -if test "x$ac_cv_header_amd_h" = xyes -then : - pac_amd_header_ok=yes -else $as_nop - pac_amd_header_ok=no; AMD_INCLUDES="" -fi + FCOPENACC="$ax_cv_prog_fc_openacc"; -if test "x$pac_amd_header_ok" == "xno" ; then - unset ac_cv_header_amd_h - AMD_INCLUDES="-I$psblas_cv_amddir/include -I$psblas_cv_amddir/Include " - CPPFLAGS="$AMD_INCLUDES $SAVE_CPPFLAGS" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CCOPENACC flags should be added (should be invoked only once)" >&5 +printf %s "checking whether additional CCOPENACC flags should be added (should be invoked only once)... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for amd_h in $AMD_INCLUDES" >&5 -printf %s "checking for amd_h in $AMD_INCLUDES... " >&6; } - ac_fn_c_check_header_compile "$LINENO" "amd.h" "ac_cv_header_amd_h" "$ac_includes_default" -if test "x$ac_cv_header_amd_h" = xyes +# Check whether --with-ccopenacc was given. +if test ${with_ccopenacc+y} then : - pac_amd_header_ok=yes -else $as_nop - pac_amd_header_ok=no; AMD_INCLUDES="" -fi + withval=$with_ccopenacc; +CCOPENACC="${withval} ${CCOPENACC}" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CCOPENACC = ${CCOPENACC}" >&5 +printf "%s\n" "CCOPENACC = ${CCOPENACC}" >&6; } -fi -if test "x$pac_amd_header_ok" == "xno" ; then - unset ac_cv_header_amd_h - AMD_INCLUDES="-I$psblas_cv_amddir/UFconfig -I$psblas_cv_amddir/AMD/Include -I$psblas_cv_amddir/AMD/Include" - CPPFLAGS="$AMD_INCLUDES $SAVE_CPPFLAGS" - ac_fn_c_check_header_compile "$LINENO" "amd.h" "ac_cv_header_amd_h" "$ac_includes_default" -if test "x$ac_cv_header_amd_h" = xyes -then : - pac_amd_header_ok=yes -else $as_nop - pac_amd_header_ok=no; AMD_INCLUDES="" +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + ;; +esac fi -fi -if test "x$pac_amd_header_ok" == "xyes" ; then - psblas_cv_amd_includes="$AMD_INCLUDES" - if test "x$AMD_LIBDIR" == "x" ; then - AMD_LIBS="$psblas_cv_amd" - else - AMD_LIBS="$psblas_cv_amd $AMD_LIBDIR" - fi - LIBS="$AMD_LIBS -lm $LIBS"; - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for amd_order in $AMD_LIBS" >&5 -printf %s "checking for amd_order in $AMD_LIBS... " >&6; } - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CXXOPENACC flags should be added (should be invoked only once)" >&5 +printf %s "checking whether additional CXXOPENACC flags should be added (should be invoked only once)... " >&6; } -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char amd_order (); -int -main (void) -{ -return amd_order (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" +# Check whether --with-cxxopenacc was given. +if test ${with_cxxopenacc+y} then : - psblas_cv_have_amd=yes;pac_amd_lib_ok=yes; -else $as_nop - psblas_cv_have_amd=no;pac_amd_lib_ok=no; AMD_LIBS="" + withval=$with_cxxopenacc; +CXXOPENACC="${withval} ${CXXOPENACC}" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CXXOPENACC = ${CXXOPENACC}" >&5 +printf "%s\n" "CXXOPENACC = ${CXXOPENACC}" >&6; } + +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + ;; +esac fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_amd_lib_ok" >&5 -printf "%s\n" "$pac_amd_lib_ok" >&6; } - if test "x$pac_amd_lib_ok" == "xno" ; then - AMD_LIBDIR="-L$psblas_cv_amddir/Lib -L$psblas_cv_amddir/lib" - AMD_LIBS="$psblas_cv_amd $AMD_LIBDIR" - LIBS="$AMD_LIBS -lm $SAVE_LIBS" - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for amd_order in $AMD_LIBS" >&5 -printf %s "checking for amd_order in $AMD_LIBS... " >&6; } - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char amd_order (); -int -main (void) -{ -return amd_order (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - psblas_cv_have_amd=yes;pac_amd_lib_ok=yes; -else $as_nop - psblas_cv_have_amd=no;pac_amd_lib_ok=no; AMD_LIBS="" -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_amd_lib_ok" >&5 -printf "%s\n" "$pac_amd_lib_ok" >&6; } - fi - if test "x$pac_amd_lib_ok" == "xno" ; then - AMD_LIBDIR="-L$psblas_cv_amddir/AMD/Lib -L$psblas_cv_amddir/AMD/Lib" - AMD_LIBS="$psblas_cv_amd $AMD_LIBDIR" - LIBS="$AMD_LIBS -lm $SAVE_LIBS" - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for amd_order in $AMD_LIBS" >&5 -printf %s "checking for amd_order in $AMD_LIBS... " >&6; } - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char amd_order (); -int -main (void) -{ -return amd_order (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional FCOPENACC flags should be added (should be invoked only once)" >&5 +printf %s "checking whether additional FCOPENACC flags should be added (should be invoked only once)... " >&6; } + +# Check whether --with-fcopenacc was given. +if test ${with_fcopenacc+y} then : - psblas_cv_have_amd=yes;pac_amd_lib_ok=yes; -else $as_nop - psblas_cv_have_amd=no;pac_amd_lib_ok=no; AMD_LIBS="" + withval=$with_fcopenacc; +FCOPENACC="${withval} ${FCOPENACC}" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: FCOPENACC = ${FCOPENACC}" >&5 +printf "%s\n" "FCOPENACC = ${FCOPENACC}" >&6; } + +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + ;; +esac fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_amd_lib_ok" >&5 -printf "%s\n" "$pac_amd_lib_ok" >&6; } - fi + + + CCOPENACC="$CCOPENACC $EXTRAOPENACC"; + CXXOPENACC="$CXXOPENACC $EXTRAOPENACC"; + FCOPENACC="$FCOPENACC $EXTRAOPENACC"; + OACCD=oaccd; + OACCLD=oaccld; + + #FCOPT="$FCOPT $FCOPENACC"; + #CCOPT="$CCOPT $CCOPENACC" + #CXXOPT="$CXXOPT $CXXOPENACC" + #FLINK="$FLINK $FCOPENACC"; fi -LIBS="$SAVE_LIBS"; -CPPFLAGS="$SAVE_CPPFLAGS"; -if test "x$psblas_cv_have_amd" == "xyes" ; then - FDEFINES="$psblas_cv_define_prepend-DHAVE_AMD $FDEFINES" - CDEFINES="-DHAVE_AMD_ $psblas_cv_amd_includes $CDEFINES" +############################################################################### +LIBRSB_DIR="$pac_cv_librsb_dir"; +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBRSB install dir" >&5 +printf %s "checking for LIBRSB install dir... " >&6; } +case $LIBRSB_DIR in + /*) ;; + *) esac +pac_cv_status_file="$LIBRSB_DIR/librsb.a" +if test ! -f "$pac_cv_status_file" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + #AC_MSG_ERROR([Could not find an installation in $LIBRSB_DIR.]) +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LIBRSB_DIR" >&5 +printf "%s\n" "$LIBRSB_DIR" >&6; } + RSBTARGETLIB=rsbd; + RSBTARGETOBJ=rsbobj; + CHAVELIBRSB="#define PSB_HAVE_LIBRSB" fi + + ############################################################################### # Library target directory and archive files. ############################################################################### @@ -10617,7 +11580,7 @@ fi LIBDIR=lib BASELIBNAME=libpsb_base.a PRECLIBNAME=libpsb_prec.a -METHDLIBNAME=libpsb_krylov.a +METHDLIBNAME=libpsb_linsolve.a UTILLIBNAME=libpsb_util.a ############################################################################### @@ -10661,12 +11624,15 @@ UTILLIBNAME=libpsb_util.a + + + PSBLASRULES=' -PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) +PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(FLIBS) $(LIBS) CXXDEFINES=$(PSBCXXDEFINES) CDEFINES=$(PSBCDEFINES) FDEFINES=$(PSBFDEFINES) @@ -10692,11 +11658,54 @@ FDEFINES=$(PSBFDEFINES) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ############################################################################### # the following files will be created by Automake ac_config_files="$ac_config_files Make.inc" +ac_config_files="$ac_config_files base/modules/psb_config.h" + ac_config_files="$ac_config_files util/psb_metis_int.h" cat >confcache <<\_ACEOF @@ -10709,8 +11718,8 @@ cat >confcache <<\_ACEOF # config.status only pays attention to the cache file if you give it # the --recheck option to rerun configure. # -# `ac_cv_env_foo' variables (set or unset) will be overridden when -# loading this file, other *unset* `ac_cv_foo' will be assigned the +# 'ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* 'ac_cv_foo' will be assigned the # following values. _ACEOF @@ -10740,14 +11749,14 @@ printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} (set) 2>&1 | case $as_nl`(ac_space=' '; set) 2>&1` in #( *${as_nl}ac_space=\ *) - # `set' does not quote correctly, so add quotes: double-quote + # 'set' does not quote correctly, so add quotes: double-quote # substitution turns \\\\ into \\, and sed turns \\ into \. sed -n \ "s/'/'\\\\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" ;; #( *) - # `set' quotes correctly as required by POSIX, so do not add quotes. + # 'set' quotes correctly as required by POSIX, so do not add quotes. sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | @@ -10811,9 +11820,7 @@ s/^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)/-D\1=\2/g t quote b any :quote -s/[ `~#$^&*(){}\\|;'\''"<>?]/\\&/g -s/\[/\\&/g -s/\]/\\&/g +s/[][ `~#$^&*(){}\\|;'\''"<>?]/\\&/g s/\$/$$/g H :any @@ -10864,6 +11871,18 @@ if test -z "${am__fastdepCXX_TRUE}" && test -z "${am__fastdepCXX_FALSE}"; then as_fn_error $? "conditional \"am__fastdepCXX\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; +esac +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi + if test -n "$EXEEXT"; then am__EXEEXT_TRUE= am__EXEEXT_FALSE='#' @@ -10901,7 +11920,6 @@ cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh -as_nop=: if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 then : emulate sh @@ -10910,12 +11928,13 @@ then : # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST -else $as_nop - case `(set -o) 2>/dev/null` in #( +else case e in #( + e) case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; +esac ;; esac fi @@ -10987,7 +12006,7 @@ IFS=$as_save_IFS ;; esac -# We did not find ourselves, most probably we were run as `sh COMMAND' +# We did not find ourselves, most probably we were run as 'sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 @@ -11016,7 +12035,6 @@ as_fn_error () } # as_fn_error - # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. @@ -11056,11 +12074,12 @@ then : { eval $1+=\$2 }' -else $as_nop - as_fn_append () +else case e in #( + e) as_fn_append () { eval $1=\$$1\$2 - } + } ;; +esac fi # as_fn_append # as_fn_arith ARG... @@ -11074,11 +12093,12 @@ then : { as_val=$(( $* )) }' -else $as_nop - as_fn_arith () +else case e in #( + e) as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` - } + } ;; +esac fi # as_fn_arith @@ -11161,9 +12181,9 @@ if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: - # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. - # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. - # In both cases, we have to default to `cp -pR'. + # 1) On MSYS, both 'ln -s file dir' and 'ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; 'ln -s' creates a wrapper executable. + # In both cases, we have to default to 'cp -pR'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -pR' elif ln conf$$.file conf$$ 2>/dev/null; then @@ -11244,10 +12264,12 @@ as_test_x='test -x' as_executable_p=as_fn_executable_p # Sed expression to map a string onto a valid CPP name. -as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" +as_sed_cpp="y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g" +as_tr_cpp="eval sed '$as_sed_cpp'" # deprecated # Sed expression to map a string onto a valid variable name. -as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" +as_sed_sh="y%*+%pp%;s%[^_$as_cr_alnum]%_%g" +as_tr_sh="eval sed '$as_sed_sh'" # deprecated exec 6>&1 @@ -11262,8 +12284,8 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by PSBLAS $as_me 3.7.0, which was -generated by GNU Autoconf 2.71. Invocation command line was +This file was extended by PSBLAS $as_me 3.9.0, which was +generated by GNU Autoconf 2.72. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS @@ -11291,7 +12313,7 @@ _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 ac_cs_usage="\ -\`$as_me' instantiates files and other configuration actions +'$as_me' instantiates files and other configuration actions from templates according to the current configuration. Unless the files and actions are specified as TAGs, all are instantiated by default. @@ -11321,11 +12343,11 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -PSBLAS config.status 3.7.0 -configured by $0, generated by GNU Autoconf 2.71, +PSBLAS config.status 3.9.0 +configured by $0, generated by GNU Autoconf 2.72, with options \\"\$ac_cs_config\\" -Copyright (C) 2021 Free Software Foundation, Inc. +Copyright (C) 2023 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." @@ -11385,8 +12407,8 @@ do ac_cs_silent=: ;; # This is an error. - -*) as_fn_error $? "unrecognized option: \`$1' -Try \`$0 --help' for more information." ;; + -*) as_fn_error $? "unrecognized option: '$1' +Try '$0 --help' for more information." ;; *) as_fn_append ac_config_targets " $1" ac_need_defaults=false ;; @@ -11441,9 +12463,10 @@ do case $ac_config_target in "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; "Make.inc") CONFIG_FILES="$CONFIG_FILES Make.inc" ;; + "base/modules/psb_config.h") CONFIG_FILES="$CONFIG_FILES base/modules/psb_config.h" ;; "util/psb_metis_int.h") CONFIG_FILES="$CONFIG_FILES util/psb_metis_int.h" ;; - *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + *) as_fn_error $? "invalid argument: '$ac_config_target'" "$LINENO" 5;; esac done @@ -11462,7 +12485,7 @@ fi # creating and moving files from /tmp can sometimes cause problems. # Hook for its removal unless debugging. # Note that there is a small window in which the directory will not be cleaned: -# after its creation but before its name has been assigned to `$tmp'. +# after its creation but before its name has been assigned to '$tmp'. $debug || { tmp= ac_tmp= @@ -11486,7 +12509,7 @@ ac_tmp=$tmp # Set up the scripts for CONFIG_FILES section. # No need to generate them if there are no CONFIG_FILES. -# This happens for instance with `./config.status config.h'. +# This happens for instance with './config.status config.h'. if test -n "$CONFIG_FILES"; then @@ -11652,7 +12675,7 @@ do esac case $ac_mode$ac_tag in :[FHL]*:*);; - :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :L* | :C*:*) as_fn_error $? "invalid tag '$ac_tag'" "$LINENO" 5;; :[FH]-) ac_tag=-:-;; :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; esac @@ -11674,19 +12697,19 @@ do -) ac_f="$ac_tmp/stdin";; *) # Look for the file first in the build tree, then in the source tree # (if the path is not absolute). The absolute path cannot be DOS-style, - # because $ac_f cannot contain `:'. + # because $ac_f cannot contain ':'. test -f "$ac_f" || case $ac_f in [\\/$]*) false;; *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; esac || - as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + as_fn_error 1 "cannot find input file: '$ac_f'" "$LINENO" 5;; esac case $ac_f in *\'*) ac_f=`printf "%s\n" "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac as_fn_append ac_file_inputs " '$ac_f'" done - # Let's still pretend it is `configure' which instantiates (i.e., don't + # Let's still pretend it is 'configure' which instantiates (i.e., don't # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ configure_input='Generated from '` @@ -11819,7 +12842,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 esac _ACEOF -# Neutralize VPATH when `$srcdir' = `.'. +# Neutralize VPATH when '$srcdir' = '.'. # Shell code in configure.ac might set extrasub. # FIXME: do we really want to maintain this feature? cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 @@ -11850,9 +12873,9 @@ test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ "$ac_tmp/out"`; test -z "$ac_out"; } && - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable 'datarootdir' which seems to be undefined. Please make sure it is defined" >&5 -printf "%s\n" "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +printf "%s\n" "$as_me: WARNING: $ac_file contains a reference to the variable 'datarootdir' which seems to be undefined. Please make sure it is defined" >&2;} rm -f "$ac_tmp/stdin" @@ -11950,15 +12973,15 @@ printf "%s\n" X/"$am_mf" | (exit $ac_status); } || am_rc=$? done if test $am_rc -ne 0; then - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} as_fn_error $? "Something went wrong bootstrapping makefile fragments for automatic dependency tracking. If GNU make was not used, consider re-running the configure script with MAKE=\"gmake\" (or whatever is necessary). You can also try re-running configure with the '--disable-dependency-tracking' option to at least be able to build the package (albeit without support for automatic dependency tracking). -See \`config.log' for more details" "$LINENO" 5; } +See 'config.log' for more details" "$LINENO" 5; } fi { am_dirpart=; unset am_dirpart;} { am_filepart=; unset am_filepart;} @@ -12022,6 +13045,14 @@ fi FCOPT : ${FCOPT} CCOPT : ${CCOPT} + CUDA : ${PSB_HAVE_CUDA} + CUDA_CC : ${pac_cv_cudacc} + + OPENACC : ${pac_cv_openacc} + FCOPENACC : ${FCOPENACC} + OACCD : ${OACCD} + OACCLD : ${OACCLD} + BLAS : ${BLAS_LIBS} METIS usable : ${psblas_cv_have_metis} @@ -12052,6 +13083,14 @@ printf "%s\n" "$as_me: FCOPT : ${FCOPT} CCOPT : ${CCOPT} + CUDA : ${PSB_HAVE_CUDA} + CUDA_CC : ${pac_cv_cudacc} + + OPENACC : ${pac_cv_openacc} + FCOPENACC : ${FCOPENACC} + OACCD : ${OACCD} + OACCLD : ${OACCLD} + BLAS : ${BLAS_LIBS} METIS usable : ${psblas_cv_have_metis} @@ -12068,6 +13107,16 @@ printf "%s\n" "$as_me: If you are satisfied, run 'make' to build ${PACKAGE_NAME} and its documentation; otherwise type ./configure --help=short for a complete list of configure options specific to ${PACKAGE_NAME}. " >&6;} +if test x"${pac_cv_openacc}" == x"yes" ; then + if test x"${FCOPENACC}" == x ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: + WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag. + You may want to rerun configure with --with-fcopenacc= " >&5 +printf "%s\n" "$as_me: + WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag. + You may want to rerun configure with --with-fcopenacc= " >&6;} + fi +fi ############################################################################### diff --git a/configure.ac b/configure.ac old mode 100755 new mode 100644 index d8a02a50..2850d275 --- a/configure.ac +++ b/configure.ac @@ -36,11 +36,11 @@ dnl NOTE : There is no cross compilation support. ############################################################################### # NOTE: the literal for version (the second argument to AC_INIT should be a literal!) -AC_INIT([PSBLAS],3.7.0, [https://github.com/sfilippone/psblas3/issues]) +AC_INIT([PSBLAS],3.9.0, [https://github.com/sfilippone/psblas3/issues]) # VERSION is the file containing the PSBLAS version code # FIXME -psblas_cv_version="3.7.0" +psblas_cv_version="3.9.0" # A sample source file AC_CONFIG_SRCDIR([base/modules/psb_base_mod.f90]) @@ -56,7 +56,7 @@ AC_MSG_NOTICE([ documentation, you can make your own by hand for your needs. Be sure to specify the library paths of your interest. Examples: - ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -100,17 +100,17 @@ dnl We set our own FC flags, ignore those from AC_PROG_FC but not those from the dnl environment variable. Same for C dnl save_FCFLAGS="$FCFLAGS"; -AC_PROG_FC([ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 ifort ifc nagfor gfortran]) +AC_PROG_FC([ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 flang ifx ifort ifc nagfor gfortran]) FCFLAGS="$save_FCFLAGS"; save_CFLAGS="$CFLAGS"; -AC_PROG_CC([xlc pgcc icc gcc cc ]) +AC_PROG_CC([xlc pgcc clang icx icc gcc cc ]) if test "x$ac_cv_prog_cc_stdc" == "xno" ; then AC_MSG_ERROR([Problem : Need a C99 compiler ! ]) else C99OPT="$ac_cv_prog_cc_stdc"; fi CFLAGS="$save_CFLAGS"; -AC_PROG_CXX([CC xlc++ icpc g++]) +AC_PROG_CXX([CC xlc++ clang++ icpx icpc g++]) dnl AC_PROG_F90 doesn't exist, at the time of writing this ! dnl AC_PROG_F90 @@ -136,30 +136,32 @@ PAC_ARG_SERIAL_MPI #Note : we miss the name of the Intel C compiler if test x"$pac_cv_serial_mpi" == x"yes" ; then - FAKEMPI="fakempi.o"; + FAKEMPI="psb_fakempi.o"; MPIFC="$FC"; MPICC="$CC"; - MPICXX="$CXX"; + MPICXX="$CXX"; + CSERIALMPI="#define PSB_SERIAL_MPI" else AC_LANG([C]) if test "X$MPICC" = "X" ; then # This is our MPICC compiler preference: it will override ACX_MPI's first try. - AC_CHECK_PROGS([MPICC],[mpxlc mpiicc mpcc mpicc cc]) + AC_CHECK_PROGS([MPICC],[mpxlc mpiicx mpiicc mpcc mpicc cc]) +else + AC_MSG_NOTICE([test with $MPICC]) fi ACX_MPI([], [AC_MSG_ERROR([[Cannot find any suitable MPI implementation for C]])]) -AC_LANG([Fortran]) AC_LANG([C++]) if test "X$MPICXX" = "X" ; then # This is our MPICC compiler preference: it will override ACX_MPI's first try. - AC_CHECK_PROGS([MPICXX],[mpxlc++ mpiicpc mpicxx]) + AC_CHECK_PROGS([MPICXX],[mpxlc++ mpiicpx mpiicpc mpicxx]) fi ACX_MPI([], [AC_MSG_ERROR([[Cannot find any suitable MPI implementation for C++]])]) AC_LANG([Fortran]) if test "X$MPIFC" = "X" ; then # This is our MPIFC compiler preference: it will override ACX_MPI's first try. - AC_CHECK_PROGS([MPIFC],[mpxlf2003_r mpxlf2003 mpxlf95_r mpxlf90 mpiifort mpf95 mpf90 mpifort mpif95 mpif90 ftn ]) + AC_CHECK_PROGS([MPIFC],[mpxlf2003_r mpxlf2003 mpxlf95_r mpxlf90 mpiifx mpiifort mpf95 mpf90 mpifort mpif95 mpif90 ftn ]) fi ACX_MPI([], [AC_MSG_ERROR([[Cannot find any suitable MPI implementation for Fortran]])]) @@ -193,9 +195,11 @@ dnl NOTE : no spaces before the comma, and no brackets before the second argumen PAC_ARG_WITH_FLAGS(ccopt,CCOPT) PAC_ARG_WITH_FLAGS(cxxopt,CXXOPT) PAC_ARG_WITH_FLAGS(fcopt,FCOPT) +PAC_ARG_WITH_FLAGS(extra-opt,EXTRA_OPT) PAC_ARG_WITH_LIBS PAC_ARG_WITH_FLAGS(clibs,CLIBS) PAC_ARG_WITH_FLAGS(flibs,FLIBS) +PAC_ARG_WITH_FLAGS(extra-nvcc,EXTRA_NVCC) dnl candidates for removal: PAC_ARG_WITH_FLAGS(library-path,LIBRARYPATH) @@ -204,7 +208,7 @@ PAC_ARG_WITH_FLAGS(module-path,MODULE_PATH) # we just gave the user the chance to append values to these variables -############################################################################### + dnl Library oriented Autotools facilities (we don't care about this for now) @@ -233,12 +237,12 @@ PAC_CHECK_HAVE_CRAYFTN( if test x"$psblas_cv_fc" == "x" ; then if eval "$MPIFC -qversion 2>&1 | grep XL 2>/dev/null" ; then - psblas_cv_fc="xlf" + psblas_cv_fc="xlf"; # Some configurations of the XLF want "-WF," prepended to -D.. flags. # TODO : discover the exact conditions when the usage of -WF is needed. psblas_cv_define_prepend="-WF," if eval "$MPIFC -qversion 2>&1 | grep -e\"Version: 10\.\" 2>/dev/null"; then - FDEFINES="$psblas_cv_define_prepend-DXLF_10 $FDEFINES" + FDEFINES="$psblas_cv_define_prepend-DXLF_10 $FDEFINES"; sed -e's/([0-9]*).*/$1/p' fi # Note : there could be problems with old xlf compiler versions ( <10.1 ) @@ -247,20 +251,50 @@ if test x"$psblas_cv_fc" == "x" ; then elif eval "$MPIFC -V 2>&1 | grep Sun 2>/dev/null" ; then # Sun compiler detection - psblas_cv_fc="sun" - elif eval "$MPIFC -V 2>&1 | grep Portland 2>/dev/null" ; then - # Portland group compiler detection - - psblas_cv_fc="pg" + psblas_cv_fc="sun"; + elif eval "$MPIFC --version 2>&1 | grep flang-new 2>&1 1>/dev/null" ; then + # LLVM compiler + psblas_cv_fc="flang-new"; + psblas_flang_version=`flang-new --version |grep flang| sed -e 's/^ *flang.* version *//gi'`; + psblas_flang_shv=`flang-new --version |grep flang| sed -e 's/^ *flang.* version *//gi' | sed -e's/\./ /g' | awk '{print $1}'`; + psblas_cv_define_prepend=""; + FDEFINES="$psblas_cv_define_prepend-DFLANG $FDEFINES" + psblas_shvs=`echo $psblas_flang_shv|sed -e's/[^0-9]*//g'`; + if test x"$psblas_shvs" != x""; then + AC_MSG_NOTICE([[Running with LLVM $psblas_flang_version ($psblas_flang_shv). ]]) + if (( $psblas_flang_shv < 20 )) ; then + AC_MSG_ERROR([[The minimum supported LLVM version is version 20, bailing out. ]]) + else + psblas_cv_fc="flang"; + fi + else + AC_MSG_ERROR([[Unrecognized LLVM version. The minimum supported LLVM version is version 20, bailing out. ]]) + fi + elif eval "$MPIFC --version 2>&1 | grep flang 2>&1 1>/dev/null" ; then + # LLVM compiler + psblas_flang_version=`flang --version |grep flang| sed -e 's/^ *flang.* version *//gi'`; + psblas_flang_shv=`flang --version |grep flang| sed -e 's/^ *flang.* version *//gi' | sed -e's/\./ /g' | awk '{print $1}'`; + psblas_cv_fc="flang"; + psblas_cv_define_prepend=""; + FDEFINES="$psblas_cv_define_prepend-DFLANG $FDEFINES" + psblas_shvs=`echo $psblas_flang_shv|sed -e's/[^0-9]*//g'`; + if test x"$psblas_shvs" != x""; then + AC_MSG_NOTICE([[Running with LLVM $psblas_flang_version ($psblas_flang_shv). ]]) + if (( $psblas_flang_shv < 20 )) ; then + AC_MSG_ERROR([[The minimum supported LLVM version is version 20, bailing out. ]]) + fi + else + AC_MSG_ERROR([[Unrecognized LLVM version. The minimum supported LLVM version is version 20, bailing out. ]]) + fi elif eval "$MPIFC -V 2>&1 | grep Intel.*Fortran.*Compiler 2>/dev/null" ; then # Intel compiler identification - psblas_cv_fc="ifc" + psblas_cv_fc="ifc"; elif eval "$MPIFC -v 2>&1 | grep NAG 2>/dev/null" ; then - psblas_cv_fc="nag" - FC="$MPIFC" + psblas_cv_fc="nag"; + FC="$MPIFC"; else - psblas_cv_fc="" + psblas_cv_fc=""; # unsupported MPI Fortran compiler AC_MSG_NOTICE([[Unknown Fortran compiler, proceeding with fingers crossed !]]) fi @@ -272,18 +306,9 @@ PAC_HAVE_MODERN_GFORTRAN( ) fi - ############################################################################### # Linking, symbol mangling, and misc tests ############################################################################### - -# Note : This is functional to Make.inc rules and structure (see below). -AC_LANG([C]) -AC_CHECK_SIZEOF(void *) -# Define for platforms with 64 bit (void * ) pointers -if test X"$ac_cv_sizeof_void_p" == X"8" ; then - CDEFINES="-DPtr64Bits $CDEFINES" -fi AC_LANG([Fortran]) __AC_FC_NAME_MANGLING if test "X$psblas_cv_fc" == X"pg" ; then @@ -297,45 +322,45 @@ dnl AC_MSG_NOTICE([Fortran name mangling: $ac_cv_fc_mangling]) [pac_fc_sec_under=${pac_fc_sec_under# }] [pac_fc_under=${pac_fc_under%%,*}] [pac_fc_under=${pac_fc_under# }] -AC_MSG_CHECKING([defines for C/Fortran name interfaces]) -if test "x$pac_fc_case" == "xlower case"; then - if test "x$pac_fc_under" == "xunderscore"; then - if test "x$pac_fc_sec_under" == "xno extra underscore"; then - pac_f_c_names="-DLowerUnderscore" - elif test "x$pac_fc_sec_under" == "xextra underscore"; then - pac_f_c_names="-DLowerDoubleUnderscore" - else - pac_f_c_names="-DUNKNOWN" -dnl AC_MSG_NOTICE([Fortran name mangling extra underscore unknown case]) - fi - elif test "x$pac_fc_under" == "xno underscore"; then - pac_f_c_names="-DLowerCase" - else - pac_f_c_names="-DUNKNOWN" -dnl AC_MSG_NOTICE([Fortran name mangling underscore unknown case]) - fi -elif test "x$pac_fc_case" == "xupper case"; then - if test "x$pac_fc_under" == "xunderscore"; then - if test "x$pac_fc_sec_under" == "xno extra underscore"; then - pac_f_c_names="-DUpperUnderscore" - elif test "x$pac_fc_sec_under" == "xextra underscore"; then - pac_f_c_names="-DUpperDoubleUnderscore" - else - pac_f_c_names="-DUNKNOWN" -dnl AC_MSG_NOTICE([Fortran name mangling extra underscore unknown case]) - fi - elif test "x$pac_fc_under" == "xno underscore"; then - pac_f_c_names="-DUpperCase" - else - pac_f_c_names="-DUNKNOWN" -dnl AC_MSG_NOTICE([Fortran name mangling underscore unknown case]) - fi -dnl AC_MSG_NOTICE([Fortran name mangling UPPERCASE not handled]) -else - pac_f_c_names="-DUNKNOWN" -dnl AC_MSG_NOTICE([Fortran name mangling unknown case]) -fi -CDEFINES="$pac_f_c_names $CDEFINES" +dnl AC_MSG_CHECKING([defines for C/Fortran name interfaces]) +dnl if test "x$pac_fc_case" == "xlower case"; then +dnl if test "x$pac_fc_under" == "xunderscore"; then +dnl if test "x$pac_fc_sec_under" == "xno extra underscore"; then +dnl pac_f_c_names="-DLowerUnderscore" +dnl elif test "x$pac_fc_sec_under" == "xextra underscore"; then +dnl pac_f_c_names="-DLowerDoubleUnderscore" +dnl else +dnl pac_f_c_names="-DUNKNOWN" +dnl dnl AC_MSG_NOTICE([Fortran name mangling extra underscore unknown case]) +dnl fi +dnl elif test "x$pac_fc_under" == "xno underscore"; then +dnl pac_f_c_names="-DLowerCase" +dnl else +dnl pac_f_c_names="-DUNKNOWN" +dnl dnl AC_MSG_NOTICE([Fortran name mangling underscore unknown case]) +dnl fi +dnl elif test "x$pac_fc_case" == "xupper case"; then +dnl if test "x$pac_fc_under" == "xunderscore"; then +dnl if test "x$pac_fc_sec_under" == "xno extra underscore"; then +dnl pac_f_c_names="-DUpperUnderscore" +dnl elif test "x$pac_fc_sec_under" == "xextra underscore"; then +dnl pac_f_c_names="-DUpperDoubleUnderscore" +dnl else +dnl pac_f_c_names="-DUNKNOWN" +dnl dnl AC_MSG_NOTICE([Fortran name mangling extra underscore unknown case]) +dnl fi +dnl elif test "x$pac_fc_under" == "xno underscore"; then +dnl pac_f_c_names="-DUpperCase" +dnl else +dnl pac_f_c_names="-DUNKNOWN" +dnl dnl AC_MSG_NOTICE([Fortran name mangling underscore unknown case]) +dnl fi +dnl dnl AC_MSG_NOTICE([Fortran name mangling UPPERCASE not handled]) +dnl else +dnl pac_f_c_names="-DUNKNOWN" +dnl dnl AC_MSG_NOTICE([Fortran name mangling unknown case]) +dnl fi +dnl CDEFINES="$pac_f_c_names $CDEFINES" AC_MSG_RESULT([ $pac_f_c_names ]) @@ -416,7 +441,7 @@ if test "X$FCOPT" == "X" ; then if test "X$psblas_cv_fc" == "Xgcc" ; then # note that no space should be placed around the equality symbol in assignations # Note : 'native' is valid _only_ on GCC/x86 (32/64 bits) - FCOPT="-g -O3 -frecursive $FCOPT" + FCOPT="-g -O3 $FCOPT" elif test "X$psblas_cv_fc" == X"xlf" ; then # XL compiler : consider using -qarch=auto FCOPT="-O3 -qarch=auto -qlanglvl=extended -qxlf2003=polymorphic:autorealloc $FCOPT" @@ -427,6 +452,9 @@ if test "X$FCOPT" == "X" ; then elif test "X$psblas_cv_fc" == X"pg" ; then # other compilers .. FCOPT="-fast $FCOPT" + elif test "X$psblas_cv_fc" == X"flang" ; then + # other compilers .. + FCOPT="-O3" # NOTE : PG & Sun use -fast instead -O3 elif test "X$psblas_cv_fc" == X"sun" ; then # other compilers .. @@ -444,13 +472,13 @@ fi if test "X$psblas_cv_fc" == X"nag" ; then # Add needed options FCOPT="$FCOPT -dcfuns -f2003 -wmismatch=mpi_scatterv,mpi_alltoallv,mpi_gatherv,mpi_allgatherv" - EXTRA_OPT="-mismatch_all" + EXTRA_OPT="$EXTRA_OPT -mismatch_all" fi if test "X$psblas_cv_fc" == "Xgcc" ; then - FCOPT="-frecursive $FCOPT" - PAC_HAVE_GFORTRAN_10( - [FCOPT="-fallow-argument-mismatch $FCOPT"], - []) + FCOPT="-frecursive $FCOPT" +dnl PAC_HAVE_GFORTRAN_10( +dnl [FCOPT="-fallow-argument-mismatch $FCOPT"], +dnl []) fi @@ -478,7 +506,7 @@ fi ############################################################################## BASEMODNAME=psb_base_mod PRECMODNAME=psb_prec_mod -METHDMODNAME=psb_krylov_mod +METHDMODNAME=psb_linsolve_mod UTILMODNAME=psb_util_mod if test "X$psblas_cv_fc" == X"cray" @@ -488,7 +516,7 @@ then FIFLAG="-I" BASEMODNAME=PSB_BASE_MOD PRECMODNAME=PSB_PREC_MOD - METHDMODNAME=PSB_KRYLOV_MOD + METHDMODNAME=PSB_LINSOLVE_MOD UTILMODNAME=PSB_UTIL_MOD else @@ -512,47 +540,53 @@ fi ############################################################################### # Custom test : do we have a module or include for MPI Fortran interface? if test x"$pac_cv_serial_mpi" == x"yes" ; then - FDEFINES="$psblas_cv_define_prepend-DSERIAL_MPI $psblas_cv_define_prepend-DMPI_MOD $FDEFINES"; + FDEFINES="$psblas_cv_define_prepend-DPSB_SERIAL_MPI $psblas_cv_define_prepend-DPSB_MPI_MOD $FDEFINES"; + dnl CDEFINES="-DPSB_SERIAL_MPI $CDEFINES" else PAC_FORTRAN_CHECK_HAVE_MPI_MOD_F08() if test x"$pac_cv_mpi_f08" == x"yes" ; then -dnl FDEFINES="$psblas_cv_define_prepend-DMPI_MOD_F08 $FDEFINES"; - FDEFINES="$psblas_cv_define_prepend-DMPI_MOD $FDEFINES"; +dnl FDEFINES="$psblas_cv_define_prepend-DPSB_MPI_MOD_F08 $FDEFINES"; + FDEFINES="$psblas_cv_define_prepend-DPSB_MPI_MOD $FDEFINES"; else PAC_FORTRAN_CHECK_HAVE_MPI_MOD( - [FDEFINES="$psblas_cv_define_prepend-DMPI_MOD $FDEFINES"], - [FDEFINES="$psblas_cv_define_prepend-DMPI_H $FDEFINES"]) + [FDEFINES="$psblas_cv_define_prepend-DPSB_MPI_MOD $FDEFINES"], + [FDEFINES="$psblas_cv_define_prepend-DPSB_MPI_H $FDEFINES"]) fi fi PAC_ARG_WITH_IPK PAC_ARG_WITH_LPK -# Defaults for IPK/LPK +# Defaults for PSB_IPK/PSB_LPK if test x"$pac_cv_ipk_size" == x"" ; then pac_cv_ipk_size=4 fi if test x"$pac_cv_lpk_size" == x"" ; then pac_cv_lpk_size=8 fi +PSB_IPKDEF="#define PSB_IPK$pac_cv_ipk_size" +PSB_LPKDEF="#define PSB_LPK$pac_cv_lpk_size" # Enforce sensible combination if (( $pac_cv_lpk_size < $pac_cv_ipk_size )); then - AC_MSG_NOTICE([[Invalid combination of size specs IPK ${pac_cv_ipk_size} LPK ${pac_cv_lpk_size}. ]]); + AC_MSG_NOTICE([[Invalid combination of size specs PSB_IPK ${pac_cv_ipk_size} PSB_LPK ${pac_cv_lpk_size}. ]]); AC_MSG_NOTICE([[Forcing equal values]]) pac_cv_lpk_size=$pac_cv_ipk_size; fi -FDEFINES="$psblas_cv_define_prepend-DIPK${pac_cv_ipk_size} $FDEFINES"; -FDEFINES="$psblas_cv_define_prepend-DLPK${pac_cv_lpk_size} $FDEFINES"; -CDEFINES="-DIPK${pac_cv_ipk_size} -DLPK${pac_cv_lpk_size} $CDEFINES" +FDEFINES="$psblas_cv_define_prepend-DPSB_IPK${pac_cv_ipk_size} $FDEFINES"; +FDEFINES="$psblas_cv_define_prepend-DPSB_LPK${pac_cv_lpk_size} $FDEFINES"; +dnl CDEFINES="-DPSB_IPK${pac_cv_ipk_size} -DPSB_LPK${pac_cv_lpk_size} $CDEFINES" FLINK="$MPIFC" +CLINK="$MPICC" PAC_ARG_OPENMP() if test x"$pac_cv_openmp" == x"yes" ; then - FDEFINES="$psblas_cv_define_prepend-DOPENMP $FDEFINES"; - CDEFINES="-DOPENMP $CDEFINES"; + FDEFINES="$psblas_cv_define_prepend-DPSB_OPENMP $FDEFINES"; + CDEFINES="$CDEFINES"; + CHAVE_OPENMP="#define PSB_OPENMP" FCOPT="$FCOPT $pac_cv_openmp_fcopt"; CCOPT="$CCOPT $pac_cv_openmp_ccopt"; CXXOPT="$CXXOPT $pac_cv_openmp_cxxopt"; FLINK="$FLINK $pac_cv_openmp_fcopt"; + CLINK="$CLINK $pac_cv_openmp_fcopt"; fi # # Tests for support of various Fortran features; some of them are critical, @@ -639,11 +673,11 @@ PAC_FORTRAN_TEST_FINAL( PAC_FORTRAN_TEST_GENERICS( [], - [FDEFINES="$psblas_cv_define_prepend-DHAVE_BUGGY_GENERICS $FDEFINES"] + [FDEFINES="$psblas_cv_define_prepend-DPSB_HAVE_BUGGY_GENERICS $FDEFINES"] ) PAC_FORTRAN_TEST_FLUSH( - [FDEFINES="$psblas_cv_define_prepend-DHAVE_FLUSH_STMT $FDEFINES"], + [FDEFINES="$psblas_cv_define_prepend-DPSB_HAVE_FLUSH_STMT $FDEFINES"], ) @@ -680,7 +714,7 @@ if test "X$RANLIB" == "X" ; then fi # This should be portable -AR="${AR} -cur" +AR="${AR} -cDr" ############################################################################### @@ -696,7 +730,7 @@ AR="${AR} -cur" PAC_BLAS([], [AC_MSG_ERROR([[Cannot find BLAS library, specify a path using --with-blas=DIR/LIB (for example --with-blas=/usr/path/lib/libcxml.a)]])]) PAC_LAPACK( -[FDEFINES="$psblas_cv_define_prepend-DHAVE_LAPACK $FDEFINES"], +[FDEFINES="$psblas_cv_define_prepend-DPSB_HAVE_LAPACK $FDEFINES"], ) AC_LANG([C]) @@ -729,65 +763,214 @@ PAC_MAKE_IS_GNUMAKE # Note : also umfdi_local_search, ... #AC_CHECK_LIB(umf,umfpack_di_solve,psblas_cv_have_umfpack=yes,psblas_cv_have_umfpack=no,[amd]) -AC_ARG_WITH(rsb, AS_HELP_STRING([--with-rsb], [Specify Recursive Sparse BLAS library linkage info (that is, the output of librsb-config --static --ldflags, or a directory where the usual bin/include/lib subdirs with a regular RSB installation resides, or nothing to make the configure script invoke librsb-config)]), [if test x"$withval" = xno; then -want_rsb_libs= ; else if test x"$withval" = xyes ; then want_rsb_libs=yes ; else want_rsb_libs="$withval" ; fi ; fi], [want_rsb_libs=""]) -if test x"$want_rsb_libs" != x ; then - if test x"$want_rsb_libs" = xyes ; then - want_rsb_libs="`librsb-config --static --ldflags`" - else - if test -d "$want_rsb_libs" ; then - want_rsb_libs="`$want_rsb_libs/bin/librsb-config --static --ldflags`" - else - true; - # we assume want_rsb_libs are linkage parameters - fi - fi - FDEFINES="$FDEFINES $psblas_cv_define_prepend-DHAVE_LIBRSB" -fi -RSB_LIBS="$want_rsb_libs" -LIBS="$RSB_LIBS ${LIBS}" +dnl AC_ARG_WITH(rsb, AS_HELP_STRING([--with-rsb], [Specify Recursive Sparse BLAS library linkage info (that is, the output of librsb-config --static --ldflags, or a directory where the usual bin/include/lib subdirs with a regular RSB installation resides, or nothing to make the configure script invoke librsb-config)]), [if test x"$withval" = xno; then +dnl want_rsb_libs= ; else if test x"$withval" = xyes ; then want_rsb_libs=yes ; else want_rsb_libs="$withval" ; fi ; fi], [want_rsb_libs=""]) +dnl if test x"$want_rsb_libs" != x ; then +dnl if test x"$want_rsb_libs" = xyes ; then +dnl want_rsb_libs="`librsb-config --static --ldflags`" +dnl else +dnl if test -d "$want_rsb_libs" ; then +dnl want_rsb_libs="`$want_rsb_libs/bin/librsb-config --static --ldflags`" +dnl else +dnl true; +dnl # we assume want_rsb_libs are linkage parameters +dnl fi +dnl fi +dnl FDEFINES="$FDEFINES $psblas_cv_define_prepend-DPSB_HAVE_LIBRSB" +dnl fi +dnl RSB_LIBS="$want_rsb_libs" +dnl LIBS="$RSB_LIBS ${LIBS}" dnl AC_CHECK_HEADERS([rsb.h], [ LIBS="${LIBS} $want_rsb_libs"], []) PAC_CHECK_METIS -AC_MSG_CHECKING([Compatibility between metis and LPK]) +AC_MSG_CHECKING([Compatibility between metis and PSB_LPK]) if test "x$pac_cv_lpk_size" == "x4" ; then if test "x$pac_cv_metis_idx" == "x64" ; then - dnl mismatch between metis size and PSBLAS LPK + dnl mismatch between metis size and PSBLAS PSB_LPK psblas_cv_have_metis="no"; dnl fi fi if test "x$pac_cv_lpk_size" == "x8" ; then if test "x$pac_cv_metis_idx" == "x32" ; then - dnl mismatch between metis size and PSBLAS LPK + dnl mismatch between metis size and PSBLAS PSB_LPK psblas_cv_have_metis="no"; fi fi AC_MSG_RESULT([$psblas_cv_have_metis]) if test "x$pac_cv_metis_idx" == "xunknown" ; then - dnl mismatch between metis size and PSBLAS LPK + dnl mismatch between metis size and PSBLAS PSB_LPK AC_MSG_NOTICE([Unknown METIS bitsize.]) $psblas_cv_have_metis = "no"; fi if test "x$pac_cv_metis_real" == "xunknown" ; then - dnl mismatch between metis size and PSBLAS LPK + dnl mismatch between metis size and PSBLAS PSB_LPK AC_MSG_NOTICE([Unknown METIS REAL bitsize.]) $psblas_cv_have_metis = "no"; fi if test "x$psblas_cv_have_metis" == "xyes" ; then - FDEFINES="$psblas_cv_define_prepend-DHAVE_METIS $psblas_cv_define_prepend-DMETIS_$pac_cv_metis_idx $psblas_cv_define_prepend-DMETIS_REAL_$pac_cv_metis_real $FDEFINES" - CDEFINES="-DHAVE_METIS_ $psblas_cv_metis_includes $CDEFINES -DMETIS_$pac_cv_metis_idx -DMETIS_REAL_$pac_cv_metis_real" + FDEFINES="$psblas_cv_define_prepend-DPSB_HAVE_METIS $psblas_cv_define_prepend-DPSB_METIS_$pac_cv_metis_idx $psblas_cv_define_prepend-DPSB_METIS_REAL_$pac_cv_metis_real $FDEFINES" + CDEFINES="$psblas_cv_metis_includes $CDEFINES" METISINCFILE=$psblas_cv_metisincfile + CHAVEMETIS="#define PSB_HAVE_METIS" + CINTMETIS="#define PSB_METIS_$pac_cv_metis_idx" + CREALMETIS="#define PSB_METIS_REAL_$pac_cv_metis_real" fi PAC_CHECK_AMD if test "x$psblas_cv_have_amd" == "xyes" ; then - FDEFINES="$psblas_cv_define_prepend-DHAVE_AMD $FDEFINES" - CDEFINES="-DHAVE_AMD_ $psblas_cv_amd_includes $CDEFINES" + FDEFINES="$psblas_cv_define_prepend-DPSB_HAVE_AMD $FDEFINES" + CDEFINES="$psblas_cv_amd_includes $CDEFINES" + CHAVEAMD="#define PSB_HAVE_AMD" +fi + + +PAC_ARG_CUDA() +if test "x$pac_cv_cuda" == "xyes"; then +if test "x$pac_cv_ipk_size" != "x4"; then + AC_MSG_NOTICE([For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU]) + pac_cv_cuda="no"; + PSB_HAVE_CUDA="no"; + CUDA_CC=""; + SPGPU_LIBS=""; + CUDAD=""; + CUDALD=""; + CUDEFINES=""; + CUDA_DEFINES=""; + CUDA_INCLUDES=""; + CUDA_LIBS=""; + FCUDEFINES=""; + CCUDEFINES=""; + CXXCUDEFINES=""; +else + + PAC_CHECK_CUDA() + + if test "x$pac_cv_have_cuda" == "xyes"; then + + PAC_CHECK_CUDA_VERSION() + dnl PAC_CHECK_SPGPU() + PSB_CUDA_VERSION="$pac_cv_cuda_version"; + if (( ${PSB_CUDA_VERSION} > 12080 )); then + AC_MSG_NOTICE([Unsupported CUDA version ${PSB_CUDA_VERSION} disabling CUDA]) + pac_cv_cuda="no"; + PSB_HAVE_CUDA="no"; + CUDA_CC=""; + SPGPU_LIBS=""; + CUDAD=""; + CUDALD=""; + CUDEFINES=""; + CUDA_DEFINES=""; + CUDA_INCLUDES=""; + CUDA_LIBS=""; + FCUDEFINES=""; + CCUDEFINES=""; + CXXCUDEFINES=""; + else + AC_MSG_NOTICE([CUDA version ${PSB_CUDA_VERSION}]) + PSB_HAVE_CUDA="yes"; + PSB_CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); + CUDA_DEFINES="-DPSB_HAVE_CUDA -DPSB_CUDA_SHORT_VERSION=${PSB_CUDA_SHORT_VERSION} -DPSB_CUDA_VERSION=${PSB_CUDA_VERSION}"; + CHAVECUDA="#define PSB_HAVE_CUDA" + CSHORTVCUDA="#define PSB_CUDA_SHORT_VERSION ${PSB_CUDA_SHORT_VERSION}" + CVERSIONCUDA="#define PSB_CUDA_VERSION ${PSB_CUDA_VERSION}" + SPGPU_LIBS="-lspgpu"; + CUDAD=cudad; + CUDALD=cudald; + LCUDA="-lpsb_cuda"; + EXTRALDLIBS="-lstdc++"; + AC_MSG_NOTICE([At this point GPUTARGET is $CUDAD $CUDALD]) + + PAC_ARG_WITH_CUDACC() + if test "x$pac_cv_cudacc" == "x"; then + pac_cv_cudacc="50,60,70,75,80,86"; + CUDA_CC="$pac_cv_cudacc"; + fi + if (( $pac_cv_cuda_version >= 11070 )) + then + CUDEFINES="--dopt=on"; + fi + for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` + do + CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; + done + if test "x$pac_cv_cuda_version" != "xunknown"; then + dnl CUDEFINES="$CUDEFINES ${CUDA_DEFINES}" + FCUDEFINES=" ${CUDA_DEFINES}" + dnl CCUDEFINES=" ${CUDA_DEFINES}" + dnl CXXCUDEFINES=" ${CUDA_DEFINES}" + fi + fi + fi +fi fi +PAC_ARG_OPENACC() +dnl AC_ARG_ENABLE([openacc], +dnl [AS_HELP_STRING([--disable-openacc], [do not use Openacc])]) +if test x"$pac_cv_openacc" == x"yes" ; then + PAC_ARG_WITH_FLAGS(extraopenacc,EXTRAOPENACC) + dnl if test false; then + AX_C_OPENACC() + CCOPENACC="$ax_cv_prog_c_openacc"; + AX_CXX_OPENACC() + CXXOPENACC="$ax_cv_prog_cxx_openacc"; + AX_FC_OPENACC() + FCOPENACC="$ax_cv_prog_fc_openacc"; + dnl AX_OPENACC() + dnl + dnl CXXOPENACC="$ax_cv_prog_cxx_openacc"; + dnl FCOPENACC="$ax_cv_prog_fc_openacc"; + dnl else +dnl AC_MSG_NOTICE([OpenACC 1 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC]) + PAC_ARG_WITH_FLAGS(ccopenacc,CCOPENACC) + PAC_ARG_WITH_FLAGS(cxxopenacc,CXXOPENACC) + PAC_ARG_WITH_FLAGS(fcopenacc,FCOPENACC) +dnl AC_MSG_NOTICE([OpenACC 2 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC]) +dnl CCOPENACC="$ax_cv_prog_c_openacc"; +dnl CXXOPENACC="$ax_cv_prog_cxx_openacc"; +dnl FCOPENACC="$ax_cv_prog_fc_openacc"; +dnl fi + CCOPENACC="$CCOPENACC $EXTRAOPENACC"; + CXXOPENACC="$CXXOPENACC $EXTRAOPENACC"; + FCOPENACC="$FCOPENACC $EXTRAOPENACC"; +dnl AC_MSG_NOTICE([OpenACC 3 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC]) + OACCD=oaccd; + OACCLD=oaccld; + + #FCOPT="$FCOPT $FCOPENACC"; + #CCOPT="$CCOPT $CCOPENACC" + #CXXOPT="$CXXOPT $CXXOPENACC" + #FLINK="$FLINK $FCOPENACC"; +fi + +############################################################################### +dnl PAC_ARG_WITH_LIBRSB() +LIBRSB_DIR="$pac_cv_librsb_dir"; +AC_MSG_CHECKING([for LIBRSB install dir]) +case $LIBRSB_DIR in + /*) ;; + *) dnl AC_MSG_ERROR([The LIBRSB installation dir must be an absolute pathname + dnl specified with --with-librsb=/path/to/librsb]) +esac +dnl if test ! -d "$LIBRSB_DIR" ; then +dnl AC_MSG_ERROR([Could not find LIBRSB build dir $LIBRSB_DIR!]) +dnl fi +pac_cv_status_file="$LIBRSB_DIR/librsb.a" +if test ! -f "$pac_cv_status_file" ; then + AC_MSG_RESULT([no]) + #AC_MSG_ERROR([Could not find an installation in $LIBRSB_DIR.]) +else + AC_MSG_RESULT([$LIBRSB_DIR]) + RSBTARGETLIB=rsbd; + RSBTARGETOBJ=rsbobj; + CHAVELIBRSB="#define PSB_HAVE_LIBRSB" +fi + + + ############################################################################### @@ -797,7 +980,7 @@ fi LIBDIR=lib BASELIBNAME=libpsb_base.a PRECLIBNAME=libpsb_prec.a -METHDLIBNAME=libpsb_krylov.a +METHDLIBNAME=libpsb_linsolve.a UTILLIBNAME=libpsb_util.a ############################################################################### @@ -817,12 +1000,15 @@ AC_SUBST(FCOPT) AC_SUBST(CCOPT) AC_SUBST(CXXOPT) AC_SUBST(EXTRA_OPT) +AC_SUBST(EXTRA_NVCC) AC_SUBST(FAKEMPI) AC_SUBST(FIFLAG) AC_SUBST(FMFLAG) AC_SUBST(MODEXT) AC_SUBST(FLINK) +AC_SUBST(CLINK) AC_SUBST(LIBS) +AC_SUBST(FLIBS) AC_SUBST(AR) AC_SUBST(RANLIB) AC_SUBST(MPIFC) @@ -846,10 +1032,10 @@ AC_SUBST(CINCLUDES) AC_SUBST(FINCLUDES) PSBLASRULES=' -PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) -CXXDEFINES=$(PSBCXXDEFINES) -CDEFINES=$(PSBCDEFINES) -FDEFINES=$(PSBFDEFINES) +PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(FLIBS) $(LIBS) +CXXDEFINES=$(PSBCXXDEFINES) +CDEFINES=$(PSBCDEFINES) +FDEFINES=$(PSBFDEFINES) # These should be portable rules, arent they? @@ -863,19 +1049,65 @@ FDEFINES=$(PSBFDEFINES) $(CXX) $(CXXOPT) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@' +AC_SUBST(PSB_IPKDEF) +AC_SUBST(PSB_LPKDEF) +AC_SUBST(CSERIALMPI) +AC_SUBST(CHAVE_OPENMP) +AC_SUBST(CHAVEMETIS) +AC_SUBST(CINTMETIS) +AC_SUBST(CREALMETIS) +AC_SUBST(CHAVEAMD) +AC_SUBST(CHAVECUDA) +AC_SUBST(CSHORTVCUDA) +AC_SUBST(CVERSIONCUDA) AC_SUBST(PSBLASRULES) AC_SUBST(LIBDIR) AC_SUBST(RSB_LIBS) +AC_SUBST(CHAVELIBRSB) AC_SUBST(BASELIBNAME) AC_SUBST(PRECLIBNAME) AC_SUBST(METHDLIBNAME) AC_SUBST(UTILLIBNAME) AC_SUBST(METISINCFILE) +AC_SUBST(OACCD) +AC_SUBST(OACCLD) +AC_SUBST(FCOPENACC) +AC_SUBST(CCOPENACC) +AC_SUBST(CXXOPENACC) + +AC_SUBST(SPGPU_FLAGS) +AC_SUBST(SPGPU_LIBS) +dnl AC_SUBST(SPGPU_DIR) +dnl AC_SUBST(SPGPU_INCLUDES) +dnl AC_SUBST(SPGPU_INCDIR) +AC_SUBST(EXTRALDLIBS) +AC_SUBST(CUDA_DIR) +AC_SUBST(CUDA_DEFINES) +AC_SUBST(FCUDEFINES) +AC_SUBST(CCUDEFINES) +AC_SUBST(CXXCUDEFINES) +AC_SUBST(CUDA_INCLUDES) +AC_SUBST(CUDA_LIBS) +AC_SUBST(PSB_CUDA_VERSION) +AC_SUBST(PSB_CUDA_SHORT_VERSION) +AC_SUBST(CUDA_NVCC) +AC_SUBST(CUDEFINES) +AC_SUBST(CUDAD) +AC_SUBST(CUDALD) +AC_SUBST(LCUDA) +AC_SUBST(LIBRSB_LIBS) +AC_SUBST(LIBRSB_INCLUDES) +AC_SUBST(LIBRSB_INCDIR) +AC_SUBST(LIBRSB_DIR) +AC_SUBST(LIBRSB_DEFINES) +AC_SUBST(LRSB) ############################################################################### # the following files will be created by Automake AC_CONFIG_FILES([Make.inc]) +AC_CONFIG_FILES([base/modules/psb_config.h]) +dnl AC_CONFIG_FILES([base/modules/psb_cxxconfig.h]) AC_CONFIG_FILES([util/psb_metis_int.h]) AC_OUTPUT() #AC_OUTPUT(Make.inc Makefile) @@ -896,6 +1128,14 @@ AC_MSG_NOTICE([ FCOPT : ${FCOPT} CCOPT : ${CCOPT} + CUDA : ${PSB_HAVE_CUDA} + CUDA_CC : ${pac_cv_cudacc} + + OPENACC : ${pac_cv_openacc} + FCOPENACC : ${FCOPENACC} + OACCD : ${OACCD} + OACCLD : ${OACCLD} + BLAS : ${BLAS_LIBS} METIS usable : ${psblas_cv_have_metis} @@ -915,6 +1155,13 @@ dnl Note : we should use LDLIBS sooner or later! dnl To install the program and its documentation, run 'make install' if you are root, dnl or run 'su -c "make install"' if you are not root. ]) +if test x"${pac_cv_openacc}" == x"yes" ; then + if test x"${FCOPENACC}" == x ; then + AC_MSG_NOTICE([ + WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag. + You may want to rerun configure with --with-fcopenacc= ]) + fi +fi ############################################################################### diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt new file mode 100644 index 00000000..04aa4825 --- /dev/null +++ b/cuda/CMakeLists.txt @@ -0,0 +1,558 @@ +set(PSB_cuda_source_files + psb_base_vectordev_mod.F90 + psb_cuda_mod.F90 + c_cusparse_mod.F90 + psb_d_cuda_csrg_mat_mod.F90 + #psb_i_cuda_dnsg_mat_mod.F90 + psb_z_cuda_diag_mat_mod.F90 + psb_i_vectordev_mod.F90 + psb_z_cuda_csrg_mat_mod.F90 + cusparse_mod.F90 + #impl/psb_z_cuda_hdiag_csmv.F90 + impl/psb_c_cuda_hybg_csmm.F90 + impl/psb_c_cuda_csrg_vect_mv.F90 + # impl/psb_c_cuda_elg_trim.f90 + impl/psb_s_cuda_mv_elg_from_fmt.F90 + impl/psb_c_cuda_csrg_inner_vect_sv.F90 + impl/psb_c_cuda_cp_hlg_from_coo.F90 + impl/psb_c_cuda_diag_to_gpu.F90 + impl/psb_c_cuda_elg_csput.F90 + impl/psb_z_cuda_cp_csrg_from_coo.F90 + impl/psb_s_cuda_cp_elg_from_fmt.F90 + impl/psb_d_cuda_elg_csmm.F90 + impl/psb_c_cuda_diag_vect_mv.F90 + impl/psb_s_cuda_hdiag_mold.F90 + impl/psb_c_cuda_mv_hlg_from_coo.F90 + impl/psb_c_cuda_hdiag_mold.F90 + impl/psb_c_cuda_hybg_scal.F90 + impl/psb_s_cuda_elg_scal.F90 + impl/psb_c_cuda_mv_csrg_from_fmt.F90 + impl/psb_z_cuda_mv_hlg_from_fmt.F90 + #impl/psb_d_cuda_elg_trim.f90 + impl/psb_c_cuda_hybg_inner_vect_sv.F90 + impl/psb_c_cuda_cp_elg_from_fmt.F90 + impl/psb_s_cuda_hybg_vect_mv.F90 + impl/psb_s_cuda_cp_elg_from_coo.F90 + impl/psb_c_cuda_hlg_vect_mv.F90 + impl/psb_d_cuda_mv_hybg_from_coo.F90 + impl/psb_d_cuda_elg_asb.f90 + impl/psb_d_cuda_csrg_scals.F90 + impl/psb_s_cuda_csrg_vect_mv.F90 + impl/psb_d_cuda_elg_csmv.F90 + impl/psb_c_cuda_hlg_scal.F90 + impl/psb_s_cuda_hlg_csmm.F90 + impl/psb_d_cuda_csrg_csmm.F90 + impl/psb_z_cuda_cp_hybg_from_fmt.F90 + impl/psb_z_cuda_elg_to_gpu.F90 + impl/psb_s_cuda_elg_mold.F90 + impl/psb_s_cuda_elg_allocate_mnnz.F90 + impl/psb_d_cuda_diag_vect_mv.F90 + impl/psb_z_cuda_hybg_csmm.F90 + impl/psb_s_cuda_hybg_allocate_mnnz.F90 + impl/psb_d_cuda_hlg_scal.F90 + impl/psb_c_cuda_csrg_csmv.F90 + impl/psb_d_cuda_cp_hybg_from_fmt.F90 + impl/psb_d_cuda_csrg_from_gpu.F90 + impl/psb_z_cuda_csrg_vect_mv.F90 + impl/psb_s_cuda_elg_inner_vect_sv.F90 + impl/psb_s_cuda_mv_csrg_from_fmt.F90 + impl/psb_s_cuda_mv_hybg_from_coo.F90 + impl/psb_d_cuda_dnsg_mat_impl.F90 + impl/psb_z_cuda_elg_scal.F90 + impl/psb_c_cuda_dnsg_mat_impl.F90 + impl/psb_c_cuda_csrg_scal.F90 + impl/psb_z_cuda_diag_csmv.F90 + impl/psb_c_cuda_mv_elg_from_coo.F90 + impl/psb_s_cuda_elg_asb.f90 + impl/psb_s_cuda_cp_hlg_from_coo.F90 + impl/psb_s_cuda_hybg_csmv.F90 + #impl/psb_z_cuda_elg_trim.f90 + impl/psb_z_cuda_csrg_inner_vect_sv.F90 + impl/psb_s_cuda_hybg_to_gpu.F90 + impl/psb_s_cuda_hybg_mold.F90 + impl/psb_z_cuda_elg_inner_vect_sv.F90 + impl/psb_c_cuda_csrg_scals.F90 + impl/psb_d_cuda_hlg_reallocate_nz.F90 + impl/psb_z_cuda_hybg_allocate_mnnz.F90 + impl/psb_s_cuda_csrg_csmm.F90 + impl/psb_s_cuda_hlg_scals.F90 + impl/psb_z_cuda_hybg_csmv.F90 + impl/psb_c_cuda_cp_hdiag_from_coo.F90 + impl/psb_s_cuda_diag_csmv.F90 + impl/psb_z_cuda_hlg_scals.F90 + impl/psb_z_cuda_hlg_scal.F90 + impl/psb_c_cuda_csrg_to_gpu.F90 + impl/psb_d_cuda_hybg_scal.F90 + impl/psb_c_cuda_mv_elg_from_fmt.F90 + impl/psb_d_cuda_cp_csrg_from_fmt.F90 + impl/psb_d_cuda_cp_elg_from_fmt.F90 + impl/psb_z_cuda_mv_diag_from_coo.F90 + impl/psb_c_cuda_elg_asb.f90 + impl/psb_d_cuda_csrg_vect_mv.F90 + impl/psb_d_cuda_hybg_reallocate_nz.F90 + #impl/psb_z_cuda_hdiag_vect_mv.F90 + impl/psb_z_cuda_hybg_to_gpu.F90 + impl/psb_d_cuda_mv_hdiag_from_coo.F90 + impl/psb_c_cuda_cp_elg_from_coo.F90 + #impl/psb_c_cuda_hdiag_to_gpu.F90 + impl/psb_c_cuda_hlg_inner_vect_sv.F90 + impl/psb_z_cuda_diag_to_gpu.F90 + impl/psb_c_cuda_elg_to_gpu.F90 + impl/psb_c_cuda_hlg_scals.F90 + impl/psb_d_cuda_csrg_inner_vect_sv.F90 + impl/psb_c_cuda_cp_csrg_from_fmt.F90 + impl/psb_z_cuda_diag_vect_mv.F90 + impl/psb_z_cuda_cp_elg_from_coo.F90 + impl/psb_z_cuda_mv_csrg_from_fmt.F90 + impl/psb_d_cuda_mv_diag_from_coo.F90 + impl/psb_d_cuda_hybg_allocate_mnnz.F90 + impl/psb_c_cuda_hybg_vect_mv.F90 + impl/psb_s_cuda_mv_hlg_from_fmt.F90 + impl/psb_c_cuda_mv_diag_from_coo.F90 + impl/psb_s_cuda_hlg_mold.F90 + impl/psb_d_cuda_cp_elg_from_coo.F90 + impl/psb_d_cuda_elg_scals.F90 + impl/psb_d_cuda_csrg_scal.F90 + impl/psb_d_cuda_hlg_csmv.F90 + impl/psb_d_cuda_hdiag_mold.F90 + impl/psb_z_cuda_hdiag_mold.F90 + impl/psb_c_cuda_csrg_csmm.F90 + impl/psb_s_cuda_hlg_csmv.F90 + impl/psb_d_cuda_elg_csput.F90 + impl/psb_d_cuda_hdiag_csmv.F90 + impl/psb_z_cuda_hybg_inner_vect_sv.F90 + impl/psb_s_cuda_csrg_scal.F90 + impl/psb_c_cuda_elg_scals.F90 + impl/psb_d_cuda_csrg_mold.F90 + impl/psb_z_cuda_csrg_mold.F90 + impl/psb_s_cuda_cp_csrg_from_coo.F90 + impl/psb_c_cuda_hybg_scals.F90 + impl/psb_s_cuda_hdiag_vect_mv.F90 + impl/psb_s_cuda_hlg_vect_mv.F90 + impl/psb_z_cuda_cp_hdiag_from_coo.F90 + impl/psb_c_cuda_elg_inner_vect_sv.F90 + impl/psb_c_cuda_elg_allocate_mnnz.F90 + impl/psb_s_cuda_csrg_mold.F90 + impl/psb_c_cuda_mv_hybg_from_coo.F90 + impl/psb_d_cuda_mv_hybg_from_fmt.F90 + impl/psb_c_cuda_hybg_csmv.F90 + impl/psb_z_cuda_hybg_vect_mv.F90 + impl/psb_c_cuda_cp_csrg_from_coo.F90 + impl/psb_z_cuda_elg_mold.F90 + impl/psb_z_cuda_cp_hybg_from_coo.F90 + impl/psb_z_cuda_cp_elg_from_fmt.F90 + impl/psb_s_cuda_hybg_csmm.F90 + impl/psb_z_cuda_elg_asb.f90 + impl/psb_z_cuda_mv_csrg_from_coo.F90 + impl/psb_d_cuda_hlg_from_gpu.F90 + impl/psb_c_cuda_hlg_csmv.F90 + impl/psb_z_cuda_dnsg_mat_impl.F90 + impl/psb_c_cuda_cp_hybg_from_coo.F90 + impl/psb_z_cuda_cp_hlg_from_fmt.F90 + impl/psb_s_cuda_elg_csmm.F90 + impl/psb_c_cuda_csrg_from_gpu.F90 + impl/psb_d_cuda_diag_to_gpu.F90 + impl/psb_d_cuda_mv_elg_from_coo.F90 + impl/psb_d_cuda_elg_from_gpu.F90 + impl/psb_z_cuda_elg_allocate_mnnz.F90 + impl/psb_d_cuda_cp_hdiag_from_coo.F90 + impl/psb_z_cuda_hlg_csmv.F90 + impl/psb_d_cuda_elg_mold.F90 + impl/psb_z_cuda_mv_hdiag_from_coo.F90 + impl/psb_s_cuda_hlg_inner_vect_sv.F90 + impl/psb_c_cuda_cp_hlg_from_fmt.F90 + impl/psb_s_cuda_mv_hybg_from_fmt.F90 + impl/psb_c_cuda_mv_hlg_from_fmt.F90 + impl/psb_z_cuda_mv_hlg_from_coo.F90 + impl/psb_z_cuda_elg_scals.F90 + impl/psb_d_cuda_hybg_vect_mv.F90 + impl/psb_z_cuda_hlg_reallocate_nz.F90 + impl/psb_z_cuda_csrg_csmm.F90 + impl/psb_z_cuda_csrg_to_gpu.F90 + impl/psb_s_cuda_elg_from_gpu.F90 + impl/psb_d_cuda_hybg_csmm.F90 + impl/psb_s_cuda_mv_csrg_from_coo.F90 + impl/psb_d_cuda_hybg_csmv.F90 + impl/psb_s_cuda_csrg_inner_vect_sv.F90 + impl/psb_d_cuda_hlg_vect_mv.F90 + impl/psb_z_cuda_csrg_allocate_mnnz.F90 + impl/psb_s_cuda_hlg_reallocate_nz.F90 + impl/psb_s_cuda_elg_reallocate_nz.F90 + impl/psb_d_cuda_hlg_allocate_mnnz.F90 + impl/psb_d_cuda_elg_scal.F90 + impl/psb_z_cuda_hlg_allocate_mnnz.F90 + impl/psb_z_cuda_hlg_to_gpu.F90 + impl/psb_z_cuda_elg_csmm.F90 + impl/psb_d_cuda_hlg_inner_vect_sv.F90 + impl/psb_z_cuda_diag_mold.F90 + impl/psb_s_cuda_diag_mold.F90 + impl/psb_c_cuda_hybg_to_gpu.F90 + impl/psb_c_cuda_cp_diag_from_coo.F90 + impl/psb_d_cuda_cp_diag_from_coo.F90 + impl/psb_s_cuda_cp_hybg_from_fmt.F90 + impl/psb_s_cuda_elg_csmv.F90 + impl/psb_d_cuda_hlg_to_gpu.F90 + impl/psb_s_cuda_mv_diag_from_coo.F90 + impl/psb_c_cuda_hybg_mold.F90 + impl/psb_s_cuda_hlg_from_gpu.F90 + #impl/psb_c_cuda_hdiag_csmv.F90 + impl/psb_z_cuda_cp_hlg_from_coo.F90 + impl/psb_c_cuda_hlg_csmm.F90 + impl/psb_d_cuda_hlg_csmm.F90 + impl/psb_z_cuda_hlg_vect_mv.F90 + impl/psb_z_cuda_cp_csrg_from_fmt.F90 + impl/psb_s_cuda_csrg_scals.F90 + impl/psb_c_cuda_hlg_reallocate_nz.F90 + impl/psb_c_cuda_mv_hdiag_from_coo.F90 + impl/psb_s_cuda_elg_csput.F90 + impl/psb_d_cuda_csrg_to_gpu.F90 + impl/psb_d_cuda_mv_elg_from_fmt.F90 + impl/psb_s_cuda_cp_csrg_from_fmt.F90 + impl/psb_d_cuda_hybg_inner_vect_sv.F90 + impl/psb_d_cuda_hlg_mold.F90 + impl/psb_s_cuda_csrg_reallocate_nz.F90 + impl/psb_z_cuda_csrg_csmv.F90 + impl/psb_s_cuda_mv_hlg_from_coo.F90 + impl/psb_s_cuda_hdiag_csmv.F90 + impl/psb_c_cuda_hlg_mold.F90 + impl/psb_z_cuda_hlg_from_gpu.F90 + impl/psb_s_cuda_cp_diag_from_coo.F90 + impl/psb_d_cuda_hlg_scals.F90 + impl/psb_c_cuda_mv_csrg_from_coo.F90 + impl/psb_s_cuda_cp_hybg_from_coo.F90 + impl/psb_s_cuda_hybg_scals.F90 + impl/psb_d_cuda_cp_hybg_from_coo.F90 + impl/psb_c_cuda_csrg_mold.F90 + impl/psb_d_cuda_elg_vect_mv.F90 + impl/psb_d_cuda_elg_allocate_mnnz.F90 + impl/psb_s_cuda_csrg_to_gpu.F90 + impl/psb_d_cuda_hybg_to_gpu.F90 + impl/psb_z_cuda_cp_diag_from_coo.F90 + impl/psb_d_cuda_mv_hlg_from_coo.F90 + impl/psb_c_cuda_diag_mold.F90 + impl/psb_d_cuda_mv_csrg_from_coo.F90 + impl/psb_d_cuda_hybg_scals.F90 + impl/psb_c_cuda_elg_mold.F90 + impl/psb_z_cuda_hlg_mold.F90 + impl/psb_c_cuda_elg_scal.F90 + impl/psb_d_cuda_csrg_reallocate_nz.F90 + impl/psb_z_cuda_elg_csput.F90 + impl/psb_d_cuda_hdiag_to_gpu.F90 + impl/psb_d_cuda_cp_hlg_from_fmt.F90 + impl/psb_z_cuda_mv_hybg_from_coo.F90 + impl/psb_z_cuda_hlg_csmm.F90 + impl/psb_z_cuda_mv_elg_from_fmt.F90 + impl/psb_d_cuda_diag_mold.F90 + impl/psb_c_cuda_elg_csmv.F90 + impl/psb_d_cuda_mv_csrg_from_fmt.F90 + impl/psb_c_cuda_hlg_from_gpu.F90 + impl/psb_c_cuda_hlg_allocate_mnnz.F90 + impl/psb_z_cuda_csrg_reallocate_nz.F90 + impl/psb_c_cuda_hlg_to_gpu.F90 + impl/psb_c_cuda_csrg_allocate_mnnz.F90 + #impl/psb_s_cuda_elg_trim.f90 + impl/psb_c_cuda_elg_reallocate_nz.F90 + impl/psb_d_cuda_csrg_csmv.F90 + impl/psb_z_cuda_csrg_scals.F90 + impl/psb_s_cuda_elg_vect_mv.F90 + impl/psb_z_cuda_csrg_from_gpu.F90 + impl/psb_c_cuda_elg_from_gpu.F90 + impl/psb_c_cuda_diag_csmv.F90 + impl/psb_z_cuda_hybg_reallocate_nz.F90 + impl/psb_z_cuda_hlg_inner_vect_sv.F90 + impl/psb_s_cuda_elg_scals.F90 + impl/psb_s_cuda_hybg_inner_vect_sv.F90 + impl/psb_s_cuda_dnsg_mat_impl.F90 + impl/psb_d_cuda_csrg_allocate_mnnz.F90 + impl/psb_z_cuda_elg_vect_mv.F90 + impl/psb_d_cuda_elg_reallocate_nz.F90 + impl/psb_c_cuda_hybg_reallocate_nz.F90 + impl/psb_d_cuda_mv_hlg_from_fmt.F90 + impl/psb_d_cuda_cp_hlg_from_coo.F90 + impl/psb_s_cuda_hlg_scal.F90 + impl/psb_s_cuda_hlg_to_gpu.F90 + impl/psb_d_cuda_hybg_mold.F90 + impl/psb_s_cuda_hybg_scal.F90 +# impl/psb_c_cuda_hdiag_vect_mv.F90 + impl/psb_z_cuda_elg_from_gpu.F90 + impl/psb_c_cuda_mv_hybg_from_fmt.F90 + impl/psb_s_cuda_elg_to_gpu.F90 + impl/psb_s_cuda_mv_hdiag_from_coo.F90 + impl/psb_d_cuda_cp_csrg_from_coo.F90 + impl/psb_d_cuda_diag_csmv.F90 + impl/psb_z_cuda_mv_elg_from_coo.F90 + impl/psb_c_cuda_cp_hybg_from_fmt.F90 + impl/psb_s_cuda_csrg_allocate_mnnz.F90 + impl/psb_c_cuda_elg_csmm.F90 + impl/psb_s_cuda_mv_elg_from_coo.F90 + impl/psb_z_cuda_elg_reallocate_nz.F90 + impl/psb_s_cuda_hybg_reallocate_nz.F90 + impl/psb_s_cuda_diag_vect_mv.F90 + impl/psb_s_cuda_diag_to_gpu.F90 + impl/psb_z_cuda_hybg_mold.F90 + impl/psb_s_cuda_csrg_csmv.F90 + impl/psb_z_cuda_hybg_scals.F90 + impl/psb_s_cuda_csrg_from_gpu.F90 + impl/psb_z_cuda_elg_csmv.F90 + impl/psb_s_cuda_hdiag_to_gpu.F90 + impl/psb_s_cuda_hlg_allocate_mnnz.F90 +# impl/psb_z_cuda_hdiag_to_gpu.F90 + impl/psb_s_cuda_cp_hdiag_from_coo.F90 + impl/psb_z_cuda_csrg_scal.F90 + impl/psb_c_cuda_elg_vect_mv.F90 + impl/psb_c_cuda_hybg_allocate_mnnz.F90 + impl/psb_s_cuda_cp_hlg_from_fmt.F90 + impl/psb_d_cuda_elg_inner_vect_sv.F90 + impl/psb_c_cuda_csrg_reallocate_nz.F90 + impl/psb_d_cuda_elg_to_gpu.F90 + impl/psb_z_cuda_mv_hybg_from_fmt.F90 + impl/psb_d_cuda_hdiag_vect_mv.F90 + impl/psb_z_cuda_hybg_scal.F90 + psb_d_cuda_elg_mat_mod.F90 + psb_d_cuda_hlg_mat_mod.F90 + hdiagdev_mod.F90 + #psb_s_cuda_hybg_mat_mod.F90 + #psb_d_cuda_hybg_mat_mod.F90 + dnsdev_mod.F90 + psb_c_cuda_hlg_mat_mod.F90 + psb_i_cuda_vect_mod.F90 + z_cusparse_mod.F90 + psb_s_cuda_hdiag_mat_mod.F90 + psb_d_cuda_hdiag_mat_mod.F90 + psb_z_cuda_hdiag_mat_mod.F90 + psb_z_cuda_vect_mod.F90 + #psb_z_cuda_hybg_mat_mod.F90 + psb_c_cuda_elg_mat_mod.F90 + psb_c_cuda_dnsg_mat_mod.F90 + elldev_mod.F90 + psb_c_cuda_diag_mat_mod.F90 + psb_z_cuda_elg_mat_mod.F90 + #psb_i_cuda_diag_mat_mod.F90 + #psb_i_cuda_hdiag_mat_mod.F90 + psb_c_cuda_vect_mod.F90 + psb_d_cuda_dnsg_mat_mod.F90 + psb_s_cuda_dnsg_mat_mod.F90 + psb_s_cuda_vect_mod.F90 + hlldev_mod.F90 + psb_z_cuda_dnsg_mat_mod.F90 + #psb_i_cuda_csrg_mat_mod.F90 + psb_z_cuda_hlg_mat_mod.F90 + psb_c_vectordev_mod.F90 + psb_s_cuda_diag_mat_mod.F90 + psb_s_cuda_elg_mat_mod.F90 + psb_s_cuda_hlg_mat_mod.F90 + core_mod.f90 + psb_d_cuda_diag_mat_mod.F90 + #psb_i_cuda_hybg_mat_mod.F90 + psb_z_vectordev_mod.F90 + #psb_i_cuda_hlg_mat_mod.F90 + psb_d_cuda_vect_mod.F90 + psb_cuda_env_mod.F90 + psb_c_cuda_hdiag_mat_mod.F90 + psb_s_vectordev_mod.F90 + base_cusparse_mod.F90 + #psb_i_cuda_elg_mat_mod.F90 + psb_vectordev_mod.f90 + #psb_c_cuda_hybg_mat_mod.F90 + s_cusparse_mod.F90 + d_cusparse_mod.F90 + diagdev_mod.F90 + psb_d_vectordev_mod.F90 + psb_c_cuda_csrg_mat_mod.F90 + psb_s_cuda_csrg_mat_mod.F90 +) + + +list(APPEND PSB_cuda_source_C_files + zvectordev.c + ivectordev.c + hdiagdev.c + scusparse.c + ccusparse.c + vectordev.c + diagdev.c + dnsdev.c + hlldev.c + svectordev.c + dvectordev.c + fcusparse.c + spgpu/hell.c + spgpu/ell.c + spgpu/dia.c + spgpu/core.c + zcusparse.c + cuda_util.c + elldev.c + cvectordev.c + dcusparse.c + + ) + +list(APPEND PSB_cuda_header_C_files + diagdev.h + fcusparse_fct.h + zcusparse.h + ccusparse.h + cintrf.h + vectordev.h + dnsdev.h + zvectordev.h + ivectordev.h + elldev.h + fcusparse.h + cuda_util.h + cvectordev.h + dcusparse.h + fcusparse_dat.h + hlldev.h + spgpu/kernels/cudadebug.h + spgpu/kernels/cudalang.h + spgpu/debug.h + spgpu/ell.h + spgpu/coo_conv.h + spgpu/ell_conv.h + spgpu/hdia_conv.h + spgpu/hdia.h + spgpu/core.h + spgpu/vector.h + spgpu/dia.h + spgpu/hell.h + spgpu/dia_conv.h + spgpu/hell_conv.h + svectordev.h + hdiagdev.h + dvectordev.h + scusparse.h +) + +list(APPEND PSB_cuda_header_cu_files + CUDA/psi_cuda_CopyCooToElg.cuh + CUDA/psi_cuda_common.cuh + CUDA/psi_cuda_CopyCooToHlg.cuh + spgpu/kernels/mathbase.cuh + spgpu/kernels/dia_spmv_base.cuh + spgpu/kernels/asum_base.cuh + spgpu/kernels/hdia_spmv_base.cuh + spgpu/kernels/hdia_spmv_base_template.cuh + spgpu/kernels/scal_base.cuh + spgpu/kernels/gath_base.cuh + spgpu/kernels/ell_spmv_base_template.cuh + spgpu/kernels/ell_spmv_base.cuh + spgpu/kernels/setscal_base.cuh + spgpu/kernels/amax_base.cuh + spgpu/kernels/abs_base.cuh + spgpu/kernels/hell_spmv_base.cuh + spgpu/kernels/dia_spmv_base_template.cuh + spgpu/kernels/axy_base.cuh + spgpu/kernels/ell_csput_base.cuh + spgpu/kernels/scat_base.cuh + spgpu/kernels/ell_spmv_base_nors.cuh + spgpu/kernels/hell_spmv_base_template.cuh +) + +list(APPEND PSB_cuda_source_cu_files + CUDA/psi_cuda_c_CopyCooToElg.cu + CUDA/psi_cuda_d_CopyCooToElg.cu + CUDA/psi_cuda_c_CopyCooToHlg.cu + CUDA/psi_cuda_s_CopyCooToHlg.cu + CUDA/psi_cuda_s_CopyCooToElg.cu + CUDA/psi_cuda_d_CopyCooToHlg.cu + CUDA/psi_cuda_z_CopyCooToHlg.cu + CUDA/psi_cuda_z_CopyCooToElg.cu + spgpu/kernels/ddot.cu + spgpu/kernels/zscat.cu + spgpu/kernels/ell_cspmv.cu + spgpu/kernels/ssetscal.cu + spgpu/kernels/sasum.cu + spgpu/kernels/dupd_xyz.cu + spgpu/kernels/caxy.cu + spgpu/kernels/cscal.cu + spgpu/kernels/sscal.cu + spgpu/kernels/hell_sspmv.cu + spgpu/kernels/dscat.cu + spgpu/kernels/hell_cspmv.cu + spgpu/kernels/cxyzw.cu + spgpu/kernels/casum.cu + spgpu/kernels/zabs.cu + spgpu/kernels/ell_zspmv.cu + spgpu/kernels/ell_zcsput.cu + spgpu/kernels/hdia_cspmv.cu + spgpu/kernels/zxyzw.cu + spgpu/kernels/dnrm2.cu + spgpu/kernels/cabs.cu + spgpu/kernels/cnrm2.cu + spgpu/kernels/csetscal.cu + spgpu/kernels/cscat.cu + spgpu/kernels/zdot.cu + spgpu/kernels/ell_ccsput.cu + spgpu/kernels/zaxpby.cu + spgpu/kernels/zsetscal.cu + spgpu/kernels/sdot.cu + spgpu/kernels/zgath.cu + spgpu/kernels/saxy.cu + spgpu/kernels/dscal.cu + spgpu/kernels/cgath.cu + spgpu/kernels/hdia_dspmv.cu + spgpu/kernels/camax.cu + spgpu/kernels/dabs.cu + spgpu/kernels/dasum.cu + spgpu/kernels/sgath.cu + spgpu/kernels/znrm2.cu + spgpu/kernels/igath.cu + spgpu/kernels/hell_zspmv.cu + spgpu/kernels/hell_dspmv.cu + spgpu/kernels/damax.cu + spgpu/kernels/dsetscal.cu + spgpu/kernels/dxyzw.cu + spgpu/kernels/zaxy.cu + spgpu/kernels/dgath.cu + spgpu/kernels/iscat.cu + spgpu/kernels/supd_xyz.cu + spgpu/kernels/sxyzw.cu + spgpu/kernels/ell_dcsput.cu + spgpu/kernels/dia_cspmv.cu + spgpu/kernels/ell_scsput.cu + spgpu/kernels/caxpby.cu + spgpu/kernels/zupd_xyz.cu + spgpu/kernels/isetscal.cu + spgpu/kernels/hdia_sspmv.cu + spgpu/kernels/daxy.cu + spgpu/kernels/zscal.cu + spgpu/kernels/ell_sspmv.cu + spgpu/kernels/sabs.cu + spgpu/kernels/hdia_zspmv.cu + spgpu/kernels/saxpby.cu + spgpu/kernels/zamax.cu + spgpu/kernels/dia_sspmv.cu + spgpu/kernels/zasum.cu + spgpu/kernels/dia_zspmv.cu + spgpu/kernels/ell_dspmv.cu + spgpu/kernels/snrm2.cu + spgpu/kernels/sscat.cu + spgpu/kernels/cdot.cu + spgpu/kernels/samax.cu + spgpu/kernels/cupd_xyz.cu + spgpu/kernels/dia_dspmv.cu + spgpu/kernels/daxpby.cu +) + + +foreach(file IN LISTS PSB_cuda_source_files) + list(APPEND cuda_source_files ${CMAKE_CURRENT_LIST_DIR}/${file}) +endforeach() + +foreach(file IN LISTS PSB_cuda_source_C_files) + list(APPEND cuda_source_C_files ${CMAKE_CURRENT_LIST_DIR}/${file}) +endforeach() + +foreach(file IN LISTS PSB_cuda_header_C_files) + list(APPEND cuda_header_C_files ${CMAKE_CURRENT_LIST_DIR}/${file}) +endforeach() + +foreach(file IN LISTS PSB_cuda_source_cu_files) + list(APPEND cuda_source_cu_files ${CMAKE_CURRENT_LIST_DIR}/${file}) +endforeach() + +foreach(file IN LISTS PSB_cuda_header_cu_files) + list(APPEND cuda_header_cu_files ${CMAKE_CURRENT_LIST_DIR}/${file}) +endforeach() diff --git a/cuda/CUDA/Makefile b/cuda/CUDA/Makefile new file mode 100644 index 00000000..010d591d --- /dev/null +++ b/cuda/CUDA/Makefile @@ -0,0 +1,37 @@ +TOPDIR=../.. +include $(TOPDIR)/Make.inc +# +# Libraries used +# +PSBLIBDIR=$(PSBLASDIR)/lib/ +PSBINCDIR=$(PSBLASDIR)/include +LIBDIR=$(TOPDIR)/lib +INCDIR=$(TOPDIR)/include +PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base +#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(PSBINCDIR) $(FIFLAG). +CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I.. -I$(INCDIR) +LIBNAME=libpsb_gpu.a + + +CUDAOBJS=psi_cuda_c_CopyCooToElg.o psi_cuda_c_CopyCooToHlg.o \ +psi_cuda_d_CopyCooToElg.o psi_cuda_d_CopyCooToHlg.o \ +psi_cuda_s_CopyCooToElg.o psi_cuda_s_CopyCooToHlg.o \ +psi_cuda_z_CopyCooToElg.o psi_cuda_z_CopyCooToHlg.o + + + +objs: $(CUDAOBJS) + +lib: objs + $(AR) ../$(LIBNAME) $(CUDAOBJS) + +$(CUDAOBJS): psi_cuda_common.cuh psi_cuda_CopyCooToElg.cuh psi_cuda_CopyCooToHlg.cuh + +clean: + /bin/rm -f $(CUDAOBJS) diff --git a/cuda/CUDA/psi_cuda_CopyCooToElg.cuh b/cuda/CUDA/psi_cuda_CopyCooToElg.cuh new file mode 100644 index 00000000..5b723be1 --- /dev/null +++ b/cuda/CUDA/psi_cuda_CopyCooToElg.cuh @@ -0,0 +1,103 @@ +#include +#include + +#include "psi_cuda_common.cuh" + + +#undef GEN_PSI_FUNC_NAME +#define GEN_PSI_FUNC_NAME(x) CONCAT(CONCAT(psi_cuda_,x),_CopyCooToElg) + +#define THREAD_BLOCK 256 + +#ifdef __cplusplus +extern "C" { +#endif + + + void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza, + int baseIdx, int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM); + + +#ifdef __cplusplus +} +#endif + + + + + +__global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, int nr, int nza, + int baseIdx, int hacksz, int ldv, int nzm, + int *rS, int *devIdisp, int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM) +{ + int ir, k, ipnt, rsz,jc; + int ki = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + int i=ii+ki; + int idval=0; + + if (ki >= nrws) return; + if (i >= nr) return; + + ipnt=devIdisp[i]; + rsz=rS[i]; + ir = i; + for (k=0; kcurrentStream >>>(i,nrws, nr, nza, baseIdx, + hacksz, ldv, nzm, + rS,devIdisp,devJa,devVal, + idiag, rP,cM); + +} + + + + +void +GEN_PSI_FUNC_NAME(TYPE_SYMBOL) + (spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM) +{ int i, nrws; + //int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX; + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + //fprintf(stderr,"Loop on j: %d\n",j); + for (i=0; i +#include + +#include "psi_cuda_common.cuh" + + +#undef GEN_PSI_FUNC_NAME +#define GEN_PSI_FUNC_NAME(x) CONCAT(CONCAT(psi_cuda_,x),_CopyCooToHlg) + +#define THREAD_BLOCK 256 + +#ifdef __cplusplus +extern "C" { +#endif + +void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, + int noffs, int isz, int *rS, int *hackOffs, int *devIdisp, + int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM); + + + +#ifdef __cplusplus +} +#endif + + +__global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, int nr, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *rS, int *hackOffs, int *devIdisp, + int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM) +{ + int ir, k, ipnt, rsz,jc; + int ki = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + int i=ii+ki; + + if (ki >= nrws) return; + + + if (icurrentStream >>>(i,nrws,nr, nza, baseIdx, hacksz, noffs, isz, + rS,hackOffs,devIdisp,devJa,devVal,idiag,rP,cM); + +} + + +void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *rS, int *hackOffs, int *devIdisp, + int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM) +{ int i, nrws; + //int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX; + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + //fprintf(stderr,"Loop on j: %d\n",j); + for (i=0; i +#include + +#include "cintrf.h" +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL c +#include "psi_cuda_CopyCooToElg.cuh" + diff --git a/cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu new file mode 100644 index 00000000..e36728b1 --- /dev/null +++ b/cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu @@ -0,0 +1,7 @@ +#include +#include + +#include "cintrf.h" +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL c +#include "psi_cuda_CopyCooToHlg.cuh" diff --git a/cuda/CUDA/psi_cuda_common.cuh b/cuda/CUDA/psi_cuda_common.cuh new file mode 100644 index 00000000..12d81f03 --- /dev/null +++ b/cuda/CUDA/psi_cuda_common.cuh @@ -0,0 +1,16 @@ +#pragma once + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) +#define MIN(A,B) ( (A)<(B) ? (A) : (B) ) +#define SQUARE(x) ((x)*(x)) +#define GET_ADDR(a,ix,iy,nc) a[(nc)*(ix)+(iy)] +#define GET_VAL(a,ix,iy,nc) (GET_ADDR(a,ix,iy,nc)) + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } + +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +#endif diff --git a/cuda/CUDA/psi_cuda_d_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_d_CopyCooToElg.cu new file mode 100644 index 00000000..233bae06 --- /dev/null +++ b/cuda/CUDA/psi_cuda_d_CopyCooToElg.cu @@ -0,0 +1,7 @@ +#include +#include + +#include "cintrf.h" +#define VALUE_TYPE double +#define TYPE_SYMBOL d +#include "psi_cuda_CopyCooToElg.cuh" diff --git a/cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu new file mode 100644 index 00000000..94e076ae --- /dev/null +++ b/cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu @@ -0,0 +1,7 @@ +#include +#include + +#include "cintrf.h" +#define VALUE_TYPE double +#define TYPE_SYMBOL d +#include "psi_cuda_CopyCooToHlg.cuh" diff --git a/cuda/CUDA/psi_cuda_s_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_s_CopyCooToElg.cu new file mode 100644 index 00000000..e083708c --- /dev/null +++ b/cuda/CUDA/psi_cuda_s_CopyCooToElg.cu @@ -0,0 +1,7 @@ +#include +#include + +#include "cintrf.h" +#define VALUE_TYPE float +#define TYPE_SYMBOL s +#include "psi_cuda_CopyCooToElg.cuh" diff --git a/cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu new file mode 100644 index 00000000..90ad5fdf --- /dev/null +++ b/cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu @@ -0,0 +1,7 @@ +#include +#include + +#include "cintrf.h" +#define VALUE_TYPE float +#define TYPE_SYMBOL s +#include "psi_cuda_CopyCooToHlg.cuh" diff --git a/cuda/CUDA/psi_cuda_z_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_z_CopyCooToElg.cu new file mode 100644 index 00000000..b5ec817d --- /dev/null +++ b/cuda/CUDA/psi_cuda_z_CopyCooToElg.cu @@ -0,0 +1,7 @@ +#include +#include + +#include "cintrf.h" +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL z +#include "psi_cuda_CopyCooToElg.cuh" diff --git a/cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu new file mode 100644 index 00000000..24d39ec4 --- /dev/null +++ b/cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu @@ -0,0 +1,7 @@ +#include +#include + +#include "cintrf.h" +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL z +#include "psi_cuda_CopyCooToHlg.cuh" diff --git a/cuda/License-spgpu.md b/cuda/License-spgpu.md new file mode 100644 index 00000000..7f4b8ff4 --- /dev/null +++ b/cuda/License-spgpu.md @@ -0,0 +1,21 @@ +(c) Copyright 2011-2021 Davide Barbieri, Salvatore Filippone + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/cuda/Makefile b/cuda/Makefile new file mode 100755 index 00000000..b47e58ce --- /dev/null +++ b/cuda/Makefile @@ -0,0 +1,153 @@ +include ../Make.inc +# +# Libraries used +# +LIBDIR=../lib +INCDIR=../include +MODDIR=../modules +PSBLAS_LIB= -lpsb_util -lpsb_base +#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG). +CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I$(INCDIR) +LIBNAME=libpsb_cuda.a + + +FOBJS=cusparse_mod.o base_cusparse_mod.o \ + s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o \ + psb_vectordev_mod.o core_mod.o \ + psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_i_vectordev_mod.o\ + psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_base_vectordev_mod.o \ + elldev_mod.o hlldev_mod.o diagdev_mod.o hdiagdev_mod.o \ + psb_i_cuda_vect_mod.o \ + psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o\ + psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o\ + psb_d_cuda_elg_mat_mod.o psb_d_cuda_hlg_mat_mod.o \ + psb_d_cuda_hybg_mat_mod.o psb_d_cuda_csrg_mat_mod.o\ + psb_s_cuda_elg_mat_mod.o psb_s_cuda_hlg_mat_mod.o \ + psb_s_cuda_hybg_mat_mod.o psb_s_cuda_csrg_mat_mod.o\ + psb_c_cuda_elg_mat_mod.o psb_c_cuda_hlg_mat_mod.o \ + psb_c_cuda_hybg_mat_mod.o psb_c_cuda_csrg_mat_mod.o\ + psb_z_cuda_elg_mat_mod.o psb_z_cuda_hlg_mat_mod.o \ + psb_z_cuda_hybg_mat_mod.o psb_z_cuda_csrg_mat_mod.o\ + psb_cuda_env_mod.o psb_cuda_mod.o \ + psb_d_cuda_diag_mat_mod.o\ + psb_d_cuda_hdiag_mat_mod.o psb_s_cuda_hdiag_mat_mod.o\ + psb_s_cuda_dnsg_mat_mod.o psb_d_cuda_dnsg_mat_mod.o \ + psb_c_cuda_dnsg_mat_mod.o psb_z_cuda_dnsg_mat_mod.o \ + dnsdev_mod.o + +COBJS= elldev.o hlldev.o diagdev.o hdiagdev.o vectordev.o ivectordev.o dnsdev.o\ + svectordev.o dvectordev.o cvectordev.o zvectordev.o cuda_util.o \ + fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o + +OBJS=$(COBJS) $(FOBJS) + +lib: objs ilib cudalib spgpulib + $(AR) $(LIBNAME) $(OBJS) + /bin/cp -p $(LIBNAME) $(LIBDIR) + +$(COBJS): spgpuinc + +objs: spgpuinc $(OBJS) iobjs cudaobjs spgpuobjs + /bin/cp -p *$(.mod) $(MODDIR) + /bin/cp -p *.h $(INCDIR) + +spgpuinc: + $(MAKE) -C spgpu includes +spgpuobjs: + $(MAKE) -C spgpu objs +spgpulib: + $(MAKE) -C spgpu lib + + +hdiagdev_mod.o diagdev_mod.o dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o +psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o: psb_i_cuda_vect_mod.o +psb_i_cuda_vect_mod.o : psb_vectordev_mod.o psb_cuda_env_mod.o +cusparse_mod.o: s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o +s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o : base_cusparse_mod.o +psb_d_cuda_hlg_mat_mod.o: hlldev_mod.o psb_d_cuda_vect_mod.o psb_cuda_env_mod.o +psb_d_cuda_elg_mat_mod.o: elldev_mod.o psb_d_cuda_vect_mod.o +psb_d_cuda_diag_mat_mod.o: diagdev_mod.o psb_d_cuda_vect_mod.o +psb_d_cuda_hdiag_mat_mod.o: hdiagdev_mod.o psb_d_cuda_vect_mod.o +psb_s_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_s_cuda_vect_mod.o +psb_d_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_d_cuda_vect_mod.o +psb_c_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_c_cuda_vect_mod.o +psb_z_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_z_cuda_vect_mod.o +psb_s_cuda_hlg_mat_mod.o: hlldev_mod.o psb_s_cuda_vect_mod.o psb_cuda_env_mod.o +psb_s_cuda_elg_mat_mod.o: elldev_mod.o psb_s_cuda_vect_mod.o +psb_s_cuda_diag_mat_mod.o: diagdev_mod.o psb_s_cuda_vect_mod.o +psb_s_cuda_hdiag_mat_mod.o: hdiagdev_mod.o psb_s_cuda_vect_mod.o +psb_s_cuda_csrg_mat_mod.o psb_s_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_d_cuda_csrg_mat_mod.o psb_d_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_z_cuda_hlg_mat_mod.o: hlldev_mod.o psb_z_cuda_vect_mod.o psb_cuda_env_mod.o +psb_z_cuda_elg_mat_mod.o: elldev_mod.o psb_z_cuda_vect_mod.o +psb_c_cuda_hlg_mat_mod.o: hlldev_mod.o psb_c_cuda_vect_mod.o psb_cuda_env_mod.o +psb_c_cuda_elg_mat_mod.o: elldev_mod.o psb_c_cuda_vect_mod.o +psb_c_cuda_csrg_mat_mod.o psb_c_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_z_cuda_csrg_mat_mod.o psb_z_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_vectordev_mod.o: psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_i_vectordev_mod.o +psb_i_vectordev_mod.o psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o: psb_base_vectordev_mod.o +vectordev.o: cuda_util.o vectordev.h +elldev.o: elldev.c +dnsdev.o: dnsdev.c +fcusparse.h elldev.c: elldev.h vectordev.h +fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o : fcusparse.h +fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o : fcusparse_fct.h +svectordev.o: svectordev.h vectordev.h +dvectordev.o: dvectordev.h vectordev.h +cvectordev.o: cvectordev.h vectordev.h +zvectordev.o: zvectordev.h vectordev.h +psb_cuda_env_mod.o: base_cusparse_mod.o +psb_cuda_mod.o: psb_cuda_env_mod.o psb_i_cuda_vect_mod.o\ + psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o\ + psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o\ + psb_d_cuda_elg_mat_mod.o psb_d_cuda_hlg_mat_mod.o \ + psb_d_cuda_hybg_mat_mod.o psb_d_cuda_csrg_mat_mod.o\ + psb_s_cuda_elg_mat_mod.o psb_s_cuda_hlg_mat_mod.o \ + psb_s_cuda_hybg_mat_mod.o psb_s_cuda_csrg_mat_mod.o\ + psb_c_cuda_elg_mat_mod.o psb_c_cuda_hlg_mat_mod.o \ + psb_c_cuda_hybg_mat_mod.o psb_c_cuda_csrg_mat_mod.o\ + psb_z_cuda_elg_mat_mod.o psb_z_cuda_hlg_mat_mod.o \ + psb_z_cuda_hybg_mat_mod.o psb_z_cuda_csrg_mat_mod.o\ + psb_d_cuda_diag_mat_mod.o \ + psb_d_cuda_hdiag_mat_mod.o psb_s_cuda_hdiag_mat_mod.o\ + psb_s_cuda_dnsg_mat_mod.o psb_d_cuda_dnsg_mat_mod.o \ + psb_c_cuda_dnsg_mat_mod.o psb_z_cuda_dnsg_mat_mod.o + +iobjs: $(FOBJS) + $(MAKE) -C impl objs +cudaobjs: $(FOBJS) + $(MAKE) -C CUDA objs + +ilib: objs + $(MAKE) -C impl lib LIBNAME=$(LIBNAME) +cudalib: objs ilib + $(MAKE) -C CUDA lib LIBNAME=$(LIBNAME) + +clean: cclean iclean cudaclean spgpuclean + /bin/rm -f $(FOBJS) *$(.mod) *.a + +cclean: + /bin/rm -f $(COBJS) +iclean: + $(MAKE) -C impl clean +cudaclean: + $(MAKE) -C CUDA clean +spgpuclean: + $(MAKE) -C spgpu clean + +veryclean: clean + +.c.o: + $(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@ +.f90.o: + $(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@ +.F90.o: + $(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@ +.cpp.o: + $(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@ diff --git a/cuda/base_cusparse_mod.F90 b/cuda/base_cusparse_mod.F90 new file mode 100644 index 00000000..94a8255f --- /dev/null +++ b/cuda/base_cusparse_mod.F90 @@ -0,0 +1,113 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module base_cusparse_mod + use iso_c_binding + ! Interface to CUSPARSE. + + enum, bind(c) + enumerator cusparse_status_success + enumerator cusparse_status_not_initialized + enumerator cusparse_status_alloc_failed + enumerator cusparse_status_invalid_value + enumerator cusparse_status_arch_mismatch + enumerator cusparse_status_mapping_error + enumerator cusparse_status_execution_failed + enumerator cusparse_status_internal_error + enumerator cusparse_status_matrix_type_not_supported + end enum + + enum, bind(c) + enumerator cusparse_matrix_type_general + enumerator cusparse_matrix_type_symmetric + enumerator cusparse_matrix_type_hermitian + enumerator cusparse_matrix_type_triangular + end enum + + enum, bind(c) + enumerator cusparse_fill_mode_lower + enumerator cusparse_fill_mode_upper + end enum + + enum, bind(c) + enumerator cusparse_diag_type_non_unit + enumerator cusparse_diag_type_unit + end enum + + enum, bind(c) + enumerator cusparse_index_base_zero + enumerator cusparse_index_base_one + end enum + + enum, bind(c) + enumerator cusparse_operation_non_transpose + enumerator cusparse_operation_transpose + enumerator cusparse_operation_conjugate_transpose + end enum + + enum, bind(c) + enumerator cusparse_direction_row + enumerator cusparse_direction_column + end enum + + interface + function FcusparseCreate() & + & bind(c,name="FcusparseCreate") result(res) + use iso_c_binding + integer(c_int) :: res + end function FcusparseCreate + end interface + + interface + function FcusparseDestroy() & + & bind(c,name="FcusparseDestroy") result(res) + use iso_c_binding + integer(c_int) :: res + end function FcusparseDestroy + end interface + +contains + + function initFcusparse() result(res) + implicit none + integer(c_int) :: res + + res = FcusparseCreate() + end function initFcusparse + + function closeFcusparse() result(res) + implicit none + integer(c_int) :: res + res = FcusparseDestroy() + end function closeFcusparse + +end module base_cusparse_mod diff --git a/cuda/c_cusparse_mod.F90 b/cuda/c_cusparse_mod.F90 new file mode 100644 index 00000000..3329f733 --- /dev/null +++ b/cuda/c_cusparse_mod.F90 @@ -0,0 +1,312 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module c_cusparse_mod + use base_cusparse_mod + + type, bind(c) :: c_Cmat + type(c_ptr) :: Mat = c_null_ptr + end type c_Cmat + +#if PSB_CUDA_SHORT_VERSION <= 10 + type, bind(c) :: c_Hmat + type(c_ptr) :: Mat = c_null_ptr + end type c_Hmat +#endif + + interface CSRGDeviceFree + function c_CSRGDeviceFree(Mat) & + & bind(c,name="c_CSRGDeviceFree") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: res + end function c_CSRGDeviceFree + end interface + + interface CSRGDeviceSetMatType + function c_CSRGDeviceSetMatType(Mat,type) & + & bind(c,name="c_CSRGDeviceSetMatType") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_CSRGDeviceSetMatType + end interface + + interface CSRGDeviceSetMatFillMode + function c_CSRGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="c_CSRGDeviceSetMatFillMode") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_CSRGDeviceSetMatFillMode + end interface + + interface CSRGDeviceSetMatDiagType + function c_CSRGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="c_CSRGDeviceSetMatDiagType") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_CSRGDeviceSetMatDiagType + end interface + + interface CSRGDeviceSetMatIndexBase + function c_CSRGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="c_CSRGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_CSRGDeviceSetMatIndexBase + end interface + +#if PSB_CUDA_SHORT_VERSION <= 10 + interface CSRGDeviceCsrsmAnalysis + function c_CSRGDeviceCsrsmAnalysis(Mat) & + & bind(c,name="c_CSRGDeviceCsrsmAnalysis") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: res + end function c_CSRGDeviceCsrsmAnalysis + end interface +#else + interface CSRGIsNullSvBuffer + function c_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="c_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: res + end function c_CSRGIsNullSvBuffer + end interface +#endif + + interface CSRGDeviceAlloc + function c_CSRGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="c_CSRGDeviceAlloc") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function c_CSRGDeviceAlloc + end interface + + interface CSRGDeviceGetParms + function c_CSRGDeviceGetParms(Mat,nr,nc,nz) & + & bind(c,name="c_CSRGDeviceGetParms") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: nr, nc, nz + integer(c_int) :: res + end function c_CSRGDeviceGetParms + end interface + + interface spsvCSRGDevice + function c_spsvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="c_spsvCSRGDevice") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_float_complex), value :: alpha,beta + integer(c_int) :: res + end function c_spsvCSRGDevice + end interface + + interface spmvCSRGDevice + function c_spmvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="c_spmvCSRGDevice") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_float_complex), value :: alpha,beta + integer(c_int) :: res + end function c_spmvCSRGDevice + end interface + + interface CSRGHost2Device + function c_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="c_CSRGHost2Device") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_float_complex) :: val(*) + integer(c_int) :: res + end function c_CSRGHost2Device + end interface + + interface CSRGDevice2Host + function c_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="c_CSRGDevice2Host") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_float_complex) :: val(*) + integer(c_int) :: res + end function c_CSRGDevice2Host + end interface + +#if PSB_CUDA_SHORT_VERSION <=10 + interface HYBGDeviceAlloc + function c_HYBGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="c_HYBGDeviceAlloc") result(res) + use iso_c_binding + import c_hmat + type(c_Hmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function c_HYBGDeviceAlloc + end interface + + interface HYBGDeviceFree + function c_HYBGDeviceFree(Mat) & + & bind(c,name="c_HYBGDeviceFree") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int) :: res + end function c_HYBGDeviceFree + end interface + + interface HYBGDeviceSetMatType + function c_HYBGDeviceSetMatType(Mat,type) & + & bind(c,name="c_HYBGDeviceSetMatType") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_HYBGDeviceSetMatType + end interface + + interface HYBGDeviceSetMatFillMode + function c_HYBGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="c_HYBGDeviceSetMatFillMode") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_HYBGDeviceSetMatFillMode + end interface + + interface HYBGDeviceSetMatDiagType + function c_HYBGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="c_HYBGDeviceSetMatDiagType") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_HYBGDeviceSetMatDiagType + end interface + + interface HYBGDeviceSetMatIndexBase + function c_HYBGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="c_HYBGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_HYBGDeviceSetMatIndexBase + end interface + + interface HYBGDeviceHybsmAnalysis + function c_HYBGDeviceHybsmAnalysis(Mat) & + & bind(c,name="c_HYBGDeviceHybsmAnalysis") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int) :: res + end function c_HYBGDeviceHybsmAnalysis + end interface + + interface spsvHYBGDevice + function c_spsvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="c_spsvHYBGDevice") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_float_complex), value :: alpha,beta + integer(c_int) :: res + end function c_spsvHYBGDevice + end interface + + interface spmvHYBGDevice + function c_spmvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="c_spmvHYBGDevice") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_float_complex), value :: alpha,beta + integer(c_int) :: res + end function c_spmvHYBGDevice + end interface + + interface HYBGHost2Device + function c_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="c_HYBGHost2Device") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_float_complex) :: val(*) + integer(c_int) :: res + end function c_HYBGHost2Device + end interface +#endif + +end module c_cusparse_mod diff --git a/cuda/ccusparse.c b/cuda/ccusparse.c new file mode 100644 index 00000000..6b5c8ea6 --- /dev/null +++ b/cuda/ccusparse.c @@ -0,0 +1,42 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#include +#include +#include "fcusparse.h" + +#include "ccusparse.h" +#include "fcusparse_dat.h" +#include "fcusparse_fct.h" diff --git a/cuda/ccusparse.h b/cuda/ccusparse.h new file mode 100644 index 00000000..8643211a --- /dev/null +++ b/cuda/ccusparse.h @@ -0,0 +1,100 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef CCUSPARSE_ +#define CCUSPARSE_ + + +#include +#include + +#include +#include +#include "cintrf.h" + +/* Double precision real */ +#define TYPE float complex +#define CUSPARSE_BASE_TYPE CUDA_C_32F +#define T_CSRGDeviceMat c_CSRGDeviceMat +#define T_Cmat c_Cmat +#define T_spmvCSRGDevice c_spmvCSRGDevice +#define T_spsvCSRGDevice c_spsvCSRGDevice +#define T_CSRGDeviceAlloc c_CSRGDeviceAlloc +#define T_CSRGDeviceFree c_CSRGDeviceFree +#define T_CSRGHost2Device c_CSRGHost2Device +#define T_CSRGDevice2Host c_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode c_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType c_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms c_CSRGDeviceGetParms + +#if PSB_CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseCcsrmv +#define cusparseTcsrsv_solve cusparseCcsrsv_solve +#define cusparseTcsrsv_analysis cusparseCcsrsv_analysis +#define T_HYBGDeviceMat c_HYBGDeviceMat +#define T_Hmat c_Hmat +#define T_HYBGDeviceFree c_HYBGDeviceFree +#define T_spmvHYBGDevice c_spmvHYBGDevice +#define T_HYBGDeviceAlloc c_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType c_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase c_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType c_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode c_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis c_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice c_spsvHYBGDevice +#define T_HYBGHost2Device c_HYBGHost2Device +#define cusparseThybmv cusparseChybmv +#define cusparseThybsv_solve cusparseChybsv_solve +#define cusparseThybsv_analysis cusparseChybsv_analysis +#define cusparseTcsr2hyb cusparseCcsr2hyb + +#elif PSB_CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer c_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr c_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr c_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr c_CSRGCreateSpMVDescr + +#endif + +#include "fcusparse.h" + +#endif diff --git a/cuda/cintrf.h b/cuda/cintrf.h new file mode 100644 index 00000000..c9b70ce3 --- /dev/null +++ b/cuda/cintrf.h @@ -0,0 +1,45 @@ + /* Parallel Sparse BLAS SPGPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef _CINTRF_H_ +#define _CINTRF_H_ + +#include +#include +#include "psb_config.h" +#include "psb_types.h" +#include "core.h" + +#define ELL_PITCH_ALIGN_S 32 +#define ELL_PITCH_ALIGN_D 16 + +#endif diff --git a/cuda/core_mod.f90 b/cuda/core_mod.f90 new file mode 100644 index 00000000..d30f8a99 --- /dev/null +++ b/cuda/core_mod.f90 @@ -0,0 +1,53 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module core_mod + use iso_c_binding + + integer(c_int), parameter :: spgpu_type_int = 0 + integer(c_int), parameter :: spgpu_type_float = 1 + integer(c_int), parameter :: spgpu_type_double = 2 + integer(c_int), parameter :: spgpu_type_complex_float = 3 + integer(c_int), parameter :: spgpu_type_complex_double = 4 + integer(c_int), parameter :: spgpu_success = 0 + integer(c_int), parameter :: spgpu_unsupported = 1 + integer(c_int), parameter :: spgpu_unspecified = 2 + integer(c_int), parameter :: spgpu_outofmem = 3 + + interface + subroutine psb_cudaSync() & + & bind(c,name='cudaSync') + use iso_c_binding + end subroutine psb_cudaSync + end interface + +end module core_mod diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c new file mode 100644 index 00000000..8ae2e663 --- /dev/null +++ b/cuda/cuda_util.c @@ -0,0 +1,821 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include "cuda_util.h" + + +static int hasUVA=-1; +static struct cudaDeviceProp *prop=NULL; +static spgpuHandle_t psb_cuda_handle = NULL; +static cublasHandle_t psb_cublas_handle = NULL; +#if defined(TRACK_CUDA_MALLOC) +static int64_t total_cuda_mem = 0; +#endif + +int allocRemoteBuffer(void** buffer, size_t count) +{ + cudaError_t err = cudaMalloc(buffer, (size_t) count); +#if defined(TRACK_CUDA_MALLOC) + total_cuda_mem += count; + fprintf(stderr,"Tracking CUDA allocRemoteBuffer for %ld bytes total %ld address %p\n", + count, total_cuda_mem, *buffer); +#endif + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA allocRemoteBuffer for %ld bytes Error: %s \n", + count, cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int hostRegisterMapped(void *pointer, size_t size) +{ + cudaError_t err = cudaHostRegister(pointer, size, cudaHostRegisterMapped); + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA hostRegisterMapped Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int getDevicePointer(void **d_p, void * h_p) +{ + cudaError_t err = cudaHostGetDevicePointer(d_p,h_p,0); + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA getDevicePointer Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int registerMappedMemory(void *buffer, void **dp, size_t size) +{ + //cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped); + cudaError_t err = cudaHostRegister(buffer, size, cudaHostRegisterMapped); + if (err == cudaSuccess) err = cudaHostGetDevicePointer(dp,buffer,0); + + if (err == cudaSuccess) + { + err = cudaHostGetDevicePointer(dp,buffer,0); + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA registerMappedMemory Error: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } + } + else + { + fprintf(stderr,"CUDA registerMappedMemory Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int allocMappedMemory(void **buffer, void **dp, size_t size) +{ + cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped); + if (err == 0) err = cudaHostGetDevicePointer(dp,*buffer,0); + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA allocMappedMemory Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int unregisterMappedMemory(void *buffer) +{ + //cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped); + cudaError_t err = cudaHostUnregister(buffer); + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA unregisterMappedMemory Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int writeRemoteBuffer(void* hostSrc, void* buffer, size_t count) +{ + cudaError_t err = cudaMemcpy(buffer, hostSrc, count, cudaMemcpyHostToDevice); + + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else { + fprintf(stderr,"CUDA Error writeRemoteBuffer: %s %p %p %ld\n", + cudaGetErrorString(err),buffer, hostSrc, count); + return SPGPU_UNSPECIFIED; + } +} + +int readRemoteBuffer(void* hostDest, void* buffer, size_t count) +{ + + + cudaError_t err1; + cudaError_t err; +#if 0 + { + err1 =cudaGetLastError(); + fprintf(stderr,"CUDA Error prior to readRemoteBuffer: %s %d\n", + cudaGetErrorString(err1),err1); + } + +#endif + err = cudaMemcpy(hostDest, buffer, count, cudaMemcpyDeviceToHost); + + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else { + fprintf(stderr,"CUDA Error readRemoteBuffer: %s %p %p %ld %d\n", + cudaGetErrorString(err),hostDest,buffer,count,err); + return SPGPU_UNSPECIFIED; + } +} + +int freeRemoteBuffer(void* buffer) +{ + cudaError_t err = cudaFree(buffer); + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else { + fprintf(stderr,"CUDA Error freeRemoteBuffer: %s %p\n", cudaGetErrorString(err),buffer); + return SPGPU_UNSPECIFIED; + } +} + +int gpuInit(int dev) +{ + + int count,err; + + if ((err=cudaSetDeviceFlags(cudaDeviceMapHost))!=cudaSuccess) + fprintf(stderr,"Error On SetDeviceFlags: %d '%s'\n",err,cudaGetErrorString(err)); + if ((prop=(struct cudaDeviceProp *) malloc(sizeof(struct cudaDeviceProp)))==NULL) { + fprintf(stderr,"CUDA Error gpuInit3: not malloced prop\n"); + return SPGPU_UNSPECIFIED; + } + err = setDevice(dev); + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error gpuInit2: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } + if (!psb_cublas_handle) + psb_cudaCreateCublasHandle(); + hasUVA=getDeviceHasUVA(); + FcusparseCreate(); + return err; + +} + +void gpuClose() +{ + cudaStream_t st1, st2; + if (! psb_cuda_handle) + st1=spgpuGetStream(psb_cuda_handle); + if (! psb_cublas_handle) + cublasGetStream(psb_cublas_handle,&st2); + FcusparseDestroy(); + psb_cudaDestroyHandle(); + if (st1 != st2) + psb_cudaDestroyCublasHandle(); + free(prop); + prop=NULL; + hasUVA=-1; +} + + +int setDevice(int dev) +{ + int count,err,idev; + + err = cudaGetDeviceCount(&count); + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error setDevice: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } + + if ((0<=dev)&&(devunifiedAddressing; + return(count); +} + +int getGPUMultiProcessors() +{ int count=0; + if (prop!=NULL) + count = prop->multiProcessorCount; + return(count); +} + + +int getGPUMemoryBusWidth() +{ int count=0; +#if CUDART_VERSION >= 5000 + if (prop!=NULL) + count = prop->memoryBusWidth; +#endif + return(count); +} +int getGPUMemoryClockRate() +{ int count=0; +#if CUDART_VERSION >= 5000 + if (prop!=NULL) + count = prop->memoryClockRate; +#endif + return(count); +} +int getGPUWarpSize() +{ int count=0; + if (prop!=NULL) + count = prop->warpSize; + return(count); +} +int getGPUMaxThreadsPerBlock() +{ int count=0; + if (prop!=NULL) + count = prop->maxThreadsPerBlock; + return(count); +} +int getGPUMaxThreadsPerMP() +{ int count=0; + if (prop!=NULL) + count = prop->maxThreadsPerMultiProcessor; + return(count); +} +int getGPUMaxRegistersPerBlock() +{ int count=0; + if (prop!=NULL) + count = prop->regsPerBlock; + return(count); +} + +void cpyGPUNameString(char *cstring) +{ + *cstring='\0'; + if (prop!=NULL) + strcpy(cstring,prop->name); + +} + +int DeviceHasUVA() +{ + return(hasUVA == 1); +} + + +int getDeviceCount() +{ int count; + cudaError_t err; + err = cudaGetDeviceCount(&count); + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error getDeviceCount: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } + return(count); +} + +void cudaSync() +{ + cudaError_t err; + err = cudaDeviceSynchronize(); +#if 0 + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else { + fprintf(stderr,"CUDA Error cudaSync: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } +#else + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error cudaSync: %s\n", cudaGetErrorString(err)); + } + return ; +#endif +} + +void cudaReset() +{ + cudaError_t err; + err = cudaDeviceReset(); +#if 0 + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error Reset: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } +#else + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error Reset: %s\n", cudaGetErrorString(err)); + } + return ; +#endif +} + + +spgpuHandle_t psb_cudaGetHandle() +{ + return psb_cuda_handle; +} + +void psb_cudaCreateHandle() +{ + if (!psb_cuda_handle) + spgpuCreate(&psb_cuda_handle, getDevice()); + +} + +void psb_cudaDestroyHandle() +{ + if (!psb_cuda_handle) + spgpuDestroy(psb_cuda_handle); + psb_cuda_handle = NULL; +} + +cudaStream_t psb_cudaGetStream() +{ + return spgpuGetStream(psb_cuda_handle); +} + +void psb_cudaSetStream(cudaStream_t stream) +{ + spgpuSetStream(psb_cuda_handle, stream); + return ; +} + + + +cublasHandle_t psb_cudaGetCublasHandle() +{ + if (!psb_cublas_handle) + psb_cudaCreateCublasHandle(); + return psb_cublas_handle; +} +void psb_cudaCreateCublasHandle() +{ if (!psb_cublas_handle) + cublasCreate(&psb_cublas_handle); +} +void psb_cudaDestroyCublasHandle() +{ + if (!psb_cublas_handle) + cublasDestroy(psb_cublas_handle); + psb_cublas_handle=NULL; +} + +/* Simple memory tools */ + +int allocateInt(void **d_int, int n) +{ + return allocRemoteBuffer((void **)(d_int), n*sizeof(int)); +} + +int writeInt(void *d_int, int* h_int, int n) +{ + int i,j; + int *di; + i = writeRemoteBuffer((void*)h_int, (void*)d_int, n*sizeof(int)); + return i; +} + +int readInt(void* d_int, int* h_int, int n) +{ int i; + i = readRemoteBuffer((void *) h_int, (void *) d_int, n*sizeof(int)); + //cudaSync(); + return(i); +} + +int writeIntFirst(int first, void *d_int, int* h_int, int n, int IndexBase) +{ + int i,j; + int *di=(int *) d_int; + di = &(di[first-IndexBase]); + i = writeRemoteBuffer((void*)h_int, (void*)di, n*sizeof(int)); + return i; +} + +int readIntFirst(int first,void* d_int, int* h_int, int n, int IndexBase) +{ int i; + int *di=(int *) d_int; + di = &(di[first-IndexBase]); + i = readRemoteBuffer((void *) h_int, (void *) di, n*sizeof(int)); + //cudaSync(); + return(i); +} + +int allocateMultiInt(void **d_int, int m, int n) +{ + return allocRemoteBuffer((void **)(d_int), m*n*sizeof(int)); +} + +int writeMultiInt(void *d_int, int* h_int, int m, int n) +{ + int i,j; + int *di; + i = writeRemoteBuffer((void*)h_int, (void*)d_int, m*n*sizeof(int)); + return i; +} + +int readMultiInt(void* d_int, int* h_int, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_int, (void *) d_int, m*n*sizeof(int)); + //cudaSync(); + return(i); +} + +void freeInt(void *d_int) +{ + //printf("Before freeInt\n"); + freeRemoteBuffer(d_int); +} + + + + +int allocateFloat(void **d_float, int n) +{ + return allocRemoteBuffer((void **)(d_float), n*sizeof(float)); +} + +int writeFloat(void *d_float, float* h_float, int n) +{ + int i; + + i = writeRemoteBuffer((void*)h_float, (void*)d_float, n*sizeof(float)); + + return i; +} + +int readFloat(void* d_float, float* h_float, int n) +{ int i; + i = readRemoteBuffer((void *) h_float, (void *) d_float, n*sizeof(float)); + + return(i); +} + +int writeFloatFirst(int df, void *d_float, float* h_float, int n, int IndexBase) +{ + int i; + + float *dv=(float *) d_float; + dv = &dv[df-IndexBase]; + i = writeRemoteBuffer((void*)h_float, (void*)dv, n*sizeof(float)); + + return i; +} + +int readFloatFirst(int df, void* d_float, float* h_float, int n, int IndexBase) +{ int i; + float *dv=(float *) d_float; + dv = &dv[df-IndexBase]; + //fprintf(stderr,"readFloatFirst: %d %p %p %p %d \n",df,d_float,dv,h_float,n); + i = readRemoteBuffer((void *) h_float, (void *) dv, n*sizeof(float)); + + return(i); +} + + +int allocateMultiFloat(void **d_float, int m, int n) +{ + return allocRemoteBuffer((void **)(d_float), m*n*sizeof(float)); +} + +int writeMultiFloat(void *d_float, float* h_float, int m, int n) +{ + int i,j; + i = writeRemoteBuffer((void*)h_float, (void*)d_float, m*n*sizeof(float)); + return i; +} + +int readMultiFloat(void* d_float, float* h_float, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_float, (void *) d_float, m*n*sizeof(float)); + //cudaSync(); + return(i); +} + +void freeFloat(void *d_float) +{ + freeRemoteBuffer(d_float); +} + + + +int allocateDouble(void **d_double, int n) +{ + return allocRemoteBuffer((void **)(d_double), n*sizeof(double)); +} + +int writeDouble(void *d_double, double* h_double, int n) +{ + int i; + + i = writeRemoteBuffer((void*)h_double, (void*)d_double, n*sizeof(double)); + + return i; +} + +int readDouble(void* d_double, double* h_double, int n) +{ int i; + i = readRemoteBuffer((void *) h_double, (void *) d_double, n*sizeof(double)); + + return(i); +} + +int writeDoubleFirst(int df, void *d_double, double* h_double, int n, int IndexBase) +{ + int i; + + double *dv=(double *) d_double; + dv = &dv[df-IndexBase]; + i = writeRemoteBuffer((void*)h_double, (void*)dv, n*sizeof(double)); + + return i; +} + +int readDoubleFirst(int df, void* d_double, double* h_double, int n, int IndexBase) +{ int i; + double *dv=(double *) d_double; + dv = &dv[df-IndexBase]; + //fprintf(stderr,"readDoubleFirst: %d %p %p %p %d \n",df,d_double,dv,h_double,n); + i = readRemoteBuffer((void *) h_double, (void *) dv, n*sizeof(double)); + + return(i); +} + +int allocateMultiDouble(void **d_double, int m, int n) +{ + return allocRemoteBuffer((void **)(d_double), m*n*sizeof(double)); +} + +int writeMultiDouble(void *d_double, double* h_double, int m, int n) +{ + int i,j; + i = writeRemoteBuffer((void*)h_double, (void*)d_double, m*n*sizeof(double)); + return i; +} + +int readMultiDouble(void* d_double, double* h_double, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_double, (void *) d_double, m*n*sizeof(double)); + //cudaSync(); + return(i); +} + +void freeDouble(void *d_double) +{ + freeRemoteBuffer(d_double); +} + + + +int allocateFloatComplex(void **d_FloatComplex, int n) +{ + return allocRemoteBuffer((void **)(d_FloatComplex), n*sizeof(cuFloatComplex)); +} + +int writeFloatComplex(void *d_FloatComplex, cuFloatComplex* h_FloatComplex, int n) +{ + int i; + + i = writeRemoteBuffer((void*)h_FloatComplex, (void*)d_FloatComplex, n*sizeof(cuFloatComplex)); + + return i; +} + +int readFloatComplex(void* d_FloatComplex, cuFloatComplex* h_FloatComplex, int n) +{ int i; + i = readRemoteBuffer((void *) h_FloatComplex, (void *) d_FloatComplex, n*sizeof(cuFloatComplex)); + + return(i); +} + +int allocateMultiFloatComplex(void **d_FloatComplex, int m, int n) +{ + return allocRemoteBuffer((void **)(d_FloatComplex), m*n*sizeof(cuFloatComplex)); +} + +int writeMultiFloatComplex(void *d_FloatComplex, cuFloatComplex* h_FloatComplex, int m, int n) +{ + int i,j; + i = writeRemoteBuffer((void*)h_FloatComplex, (void*)d_FloatComplex, m*n*sizeof(cuFloatComplex)); + return i; +} + +int readMultiFloatComplex(void* d_FloatComplex, cuFloatComplex* h_FloatComplex, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_FloatComplex, (void *) d_FloatComplex, m*n*sizeof(cuFloatComplex)); + //cudaSync(); + return(i); +} + +int writeFloatComplexFirst(int df, void *d_floatComplex, + cuFloatComplex* h_floatComplex, int n, int IndexBase) +{ + int i; + + cuFloatComplex *dv=(cuFloatComplex *) d_floatComplex; + dv = &dv[df-IndexBase]; + i = writeRemoteBuffer((void*)h_floatComplex, (void*)dv, n*sizeof(cuFloatComplex)); + + return i; +} + +int readFloatComplexFirst(int df, void* d_floatComplex, cuFloatComplex* h_floatComplex, + int n, int IndexBase) +{ int i; + cuFloatComplex *dv=(cuFloatComplex *) d_floatComplex; + dv = &dv[df-IndexBase]; + i = readRemoteBuffer((void *) h_floatComplex, (void *) dv, n*sizeof(cuFloatComplex)); + + return(i); +} + +void freeFloatComplex(void *d_FloatComplex) +{ + freeRemoteBuffer(d_FloatComplex); +} + + + + +int allocateDoubleComplex(void **d_DoubleComplex, int n) +{ + return allocRemoteBuffer((void **)(d_DoubleComplex), n*sizeof(cuDoubleComplex)); +} + +int writeDoubleComplex(void *d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int n) +{ + int i; + + i = writeRemoteBuffer((void*)h_DoubleComplex, (void*)d_DoubleComplex, n*sizeof(cuDoubleComplex)); + + return i; +} + +int readDoubleComplex(void* d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int n) +{ int i; + i = readRemoteBuffer((void *) h_DoubleComplex, (void *) d_DoubleComplex, n*sizeof(cuDoubleComplex)); + + return(i); +} + +int writeDoubleComplexFirst(int df, void *d_doubleComplex, + cuDoubleComplex* h_doubleComplex, int n, int IndexBase) +{ + int i; + + cuDoubleComplex *dv=(cuDoubleComplex *) d_doubleComplex; + dv = &dv[df-IndexBase]; + i = writeRemoteBuffer((void*)h_doubleComplex, (void*)dv, n*sizeof(cuDoubleComplex)); + + return i; +} + +int readDoubleComplexFirst(int df, void* d_doubleComplex, cuDoubleComplex* h_doubleComplex, + int n, int IndexBase) +{ int i; + cuDoubleComplex *dv=(cuDoubleComplex *) d_doubleComplex; + dv = &dv[df-IndexBase]; + i = readRemoteBuffer((void *) h_doubleComplex, (void *) dv, n*sizeof(cuDoubleComplex)); + + return(i); +} + +int allocateMultiDoubleComplex(void **d_DoubleComplex, int m, int n) +{ + return allocRemoteBuffer((void **)(d_DoubleComplex), m*n*sizeof(cuDoubleComplex)); +} + +int writeMultiDoubleComplex(void *d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int m, int n) +{ + int i,j; + i = writeRemoteBuffer((void*)h_DoubleComplex, (void*)d_DoubleComplex, m*n*sizeof(cuDoubleComplex)); + return i; +} + +int readMultiDoubleComplex(void* d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_DoubleComplex, (void *) d_DoubleComplex, m*n*sizeof(cuDoubleComplex)); + //cudaSync(); + return(i); +} + +void freeDoubleComplex(void *d_DoubleComplex) +{ + freeRemoteBuffer(d_DoubleComplex); +} + + + +double etime() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6; + return(temp); +} + + diff --git a/cuda/cuda_util.h b/cuda/cuda_util.h new file mode 100644 index 00000000..609e6f43 --- /dev/null +++ b/cuda/cuda_util.h @@ -0,0 +1,139 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef _CUDA_UTIL_H_ +#define _CUDA_UTIL_H_ + +#include +#include +#include +#include +#include + +#include "cuda_runtime.h" +#include "core.h" +#include "cuComplex.h" +#include "fcusparse.h" +#include "cublas_v2.h" + +int allocRemoteBuffer(void** buffer, size_t count); +int allocMappedMemory(void **buffer, void **dp, size_t size); +int registerMappedMemory(void *buffer, void **dp, size_t size); +int unregisterMappedMemory(void *buffer); +int writeRemoteBuffer(void* hostSrc, void* buffer, size_t count); +int readRemoteBuffer(void* hostDest, void* buffer, size_t count); +int freeRemoteBuffer(void* buffer); +int gpuInit(int dev); +int getDeviceCount(); +int getDevice(); +int getDeviceHasUVA(); +int setDevice(int dev); +int getGPUMultiProcessors(); +int getGPUMemoryBusWidth(); +int getGPUMemoryClockRate(); +int getGPUWarpSize(); +int getGPUMaxThreadsPerBlock(); +int getGPUMaxThreadsPerMP(); +int getGPUMaxRegistersPerBlock(); +void cpyGPUNameString(char *cstring); + + +void cudaSync(); +void cudaReset(); +void gpuClose(); + + +spgpuHandle_t psb_cudaGetHandle(); +void psb_cudaCreateHandle(); +void psb_cudaDestroyHandle(); +cudaStream_t psb_cudaGetStream(); +void psb_cudaSetStream(cudaStream_t stream); + +cublasHandle_t psb_cudaGetCublasHandle(); +void psb_cudaCreateCublasHandle(); +void psb_cudaDestroyCublasHandle(); + + +int allocateInt(void **, int); +int allocateMultiInt(void **, int, int); +int writeInt(void *, int *, int); +int writeMultiInt(void *, int* , int , int ); +int readInt(void *, int *, int); +int readMultiInt(void*, int*, int, int ); +int writeIntFirst(int,void *, int *, int,int); +int readIntFirst(int,void *, int *, int,int); +void freeInt(void *); + +int allocateFloat(void **, int); +int allocateMultiFloat(void **, int, int); +int writeFloat(void *, float *, int); +int writeMultiFloat(void *, float* , int , int ); +int readFloat(void *, float*, int); +int readMultiFloat(void*, float*, int, int ); +int writeFloatFirst(int, void *, float*, int, int); +int readFloatFirst(int, void *, float*, int, int); +void freeFloat(void *); + +int allocateDouble(void **, int); +int allocateMultiDouble(void **, int, int); +int writeDouble(void *, double*, int); +int writeMultiDouble(void *, double* , int , int ); +int readDouble(void *, double*, int); +int readMultiDouble(void*, double*, int, int ); +int writeDoubleFirst(int, void *, double*, int, int); +int readDoubleFirst(int, void *, double*, int, int); +void freeDouble(void *); + +int allocateFloatComplex(void **, int); +int allocateMultiFloatComplex(void **, int, int); +int writeFloatComplex(void *, cuFloatComplex*, int); +int writeMultiFloatComplex(void *, cuFloatComplex* , int , int ); +int readFloatComplex(void *, cuFloatComplex*, int); +int readMultiFloatComplex(void*, cuFloatComplex*, int, int ); +int writeFloatComplexFirst(int, void *, cuFloatComplex*, int, int); +int readFloatComplexFirst(int, void *, cuFloatComplex*, int, int); +void freeFloatComplex(void *); + +int allocateDoubleComplex(void **, int); +int allocateMultiDoubleComplex(void **, int, int); +int writeDoubleComplex(void *, cuDoubleComplex*, int); +int writeMultiDoubleComplex(void *, cuDoubleComplex* , int , int ); +int readDoubleComplex(void *, cuDoubleComplex*, int); +int readMultiDoubleComplex(void*, cuDoubleComplex*, int, int ); +int writeDoubleComplexFirst(int, void *, cuDoubleComplex*, int, int); +int readDoubleComplexFirst(int, void *, cuDoubleComplex*, int, int); +void freeDoubleComplex(void *); + +double etime(); + + +#endif diff --git a/cuda/cusparse_mod.F90 b/cuda/cusparse_mod.F90 new file mode 100644 index 00000000..4ae16cff --- /dev/null +++ b/cuda/cusparse_mod.F90 @@ -0,0 +1,38 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module cusparse_mod + use base_cusparse_mod + use s_cusparse_mod + use d_cusparse_mod + use c_cusparse_mod + use z_cusparse_mod +end module cusparse_mod diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c new file mode 100644 index 00000000..0eaacbdb --- /dev/null +++ b/cuda/cvectordev.c @@ -0,0 +1,363 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +//#include "utils.h" +//#include "common.h" +#include "cvectordev.h" + + +int registerMappedFloatComplex(void *buff, void **d_p, int n, cuFloatComplex dummy) +{ + return registerMappedMemory(buff,d_p,((size_t) n)*sizeof(cuFloatComplex)); +} + +int writeMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // Ex updateFromHost vector function + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, + ((size_t) devVec->pitch_)*devVec->count_*sizeof(cuFloatComplex)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + return(i); +} + +int writeMultiVecDeviceFloatComplexR2(void* deviceVec, cuFloatComplex* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceFloatComplex(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceFloatComplexR2",i); + } + return(i); +} + +int readMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + ((size_t) devVec->pitch_)*devVec->count_*sizeof(cuFloatComplex)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloat",i); + } + return(i); +} + +int readMultiVecDeviceFloatComplexR2(void* deviceVec, cuFloatComplex* hostVec, int ld) +{ int i; + i = readMultiVecDeviceFloatComplex(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloatComplexR2",i); + } + return(i); +} + +int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuCsetscal(handle, first, last, indexBase, val, (cuFloatComplex *) devVecX->v_); + + return(i); +} + +int geinsMultiVecDeviceFloatComplex(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + cuFloatComplex beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + + if (dupl == INS_OVERWRITE) + beta = make_cuFloatComplex(0.0, 0.0); + else if (dupl == INS_ADD) + beta = make_cuFloatComplex(1.0, 0.0); + else + beta = make_cuFloatComplex(0.0, 0.0); + + spgpuCscat(handle, (cuFloatComplex *) devVecX->v_, n, (cuFloatComplex*)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceFloatComplex(deviceVec, vectorId, n, + first, (void*) devIdx->v_, hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + cuFloatComplex *hv = (cuFloatComplex *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuCgath(handle,hv, n, idx,indexBase, + (cuFloatComplex *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void *deviceIdx, + int hfirst, void* host_values, + int indexBase, cuFloatComplex beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceFloatComplex(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, + indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, + int first, void *indexes, + int hfirst, void* host_values, + int indexBase, cuFloatComplex beta) +{ int i=0; + cuFloatComplex *hv = (cuFloatComplex *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuCscat(handle, (cuFloatComplex *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + + +int nrm2MultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuCmnrm2(handle, y_res, n,(cuFloatComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + return(i); +} + +int amaxMultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuCmamax(handle, y_res, n,(cuFloatComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + return(i); +} + +int asumMultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuCmasum(handle, y_res, n,(cuFloatComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + + return(i); +} + +int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + // Note: inner kernel can handle aliased input/output + spgpuCscal(handle, (cuFloatComplex *)devVecA->v_, devVecA->pitch_, + alpha, (cuFloatComplex *)devVecA->v_); + return(i); +} + +int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, + void* devMultiVecA, void* devMultiVecB) +{int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuCmdot(handle, y_res, n, (cuFloatComplex*)devVecA->v_, + (cuFloatComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_); + return(i); +} + +int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVecX, + cuFloatComplex beta, void* devMultiVecY) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + for(j=0;jcount_;j++) + spgpuCaxpby(handle,(cuFloatComplex*)devVecY->v_+pitch*j, n, beta, + (cuFloatComplex*)devVecY->v_+pitch*j, alpha, + (cuFloatComplex*) devVecX->v_+pitch*j); + return(i); +} + +int upd_xyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, + cuFloatComplex gamma, cuFloatComplex delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuCupd_xyz(handle,n, alpha,beta,gamma,delta, + (cuFloatComplex *)devVecX->v_,(cuFloatComplex *) devVecY->v_,(cuFloatComplex *) devVecZ->v_); + return(i); +} + +int xyzwMultiVecDeviceFloatComplex(int n,cuFloatComplex a,cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuCxyzw(handle,n, a,b,c,d,e,f, + (cuFloatComplex *)devVecX->v_,(cuFloatComplex *) devVecY->v_, + (cuFloatComplex *) devVecZ->v_,(cuFloatComplex *) devVecW->v_); + return(i); +} + +int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, + void *deviceVecA, void *deviceVecB) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + spgpuHandle_t handle=psb_cudaGetHandle(); + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuCmaxy(handle, (cuFloatComplex*)devVecB->v_, n, alpha, + (cuFloatComplex*)devVecA->v_, + (cuFloatComplex*)devVecB->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, + void *deviceVecB, cuFloatComplex beta, + void *deviceVecZ) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) + return SPGPU_UNSUPPORTED; + spgpuCmaxypbz(handle, (cuFloatComplex*)devVecZ->v_, n, beta, + (cuFloatComplex*)devVecZ->v_, + alpha, (cuFloatComplex*) devVecA->v_, (cuFloatComplex*) devVecB->v_, + devVecB->count_, devVecB->pitch_); + return(i); +} + + +int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA, + void *deviceVecB) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + + spgpuHandle_t handle=psb_cudaGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuCabs(handle, (cuFloatComplex*)devVecB->v_, n, + alpha, (cuFloatComplex*)devVecA->v_); + + return(i); +} + +int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + spgpuHandle_t handle=psb_cudaGetHandle(); + if (n > devVecA->size_) + return SPGPU_UNSUPPORTED; + + spgpuCabs(handle, (cuFloatComplex*)devVecA->v_, n, + alpha, (cuFloatComplex*)devVecA->v_); + + return(i); +} + + diff --git a/cuda/cvectordev.h b/cuda/cvectordev.h new file mode 100644 index 00000000..423da33e --- /dev/null +++ b/cuda/cvectordev.h @@ -0,0 +1,87 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +//#include "utils.h" +#include +#include "cuComplex.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" +#include "vector.h" + +int registerMappedFloatComplex(void *, void **, int, cuFloatComplex); +int writeMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec); +int writeMultiVecDeviceFloatComplexR2(void* deviceMultiVec, cuFloatComplex* hostMultiVec, int ld); +int readMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec); +int readMultiVecDeviceFloatComplexR2(void* deviceMultiVec, cuFloatComplex* hostMultiVec, int ld); + +int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceFloatComplex(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, int first, + void *deviceIdx, int hfirst, void* host_values, + int indexBase, cuFloatComplex beta); +int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, cuFloatComplex beta); + +int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA); +int nrm2MultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA); +int amaxMultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA); +int asumMultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA); +int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB); + +int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY); +int upd_xyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, + cuFloatComplex gamma, cuFloatComplex delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); +int xyzwMultiVecDeviceFloatComplex(int n,cuFloatComplex a,cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW); +int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB); +int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, + void *deviceVecB, cuFloatComplex beta, void *deviceVecZ); +int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA); +int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, + void *deviceVecA, void *deviceVecB); + diff --git a/cuda/d_cusparse_mod.F90 b/cuda/d_cusparse_mod.F90 new file mode 100644 index 00000000..55751475 --- /dev/null +++ b/cuda/d_cusparse_mod.F90 @@ -0,0 +1,313 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module d_cusparse_mod + use base_cusparse_mod + + type, bind(c) :: d_Cmat + type(c_ptr) :: Mat = c_null_ptr + end type d_Cmat + +#if PSB_CUDA_SHORT_VERSION <= 10 + type, bind(c) :: d_Hmat + type(c_ptr) :: Mat = c_null_ptr + end type d_Hmat +#endif + + interface CSRGDeviceFree + function d_CSRGDeviceFree(Mat) & + & bind(c,name="d_CSRGDeviceFree") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: res + end function d_CSRGDeviceFree + end interface + + interface CSRGDeviceSetMatType + function d_CSRGDeviceSetMatType(Mat,type) & + & bind(c,name="d_CSRGDeviceSetMatType") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_CSRGDeviceSetMatType + end interface + + interface CSRGDeviceSetMatFillMode + function d_CSRGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="d_CSRGDeviceSetMatFillMode") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_CSRGDeviceSetMatFillMode + end interface + + interface CSRGDeviceSetMatDiagType + function d_CSRGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="d_CSRGDeviceSetMatDiagType") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_CSRGDeviceSetMatDiagType + end interface + + interface CSRGDeviceSetMatIndexBase + function d_CSRGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="d_CSRGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_CSRGDeviceSetMatIndexBase + end interface + +#if PSB_CUDA_SHORT_VERSION <= 10 + interface CSRGDeviceCsrsmAnalysis + function d_CSRGDeviceCsrsmAnalysis(Mat) & + & bind(c,name="d_CSRGDeviceCsrsmAnalysis") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: res + end function d_CSRGDeviceCsrsmAnalysis + end interface +#else + interface CSRGIsNullSvBuffer + function d_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="d_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: res + end function d_CSRGIsNullSvBuffer + end interface +#endif + + interface CSRGDeviceAlloc + function d_CSRGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="d_CSRGDeviceAlloc") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function d_CSRGDeviceAlloc + end interface + + interface CSRGDeviceGetParms + function d_CSRGDeviceGetParms(Mat,nr,nc,nz) & + & bind(c,name="d_CSRGDeviceGetParms") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: nr, nc, nz + integer(c_int) :: res + end function d_CSRGDeviceGetParms + end interface + + interface spsvCSRGDevice + function d_spsvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="d_spsvCSRGDevice") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_double), value :: alpha,beta + integer(c_int) :: res + end function d_spsvCSRGDevice + end interface + + interface spmvCSRGDevice + function d_spmvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="d_spmvCSRGDevice") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_double), value :: alpha,beta + integer(c_int) :: res + end function d_spmvCSRGDevice + end interface + + interface CSRGHost2Device + function d_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="d_CSRGHost2Device") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_double) :: val(*) + integer(c_int) :: res + end function d_CSRGHost2Device + end interface + + interface CSRGDevice2Host + function d_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="d_CSRGDevice2Host") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_double) :: val(*) + integer(c_int) :: res + end function d_CSRGDevice2Host + end interface + +#if PSB_CUDA_SHORT_VERSION <= 10 + interface HYBGDeviceAlloc + function d_HYBGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="d_HYBGDeviceAlloc") result(res) + use iso_c_binding + import d_hmat + type(d_Hmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function d_HYBGDeviceAlloc + end interface + + interface HYBGDeviceFree + function d_HYBGDeviceFree(Mat) & + & bind(c,name="d_HYBGDeviceFree") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int) :: res + end function d_HYBGDeviceFree + end interface + + interface HYBGDeviceSetMatType + function d_HYBGDeviceSetMatType(Mat,type) & + & bind(c,name="d_HYBGDeviceSetMatType") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_HYBGDeviceSetMatType + end interface + + interface HYBGDeviceSetMatFillMode + function d_HYBGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="d_HYBGDeviceSetMatFillMode") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_HYBGDeviceSetMatFillMode + end interface + + interface HYBGDeviceSetMatDiagType + function d_HYBGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="d_HYBGDeviceSetMatDiagType") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_HYBGDeviceSetMatDiagType + end interface + + interface HYBGDeviceSetMatIndexBase + function d_HYBGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="d_HYBGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_HYBGDeviceSetMatIndexBase + end interface + + interface HYBGDeviceHybsmAnalysis + function d_HYBGDeviceHybsmAnalysis(Mat) & + & bind(c,name="d_HYBGDeviceHybsmAnalysis") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int) :: res + end function d_HYBGDeviceHybsmAnalysis + end interface + + interface spsvHYBGDevice + function d_spsvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="d_spsvHYBGDevice") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_double), value :: alpha,beta + integer(c_int) :: res + end function d_spsvHYBGDevice + end interface + + interface spmvHYBGDevice + function d_spmvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="d_spmvHYBGDevice") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_double), value :: alpha,beta + integer(c_int) :: res + end function d_spmvHYBGDevice + end interface + + interface HYBGHost2Device + function d_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="d_HYBGHost2Device") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_double) :: val(*) + integer(c_int) :: res + end function d_HYBGHost2Device + end interface + +#endif + +end module d_cusparse_mod diff --git a/cuda/dcusparse.c b/cuda/dcusparse.c new file mode 100644 index 00000000..9af4ce38 --- /dev/null +++ b/cuda/dcusparse.c @@ -0,0 +1,42 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#include +#include +#include "fcusparse.h" + +#include "dcusparse.h" +#include "fcusparse_dat.h" +#include "fcusparse_fct.h" diff --git a/cuda/dcusparse.h b/cuda/dcusparse.h new file mode 100644 index 00000000..4b00173b --- /dev/null +++ b/cuda/dcusparse.h @@ -0,0 +1,101 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef DCUSPARSE_ +#define DCUSPARSE_ + + +#include +#include + +#include +#include +#include "cintrf.h" + +/* Double precision real */ +#define TYPE double +#define CUSPARSE_BASE_TYPE CUDA_R_64F +#define T_CSRGDeviceMat d_CSRGDeviceMat +#define T_Cmat d_Cmat +#define T_spmvCSRGDevice d_spmvCSRGDevice +#define T_spsvCSRGDevice d_spsvCSRGDevice +#define T_CSRGDeviceAlloc d_CSRGDeviceAlloc +#define T_CSRGDeviceFree d_CSRGDeviceFree +#define T_CSRGHost2Device d_CSRGHost2Device +#define T_CSRGDevice2Host d_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode d_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType d_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms d_CSRGDeviceGetParms + +#if PSB_CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis d_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseDcsrmv +#define cusparseTcsrsv_solve cusparseDcsrsv_solve +#define cusparseTcsrsv_analysis cusparseDcsrsv_analysis +#define T_HYBGDeviceMat d_HYBGDeviceMat +#define T_Hmat d_Hmat +#define T_HYBGDeviceFree d_HYBGDeviceFree +#define T_spmvHYBGDevice d_spmvHYBGDevice +#define T_HYBGDeviceAlloc d_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType d_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase d_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType d_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode d_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis d_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice d_spsvHYBGDevice +#define T_HYBGHost2Device d_HYBGHost2Device +#define cusparseThybmv cusparseDhybmv +#define cusparseThybsv_solve cusparseDhybsv_solve +#define cusparseThybsv_analysis cusparseDhybsv_analysis +#define cusparseTcsr2hyb cusparseDcsr2hyb + +#elif PSB_CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis d_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseDcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer d_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr d_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr d_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr d_CSRGCreateSpMVDescr + +#endif + +#include "fcusparse.h" + +#endif + diff --git a/cuda/diagdev.c b/cuda/diagdev.c new file mode 100644 index 00000000..0cf78a41 --- /dev/null +++ b/cuda/diagdev.c @@ -0,0 +1,261 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#include "diagdev.h" +#include +#include +#include +#include +//new +DiagDeviceParams getDiagDeviceParams(unsigned int rows, unsigned int columns, unsigned int diags, unsigned int elementType) +{ + DiagDeviceParams params; + + params.elementType = elementType; + //numero di elementi di val + params.rows = rows; + params.columns = columns; + params.diags = diags; + + return params; + +} +//new +int allocDiagDevice(void ** remoteMatrix, DiagDeviceParams* params) +{ + struct DiagDevice *tmp = (struct DiagDevice *)malloc(sizeof(struct DiagDevice)); + int ret=SPGPU_SUCCESS; + *remoteMatrix = (void *)tmp; + + tmp->rows = params->rows; + + tmp->cols = params->columns; + + tmp->diags = params->diags; + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->off), tmp->diags*sizeof(int)); + + /* tmp->baseIndex = params->firstIndex; */ + + if (params->elementType == SPGPU_TYPE_INT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(int)); + } + else if (params->elementType == SPGPU_TYPE_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(float)); + } + else if (params->elementType == SPGPU_TYPE_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(double)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(cuFloatComplex)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(cuDoubleComplex)); + } + else + return SPGPU_UNSUPPORTED; // Unsupported params + return ret; +} + +void freeDiagDevice(void* remoteMatrix) +{ + struct DiagDevice *devMat = (struct DiagDevice *) remoteMatrix; + //fprintf(stderr,"freeHllDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->off); + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + +//new +int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int columns,unsigned int diags,unsigned int elementType) +{ int i; + DiagDeviceParams p; + + p = getDiagDeviceParams(rows, columns, diags,elementType); + i = allocDiagDevice(deviceMat, &p); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); + } + return(i); +} + +int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n) +{ int i,fo,fa; + char buf_a[255], buf_o[255],tmp[255]; + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + // Ex updateFromHost function + /* memset(buf_a,'\0',255); */ + /* memset(buf_o,'\0',255); */ + /* memset(tmp,'\0',255); */ + + /* strcat(buf_a,"mat_"); */ + /* strcat(buf_o,"off_"); */ + /* sprintf(tmp,"%d_%d.dat",devMat->rows,devMat->cols); */ + /* strcat(buf_a,tmp); */ + /* memset(tmp,'\0',255); */ + /* sprintf(tmp,"%d.dat",devMat->cols); */ + /* strcat(buf_o,tmp); */ + + /* fa = open(buf_a, O_CREAT | O_WRONLY | O_TRUNC, 0664); */ + /* fo = open(buf_o, O_CREAT | O_WRONLY | O_TRUNC, 0664); */ + + /* i = write(fa, a, sizeof(double)*devMat->cols*devMat->rows); */ + /* i = write(fo, off, sizeof(int)*devMat->cols); */ + + /* close(fa); */ + /* close(fo); */ + + i = writeRemoteBuffer((void*) a, (void *)devMat->cM, devMat->rows*devMat->diags*sizeof(double)); + i = writeRemoteBuffer((void*) off, (void *)devMat->off, devMat->diags*sizeof(int)); + + if(i==0) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +} + +int readDiagDeviceDouble(void* deviceMat, double* a, int* off) +{ int i; + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double)); + i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +//new +int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY) +{ + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /* spgpuDdiagspmv(handle, (double *)y->v_, (double *)y->v_,alpha,(double *)devMat->cM,devMat->off,devMat->rows,devMat->cols,x->v_,beta,devMat->baseIndex); */ + + spgpuDdiaspmv(handle, (double *)y->v_, (double *)y->v_,alpha,(double *)devMat->cM,devMat->off,devMat->rows,devMat->rows,devMat->cols,devMat->diags,x->v_,beta); + + //cudaSync(); + + return SPGPU_SUCCESS; +} + + +int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n) +{ int i,fo,fa; + char buf_a[255], buf_o[255],tmp[255]; + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + // Ex updateFromHost function + /* memset(buf_a,'\0',255); */ + /* memset(buf_o,'\0',255); */ + /* memset(tmp,'\0',255); */ + + /* strcat(buf_a,"mat_"); */ + /* strcat(buf_o,"off_"); */ + /* sprintf(tmp,"%d_%d.dat",devMat->rows,devMat->cols); */ + /* strcat(buf_a,tmp); */ + /* memset(tmp,'\0',255); */ + /* sprintf(tmp,"%d.dat",devMat->cols); */ + /* strcat(buf_o,tmp); */ + + /* fa = open(buf_a, O_CREAT | O_WRONLY | O_TRUNC, 0664); */ + /* fo = open(buf_o, O_CREAT | O_WRONLY | O_TRUNC, 0664); */ + + /* i = write(fa, a, sizeof(float)*devMat->cols*devMat->rows); */ + /* i = write(fo, off, sizeof(int)*devMat->cols); */ + + /* close(fa); */ + /* close(fo); */ + + i = writeRemoteBuffer((void*) a, (void *)devMat->cM, devMat->rows*devMat->diags*sizeof(float)); + i = writeRemoteBuffer((void*) off, (void *)devMat->off, devMat->diags*sizeof(int)); + + if(i==0) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +} + +int readDiagDeviceFloat(void* deviceMat, float* a, int* off) +{ int i; + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float)); + i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +} + +//new +int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY) +{ + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /* spgpuDdiagspmv(handle, (float *)y->v_, (float *)y->v_,alpha,(float *)devMat->cM,devMat->off,devMat->rows,devMat->cols,x->v_,beta,devMat->baseIndex); */ + + spgpuSdiaspmv(handle, (float *)y->v_, (float *)y->v_,alpha,(float *)devMat->cM,devMat->off,devMat->rows,devMat->rows,devMat->cols,devMat->diags,x->v_,beta); + + //cudaSync(); + + return SPGPU_SUCCESS; +} + diff --git a/cuda/diagdev.h b/cuda/diagdev.h new file mode 100644 index 00000000..3a062fb7 --- /dev/null +++ b/cuda/diagdev.h @@ -0,0 +1,91 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#ifndef _DIAGDEV_H_ +#define _DIAGDEV_H_ + +#include "cintrf.h" +#include "vectordev.h" +#include "dia.h" + +struct DiagDevice +{ + // Compressed matrix + void *cM; //it can be float or double + + // offset (same size of cM) + int *off; + + int rows; + + int cols; + + int diags; + +}; + +typedef struct DiagDeviceParams +{ + + unsigned int elementType; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + //unsigned int hackOffsLength; + + // Number of columns. + // Used for error-checking + unsigned int columns; + + unsigned int diags; + +} DiagDeviceParams; +DiagDeviceParams getDiagDeviceParams(unsigned int rows, unsigned int columns, + unsigned int elementType, unsigned int firstIndex); +int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, + unsigned int elementType, unsigned int firstIndex); +int allocDiagDevice(void ** remoteMatrix, DiagDeviceParams* params); +void freeDiagDevice(void* remoteMatrix); + +int readDiagDeviceDouble(void* deviceMat, double* a, int* off); +int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n); +int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY); + +int readDiagDeviceFloat(void* deviceMat, float* a, int* off); +int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n); +int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY); + + + +#endif diff --git a/cuda/diagdev_mod.F90 b/cuda/diagdev_mod.F90 new file mode 100644 index 00000000..70d58d4e --- /dev/null +++ b/cuda/diagdev_mod.F90 @@ -0,0 +1,224 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module diagdev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: diagdev_parms + integer(c_int) :: element_type + integer(c_int) :: rows + integer(c_int) :: columns + integer(c_int) :: firstIndex + end type diagdev_parms + + interface + function FgetDiagDeviceParams(rows, columns, elementType, firstIndex) & + & result(res) bind(c,name='getDiagDeviceParams') + use iso_c_binding + import :: diagdev_parms + type(diagdev_parms) :: res + integer(c_int), value :: rows,columns,elementType,firstIndex + end function FgetDiagDeviceParams + end interface + + + interface + function FallocDiagDevice(deviceMat,rows,columns,& + & elementType,firstIndex) & + & result(res) bind(c,name='FallocDiagDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: rows,columns,elementType,firstIndex + type(c_ptr) :: deviceMat + end function FallocDiagDevice + end interface + + interface writeDiagDevice + + function writeDiagDeviceFloat(deviceMat,a,off,n) & + & result(res) bind(c,name='writeDiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + real(c_float) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function writeDiagDeviceFloat + + function writeDiagDeviceDouble(deviceMat,a,off,n) & + & result(res) bind(c,name='writeDiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int),value :: n + real(c_double) :: a(n,*) + integer(c_int) :: off(*) + end function writeDiagDeviceDouble + + function writeDiagDeviceFloatComplex(deviceMat,a,off,n) & + & result(res) bind(c,name='writeDiagDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + complex(c_float_complex) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function writeDiagDeviceFloatComplex + + function writeDiagDeviceDoubleComplex(deviceMat,a,off,n) & + & result(res) bind(c,name='writeDiagDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + complex(c_double_complex) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function writeDiagDeviceDoubleComplex + + end interface + + interface readDiagDevice + + function readDiagDeviceFloat(deviceMat,a,off,n) & + & result(res) bind(c,name='readDiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_float) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function readDiagDeviceFloat + + function readDiagDeviceDouble(deviceMat,a,off,n) & + & result(res) bind(c,name='readDiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int),value :: n + real(c_double) :: a(n,*) + integer(c_int) :: off(*) + end function readDiagDeviceDouble + + function readDiagDeviceFloatComplex(deviceMat,a,off,n) & + & result(res) bind(c,name='readDiagDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + complex(c_float_complex) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function readDiagDeviceFloatComplex + + function readDiagDeviceDoubleComplex(deviceMat,a,off,n) & + & result(res) bind(c,name='readDiagDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + complex(c_double_complex) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function readDiagDeviceDoubleComplex + + end interface + + interface + subroutine freeDiagDevice(deviceMat) & + & bind(c,name='freeDiagDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeDiagDevice + end interface + + interface + subroutine resetDiagTimer() bind(c,name='resetDiagTimer') + use iso_c_binding + end subroutine resetDiagTimer + end interface + interface + function getDiagTimer() & + & bind(c,name='getDiagTimer') result(res) + use iso_c_binding + real(c_double) :: res + end function getDiagTimer + end interface + + interface + function getDiagDevicePitch(deviceMat) & + & bind(c,name='getDiagDevicePitch') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getDiagDevicePitch + end interface + + interface + function getDiagDeviceMaxRowSize(deviceMat) & + & bind(c,name='getDiagDeviceMaxRowSize') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getDiagDeviceMaxRowSize + end interface + + + interface spmvDiagDevice + function spmvDiagDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvDiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float),value :: alpha, beta + end function spmvDiagDeviceFloat + function spmvDiagDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvDiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double),value :: alpha, beta + end function spmvDiagDeviceDouble + function spmvDiagDeviceFloatComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvDiagDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_float_complex),value :: alpha, beta + end function spmvDiagDeviceFloatComplex + function spmvDiagDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvDiagDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_double_complex),value :: alpha, beta + end function spmvDiagDeviceDoubleComplex + end interface spmvDiagDevice + +end module diagdev_mod diff --git a/cuda/dnsdev.c b/cuda/dnsdev.c new file mode 100644 index 00000000..3cf57976 --- /dev/null +++ b/cuda/dnsdev.c @@ -0,0 +1,321 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#include +#include "dnsdev.h" + +#define PASS_RS 0 + +#define IMIN(a,b) ((a)<(b) ? (a) : (b)) + +DnsDeviceParams getDnsDeviceParams(unsigned int rows, unsigned int columns, + unsigned int elementType, unsigned int firstIndex) +{ + DnsDeviceParams params; + + if (elementType == SPGPU_TYPE_DOUBLE) + { + params.pitch = ((rows + ELL_PITCH_ALIGN_D - 1)/ELL_PITCH_ALIGN_D)*ELL_PITCH_ALIGN_D; + } + else + { + params.pitch = ((rows + ELL_PITCH_ALIGN_S - 1)/ELL_PITCH_ALIGN_S)*ELL_PITCH_ALIGN_S; + } + //For complex? + params.elementType = elementType; + params.rows = rows; + params.columns = columns; + params.firstIndex = firstIndex; + + return params; + +} +//new +int allocDnsDevice(void ** remoteMatrix, DnsDeviceParams* params) +{ + struct DnsDevice *tmp = (struct DnsDevice *)malloc(sizeof(struct DnsDevice)); + *remoteMatrix = (void *)tmp; + tmp->rows = params->rows; + tmp->columns = params->columns; + tmp->cMPitch = params->pitch; + tmp->pitch= tmp->cMPitch; + tmp->allocsize = (int)tmp->columns * tmp->pitch; + tmp->baseIndex = params->firstIndex; + //fprintf(stderr,"allocDnsDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize); + if (params->elementType == SPGPU_TYPE_FLOAT) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float)); + else if (params->elementType == SPGPU_TYPE_DOUBLE) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double)); + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex)); + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex)); + else + return SPGPU_UNSUPPORTED; // Unsupported params + //fprintf(stderr,"From allocDnsDevice: %d %d %d %p %p %p\n",tmp->maxRowSize, + // tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM); + + return SPGPU_SUCCESS; +} + +void freeDnsDevice(void* remoteMatrix) +{ + struct DnsDevice *devMat = (struct DnsDevice *) remoteMatrix; + //fprintf(stderr,"freeDnsDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + +//new +int FallocDnsDevice(void** deviceMat, unsigned int rows, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex) +{ int i; + DnsDeviceParams p; + + p = getDnsDeviceParams(rows, columns, elementType, firstIndex); + i = allocDnsDevice(deviceMat, &p); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocDnsDevice",i); + } + return(i); +} + + +int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha, + void *deviceMat, void* deviceX, float *beta, void* deviceY) +{ + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + int status; + + cublasHandle_t handle=psb_cudaGetCublasHandle(); + cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); + /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ + if (n == 1) { + status = cublasSgemv(handle, trans, m,k, + alpha, devMat->cM,devMat->pitch, x->v_,1, + beta, y->v_,1); + } else { + status = cublasSgemm(handle, trans, CUBLAS_OP_N, m,n,k, + alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, + beta, y->v_,y->pitch_); + } + + if (status == CUBLAS_STATUS_SUCCESS) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +} + +int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha, + void *deviceMat, void* deviceX, double *beta, void* deviceY) +{ + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + int status; + + cublasHandle_t handle=psb_cudaGetCublasHandle(); + cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); + /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ + if (n == 1) { + status = cublasDgemv(handle, trans, m,k, + alpha, devMat->cM,devMat->pitch, x->v_,1, + beta, y->v_,1); + } else { + status = cublasDgemm(handle, trans, CUBLAS_OP_N, m,n,k, + alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, + beta, y->v_,y->pitch_); + } + + if (status == CUBLAS_STATUS_SUCCESS) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +} + +int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *alpha, + void *deviceMat, void* deviceX, float complex *beta, void* deviceY) +{ + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + int status; + + cublasHandle_t handle=psb_cudaGetCublasHandle(); + cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); + /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ + if (n == 1) { + status = cublasCgemv(handle, trans, m,k, + (const cuComplex *) alpha, devMat->cM,devMat->pitch, x->v_,1, + (const cuComplex *) beta, y->v_,1); + } else { + status = cublasCgemm(handle, trans, CUBLAS_OP_N, m,n,k, + (const cuComplex *) alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, + (const cuComplex *) beta, y->v_,y->pitch_); + } + + if (status == CUBLAS_STATUS_SUCCESS) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +} + +int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex *alpha, + void *deviceMat, void* deviceX, double complex *beta, void* deviceY) +{ + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + int status; + + cublasHandle_t handle=psb_cudaGetCublasHandle(); + cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); + /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ + if (n == 1) { + status = cublasZgemv(handle, trans, m,k, + (const cuDoubleComplex *) alpha, devMat->cM,devMat->pitch, x->v_,1, + (const cuDoubleComplex *) beta, y->v_,1); + } else { + status = cublasZgemm(handle, trans, CUBLAS_OP_N, m,n,k, + (const cuDoubleComplex *) alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, + (const cuDoubleComplex *) beta, y->v_,y->pitch_); + } + + if (status == CUBLAS_STATUS_SUCCESS) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +} + + +int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasSetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloat",i); + } + return SPGPU_SUCCESS; +} + +int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasSetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDouble",i); + } + return SPGPU_SUCCESS; +} + + +int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasSetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloatComplex",i); + } + return SPGPU_SUCCESS; +} + +int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasSetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDoubleComplex",i); + } + return SPGPU_SUCCESS; +} + + +int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasGetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloat",i); + } + return SPGPU_SUCCESS; +} + +int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasGetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDouble",i); + } + return SPGPU_SUCCESS; +} + + +int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasGetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloatComplex",i); + } + return SPGPU_SUCCESS; +} + +int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasGetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDoubleComplex",i); + } + return SPGPU_SUCCESS; +} + + +int getDnsDevicePitch(void* deviceMat) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + i = devMat->pitch; + return(i); +} + diff --git a/cuda/dnsdev.h b/cuda/dnsdev.h new file mode 100644 index 00000000..aa536105 --- /dev/null +++ b/cuda/dnsdev.h @@ -0,0 +1,118 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef _DNSDEV_H_ +#define _DNSDEV_H_ + +#include "cintrf.h" +#include "vectordev.h" +#include "cuComplex.h" +#include "cublas_v2.h" + + +struct DnsDevice +{ + // Compressed matrix + void *cM; //it can be float or double + + + //matrix size (uncompressed) + int rows; + int columns; + + int pitch; //old + + int cMPitch; + + //allocation size (in elements) + int allocsize; + + /*(i.e. 0 for C, 1 for Fortran)*/ + int baseIndex; +}; + +typedef struct DnsDeviceParams +{ + // The resulting allocation for cM and rP will be pitch*maxRowSize*(size of the elementType) + unsigned int elementType; + + // Pitch (in number of elements) + unsigned int pitch; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + + // Number of columns. + // Used for error-checking + unsigned int columns; + + // First index (e.g 0 or 1) + unsigned int firstIndex; +} DnsDeviceParams; + +int FallocDnsDevice(void** deviceMat, unsigned int rows, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex); +int allocDnsDevice(void ** remoteMatrix, DnsDeviceParams* params); +void freeDnsDevice(void* remoteMatrix); + +int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc); +int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc); +int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc); +int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc); + +int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc); +int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc); +int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc); +int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc); + +int spmvDnsDeviceFloat(char transa, int m, int n, int k, + float *alpha, void *deviceMat, void* deviceX, + float *beta, void* deviceY); +int spmvDnsDeviceDouble(char transa, int m, int n, int k, + double *alpha, void *deviceMat, void* deviceX, + double *beta, void* deviceY); +int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, + float complex *alpha, void *deviceMat, void* deviceX, + float complex *beta, void* deviceY); +int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, + double complex *alpha, void *deviceMat, void* deviceX, + double complex *beta, void* deviceY); + +int getDnsDevicePitch(void* deviceMat); + +// sparse Dns matrix-vector product +//int spmvDnsDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY); +//int spmvDnsDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY); + +#endif diff --git a/cuda/dnsdev_mod.F90 b/cuda/dnsdev_mod.F90 new file mode 100644 index 00000000..fd257e0e --- /dev/null +++ b/cuda/dnsdev_mod.F90 @@ -0,0 +1,270 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module dnsdev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: dnsdev_parms + integer(c_int) :: element_type + integer(c_int) :: pitch + integer(c_int) :: rows + integer(c_int) :: columns + integer(c_int) :: maxRowSize + integer(c_int) :: avgRowSize + integer(c_int) :: firstIndex + end type dnsdev_parms + + interface + function FgetDnsDeviceParams(rows, columns, elementType, firstIndex) & + & result(res) bind(c,name='getDnsDeviceParams') + use iso_c_binding + import :: dnsdev_parms + type(dnsdev_parms) :: res + integer(c_int), value :: rows,columns,elementType,firstIndex + end function FgetDnsDeviceParams + end interface + + + interface + function FallocDnsDevice(deviceMat,rows,columns,& + & elementType,firstIndex) & + & result(res) bind(c,name='FallocDnsDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: rows,columns,elementType,firstIndex + type(c_ptr) :: deviceMat + end function FallocDnsDevice + end interface + + + interface writeDnsDevice + + function writeDnsDeviceFloat(deviceMat,val,lda,nc) & + & result(res) bind(c,name='writeDnsDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + real(c_float) :: val(lda,*) + end function writeDnsDeviceFloat + + + function writeDnsDeviceDouble(deviceMat,val,lda,nc) & + & result(res) bind(c,name='writeDnsDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + real(c_double) :: val(lda,*) + end function writeDnsDeviceDouble + + + function writeDnsDeviceFloatComplex(deviceMat,val,lda,nc) & + & result(res) bind(c,name='writeDnsDeviceFloatComple') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + complex(c_float_complex) :: val(lda,*) + end function writeDnsDeviceFloatComplex + + + function writeDnsDeviceDoubleComplex(deviceMat,val,lda,nc) & + & result(res) bind(c,name='writeDnsDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + complex(c_double_complex) :: val(lda,*) + end function writeDnsDeviceDoubleComplex + + end interface + + interface readDnsDevice + + function readDnsDeviceFloat(deviceMat,val,lda,nc) & + & result(res) bind(c,name='readDnsDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + real(c_float) :: val(lda,*) + end function readDnsDeviceFloat + + + function readDnsDeviceDouble(deviceMat,val,lda,nc) & + & result(res) bind(c,name='readDnsDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + real(c_double) :: val(lda,*) + end function readDnsDeviceDouble + + + function readDnsDeviceFloatComplex(deviceMat,val,lda,nc) & + & result(res) bind(c,name='readDnsDeviceFloatComple') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + complex(c_float_complex) :: val(lda,*) + end function readDnsDeviceFloatComplex + + + function readDnsDeviceDoubleComplex(deviceMat,val,lda,nc) & + & result(res) bind(c,name='readDnsDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + complex(c_double_complex) :: val(lda,*) + end function readDnsDeviceDoubleComplex + + end interface + + interface + subroutine freeDnsDevice(deviceMat) & + & bind(c,name='freeDnsDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeDnsDevice + end interface + + interface + subroutine resetDnsTimer() bind(c,name='resetDnsTimer') + use iso_c_binding + end subroutine resetDnsTimer + end interface + interface + function getDnsTimer() & + & bind(c,name='getDnsTimer') result(res) + use iso_c_binding + real(c_double) :: res + end function getDnsTimer + end interface + + + interface + function getDnsDevicePitch(deviceMat) & + & bind(c,name='getDnsDevicePitch') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getDnsDevicePitch + end interface + +!!$ interface csputDnsDeviceFloat +!!$ function dev_csputDnsDeviceFloat(deviceMat, nnz, ia, ja, val) & +!!$ & result(res) bind(c,name='dev_csputDnsDeviceFloat') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat , ia, ja, val +!!$ integer(c_int), value :: nnz +!!$ end function dev_csputDnsDeviceFloat +!!$ end interface +!!$ +!!$ interface csputDnsDeviceDouble +!!$ function dev_csputDnsDeviceDouble(deviceMat, nnz, ia, ja, val) & +!!$ & result(res) bind(c,name='dev_csputDnsDeviceDouble') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat , ia, ja, val +!!$ integer(c_int), value :: nnz +!!$ end function dev_csputDnsDeviceDouble +!!$ end interface +!!$ +!!$ interface csputDnsDeviceFloatComplex +!!$ function dev_csputDnsDeviceFloatComplex(deviceMat, nnz, ia, ja, val) & +!!$ & result(res) bind(c,name='dev_csputDnsDeviceFloatComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat , ia, ja, val +!!$ integer(c_int), value :: nnz +!!$ end function dev_csputDnsDeviceFloatComplex +!!$ end interface +!!$ +!!$ interface csputDnsDeviceDoubleComplex +!!$ function dev_csputDnsDeviceDoubleComplex(deviceMat, nnz, ia, ja, val) & +!!$ & result(res) bind(c,name='dev_csputDnsDeviceDoubleComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat , ia, ja, val +!!$ integer(c_int), value :: nnz +!!$ end function dev_csputDnsDeviceDoubleComplex +!!$ end interface + + interface spmvDnsDevice + function spmvDnsDeviceFloat(transa,m,n,k,alpha,deviceMat,x,beta,y) & + & result(res) bind(c,name='spmvDnsDeviceFloat') + use iso_c_binding + character(c_char), value :: transa + integer(c_int), value :: m, n, k + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float) :: alpha, beta + end function spmvDnsDeviceFloat + + function spmvDnsDeviceDouble(transa,m,n,k,alpha,deviceMat,x,beta,y) & + & result(res) bind(c,name='spmvDnsDeviceDouble') + use iso_c_binding + character(c_char), value :: transa + integer(c_int), value :: m, n, k + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double) :: alpha, beta + end function spmvDnsDeviceDouble + + function spmvDnsDeviceFloatComplex(transa,m,n,k,alpha,deviceMat,x,beta,y) & + & result(res) bind(c,name='spmvDnsDeviceFloatComplex') + use iso_c_binding + character(c_char), value :: transa + integer(c_int), value :: m, n, k + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_float_complex) :: alpha, beta + end function spmvDnsDeviceFloatComplex + + function spmvDnsDeviceDoubleComplex(transa,m,n,k,alpha,deviceMat,x,beta,y) & + & result(res) bind(c,name='spmvDnsDeviceDoubleComplex') + use iso_c_binding + character(c_char), value :: transa + integer(c_int), value :: m, n, k + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_double_complex) :: alpha, beta + end function spmvDnsDeviceDoubleComplex + + end interface + +end module dnsdev_mod diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c new file mode 100644 index 00000000..10bbc326 --- /dev/null +++ b/cuda/dvectordev.c @@ -0,0 +1,339 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +//#include "utils.h" +//#include "common.h" +#include "dvectordev.h" + + +int registerMappedDouble(void *buff, void **d_p, int n, double dummy) +{ + return registerMappedMemory(buff,d_p,((size_t) n)*sizeof(double)); +} + +int writeMultiVecDeviceDouble(void* deviceVec, double* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // Ex updateFromHost vector function + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, + ((size_t) devVec->pitch_)*devVec->count_*sizeof(double)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + return(i); +} + +int writeMultiVecDeviceDoubleR2(void* deviceVec, double* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceDouble(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceDoubleR2",i); + } + return(i); +} + +int readMultiVecDeviceDouble(void* deviceVec, double* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + ((size_t) devVec->pitch_)*devVec->count_*sizeof(double)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDouble",i); + } + return(i); +} + +int readMultiVecDeviceDoubleR2(void* deviceVec, double* hostVec, int ld) +{ int i; + i = readMultiVecDeviceDouble(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleR2",i); + } + return(i); +} + +int setscalMultiVecDeviceDouble(double val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuDsetscal(handle, first, last, indexBase, val, (double *) devVecX->v_); + + return(i); +} + + +int geinsMultiVecDeviceDouble(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + double beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + + if (dupl == INS_OVERWRITE) + beta = 0.0; + else if (dupl == INS_ADD) + beta = 1.0; + else + beta = 0.0; + + spgpuDscat(handle, (double *) devVecX->v_, n, (double*)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceDouble(deviceVec, vectorId, n, + first, (void*) devIdx->v_, hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + double *hv = (double *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuDgath(handle,hv, n, idx,indexBase, (double *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, int first, void *deviceIdx, + int hfirst, void* host_values, int indexBase, double beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceDouble(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, double beta) +{ int i=0; + double *hv = (double *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuDscat(handle, (double *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + +int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + // Note: inner kernel can handle aliased input/output + spgpuDscal(handle, (double *)devVecA->v_, devVecA->pitch_, + alpha, (double *)devVecA->v_); + return(i); +} + +int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuDmnrm2(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuDmamax(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int asumMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuDmasum(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devMultiVecB) +{int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_, + devVecA->count_,devVecB->pitch_); + return(i); +} + +int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX, + double beta, void* devMultiVecY) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + for(j=0;jcount_;j++) + spgpuDaxpby(handle,(double*)devVecY->v_+pitch*j, n, beta, + (double*)devVecY->v_+pitch*j, alpha,(double*) devVecX->v_+pitch*j); + return(i); +} + +int upd_xyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuDupd_xyz(handle,n, alpha,beta,gamma,delta, + (double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_); + return(i); +} + +int xyzwMultiVecDeviceDouble(int n,double a, double b, double c, double d, double e, double f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuDxyzw(handle,n, a,b,c,d,e,f, + (double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_,(double*) devVecW->v_); + return(i); +} + +int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + spgpuHandle_t handle=psb_cudaGetHandle(); + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuDmaxy(handle, (double*)devVecB->v_, n, alpha, (double*)devVecA->v_, + (double*)devVecB->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, + void *deviceVecB, double beta, void *deviceVecZ) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) + return SPGPU_UNSUPPORTED; + spgpuDmaxypbz(handle, (double*)devVecZ->v_, n, beta, (double*)devVecZ->v_, + alpha, (double*) devVecA->v_, (double*) devVecB->v_, + devVecB->count_, devVecB->pitch_); + return(i); +} + +int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, + void *deviceVecB) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + + spgpuHandle_t handle=psb_cudaGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuDabs(handle, (double*)devVecB->v_, n, alpha, (double*)devVecA->v_); + + return(i); +} + +int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + spgpuHandle_t handle=psb_cudaGetHandle(); + if (n > devVecA->size_) + return SPGPU_UNSUPPORTED; + + spgpuDabs(handle, (double*)devVecA->v_, n, alpha, (double*)devVecA->v_); + + return(i); +} + diff --git a/cuda/dvectordev.h b/cuda/dvectordev.h new file mode 100644 index 00000000..0d2d2ab3 --- /dev/null +++ b/cuda/dvectordev.h @@ -0,0 +1,81 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +//#include "utils.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" +#include "vector.h" + +int registerMappedDouble(void *, void **, int, double); +int writeMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec); +int writeMultiVecDeviceDoubleR2(void* deviceMultiVec, double* hostMultiVec, int ld); +int readMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec); +int readMultiVecDeviceDoubleR2(void* deviceMultiVec, double* hostMultiVec, int ld); + +int setscalMultiVecDeviceDouble(double val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceDouble(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, int first, + void *deviceIdx, int hfirst, void* host_values, + int indexBase, double beta); +int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, double beta); + +int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA); +int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devVecA); +int amaxMultiVecDeviceDouble(double* y_res, int n, void* devVecA); +int asumMultiVecDeviceDouble(double* y_res, int n, void* devVecA); +int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB); + +int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY); +int upd_xyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); +int xyzwMultiVecDeviceDouble(int n,double a, double b, double c, double d, double e, double f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW); +int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB); +int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, + void *deviceVecB, double beta, void *deviceVecZ); +int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA); +int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, void *deviceVecB); + diff --git a/cuda/elldev.c b/cuda/elldev.c new file mode 100644 index 00000000..cf49aadd --- /dev/null +++ b/cuda/elldev.c @@ -0,0 +1,750 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#include +#include "elldev.h" + +#define PASS_RS 0 + +EllDeviceParams getEllDeviceParams(unsigned int rows, unsigned int maxRowSize, + unsigned int nnzeros, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex) +{ + EllDeviceParams params; + + if (elementType == SPGPU_TYPE_DOUBLE) + { + params.pitch = ((rows + ELL_PITCH_ALIGN_D - 1)/ELL_PITCH_ALIGN_D)*ELL_PITCH_ALIGN_D; + } + else + { + params.pitch = ((rows + ELL_PITCH_ALIGN_S - 1)/ELL_PITCH_ALIGN_S)*ELL_PITCH_ALIGN_S; + } + //For complex? + params.elementType = elementType; + + params.rows = rows; + params.maxRowSize = maxRowSize; + params.avgRowSize = (nnzeros+rows-1)/rows; + params.columns = columns; + params.firstIndex = firstIndex; + + //params.pitch = computeEllAllocPitch(rows); + + return params; + +} +//new +int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params) +{ + struct EllDevice *tmp = (struct EllDevice *)malloc(sizeof(struct EllDevice)); + *remoteMatrix = (void *)tmp; + tmp->rows = params->rows; + tmp->cMPitch = computeEllAllocPitch(tmp->rows); + tmp->rPPitch = tmp->cMPitch; + tmp->pitch= tmp->cMPitch; + tmp->maxRowSize = params->maxRowSize; + tmp->avgRowSize = params->avgRowSize; + tmp->allocsize = (int)tmp->maxRowSize * tmp->pitch; + //tmp->allocsize = (int)params->maxRowSize * tmp->cMPitch; + allocRemoteBuffer((void **)&(tmp->rS), ((size_t) tmp->rows)*sizeof(int)); + allocRemoteBuffer((void **)&(tmp->diag), ((size_t) tmp->rows)*sizeof(int)); + allocRemoteBuffer((void **)&(tmp->rP), ((size_t) tmp->allocsize)*sizeof(int)); + tmp->columns = params->columns; + tmp->baseIndex = params->firstIndex; + tmp->dataType = params->elementType; + //fprintf(stderr,"allocEllDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize); + if (params->elementType == SPGPU_TYPE_FLOAT) + allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(float)); + else if (params->elementType == SPGPU_TYPE_DOUBLE) + allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(double)); + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(cuFloatComplex)); + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(cuDoubleComplex)); + else + return SPGPU_UNSUPPORTED; // Unsupported params + //fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize, + // tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM); + + return SPGPU_SUCCESS; +} + +//new +void zeroEllDevice(void *remoteMatrix) +{ + struct EllDevice *tmp = (struct EllDevice *) remoteMatrix; + + if (tmp->dataType == SPGPU_TYPE_FLOAT) + cudaMemset((void *)tmp->cM, 0, + ((size_t) tmp->allocsize)*sizeof(float)); + else if (tmp->dataType == SPGPU_TYPE_DOUBLE) + cudaMemset((void *)tmp->cM, 0, + ((size_t) tmp->allocsize)*sizeof(double)); + else if (tmp->dataType == SPGPU_TYPE_COMPLEX_FLOAT) + cudaMemset((void *)tmp->cM, 0, + ((size_t) tmp->allocsize)*sizeof(cuFloatComplex)); + else if (tmp->dataType == SPGPU_TYPE_COMPLEX_DOUBLE) + cudaMemset((void *)tmp->cM, 0, + ((size_t) tmp->allocsize)*sizeof(cuDoubleComplex)); + else + return ; // Unsupported params + //fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize, + // tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM); + + return; +} + + +void freeEllDevice(void* remoteMatrix) +{ + struct EllDevice *devMat = (struct EllDevice *) remoteMatrix; + //fprintf(stderr,"freeEllDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->rS); + freeRemoteBuffer(devMat->rP); + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + +//new +int FallocEllDevice(void** deviceMat,unsigned int rows, unsigned int maxRowSize, + unsigned int nnzeros, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex) +{ int i; + EllDeviceParams p; + + p = getEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex); + i = allocEllDevice(deviceMat, &p); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); + } + return(i); +} + +void sspmdmm_gpu(float *z,int s, int vPitch, float *y, float alpha, float* cM, int* rP, int* rS, + int avgRowSize, int maxRowSize, int rows, int pitch, float *x, float beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + + for (i=0; icount_ == x->count_, "ERROR: x and y don't share the same number of vectors"); + __assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)"); + __assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)"); +#endif + /*spgpuSellspmv (handle, (float*) y->v_, (float*)y->v_, alpha, + (float*) devMat->cM, devMat->rP, devMat->cMPitch, + devMat->rPPitch, devMat->rS, devMat->rows, + (float*)x->v_, beta, devMat->baseIndex);*/ + sspmdmm_gpu ( (float *)y->v_,y->count_, y->pitch_, (float *)y->v_, alpha, (float *)devMat->cM, devMat->rP, devMat->rS, + devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch, + (float *)x->v_, beta, devMat->baseIndex); + return(i); +} + + +void +dspmdmm_gpu (double *z,int s, int vPitch, double *y, double alpha, double* cM, int* rP, + int* rS, int avgRowSize, int maxRowSize, int rows, int pitch, + double *x, double beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + for (i=0; iv_, (double*)y->v_, alpha, (double*) devMat->cM, devMat->rP, devMat->cMPitch, devMat->rPPitch, devMat->rS, devMat->rows, (double*)x->v_, beta, devMat->baseIndex);*/ + /* fprintf(stderr,"From spmvEllDouble: mat %d %d %d %d y %d %d \n", */ + /* devMat->avgRowSize, devMat->maxRowSize, devMat->rows, */ + /* devMat->pitch, y->count_, y->pitch_); */ + dspmdmm_gpu ((double *)y->v_, y->count_, y->pitch_, (double *)y->v_, + alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->avgRowSize, + devMat->maxRowSize, devMat->rows, devMat->pitch, + (double *)x->v_, beta, devMat->baseIndex); + + return SPGPU_SUCCESS; +} + +void +cspmdmm_gpu (cuFloatComplex *z, int s, int vPitch, cuFloatComplex *y, + cuFloatComplex alpha, cuFloatComplex* cM, + int* rP, int* rS, int avgRowSize, int maxRowSize, int rows, int pitch, + cuFloatComplex *x, cuFloatComplex beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + for (i=0; iv_, y->count_, y->pitch_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM, + devMat->rP, devMat->rS, devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch, + (cuFloatComplex *)x->v_, b, devMat->baseIndex); + + return SPGPU_SUCCESS; +} + +void +zspmdmm_gpu (cuDoubleComplex *z, int s, int vPitch, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* cM, + int* rP, int* rS, int avgRowSize, int maxRowSize, int rows, int pitch, + cuDoubleComplex *x, cuDoubleComplex beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + for (i=0; iv_, y->count_, y->pitch_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM, + devMat->rP, devMat->rS, devMat->avgRowSize, devMat->maxRowSize, devMat->rows, + devMat->pitch, (cuDoubleComplex *)x->v_, b, devMat->baseIndex); + + return SPGPU_SUCCESS; +} + +int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(float)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + //i = writeEllDevice(deviceMat, (void *) val, ja, irn); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +} + +int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(double)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + } + return SPGPU_SUCCESS; +} + +int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(cuFloatComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(cuDoubleComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(float)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + /*i = readEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +} + +int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(double)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(cuFloatComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(cuDoubleComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int getEllDevicePitch(void* deviceMat) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = devMat->pitch; //old + //i = getPitchEllDevice(deviceMat); + return(i); +} + +int getEllDeviceMaxRowSize(void* deviceMat) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = devMat->maxRowSize; + return(i); +} + + + + +// New copying interface + +int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, float *val, void *deviceMat) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + float *devVal; + int *devIdisp, *devJa; + spgpuHandle_t handle; + handle = psb_cudaGetHandle(); + + allocRemoteBuffer((void **)&(devIdisp), ((size_t) (nr+1))*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), ((size_t) (nza))*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), ((size_t) (nza))*sizeof(float)); + i = writeRemoteBuffer((void*) val, (void *)devVal, + ((size_t) nza)*sizeof(float)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, + ((size_t) nza)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, + ((size_t) devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_s_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz, + ldv,nzm, + (int *) devMat->rS,devIdisp,devJa,devVal, + (int *) devMat->diag, (int *) devMat->rP, + (float *)devMat->cM); + // Ex updateFromHost function + //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); + //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + //if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); + } + return SPGPU_SUCCESS; +} + + + +int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, double *val, void *deviceMat) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + double *devVal; + int *devIdisp, *devJa; + spgpuHandle_t handle; + handle = psb_cudaGetHandle(); + + allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(double)); + i = writeRemoteBuffer((void*) val, (void *)devVal, + ((size_t) nza)*sizeof(double)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, + ((size_t) nza)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, + ((size_t) devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_d_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz, + ldv,nzm, + (int *) devMat->rS,devIdisp,devJa,devVal, + (int *) devMat->diag, (int *) devMat->rP, + (double *)devMat->cM); + // Ex updateFromHost function + //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); + //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + //if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + } + return SPGPU_SUCCESS; +} + + +int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, float complex *val, void *deviceMat) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + float complex *devVal; + int *devIdisp, *devJa; + spgpuHandle_t handle; + handle = psb_cudaGetHandle(); + + allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(cuFloatComplex)); + i = writeRemoteBuffer((void*) val, (void *)devVal, + ((size_t) nza)*sizeof(cuFloatComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, + ((size_t) nza)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, + ((size_t) devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_c_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz, + ldv,nzm, + (int *) devMat->rS,devIdisp,devJa,devVal, + (int *) devMat->diag,(int *) devMat->rP, + (float complex *)devMat->cM); + // Ex updateFromHost function + //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float complex)); + //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + //if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloatComplex",i); + } + return SPGPU_SUCCESS; +} + + + +int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, double complex *val, void *deviceMat) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + double complex *devVal; + int *devIdisp, *devJa; + spgpuHandle_t handle; + handle = psb_cudaGetHandle(); + + allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(cuDoubleComplex)); + i = writeRemoteBuffer((void*) val, (void *)devVal, + ((size_t) nza)*sizeof(cuDoubleComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, + ((size_t) nza)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, + ((size_t) (devMat->rows+1))*sizeof(int)); + + if (i==0) psi_cuda_z_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz, + ldv,nzm, + (int *) devMat->rS,devIdisp,devJa,devVal, + (int *) devMat->diag,(int *) devMat->rP, + (double complex *)devMat->cM); + // Ex updateFromHost function + //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double complex)); + //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + //if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDoubleComplex",i); + } + return SPGPU_SUCCESS; +} + + +int dev_csputEllDeviceFloat(void* deviceMat, int nnz, void *ia, void *ja, void *val) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; + struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; + struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; + float alpha=1.0; + spgpuHandle_t handle=psb_cudaGetHandle(); + + if (nnz <=0) return SPGPU_SUCCESS; + //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); + + spgpuSellcsput(handle,alpha,(float *) devMat->cM, + devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, + nnz, devIa->v_, devJa->v_, (float *) devVal->v_, 1); + + return SPGPU_SUCCESS; +} + +int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void *val) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; + struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; + struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; + double alpha=1.0; + spgpuHandle_t handle=psb_cudaGetHandle(); + + if (nnz <=0) return SPGPU_SUCCESS; + //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); + + spgpuDellcsput(handle,alpha,(double *) devMat->cM, + devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, + nnz, devIa->v_, devJa->v_, (double *) devVal->v_, 1); + + return SPGPU_SUCCESS; +} + + +int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz, + void *ia, void *ja, void *val) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; + struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; + struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; + cuFloatComplex alpha = make_cuFloatComplex(1.0, 0.0); + spgpuHandle_t handle=psb_cudaGetHandle(); + + if (nnz <=0) return SPGPU_SUCCESS; + //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); + + spgpuCellcsput(handle,alpha,(cuFloatComplex *) devMat->cM, + devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, + nnz, devIa->v_, devJa->v_, (cuFloatComplex *) devVal->v_, 1); + + return SPGPU_SUCCESS; +} + +int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz, + void *ia, void *ja, void *val) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; + struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; + struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; + cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0); + spgpuHandle_t handle=psb_cudaGetHandle(); + + if (nnz <=0) return SPGPU_SUCCESS; + //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); + + spgpuZellcsput(handle,alpha,(cuDoubleComplex *) devMat->cM, + devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, + nnz, devIa->v_, devJa->v_, (cuDoubleComplex *) devVal->v_, 1); + + return SPGPU_SUCCESS; +} + + diff --git a/cuda/elldev.h b/cuda/elldev.h new file mode 100644 index 00000000..4e69bb3a --- /dev/null +++ b/cuda/elldev.h @@ -0,0 +1,179 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef _ELLDEV_H_ +#define _ELLDEV_H_ + +#include "cintrf.h" +#include "vectordev.h" +#include "cuComplex.h" +#include "ell.h" + +struct EllDevice +{ + // Compressed matrix + void *cM; //it can be float or double + + // row pointers (same size of cM) + int *rP; + int *diag; + // row size + int *rS; + + //matrix size (uncompressed) + int rows; + int columns; + + int pitch; //old + + int cMPitch; + + int rPPitch; + + int maxRowSize; + int avgRowSize; + + //allocation size (in elements) + int allocsize; + + /*(i.e. 0 for C, 1 for Fortran)*/ + int baseIndex; + /* real/complex, single/double */ + int dataType; + +}; + +typedef struct EllDeviceParams +{ + // The resulting allocation for cM and rP will be pitch*maxRowSize*(size of the elementType) + unsigned int elementType; + + // Pitch (in number of elements) + unsigned int pitch; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + + // Number of columns. + // Used for error-checking + unsigned int columns; + + // Largest row size + unsigned int maxRowSize; + unsigned int avgRowSize; + + // First index (e.g 0 or 1) + unsigned int firstIndex; +} EllDeviceParams; + +int computeEllAllocPitch(int rowsCount); +int FallocEllDevice(void** deviceMat, unsigned int rows, unsigned int maxRowSize, + unsigned int nnzeros, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex); +int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params); +void freeEllDevice(void* remoteMatrix); + +int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag); +int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag); +int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag); +int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag); + +int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag); +int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag); +int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag); +int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag); + +int spmvEllDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY); +int spmvEllDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY); +int spmvEllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX, + float complex beta, void* deviceY); +int spmvEllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX, + double complex beta, void* deviceY); + + + +int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, float *val, void *deviceMat); + +int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, double *val, void *deviceMat); + +int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, float complex *val, void *deviceMat); + +int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, double complex *val, void *deviceMat); + + +void psi_cuda_s_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, + int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, float *devVal, + int *idiag, int *rP, float *cM); + +void psi_cuda_d_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, + int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, double *devVal, + int *idiag, int *rP, double *cM); + +void psi_cuda_c_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, + int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, float complex *devVal, + int *idiag, int *rP, float complex *cM); + +void psi_cuda_z_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, + int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, double complex *devVal, + int *idiag, int *rP, double complex *cM); + + +int dev_csputEllDeviceFloat(void* deviceMat, int nnz, + void *ia, void *ja, void *val); +int dev_csputEllDeviceDouble(void* deviceMat, int nnz, + void *ia, void *ja, void *val); +int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz, + void *ia, void *ja, void *val); +int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz, + void *ia, void *ja, void *val); + +void zeroEllDevice(void* deviceMat); + +int getEllDevicePitch(void* deviceMat); + +// sparse Ell matrix-vector product +//int spmvEllDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY); +//int spmvEllDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY); + +#endif diff --git a/cuda/elldev_mod.F90 b/cuda/elldev_mod.F90 new file mode 100644 index 00000000..40cf8e49 --- /dev/null +++ b/cuda/elldev_mod.F90 @@ -0,0 +1,321 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module elldev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: elldev_parms + integer(c_int) :: element_type + integer(c_int) :: pitch + integer(c_int) :: rows + integer(c_int) :: columns + integer(c_int) :: maxRowSize + integer(c_int) :: avgRowSize + integer(c_int) :: firstIndex + end type elldev_parms + + interface + function FgetEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex) & + & result(res) bind(c,name='getEllDeviceParams') + use iso_c_binding + import :: elldev_parms + type(elldev_parms) :: res + integer(c_int), value :: rows,maxRowSize,nnzeros,columns,elementType,firstIndex + end function FgetEllDeviceParams + end interface + + + interface + function FallocEllDevice(deviceMat,rows,maxRowSize,nnzeros,columns,& + & elementType,firstIndex) & + & result(res) bind(c,name='FallocEllDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: rows,maxRowSize,nnzeros,columns,elementType,firstIndex + type(c_ptr) :: deviceMat + end function FallocEllDevice + end interface + + + interface writeEllDevice + + function writeEllDeviceFloat(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='writeEllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + real(c_float) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function writeEllDeviceFloat + + function writeEllDeviceDouble(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='writeEllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + real(c_double) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function writeEllDeviceDouble + + function writeEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='writeEllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + complex(c_float_complex) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function writeEllDeviceFloatComplex + + function writeEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='writeEllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + complex(c_double_complex) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function writeEllDeviceDoubleComplex + + end interface + + interface readEllDevice + + function readEllDeviceFloat(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='readEllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + real(c_float) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function readEllDeviceFloat + + function readEllDeviceDouble(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='readEllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + real(c_double) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function readEllDeviceDouble + + function readEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='readEllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + complex(c_float_complex) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function readEllDeviceFloatComplex + + function readEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='readEllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + complex(c_double_complex) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function readEllDeviceDoubleComplex + + end interface + + interface + subroutine freeEllDevice(deviceMat) & + & bind(c,name='freeEllDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeEllDevice + end interface + + interface + subroutine zeroEllDevice(deviceMat) & + & bind(c,name='zeroEllDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine zeroEllDevice + end interface + + interface + subroutine resetEllTimer() bind(c,name='resetEllTimer') + use iso_c_binding + end subroutine resetEllTimer + end interface + interface + function getEllTimer() & + & bind(c,name='getEllTimer') result(res) + use iso_c_binding + real(c_double) :: res + end function getEllTimer + end interface + + + interface + function getEllDevicePitch(deviceMat) & + & bind(c,name='getEllDevicePitch') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getEllDevicePitch + end interface + + interface + function getEllDeviceMaxRowSize(deviceMat) & + & bind(c,name='getEllDeviceMaxRowSize') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getEllDeviceMaxRowSize + end interface + + + interface psi_CopyCooToElg + function psiCopyCooToElgFloat(nr, nc, nza, hacksz, ldv, nzm, irn, & + & idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToElgFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: irn(*),idisp(*),ja(*) + end function psiCopyCooToElgFloat + function psiCopyCooToElgDouble(nr, nc, nza, hacksz, ldv, nzm, irn, & + & idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToElgDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: irn(*),idisp(*),ja(*) + end function psiCopyCooToElgDouble + function psiCopyCooToElgFloatComplex(nr, nc, nza, hacksz, ldv, nzm, irn, & + & idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToElgFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm + type(c_ptr), value :: deviceMat + complex(c_float_complex) :: val(*) + integer(c_int) :: irn(*),idisp(*),ja(*) + end function psiCopyCooToElgFloatComplex + function psiCopyCooToElgDoubleComplex(nr, nc, nza, hacksz, ldv, nzm, irn, & + & idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToElgDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm + type(c_ptr), value :: deviceMat + complex(c_double_complex) :: val(*) + integer(c_int) :: irn(*),idisp(*),ja(*) + end function psiCopyCooToElgDoubleComplex + end interface + + interface csputEllDeviceFloat + function dev_csputEllDeviceFloat(deviceMat, nnz, ia, ja, val) & + & result(res) bind(c,name='dev_csputEllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat , ia, ja, val + integer(c_int), value :: nnz + end function dev_csputEllDeviceFloat + end interface + + interface csputEllDeviceDouble + function dev_csputEllDeviceDouble(deviceMat, nnz, ia, ja, val) & + & result(res) bind(c,name='dev_csputEllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat , ia, ja, val + integer(c_int), value :: nnz + end function dev_csputEllDeviceDouble + end interface + + interface csputEllDeviceFloatComplex + function dev_csputEllDeviceFloatComplex(deviceMat, nnz, ia, ja, val) & + & result(res) bind(c,name='dev_csputEllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat , ia, ja, val + integer(c_int), value :: nnz + end function dev_csputEllDeviceFloatComplex + end interface + + interface csputEllDeviceDoubleComplex + function dev_csputEllDeviceDoubleComplex(deviceMat, nnz, ia, ja, val) & + & result(res) bind(c,name='dev_csputEllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat , ia, ja, val + integer(c_int), value :: nnz + end function dev_csputEllDeviceDoubleComplex + end interface + + interface spmvEllDevice + function spmvEllDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvEllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float),value :: alpha, beta + end function spmvEllDeviceFloat + function spmvEllDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvEllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double),value :: alpha, beta + end function spmvEllDeviceDouble + function spmvEllDeviceFloatComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvEllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_float_complex),value :: alpha, beta + end function spmvEllDeviceFloatComplex + function spmvEllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvEllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_double_complex),value :: alpha, beta + end function spmvEllDeviceDoubleComplex + end interface + +end module elldev_mod diff --git a/cuda/fcusparse.c b/cuda/fcusparse.c new file mode 100644 index 00000000..094348ce --- /dev/null +++ b/cuda/fcusparse.c @@ -0,0 +1,75 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#include +#include "fcusparse.h" + +static cusparseHandle_t *cusparse_handle=NULL; + + +void setHandle(cusparseHandle_t); + +int FcusparseCreate() +{ + int ret=CUSPARSE_STATUS_SUCCESS; + cusparseHandle_t *handle; + if (cusparse_handle == NULL) { + if ((handle = (cusparseHandle_t *)malloc(sizeof(cusparseHandle_t)))==NULL) + return((int) CUSPARSE_STATUS_ALLOC_FAILED); + ret = (int)cusparseCreate(handle); + if (ret == CUSPARSE_STATUS_SUCCESS) + cusparse_handle = handle; + } + //fprintf(stderr,"Created cusparses_handle\n"); + return (ret); +} + +int FcusparseDestroy() +{ + int val; + if (cusparse_handle!=NULL){ + val = (int) cusparseDestroy(*cusparse_handle); + free(cusparse_handle); + } + cusparse_handle=NULL; + return(val); +} +cusparseHandle_t *getHandle() +{ + if (cusparse_handle == NULL) + FcusparseCreate(); + return(cusparse_handle); +} + diff --git a/cuda/fcusparse.h b/cuda/fcusparse.h new file mode 100644 index 00000000..73417591 --- /dev/null +++ b/cuda/fcusparse.h @@ -0,0 +1,72 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef FCUSPARSE_ +#define FCUSPARSE_ + +#include +#if PSB_CUDA_SHORT_VERSION <= 10 +#include +#else +#include +#endif +#include "cintrf.h" +#include "vectordev.h" + +int FcusparseCreate(); +int FcusparseDestroy(); +cusparseHandle_t *getHandle(); + +#define CHECK_CUDA(func) \ +{ \ + cudaError_t status = (func); \ + if (status != cudaSuccess) { \ + printf("CUDA API failed at line %d with error: %s (%d)\n", \ + __LINE__, cudaGetErrorString(status), status); \ + return EXIT_FAILURE; \ + } \ +} + +#define CHECK_CUSPARSE(func) \ +{ \ + cusparseStatus_t status = (func); \ + if (status != CUSPARSE_STATUS_SUCCESS) { \ + printf("CUSPARSE API failed at line %d with error: %s (%d)\n", \ + __LINE__, cusparseGetErrorString(status), status); \ + return EXIT_FAILURE; \ + } \ +} + + +#endif + + diff --git a/cuda/fcusparse_dat.h b/cuda/fcusparse_dat.h new file mode 100644 index 00000000..09de4363 --- /dev/null +++ b/cuda/fcusparse_dat.h @@ -0,0 +1,136 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ +#ifndef FCUSPARSE_DAT_ +#define FCUSPARSE_DAT_ + + +typedef struct T_CSRGDeviceMat +{ +#if PSB_CUDA_SHORT_VERSION <= 10 + cusparseMatDescr_t descr; + cusparseSolveAnalysisInfo_t triang; +#elif PSB_CUDA_VERSION < 11030 + cusparseMatDescr_t descr; + csrsv2Info_t triang; + size_t mvbsize, svbsize; + void *mvbuffer, *svbuffer; +#else + cusparseSpMatDescr_t *spmvDescr; + cusparseSpSVDescr_t *spsvDescr; + size_t mvbsize, svbsize; + void *mvbuffer, *svbuffer; +#endif + int m, n, nz; + TYPE *val; + int *irp; + int *ja; +} T_CSRGDeviceMat; + +/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ +typedef struct T_Cmat +{ + T_CSRGDeviceMat *mat; +} T_Cmat; + +#if PSB_CUDA_SHORT_VERSION <= 10 +typedef struct T_HYBGDeviceMat +{ + cusparseMatDescr_t descr; + cusparseSolveAnalysisInfo_t triang; + cusparseHybMat_t hybA; + int m, n, nz; + TYPE *val; + int *irp; + int *ja; +} T_HYBGDeviceMat; + + +/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ +typedef struct T_Hmat +{ + T_HYBGDeviceMat *mat; +} T_Hmat; +#endif + +int T_spmvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_spsvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_CSRGDeviceAlloc(T_Cmat *Mat,int nr, int nc, int nz); +int T_CSRGDeviceFree(T_Cmat *Mat); + + +int T_CSRGHost2Device(T_Cmat *Mat, int m, int n, int nz, + int *irp, int *ja, TYPE *val); +int T_CSRGDevice2Host(T_Cmat *Mat, int m, int n, int nz, + int *irp, int *ja, TYPE *val); + +int T_CSRGDeviceGetParms(T_Cmat *Mat,int *nr, int *nc, int *nz); + +#if PSB_CUDA_SHORT_VERSION <= 10 +int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); +int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Mat); +#elif PSB_CUDA_VERSION < 11030 +int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); +#else + +int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat); +#endif + + + +#if PSB_CUDA_SHORT_VERSION <= 10 + + +int T_HYBGDeviceFree(T_Hmat *Matrix); +int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz); +int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type); +int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix); +int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val); +#endif + +#endif diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h new file mode 100644 index 00000000..e8ce7934 --- /dev/null +++ b/cuda/fcusparse_fct.h @@ -0,0 +1,740 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY) +{ + T_CSRGDeviceMat *cMat=Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + void *vX, *vY; + int r,n; + cusparseHandle_t *my_handle=getHandle(); + TYPE ealpha=alpha, ebeta=beta; +#if PSB_CUDA_SHORT_VERSION <= 10 + /* getAddrMultiVecDevice(deviceX, &vX); */ + /* getAddrMultiVecDevice(deviceY, &vY); */ + vX=x->v_; + vY=y->v_; + + CHECK_CUSPARSE(cusparseTcsrmv(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->n,cMat->nz,(const TYPE *) &alpha,cMat->descr, + cMat->val, cMat->irp, cMat->ja, + (const TYPE *) vX, (const TYPE *) &beta, (TYPE *) vY)); + +#elif PSB_CUDA_VERSION < 11030 + size_t bfsz; + vX=x->v_; + vY=y->v_; +#if 1 + CHECK_CUSPARSE(cusparseCsrmvEx_bufferSize(*my_handle,CUSPARSE_ALG_MERGE_PATH, + CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->n,cMat->nz, + (const void *) &ealpha,CUSPARSE_BASE_TYPE, + cMat->descr, + (const void *) cMat->val, + CUSPARSE_BASE_TYPE, + (const int *) cMat->irp, + (const int *) cMat->ja, + (const void *) vX, CUSPARSE_BASE_TYPE, + (const void *) &ebeta, CUSPARSE_BASE_TYPE, + (void *) vY, CUSPARSE_BASE_TYPE, + CUSPARSE_BASE_TYPE, &bfsz)); +#else + bfsz=cMat->nz; +#endif + + if (bfsz > cMat->mvbsize) { + if (cMat->mvbuffer != NULL) { + CHECK_CUDA(cudaFree(cMat->mvbuffer)); + cMat->mvbuffer = NULL; + } + //CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->mvbuffer), (size_t) bfsz); + cMat->mvbsize = bfsz; + } + CHECK_CUSPARSE(cusparseCsrmvEx(*my_handle, + CUSPARSE_ALG_MERGE_PATH, + CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->n,cMat->nz, + (const void *) &ealpha,CUSPARSE_BASE_TYPE, + cMat->descr, + (const void *) cMat->val, CUSPARSE_BASE_TYPE, + (const int *) cMat->irp, (const int *) cMat->ja, + (const void *) vX, CUSPARSE_BASE_TYPE, + (const void *) &ebeta, CUSPARSE_BASE_TYPE, + (void *) vY, CUSPARSE_BASE_TYPE, + CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer)); + +#else + cusparseDnVecDescr_t vecX, vecY; + size_t bfsz; + + if (T_CSRGIsNullMvDescr(cMat)) { + cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *)); + } + T_CSRGCreateSpMVDescr(cMat); + vX=x->v_; + vY=y->v_; + CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); + CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); + CHECK_CUSPARSE(cusparseSpMV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY, + CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, + &bfsz)); + if (bfsz > cMat->mvbsize) { + if (cMat->mvbuffer != NULL) { + CHECK_CUDA(cudaFree(cMat->mvbuffer)); + cMat->mvbuffer = NULL; + } + //CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->mvbuffer), (size_t) bfsz); + + cMat->mvbsize = bfsz; + } + CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY, + CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, + cMat->mvbuffer)); + CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); + CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); + CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr))); +#endif + return(0); +} + +int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY) +{ + T_CSRGDeviceMat *cMat=Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + void *vX, *vY; + int r,n; + cusparseHandle_t *my_handle=getHandle(); +#if PSB_CUDA_SHORT_VERSION <= 10 + vX=x->v_; + vY=y->v_; + + return cusparseTcsrsv_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,(const TYPE *) &alpha,cMat->descr, + cMat->val, cMat->irp, cMat->ja, cMat->triang, + (const TYPE *) vX, (TYPE *) vY); +#elif PSB_CUDA_VERSION < 11030 + vX=x->v_; + vY=y->v_; + CHECK_CUSPARSE(cusparseTcsrsv2_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->nz, + (const TYPE *) &alpha, + cMat->descr, + cMat->val, cMat->irp, cMat->ja, + cMat->triang, + (const TYPE *) vX, (TYPE *) vY, + CUSPARSE_SOLVE_POLICY_USE_LEVEL, + (void *) cMat->svbuffer)); +#else + cusparseDnVecDescr_t vecX, vecY; + size_t bfsz; + vX=x->v_; + vY=y->v_; + CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); + CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); + if (T_CSRGIsNullMvDescr(cMat)) { + cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *)); + } + T_CSRGCreateSpMVDescr(cMat); + // fprintf(stderr,"Entry to SpSVDevice: %d %p\n", + // T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr); + if (T_CSRGIsNullSvDescr(cMat)) { + cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *)); + cMat->svbsize=0; + CHECK_CUSPARSE( cusparseSpSV_createDescr(cMat->spsvDescr) ); + //fprintf(stderr,"Entry to SpSVDevice: %d %p %d\n", + // T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr,cMat->svbsize); + CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,*(cMat->spmvDescr),vecX,vecY, + CUSPARSE_BASE_TYPE, + CUSPARSE_SPSV_ALG_DEFAULT, + *(cMat->spsvDescr), + &bfsz)); + if (bfsz > cMat->svbsize) { + if (cMat->svbuffer != NULL) { + CHECK_CUDA(cudaFree(cMat->svbuffer)); + cMat->svbuffer = NULL; + } + //CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->svbuffer), (size_t) bfsz); + + cMat->svbsize=bfsz; + CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + *(cMat->spmvDescr), + vecX, vecY, + CUSPARSE_BASE_TYPE, + CUSPARSE_SPSV_ALG_DEFAULT, + *(cMat->spsvDescr), + cMat->svbuffer)); + } + if (T_CSRGIsNullSvBuffer(cMat)) { + fprintf(stderr,"SpSV_SOLVE NULL spsv-buffer\n"); + } + } + CHECK_CUSPARSE(cusparseSpSV_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,*(cMat->spmvDescr),vecX,vecY, + CUSPARSE_BASE_TYPE, + CUSPARSE_SPSV_ALG_DEFAULT, + *(cMat->spsvDescr))); + CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); + CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); + CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr))); +#endif + return(0); +} + +#if PSB_CUDA_VERSION >= 11030 +int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat) +{ + int64_t tr,tc,tz; + tr = cMat->m; + tc = cMat->n; + tz = cMat->nz; + CHECK_CUSPARSE(cusparseCreateCsr(cMat->spmvDescr, + tr,tc,tz, + (void *) cMat->irp, + (void *) cMat->ja, + (void *) cMat->val, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ONE, + CUSPARSE_BASE_TYPE) ); +} +#endif +int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) +{ + T_CSRGDeviceMat *cMat; + int nr1=nr, nz1=nz, rc; + cusparseHandle_t *my_handle=getHandle(); + int bfsz; + + if ((nr<0)||(nc<0)||(nz<0)) + return((int) CUSPARSE_STATUS_INVALID_VALUE); + if ((cMat=(T_CSRGDeviceMat *) malloc(sizeof(T_CSRGDeviceMat)))==NULL) + return((int) CUSPARSE_STATUS_ALLOC_FAILED); + cMat->m = nr; + cMat->n = nc; + cMat->nz = nz; + if (nr1 == 0) nr1 = 1; + if (nz1 == 0) nz1 = 1; + if ((rc= allocRemoteBuffer(((void **) &(cMat->irp)), + (((size_t) nr1+1)*sizeof(int)))) != 0) + return(rc); + if ((rc= allocRemoteBuffer(((void **) &(cMat->ja)), + (((size_t) nz1)*sizeof(int)))) != 0) + return(rc); + if ((rc= allocRemoteBuffer(((void **) &(cMat->val)), + (((size_t) nz1)*sizeof(TYPE)))) != 0) + return(rc); +#if PSB_CUDA_SHORT_VERSION <= 10 + if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0) + return(rc); + if ((rc= cusparseCreateSolveAnalysisInfo(&(cMat->triang))) !=0) + return(rc); +#elif PSB_CUDA_VERSION < 11030 + if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0) + return(rc); + CHECK_CUSPARSE(cusparseSetMatType(cMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL)); + CHECK_CUSPARSE(cusparseSetMatDiagType(cMat->descr,CUSPARSE_DIAG_TYPE_NON_UNIT)); + CHECK_CUSPARSE(cusparseSetMatIndexBase(cMat->descr,CUSPARSE_INDEX_BASE_ONE)); + CHECK_CUSPARSE(cusparseCreateCsrsv2Info(&(cMat->triang))); + if (cMat->nz > 0) { + CHECK_CUSPARSE(cusparseTcsrsv2_bufferSize(*my_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->nz, cMat->descr, + cMat->val, cMat->irp, cMat->ja, + cMat->triang, &bfsz)); + } else { + bfsz = 0; + } + + /* if (cMat->svbuffer != NULL) { */ + /* fprintf(stderr,"Calling cudaFree\n"); */ + /* CHECK_CUDA(cudaFree(cMat->svbuffer)); */ + /* cMat->svbuffer = NULL; */ + /* } */ + if (bfsz > 0) { + //CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->svbuffer), (size_t) bfsz); + + } else { + cMat->svbuffer=NULL; + } + cMat->svbsize=bfsz; + + cMat->mvbuffer=NULL; + cMat->mvbsize = 0; + + +#else + + cMat->spmvDescr=NULL; + cMat->spsvDescr=NULL; + cMat->mvbuffer=NULL; + cMat->svbuffer=NULL; + cMat->mvbsize=0; + cMat->svbsize=0; +#endif + Matrix->mat = cMat; + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_CSRGDeviceFree(T_Cmat *Matrix) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + + if (cMat!=NULL) { + freeRemoteBuffer(cMat->irp); + freeRemoteBuffer(cMat->ja); + freeRemoteBuffer(cMat->val); +#if PSB_CUDA_SHORT_VERSION <= 10 + cusparseDestroyMatDescr(cMat->descr); + cusparseDestroySolveAnalysisInfo(cMat->triang); +#elif PSB_CUDA_VERSION < 11030 + cusparseDestroyMatDescr(cMat->descr); + cusparseDestroyCsrsv2Info(cMat->triang); +#else + if (!T_CSRGIsNullMvDescr(cMat)) { + // already destroyed spmvDescr, just free the pointer + free(cMat->spmvDescr); + cMat->spmvDescr=NULL; + } + if (cMat->mvbuffer!=NULL) + CHECK_CUDA( cudaFree(cMat->mvbuffer)); + cMat->mvbuffer=NULL; + cMat->mvbsize=0; + if (!T_CSRGIsNullSvDescr(cMat)) { + CHECK_CUSPARSE(cusparseSpSV_destroyDescr(*(cMat->spsvDescr))); + free(cMat->spsvDescr); + cMat->spsvDescr=NULL; + } + if (cMat->svbuffer!=NULL) + CHECK_CUDA( cudaFree(cMat->svbuffer)); + cMat->svbuffer=NULL; + cMat->svbsize=0; +#endif + free(cMat); + Matrix->mat = NULL; + } + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_CSRGDeviceGetParms(T_Cmat *Matrix,int *nr, int *nc, int *nz) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + + if (cMat!=NULL) { + *nr = cMat->m ; + *nc = cMat->n ; + *nz = cMat->nz ; + return(CUSPARSE_STATUS_SUCCESS); + } else { + return((int) CUSPARSE_STATUS_ALLOC_FAILED); + } +} + +#if PSB_CUDA_SHORT_VERSION <= 10 + +int T_CSRGDeviceSetMatType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatType(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatFillMode(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatDiagType(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatIndexBase(cMat->descr,type)); +} + +int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Matrix) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + int rc, buffersize; + cusparseHandle_t *my_handle=getHandle(); + cusparseSolveAnalysisInfo_t info; + + rc= (int) cusparseTcsrsv_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->nz,cMat->descr, + cMat->val, cMat->irp, cMat->ja, + cMat->triang); + if (rc !=0) { + fprintf(stderr,"From csrsv_analysis: %d\n",rc); + } + return(rc); +} + +#elif PSB_CUDA_VERSION < 11030 +int T_CSRGDeviceSetMatType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatType(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatFillMode(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatDiagType(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatIndexBase(cMat->descr,type)); +} + +#else + +int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + cusparseFillMode_t mode=type; + + CHECK_CUSPARSE(cusparseSpMatSetAttribute((*(cMat->spmvDescr)), + CUSPARSE_SPMAT_FILL_MODE, + (const void*) &mode, + sizeof(cusparseFillMode_t))); + return(0); +} + +int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + cusparseDiagType_t cutype=type; + CHECK_CUSPARSE(cusparseSpMatSetAttribute((*(cMat->spmvDescr)), + CUSPARSE_SPMAT_DIAG_TYPE, + (const void*) &cutype, + sizeof(cusparseDiagType_t))); + return(0); +} + +int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat) +{ + return(cMat->spmvDescr == NULL); +} + +int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat) +{ + return(cMat->svbuffer == NULL); +} +int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat) +{ + return(cMat->spsvDescr == NULL); +} + +#endif + +int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val) +{ + int rc; + T_CSRGDeviceMat *cMat= Matrix->mat; + cusparseHandle_t *my_handle=getHandle(); + + if ((rc=writeRemoteBuffer((void *) irp, (void *) cMat->irp, + ((size_t) m+1)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + + if ((rc=writeRemoteBuffer((void *) ja,(void *) cMat->ja, + ((size_t) nz)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + if ((rc=writeRemoteBuffer((void *) val, (void *) cMat->val, + ((size_t) nz)*sizeof(TYPE))) + != SPGPU_SUCCESS) + return(rc); +#if (PSB_CUDA_SHORT_VERSION > 10 ) && (PSB_CUDA_VERSION < 11030) + if (cusparseGetMatType(cMat->descr)== CUSPARSE_MATRIX_TYPE_TRIANGULAR) { + // Why do we need to set TYPE_GENERAL??? cuSPARSE can be misterious sometimes. + cusparseSetMatType(cMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL); + CHECK_CUSPARSE(cusparseTcsrsv2_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->nz, cMat->descr, + cMat->val, cMat->irp, cMat->ja, + cMat->triang, CUSPARSE_SOLVE_POLICY_USE_LEVEL, + cMat->svbuffer)); + } +#else + //cusparseSetMatType(*(cMat->spmvDescr),CUSPARSE_MATRIX_TYPE_GENERAL); +#endif + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_CSRGDevice2Host(T_Cmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val) +{ + int rc; + T_CSRGDeviceMat *cMat = Matrix->mat; + + if ((rc=readRemoteBuffer((void *) irp, (void *) cMat->irp, + ((size_t) m+1)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + + if ((rc=readRemoteBuffer((void *) ja, (void *) cMat->ja, + ((size_t) nz)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + if ((rc=readRemoteBuffer((void *) val, (void *) cMat->val, + ((size_t) nz)*sizeof(TYPE))) + != SPGPU_SUCCESS) + return(rc); + + return(CUSPARSE_STATUS_SUCCESS); +} + +#if PSB_CUDA_SHORT_VERSION <= 10 +int T_HYBGDeviceFree(T_Hmat *Matrix) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + if (hMat != NULL) { + cusparseDestroyMatDescr(hMat->descr); + cusparseDestroySolveAnalysisInfo(hMat->triang); + cusparseDestroyHybMat(hMat->hybA); + free(hMat); + } + Matrix->mat = NULL; + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY) +{ + T_HYBGDeviceMat *hMat=Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + void *vX, *vY; + int r,n,rc; + cusparseMatrixType_t type; + cusparseHandle_t *my_handle=getHandle(); + + /*getAddrMultiVecDevice(deviceX, &vX); + getAddrMultiVecDevice(deviceY, &vY); */ + vX=x->v_; + vY=y->v_; + + /* rc = (int) cusparseGetMatType(hMat->descr); */ + /* fprintf(stderr,"Spmv MatType: %d\n",rc); */ + /* rc = (int) cusparseGetMatDiagType(hMat->descr); */ + /* fprintf(stderr,"Spmv DiagType: %d\n",rc); */ + /* rc = (int) cusparseGetMatFillMode(hMat->descr); */ + /* fprintf(stderr,"Spmv FillMode: %d\n",rc); */ + /* Dirty trick: apparently hybmv does not accept a triangular + matrix even though it should not make a difference. So + we claim it's general anyway */ + type = cusparseGetMatType(hMat->descr); + rc = cusparseSetMatType(hMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL); + if (rc == 0) + rc = (int) cusparseThybmv(*my_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, + (const TYPE *) &alpha, hMat->descr, hMat->hybA, + (const TYPE *) vX, (const TYPE *) &beta, + (TYPE *) vY); + if (rc == 0) + rc = cusparseSetMatType(hMat->descr,type); + return(rc); +} + +int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz) +{ + T_HYBGDeviceMat *hMat; + int nr1=nr, nz1=nz, rc; + if ((nr<0)||(nc<0)||(nz<0)) + return((int) CUSPARSE_STATUS_INVALID_VALUE); + if ((hMat=(T_HYBGDeviceMat *) malloc(sizeof(T_HYBGDeviceMat)))==NULL) + return((int) CUSPARSE_STATUS_ALLOC_FAILED); + hMat->m = nr; + hMat->n = nc; + hMat->nz = nz; + + if ((rc= cusparseCreateMatDescr(&(hMat->descr))) !=0) + return(rc); + if ((rc= cusparseCreateSolveAnalysisInfo(&(hMat->triang))) !=0) + return(rc); + if((rc = cusparseCreateHybMat(&(hMat->hybA))) != 0) + return(rc); + Matrix->mat = hMat; + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + return ((int) cusparseSetMatDiagType(hMat->descr,type)); +} + +int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + return ((int) cusparseSetMatIndexBase(hMat->descr,type)); +} + +int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + return ((int) cusparseSetMatType(hMat->descr,type)); +} + +int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + return ((int) cusparseSetMatFillMode(hMat->descr,type)); +} + +int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY) +{ + //beta?? + T_HYBGDeviceMat *hMat=Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + void *vX, *vY; + int r,n; + cusparseHandle_t *my_handle=getHandle(); + /*getAddrMultiVecDevice(deviceX, &vX); + getAddrMultiVecDevice(deviceY, &vY); */ + vX=x->v_; + vY=y->v_; + + return cusparseThybsv_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + (const TYPE *) &alpha, hMat->descr, + hMat->hybA, hMat->triang, + (const TYPE *) vX, (TYPE *) vY); +} + +int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + cusparseSolveAnalysisInfo_t info; + int rc; + cusparseHandle_t *my_handle=getHandle(); + + /* rc = (int) cusparseGetMatType(hMat->descr); */ + /* fprintf(stderr,"Analysis MatType: %d\n",rc); */ + /* rc = (int) cusparseGetMatDiagType(hMat->descr); */ + /* fprintf(stderr,"Analysis DiagType: %d\n",rc); */ + /* rc = (int) cusparseGetMatFillMode(hMat->descr); */ + /* fprintf(stderr,"Analysis FillMode: %d\n",rc); */ + rc = (int) cusparseThybsv_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + hMat->descr, hMat->hybA, hMat->triang); + + if (rc !=0) { + fprintf(stderr,"From csrsv_analysis: %d\n",rc); + } + return(rc); +} + +int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val) +{ + int rc; double t1,t2; + int nr1=m, nz1=nz; + T_HYBGDeviceMat *hMat= Matrix->mat; + cusparseHandle_t *my_handle=getHandle(); + + if (nr1 == 0) nr1 = 1; + if (nz1 == 0) nz1 = 1; + if ((rc= allocRemoteBuffer(((void **) &(hMat->irp)), + (((size_t) nr1+1)*sizeof(int)))) != 0) + return(rc); + if ((rc= allocRemoteBuffer(((void **) &(hMat->ja)), + (((size_t) nz1)*sizeof(int)))) != 0) + return(rc); + if ((rc= allocRemoteBuffer(((void **) &(hMat->val)), + (((size_t) nz1)*sizeof(TYPE)))) != 0) + return(rc); + + if ((rc=writeRemoteBuffer((void *) irp, (void *) hMat->irp, + ((size_t) m+1)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + + if ((rc=writeRemoteBuffer((void *) ja,(void *) hMat->ja, + ((size_t) nz)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + if ((rc=writeRemoteBuffer((void *) val, (void *) hMat->val, + ((size_t) nz)*sizeof(TYPE))) + != SPGPU_SUCCESS) + return(rc); + /* rc = (int) cusparseGetMatType(hMat->descr); */ + /* fprintf(stderr,"Conversion MatType: %d\n",rc); */ + /* rc = (int) cusparseGetMatDiagType(hMat->descr); */ + /* fprintf(stderr,"Conversion DiagType: %d\n",rc); */ + /* rc = (int) cusparseGetMatFillMode(hMat->descr); */ + /* fprintf(stderr,"Conversion FillMode: %d\n",rc); */ + //t1=etime(); + rc = (int) cusparseTcsr2hyb(*my_handle, m, n, + hMat->descr, + (const TYPE *)hMat->val, + (const int *)hMat->irp, (const int *)hMat->ja, + hMat->hybA,0, + CUSPARSE_HYB_PARTITION_AUTO); + + freeRemoteBuffer(hMat->irp); hMat->irp = NULL; + freeRemoteBuffer(hMat->ja); hMat->ja = NULL; + freeRemoteBuffer(hMat->val); hMat->val = NULL; + + //cudaSync(); + //t2 = etime(); + //fprintf(stderr,"Inner call to cusparseTcsr2hyb: %lf\n",(t2-t1)); + if (rc != 0) { + fprintf(stderr,"From csr2hyb: %d\n",rc); + } + return(rc); +} +#endif + diff --git a/cuda/hdiagdev.c b/cuda/hdiagdev.c new file mode 100644 index 00000000..813e4fab --- /dev/null +++ b/cuda/hdiagdev.c @@ -0,0 +1,394 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#include "hdiagdev.h" +#include +#include +#include +#include +#define DEBUG 0 +void freeHdiagDevice(void* remoteMatrix) +{ + struct HdiagDevice *devMat = (struct HdiagDevice *) remoteMatrix; + //fprintf(stderr,"freeHllDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->hackOffsets); + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + + +HdiagDeviceParams getHdiagDeviceParams(unsigned int rows, unsigned int columns, + unsigned int allocationHeight, unsigned int hackSize, + unsigned int hackCount, unsigned int elementType) +{ + HdiagDeviceParams params; + + params.elementType = elementType; + //numero di elementi di val + params.rows = rows; + params.columns = columns; + params.allocationHeight = allocationHeight; + params.hackSize = hackSize; + params.hackCount = hackCount; + + return params; + +} + +int allocHdiagDevice(void **remoteMatrix, HdiagDeviceParams* params) +{ + struct HdiagDevice *tmp = (struct HdiagDevice *)malloc(sizeof(struct HdiagDevice)); + int ret=SPGPU_SUCCESS; + int *tmpOff = NULL; + + *remoteMatrix = (void *) tmp; +#if DEBUG + fprintf(stderr,"From alloc: %p\n",*remoteMatrix); +#endif + + tmp->rows = params->rows; + + tmp->hackSize = params->hackSize; + + tmp->cols = params->columns; + + tmp->allocationHeight = params->allocationHeight; + + tmp->hackCount = params->hackCount; + + + +#if DEBUG + fprintf(stderr,"hackcount %d allocationHeight %d\n",tmp->hackCount,tmp->allocationHeight); +#endif + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->hackOffsets), + ((size_t) tmp->hackCount+1)*sizeof(int)); + + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->hdiaOffsets), + ((size_t) tmp->allocationHeight)*sizeof(int)); + + /* tmp->baseIndex = params->firstIndex; */ + + if (params->elementType == SPGPU_TYPE_INT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->hackSize)*tmp->allocationHeight*sizeof(int)); + } + else if (params->elementType == SPGPU_TYPE_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->hackSize)*tmp->allocationHeight*sizeof(float)); + } + else if (params->elementType == SPGPU_TYPE_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->hackSize)*tmp->allocationHeight*sizeof(double)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->hackSize)*tmp->allocationHeight*sizeof(cuFloatComplex)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->hackSize)*tmp->allocationHeight*sizeof(cuDoubleComplex)); + } + else + return SPGPU_UNSUPPORTED; // Unsupported params + return ret; +} + +int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, + unsigned int allocationHeight, unsigned int hackSize, + unsigned int hackCount, unsigned int elementType) +{ int i=0; + HdiagDeviceParams p; + + p = getHdiagDeviceParams(rows, cols, allocationHeight, + hackSize, hackCount,elementType); + + i = allocHdiagDevice(deviceMat, &p); +#if DEBUG + fprintf(stderr," Falloc %p \n",*deviceMat); +#endif + + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); + } + return(i); + +} + +int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *hackOffsets) +{ int i=0,fo,fa,j,k,p; + char buf_a[255], buf_o[255],tmp[255]; + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + + i=SPGPU_SUCCESS; + + +#if DEBUG + fprintf(stderr," Write %p \n",devMat); + + fprintf(stderr,"HDIAG writing to device memory: allocationHeight %d hackCount %d\n", + devMat->allocationHeight,devMat->hackCount); + fprintf(stderr,"HackOffsets: "); + for (j=0; jhackCount+1; j++) + fprintf(stderr," %d",hackOffsets[j]); + fprintf(stderr,"\n"); + fprintf(stderr,"diaOffsets: "); + for (j=0; jallocationHeight; j++) + fprintf(stderr," %d",hdiaOffsets[j]); + fprintf(stderr,"\n"); +#if 1 + fprintf(stderr,"values: \n"); + p=0; + for (j=0; jhackCount; j++){ + fprintf(stderr,"Hack no: %d\n",j+1); + for (k=0; khackSize*(devMat->allocationHeight/devMat->hackCount); k++){ + fprintf(stderr," %d %lf\n",p+1,val[p]); p++; + } + } + fprintf(stderr,"\n"); +#endif +#endif + + + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets, + ((size_t) devMat->hackCount+1)*sizeof(int)); + + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets, + ((size_t) devMat->allocationHeight)*sizeof(int)); + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocationHeight)*devMat->hackSize*sizeof(double)); + if (i!=0) + fprintf(stderr,"Error in writeHdiagDeviceDouble %d\n",i); + +#if DEBUG + fprintf(stderr," EndWrite %p \n",devMat); +#endif + + if(i==0) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +} + + + +long long int sizeofHdiagDeviceDouble(void* deviceMat) +{ int i=0,fo,fa; + int *hoff=NULL,*hackoff=NULL; + long long int memsize=0; + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + + + memsize += (devMat->hackCount+1)*sizeof(int); + memsize += devMat->allocationHeight*sizeof(int); + memsize += devMat->allocationHeight*devMat->hackSize*sizeof(double); + return(memsize); +} + + + +int readHdiagDeviceDouble(void* deviceMat, double* a, int* off) +{ int i; + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + /* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double)); */ + /* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */ + + + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY) +{ + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif +#if DEBUG + fprintf(stderr," First %p \n",devMat); + fprintf(stderr,"%d %d %d %p %p %p\n",devMat->rows,devMat->cols, devMat->hackSize, + devMat->hackOffsets, devMat->hdiaOffsets, devMat->cM); +#endif + spgpuDhdiaspmv (handle, (double*)y->v_, (double *)y->v_, alpha, + (double *)devMat->cM,devMat->hdiaOffsets, + devMat->hackSize, devMat->hackOffsets, devMat->rows,devMat->cols, + x->v_, beta); + + //cudaSync(); + + return SPGPU_SUCCESS; +} + +int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *hackOffsets) +{ int i=0,fo,fa,j,k,p; + char buf_a[255], buf_o[255],tmp[255]; + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + + i=SPGPU_SUCCESS; + + +#if DEBUG + fprintf(stderr," Write %p \n",devMat); + + fprintf(stderr,"HDIAG writing to device memory: allocationHeight %d hackCount %d\n", + devMat->allocationHeight,devMat->hackCount); + fprintf(stderr,"HackOffsets: "); + for (j=0; jhackCount+1; j++) + fprintf(stderr," %d",hackOffsets[j]); + fprintf(stderr,"\n"); + fprintf(stderr,"diaOffsets: "); + for (j=0; jallocationHeight; j++) + fprintf(stderr," %d",hdiaOffsets[j]); + fprintf(stderr,"\n"); +#if 1 + fprintf(stderr,"values: \n"); + p=0; + for (j=0; jhackCount; j++){ + fprintf(stderr,"Hack no: %d\n",j+1); + for (k=0; khackSize*(devMat->allocationHeight/devMat->hackCount); k++){ + fprintf(stderr," %d %lf\n",p+1,val[p]); p++; + } + } + fprintf(stderr,"\n"); +#endif +#endif + + + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets, + ((size_t) devMat->hackCount+1)*sizeof(int)); + + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets, + ((size_t) devMat->allocationHeight)*sizeof(int)); + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocationHeight)*devMat->hackSize*sizeof(float)); + if (i!=0) + fprintf(stderr,"Error in writeHdiagDeviceFloat %d\n",i); + +#if DEBUG + fprintf(stderr," EndWrite %p \n",devMat); +#endif + + if(i==0) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +} + + + +long long int sizeofHdiagDeviceFloat(void* deviceMat) +{ int i=0,fo,fa; + int *hoff=NULL,*hackoff=NULL; + long long int memsize=0; + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + + + memsize += (devMat->hackCount+1)*sizeof(int); + memsize += devMat->allocationHeight*sizeof(int); + memsize += devMat->allocationHeight*devMat->hackSize*sizeof(float); + + return(memsize); +} + + + +int readHdiagDeviceFloat(void* deviceMat, float* a, int* off) +{ int i; + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + /* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float)); */ + /* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */ + + + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +} + +int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY) +{ + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif +#if DEBUG + fprintf(stderr," First %p \n",devMat); + fprintf(stderr,"%d %d %d %p %p %p\n",devMat->rows,devMat->cols, devMat->hackSize, + devMat->hackOffsets, devMat->hdiaOffsets, devMat->cM); +#endif + spgpuShdiaspmv (handle, (float*)y->v_, (float *)y->v_, alpha, + (float *)devMat->cM,devMat->hdiaOffsets, + devMat->hackSize, devMat->hackOffsets, devMat->rows,devMat->cols, + x->v_, beta); + + //cudaSync(); + + return SPGPU_SUCCESS; +} + diff --git a/cuda/hdiagdev.h b/cuda/hdiagdev.h new file mode 100644 index 00000000..c02fcc69 --- /dev/null +++ b/cuda/hdiagdev.h @@ -0,0 +1,107 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#ifndef _HDIAGDEV_H_ +#define _HDIAGDEV_H_ + +#include "cintrf.h" +#include "vectordev.h" +#include "hdia.h" + +struct HdiagDevice +{ + // Compressed matrix + void *cM; //it can be float or double + + // offset (same size of cM) + int *hdiaOffsets; + + int *hackOffsets; + + int hackCount; + + int rows; + + int cols; + + + int hackSize; + + int allocationHeight; + +}; + +typedef struct HdiagDeviceParams +{ + + unsigned int elementType; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + //unsigned int hackOffsLength; + + // Number of columns. + // Used for error-checking + unsigned int columns; + + unsigned int hackSize; + unsigned int hackCount; + unsigned int allocationHeight; + + +} HdiagDeviceParams; + + + +HdiagDeviceParams getHdiagDeviceParams(unsigned int rows, unsigned int columns, + unsigned int allocationHeight, unsigned int hackSize, + unsigned int hackCount, unsigned int elementType); + +int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, + unsigned int allocationHeight, unsigned int hackSize, + unsigned int hackCount, unsigned int elementType); + +int allocHdiagDevice(void ** remoteMatrix, HdiagDeviceParams* params); + + +void freeHdiagDevice(void* remoteMatrix); + +int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *hackOffsets); +int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY); + +int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *hackOffsets); +int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY); + + +#endif diff --git a/cuda/hdiagdev_mod.F90 b/cuda/hdiagdev_mod.F90 new file mode 100644 index 00000000..9a3530e7 --- /dev/null +++ b/cuda/hdiagdev_mod.F90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module hdiagdev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: hdiagdev_parms + integer(c_int) :: element_type + integer(c_int) :: rows + integer(c_int) :: columns + integer(c_int) :: hackSize + integer(c_int) :: hackCount + integer(c_int) :: allocationHeight + end type hdiagdev_parms + + ! interface computeHdiaHacksCount + ! function computeHdiaHacksCountDouble(allocationHeight,hackOffsets,hackSize, & + ! & diaValues,diaValuesPitch,diags,rows)& + ! & result(res) bind(c,name='computeHdiaHackOffsetsDouble') + ! use iso_c_binding + ! integer(c_int) :: res + ! integer(c_int), value :: rows,diags,diaValuesPitch,hackSize,elementType + ! real(c_double) :: diaValues(rows,:) + ! integer(c_int) :: hackOffsets,allocationHeight + ! end function computeHdiaHacksCountDouble + ! end interface computeHdiaHacksCount + + interface + function FgetHdiagDeviceParams(rows, columns, allocationHeight,hackSize, & + & hackCount, elementType) & + & result(res) bind(c,name='getHdiagDeviceParams') + use iso_c_binding + import :: hdiagdev_parms + type(hdiagdev_parms) :: res + integer(c_int), value :: rows,columns,allocationHeight,& + & elementType,hackSize,hackCount + end function FgetHdiagDeviceParams + end interface + + + interface + function FallocHdiagDevice(deviceMat,rows,columns,allocationHeight,& + & hackSize,hackCount,elementType) & + & result(res) bind(c,name='FallocHdiagDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: rows,columns,allocationHeight,hackSize,& + & hackCount,elementType + type(c_ptr) :: deviceMat + end function FallocHdiagDevice + end interface + + + interface + function sizeofHdiagDeviceDouble(deviceMat) & + & result(res) bind(c,name='sizeofHdiagDeviceDouble') + use iso_c_binding + integer(c_long_long) :: res + type(c_ptr), value :: deviceMat + end function sizeofHdiagDeviceDouble + end interface + + interface writeHdiagDevice + + function writeHdiagDeviceFloat(deviceMat,val,hdiaOffsets, hackOffsets) & + & result(res) bind(c,name='writeHdiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: hdiaOffsets(*), hackOffsets(*) + end function writeHdiagDeviceFloat + + function writeHdiagDeviceDouble(deviceMat,val,hdiaOffsets, hackOffsets) & + & result(res) bind(c,name='writeHdiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: hdiaOffsets(*), hackOffsets(*) + end function writeHdiagDeviceDouble + + end interface writeHdiagDevice + +!!$ interface readHdiagDevice +!!$ +!!$ function readHdiagDeviceFloat(deviceMat,val,ja,ldj,irn) & +!!$ & result(res) bind(c,name='readHdiagDeviceFloat') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat +!!$ integer(c_int), value :: ldj +!!$ real(c_float) :: val(ldj,*) +!!$ integer(c_int) :: ja(ldj,*),irn(*) +!!$ end function readHdiagDeviceFloat +!!$ +!!$ function readHdiagDeviceDouble(deviceMat,a,off,n) & +!!$ & result(res) bind(c,name='readHdiagDeviceDouble') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat +!!$ integer(c_int),value :: n +!!$ real(c_double) :: a(n,*) +!!$ integer(c_int) :: off(*) +!!$ end function readHdiagDeviceDouble +!!$ +!!$ function readHdiagDeviceFloatComplex(deviceMat,val,ja,ldj,irn) & +!!$ & result(res) bind(c,name='readHdiagDeviceFloatComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat +!!$ integer(c_int), value :: ldj +!!$ complex(c_float_complex) :: val(ldj,*) +!!$ integer(c_int) :: ja(ldj,*),irn(*) +!!$ end function readHdiagDeviceFloatComplex +!!$ +!!$ function readHdiagDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) & +!!$ & result(res) bind(c,name='readHdiagDeviceDoubleComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat +!!$ integer(c_int), value :: ldj +!!$ complex(c_double_complex) :: val(ldj,*) +!!$ integer(c_int) :: ja(ldj,*),irn(*) +!!$ end function readHdiagDeviceDoubleComplex +!!$ +!!$ end interface readHdiagDevice +!!$ + interface + subroutine freeHdiagDevice(deviceMat) & + & bind(c,name='freeHdiagDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeHdiagDevice + end interface + + + interface spmvHdiagDevice + function spmvHdiagDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHdiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float),value :: alpha, beta + end function spmvHdiagDeviceFloat + function spmvHdiagDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHdiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double),value :: alpha, beta + end function spmvHdiagDeviceDouble +!!$ function spmvHdiagDeviceFloatComplex(deviceMat,alpha,x,beta,y) & +!!$ & result(res) bind(c,name='spmvHdiagDeviceFloatComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat, x, y +!!$ complex(c_float_complex),value :: alpha, beta +!!$ end function spmvHdiagDeviceFloatComplex +!!$ function spmvHdiagDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & +!!$ & result(res) bind(c,name='spmvHdiagDeviceDoubleComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat, x, y +!!$ complex(c_double_complex),value :: alpha, beta +!!$ end function spmvHdiagDeviceDoubleComplex + end interface spmvHdiagDevice + +end module hdiagdev_mod diff --git a/cuda/hlldev.c b/cuda/hlldev.c new file mode 100644 index 00000000..186831d3 --- /dev/null +++ b/cuda/hlldev.c @@ -0,0 +1,618 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#include "hlldev.h" +//new +HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros, + unsigned int allocsize, unsigned int elementType, unsigned int firstIndex) +{ + HllDeviceParams params; + + params.elementType = elementType; + params.hackSize = hksize; + //numero di elementi di val + params.allocsize = allocsize; + params.rows = rows; + params.nzt = nzeros; + params.avgNzr = (nzeros+rows-1)/rows; + params.firstIndex = firstIndex; + return params; + +} + +int getHllDeviceParams(HllDevice* mat, int *hksize, int *rows, int *nzeros, + int *allocsize, int *hackOffsLength, int *firstIndex, int *avgnzr) +{ + + + if (mat!=NULL) { + *hackOffsLength = mat->hackOffsLength ; + *hksize = mat->hackSize ; + *nzeros = mat->nzt ; + *allocsize = mat->allocsize ; + *rows = mat->rows ; + *avgnzr = mat->avgNzr ; + *firstIndex = mat->baseIndex ; + return SPGPU_SUCCESS; + } else { + return SPGPU_UNSUPPORTED; + } +} +//new +int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params) +{ + HllDevice *tmp = (HllDevice *)malloc(sizeof(HllDevice)); + int ret=SPGPU_SUCCESS; + size_t tt; + int ti; + *remoteMatrix = (void *)tmp; + + tmp->hackSize = params->hackSize; + + tmp->allocsize = params->allocsize; + + tmp->rows = params->rows; + tmp->avgNzr = params->avgNzr; + tmp->nzt = params->nzt; + tmp->baseIndex = params->firstIndex; + //fprintf(stderr,"Allocating HLG with %d avgNzr\n",params->avgNzr); + tmp->hackOffsLength = (int)(tmp->rows+tmp->hackSize-1)/tmp->hackSize; + + //printf("hackOffsLength %d\n",tmp->hackOffsLength); + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->rP), + ((size_t) tmp->allocsize)*sizeof(int)); + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->rS), + ((size_t) tmp->rows)*sizeof(int)); + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->diag), + ((size_t) tmp->rows)*sizeof(int)); + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->hackOffs), + (((size_t) tmp->hackOffsLength+1)*sizeof(int))); + + if (params->elementType == SPGPU_TYPE_INT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(int)); + } + else if (params->elementType == SPGPU_TYPE_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(float)); + } + else if (params->elementType == SPGPU_TYPE_DOUBLE) + { + if (ret == SPGPU_SUCCESS) { + /* tt = ((size_t) tmp->allocsize)*sizeof(double); + ti = ((size_t) tmp->allocsize)*sizeof(double); + fprintf(stderr,"%ld %d %d\n",tt, ti, tmp->allocsize);*/ + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(double)); + } + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(cuFloatComplex)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), + ((size_t) tmp->allocsize)*sizeof(cuDoubleComplex)); + } + else + return SPGPU_UNSUPPORTED; // Unsupported params + return ret; +} + +void freeHllDevice(void* remoteMatrix) +{ + HllDevice *devMat = (HllDevice *) remoteMatrix; + //fprintf(stderr,"freeHllDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->rS); + freeRemoteBuffer(devMat->diag); + freeRemoteBuffer(devMat->rP); + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + +//new +int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, unsigned int nzeros, + unsigned int allocsize, + unsigned int elementType, unsigned int firstIndex) +{ int i; + HllDeviceParams p; + + p = bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex); + i = allocHllDevice(deviceMat, &p); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocHllDevice",i); + } + return(i); +} + + +int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY) +{ + HllDevice *devMat = (HllDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta, + devMat->baseIndex);*/ + + spgpuShellspmv (handle, (float *)y->v_, (float *)y->v_, alpha, (float *)devMat->cM, + devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, + devMat->avgNzr, devMat->rows, (float *)x->v_, beta, devMat->baseIndex); + + return SPGPU_SUCCESS; +} + +//new +int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY) +{ + HllDevice *devMat = (HllDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta, + devMat->baseIndex);*/ + + spgpuDhellspmv (handle, (double *)y->v_, (double *)y->v_, alpha, (double*)devMat->cM, + devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, + devMat->avgNzr, devMat->rows, (double *)x->v_, beta, devMat->baseIndex); + //cudaSync(); + return SPGPU_SUCCESS; +} + +int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX, + float complex beta, void* deviceY) +{ + HllDevice *devMat = (HllDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + + cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha)); + cuFloatComplex b = make_cuFloatComplex(crealf(beta),cimagf(beta)); +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta, + devMat->baseIndex);*/ + + spgpuChellspmv (handle, (cuFloatComplex *)y->v_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM, + devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, + devMat->avgNzr, devMat->rows, (cuFloatComplex *)x->v_, b, devMat->baseIndex); + + return SPGPU_SUCCESS; +} + +int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX, + double complex beta, void* deviceY) +{ + HllDevice *devMat = (HllDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + + cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha)); + cuDoubleComplex b = make_cuDoubleComplex(creal(beta),cimag(beta)); +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + + spgpuZhellspmv (handle, (cuDoubleComplex *)y->v_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM, + devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, + devMat->avgNzr,devMat->rows, (cuDoubleComplex *)x->v_, b, devMat->baseIndex); + + return SPGPU_SUCCESS; +} + +int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; + HllDevice *devMat = (HllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(float)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); + //i = writeEllDevice(deviceMat, (void *) val, ja, irn); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +} + +int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; + HllDevice *devMat = (HllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(double)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; + HllDevice *devMat = (HllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(cuFloatComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; + HllDevice *devMat = (HllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(cuDoubleComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; + HllDevice *devMat = (HllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(float)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); + /*i = readEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +} + +int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; + HllDevice *devMat = (HllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(double)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int readHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; + HllDevice *devMat = (HllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(cuFloatComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; + HllDevice *devMat = (HllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, + ((size_t) devMat->allocsize)*sizeof(cuDoubleComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, + ((size_t) devMat->allocsize)*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, + ((size_t) devMat->rows)*sizeof(int)); + i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +// New copy routines. + +int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + float *val, void *deviceMat) +{ int i,j; + spgpuHandle_t handle; + HllDevice *devMat = (HllDevice *) deviceMat; + float *devVal; + int *devIdisp, *devJa; + int *tja; + //fprintf(stderr,"devMat: %p\n",devMat); + allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(float)); + + // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); + i = writeRemoteBuffer((void*) val, (void *)devVal, + ((size_t) nza)*sizeof(float)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, + ((size_t) nza)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, + ((size_t) devMat->rows+1)*sizeof(int)); + //cudaSync(); + + handle = psb_cudaGetHandle(); + psi_cuda_s_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, + (int *) devMat->rS, (int *) devMat->hackOffs, + devIdisp,devJa,devVal, + (int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM); + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloat",i); + } + return SPGPU_SUCCESS; +} + +int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + double *val, void *deviceMat) +{ int i,j; + spgpuHandle_t handle; + HllDevice *devMat = (HllDevice *) deviceMat; + double *devVal; + int *devIdisp, *devJa; + int *tja; + //fprintf(stderr,"devMat: %p\n",devMat); + allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(double)); + + // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); + i = writeRemoteBuffer((void*) val, (void *)devVal, + ((size_t) nza)*sizeof(double)); + //fprintf(stderr,"WriteRemoteBuffer val %d\n",i); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, + ((size_t) nza)*sizeof(int)); + //fprintf(stderr,"WriteRemoteBuffer ja %d\n",i); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + //fprintf(stderr,"WriteRemoteBuffer irn %d\n",i); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); + //fprintf(stderr,"WriteRemoteBuffer hoffs %d\n",i); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, + ((size_t) devMat->rows+1)*sizeof(int)); + //fprintf(stderr,"WriteRemoteBuffer idisp %d\n",i); + //cudaSync(); + //fprintf(stderr," hacksz: %d \n",hacksz); + handle = psb_cudaGetHandle(); + psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, + (int *) devMat->rS, (int *) devMat->hackOffs, + devIdisp,devJa,devVal, + (int *) devMat->diag, (int *) devMat->rP, + (double *)devMat->cM); + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDouble",i); + } + return SPGPU_SUCCESS; +} + +int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + float complex *val, void *deviceMat) +{ int i,j; + spgpuHandle_t handle; + HllDevice *devMat = (HllDevice *) deviceMat; + float complex *devVal; + int *devIdisp, *devJa; + int *tja; + //fprintf(stderr,"devMat: %p\n",devMat); + allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(cuFloatComplex)); + + // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); + i = writeRemoteBuffer((void*) val, (void *)devVal, + ((size_t) nza)*sizeof(cuFloatComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, + ((size_t) nza)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, + ((size_t) devMat->rows+1)*sizeof(int)); + //cudaSync(); + + handle = psb_cudaGetHandle(); + psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, + (int *) devMat->rS, (int *) devMat->hackOffs, + devIdisp,devJa,devVal, + (int *) devMat->diag,(int *) devMat->rP, + (float complex *)devMat->cM); + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloatComplex",i); + } + return SPGPU_SUCCESS; +} + +int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + double complex *val, void *deviceMat) +{ int i,j; + spgpuHandle_t handle; + HllDevice *devMat = (HllDevice *) deviceMat; + double complex *devVal; + int *devIdisp, *devJa; + int *tja; + //fprintf(stderr,"devMat: %p\n",devMat); + allocRemoteBuffer((void **)&(devIdisp), ((size_t) nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), ((size_t) nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), ((size_t) nza)*sizeof(cuDoubleComplex)); + + // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); + i = writeRemoteBuffer((void*) val, (void *)devVal, + ((size_t) nza)*sizeof(cuDoubleComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, + ((size_t) nza)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, + ((size_t) devMat->rows)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, + ((size_t) devMat->hackOffsLength+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, + ((size_t) devMat->rows+1)*sizeof(int)); + //cudaSync(); + + handle = psb_cudaGetHandle(); + psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, + (int *) devMat->rS, (int *) devMat->hackOffs, + devIdisp,devJa,devVal, + (int *) devMat->diag,(int *) devMat->rP, + (double complex *)devMat->cM); + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDoubleComplex",i); + } + return SPGPU_SUCCESS; +} diff --git a/cuda/hlldev.h b/cuda/hlldev.h new file mode 100644 index 00000000..3b47f5ea --- /dev/null +++ b/cuda/hlldev.h @@ -0,0 +1,156 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#ifndef _HLLDEV_H_ +#define _HLLDEV_H_ + +#include "cintrf.h" +#include "hell.h" +#include "vectordev.h" + +typedef struct hlldevice +{ + // Compressed matrix + void *cM; //it can be float or double + + // row pointers (same size of cM) + int *rP; + + // row size and diagonal position + int *rS; + int *diag; + + int *hackOffs; + + int rows; + int avgNzr; + int hackOffsLength; + int nzt; + int hackSize; //must be multiple of 32 + + //matrix size (uncompressed) + //int rows; + //int columns; + + //allocation size + int allocsize; + + /*(i.e. 0 for C, 1 for Fortran)*/ + int baseIndex; +} HllDevice; + +typedef struct hlldeviceparams +{ + + unsigned int elementType; + + unsigned int hackSize; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + unsigned int avgNzr; + unsigned int nzt; + //unsigned int hackOffsLength; + + // Number of columns. + // Used for error-checking + // unsigned int columns; + + unsigned int allocsize; + + // First index (e.g 0 or 1) + unsigned int firstIndex; + +} HllDeviceParams; + + +HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros, + unsigned int allocsize, + unsigned int elementType, unsigned int firstIndex); +int getHllDeviceParams(HllDevice* mat, int *hksize, int *rows, int *nzeros, + int *allocsize, int *hackOffsLength, int *firstIndex, int *avgnzr); +int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, unsigned int nzeros, + unsigned int allocsize, unsigned int elementType, unsigned int firstIndex); +int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params); +void freeHllDevice(void* remoteMatrix); +int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag); +int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag); +int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, + int* ja, int *hkoffs, int* irn, int *idiag); +int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, + int* ja, int *hkoffs, int* irn, int *idiag); +int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag); +int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag); +int readHllDeviceFloatComplex(void* deviceMat, float complex* val, + int* ja, int *hkoffs, int* irn, int *idiag); +int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, + int* ja, int *hkoffs, int* irn, int *idiag); + + +int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + float *val, void *deviceMat); +int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + double *val, void *deviceMat); +int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, + int noffs, int isz, int *irn, + int *hoffs, int *idisp, int *ja, + float complex *val, void *deviceMat); +int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, + int noffs, int isz, int *irn, + int *hoffs, int *idisp, int *ja, + double complex *val, void *deviceMat); + +int psi_cuda_s_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, + int *ja, float *val, + int *idiag, int *rP, float *cM); +int psi_cuda_d_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, + int *ja, double *val, + int *idiag, int *rP, double *cM); +int psi_cuda_c_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, + int *ja, float complex *val, + int *idiag, int *rP, float complex *cM); +int psi_cuda_z_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, + int *ja, double complex *val, + int *idiag, int *rP, double complex *cM); + + +#endif diff --git a/cuda/hlldev_mod.F90 b/cuda/hlldev_mod.F90 new file mode 100644 index 00000000..90b8e13c --- /dev/null +++ b/cuda/hlldev_mod.F90 @@ -0,0 +1,268 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module hlldev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: hlldev_parms + integer(c_int) :: element_type + integer(c_int) :: hackSize + integer(c_int) :: rows + integer(c_int) :: avgNzr + integer(c_int) :: allocsize + integer(c_int) :: firstIndex + end type hlldev_parms + + interface + function bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex) & + & result(res) bind(c,name='bldHllDeviceParams') + use iso_c_binding + import :: hlldev_parms + type(hlldev_parms) :: res + integer(c_int), value :: hksize,rows,nzeros,allocsize,elementType,firstIndex + end function BldHllDeviceParams + end interface + + interface + function getHllDeviceParams(deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) & + & result(res) bind(c,name='getHllDeviceParams') + use iso_c_binding + import :: hlldev_parms + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + end function GetHllDeviceParams + end interface + + + interface + function FallocHllDevice(deviceMat,hksize,rows, nzeros,allocsize, & + & elementType,firstIndex) & + & result(res) bind(c,name='FallocHllDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: hksize,rows,nzeros,allocsize,elementType,firstIndex + type(c_ptr) :: deviceMat + end function FallocHllDevice + end interface + + + interface writeHllDevice + + function writeHllDeviceFloat(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='writeHllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function writeHllDeviceFloat + + function writeHllDeviceDouble(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='writeHllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function writeHllDeviceDouble + + function writeHllDeviceFloatComplex(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='writeHllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + complex(c_float_complex) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function writeHllDeviceFloatComplex + + function writeHllDeviceDoubleComplex(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='writeHllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + complex(c_double_complex) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function writeHllDeviceDoubleComplex + + end interface + + interface readHllDevice + + function readHllDeviceFloat(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='readHllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function readHllDeviceFloat + + function readHllDeviceDouble(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='readHllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function readHllDeviceDouble + + function readHllDeviceFloatComplex(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='readHllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + complex(c_float_complex) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function readHllDeviceFloatComplex + + function readHllDeviceDoubleComplex(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='readHllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + complex(c_double_complex) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function readHllDeviceDoubleComplex + + end interface + + interface + subroutine freeHllDevice(deviceMat) & + & bind(c,name='freeHllDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeHllDevice + end interface + + + interface psi_CopyCooToHlg + function psiCopyCooToHlgFloat(nr, nc, nza, hacksz, noffs, isz, irn, & + & hoffs, idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToHlgFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*) + end function psiCopyCooToHlgFloat + function psiCopyCooToHlgDouble(nr, nc, nza, hacksz, noffs, isz, irn, & + & hoffs, idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToHlgDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*) + end function psiCopyCooToHlgDouble + function psiCopyCooToHlgFloatComplex(nr, nc, nza, hacksz, noffs, isz, irn, & + & hoffs, idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToHlgFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz + type(c_ptr), value :: deviceMat + complex(c_float_complex) :: val(*) + integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*) + end function psiCopyCooToHlgFloatComplex + function psiCopyCooToHlgDoubleComplex(nr, nc, nza, hacksz, noffs, isz, irn, & + & hoffs, idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToHlgDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz + type(c_ptr), value :: deviceMat + complex(c_double_complex) :: val(*) + integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*) + end function psiCopyCooToHlgDoubleComplex + end interface + + + !interface + ! function getHllDevicePitch(deviceMat) & + ! & bind(c,name='getHllDevicePitch') result(res) + ! use iso_c_binding + ! type(c_ptr), value :: deviceMat + ! integer(c_int) :: res + ! end function getHllDevicePitch + !end interface + + !interface + ! function getHllDeviceMaxRowSize(deviceMat) & + ! & bind(c,name='getHllDeviceMaxRowSize') result(res) + ! use iso_c_binding + ! type(c_ptr), value :: deviceMat + ! integer(c_int) :: res + ! end function getHllDeviceMaxRowSize + !end interface + + interface spmvHllDevice + + function spmvHllDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float),value :: alpha, beta + end function spmvHllDeviceFloat + + function spmvHllDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double),value :: alpha, beta + end function spmvHllDeviceDouble + + function spmvHllDeviceFloatComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_float_complex),value :: alpha, beta + end function spmvHllDeviceFloatComplex + + function spmvHllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_double_complex),value :: alpha, beta + end function spmvHllDeviceDoubleComplex + + end interface + +end module hlldev_mod diff --git a/cuda/impl/Makefile b/cuda/impl/Makefile new file mode 100755 index 00000000..2d9a774d --- /dev/null +++ b/cuda/impl/Makefile @@ -0,0 +1,306 @@ +include ../../Make.inc +LIBDIR=../../lib +INCDIR=../../include +MODDIR=../../modules +PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base +#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. +CINCLUDES=-I$(GPU_INCDIR) -I$(CUDA_INCDIR) +LIBNAME=libpsb_gpu.a +CXXDEFINES=$(PSBCXXDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) + +OBJS= \ +psb_d_cuda_cp_csrg_from_coo.o \ +psb_d_cuda_cp_csrg_from_fmt.o \ +psb_d_cuda_cp_elg_from_coo.o \ +psb_d_cuda_cp_elg_from_fmt.o \ +psb_s_cuda_cp_csrg_from_coo.o \ +psb_s_cuda_cp_csrg_from_fmt.o \ +psb_s_cuda_csrg_allocate_mnnz.o \ +psb_s_cuda_csrg_csmm.o \ +psb_s_cuda_csrg_csmv.o \ +psb_s_cuda_csrg_mold.o \ +psb_s_cuda_csrg_reallocate_nz.o \ +psb_s_cuda_csrg_scal.o \ +psb_s_cuda_csrg_scals.o \ +psb_s_cuda_csrg_from_gpu.o \ +psb_s_cuda_csrg_to_gpu.o \ +psb_s_cuda_csrg_vect_mv.o \ +psb_s_cuda_csrg_inner_vect_sv.o \ +psb_d_cuda_csrg_allocate_mnnz.o \ +psb_d_cuda_csrg_csmm.o \ +psb_d_cuda_csrg_csmv.o \ +psb_d_cuda_csrg_mold.o \ +psb_d_cuda_csrg_reallocate_nz.o \ +psb_d_cuda_csrg_scal.o \ +psb_d_cuda_csrg_scals.o \ +psb_d_cuda_csrg_from_gpu.o \ +psb_d_cuda_csrg_to_gpu.o \ +psb_d_cuda_csrg_vect_mv.o \ +psb_d_cuda_csrg_inner_vect_sv.o \ +psb_d_cuda_elg_allocate_mnnz.o \ +psb_d_cuda_elg_asb.o \ +psb_d_cuda_elg_csmm.o \ +psb_d_cuda_elg_csmv.o \ +psb_d_cuda_elg_csput.o \ +psb_d_cuda_elg_from_gpu.o \ +psb_d_cuda_elg_inner_vect_sv.o \ +psb_d_cuda_elg_mold.o \ +psb_d_cuda_elg_reallocate_nz.o \ +psb_d_cuda_elg_scal.o \ +psb_d_cuda_elg_scals.o \ +psb_d_cuda_elg_to_gpu.o \ +psb_d_cuda_elg_vect_mv.o \ +psb_d_cuda_mv_csrg_from_coo.o \ +psb_d_cuda_mv_csrg_from_fmt.o \ +psb_d_cuda_mv_elg_from_coo.o \ +psb_d_cuda_mv_elg_from_fmt.o \ +psb_s_cuda_mv_csrg_from_coo.o \ +psb_s_cuda_mv_csrg_from_fmt.o \ +psb_s_cuda_cp_elg_from_coo.o \ +psb_s_cuda_cp_elg_from_fmt.o \ +psb_s_cuda_elg_allocate_mnnz.o \ +psb_s_cuda_elg_asb.o \ +psb_s_cuda_elg_csmm.o \ +psb_s_cuda_elg_csmv.o \ +psb_s_cuda_elg_csput.o \ +psb_s_cuda_elg_inner_vect_sv.o \ +psb_s_cuda_elg_mold.o \ +psb_s_cuda_elg_reallocate_nz.o \ +psb_s_cuda_elg_scal.o \ +psb_s_cuda_elg_scals.o \ +psb_s_cuda_elg_to_gpu.o \ +psb_s_cuda_elg_from_gpu.o \ +psb_s_cuda_elg_vect_mv.o \ +psb_s_cuda_mv_elg_from_coo.o \ +psb_s_cuda_mv_elg_from_fmt.o \ +psb_s_cuda_cp_hlg_from_fmt.o \ +psb_s_cuda_cp_hlg_from_coo.o \ +psb_d_cuda_cp_hlg_from_fmt.o \ +psb_d_cuda_cp_hlg_from_coo.o \ +psb_d_cuda_hlg_allocate_mnnz.o \ +psb_d_cuda_hlg_csmm.o \ +psb_d_cuda_hlg_csmv.o \ +psb_d_cuda_hlg_inner_vect_sv.o \ +psb_d_cuda_hlg_mold.o \ +psb_d_cuda_hlg_reallocate_nz.o \ +psb_d_cuda_hlg_scal.o \ +psb_d_cuda_hlg_scals.o \ +psb_d_cuda_hlg_from_gpu.o \ +psb_d_cuda_hlg_to_gpu.o \ +psb_d_cuda_hlg_vect_mv.o \ +psb_s_cuda_hlg_allocate_mnnz.o \ +psb_s_cuda_hlg_csmm.o \ +psb_s_cuda_hlg_csmv.o \ +psb_s_cuda_hlg_inner_vect_sv.o \ +psb_s_cuda_hlg_mold.o \ +psb_s_cuda_hlg_reallocate_nz.o \ +psb_s_cuda_hlg_scal.o \ +psb_s_cuda_hlg_scals.o \ +psb_s_cuda_hlg_from_gpu.o \ +psb_s_cuda_hlg_to_gpu.o \ +psb_s_cuda_hlg_vect_mv.o \ +psb_s_cuda_mv_hlg_from_coo.o \ +psb_s_cuda_cp_hlg_from_coo.o \ +psb_s_cuda_mv_hlg_from_fmt.o \ +psb_d_cuda_mv_hlg_from_coo.o \ +psb_d_cuda_cp_hlg_from_coo.o \ +psb_d_cuda_mv_hlg_from_fmt.o \ +psb_s_cuda_hybg_allocate_mnnz.o \ +psb_s_cuda_hybg_csmm.o \ +psb_s_cuda_hybg_csmv.o \ +psb_s_cuda_hybg_reallocate_nz.o \ +psb_s_cuda_hybg_scal.o \ +psb_s_cuda_hybg_scals.o \ +psb_s_cuda_hybg_to_gpu.o \ +psb_s_cuda_hybg_vect_mv.o \ +psb_s_cuda_hybg_inner_vect_sv.o \ +psb_s_cuda_cp_hybg_from_coo.o \ +psb_s_cuda_cp_hybg_from_fmt.o \ +psb_s_cuda_mv_hybg_from_fmt.o \ +psb_s_cuda_mv_hybg_from_coo.o \ +psb_s_cuda_hybg_mold.o \ +psb_d_cuda_hybg_allocate_mnnz.o \ +psb_d_cuda_hybg_csmm.o \ +psb_d_cuda_hybg_csmv.o \ +psb_d_cuda_hybg_reallocate_nz.o \ +psb_d_cuda_hybg_scal.o \ +psb_d_cuda_hybg_scals.o \ +psb_d_cuda_hybg_to_gpu.o \ +psb_d_cuda_hybg_vect_mv.o \ +psb_d_cuda_hybg_inner_vect_sv.o \ +psb_d_cuda_cp_hybg_from_coo.o \ +psb_d_cuda_cp_hybg_from_fmt.o \ +psb_d_cuda_mv_hybg_from_fmt.o \ +psb_d_cuda_mv_hybg_from_coo.o \ +psb_d_cuda_hybg_mold.o \ +psb_z_cuda_cp_csrg_from_coo.o \ +psb_z_cuda_cp_csrg_from_fmt.o \ +psb_z_cuda_cp_elg_from_coo.o \ +psb_z_cuda_cp_elg_from_fmt.o \ +psb_c_cuda_cp_csrg_from_coo.o \ +psb_c_cuda_cp_csrg_from_fmt.o \ +psb_c_cuda_csrg_allocate_mnnz.o \ +psb_c_cuda_csrg_csmm.o \ +psb_c_cuda_csrg_csmv.o \ +psb_c_cuda_csrg_mold.o \ +psb_c_cuda_csrg_reallocate_nz.o \ +psb_c_cuda_csrg_scal.o \ +psb_c_cuda_csrg_scals.o \ +psb_c_cuda_csrg_from_gpu.o \ +psb_c_cuda_csrg_to_gpu.o \ +psb_c_cuda_csrg_vect_mv.o \ +psb_c_cuda_csrg_inner_vect_sv.o \ +psb_z_cuda_csrg_allocate_mnnz.o \ +psb_z_cuda_csrg_csmm.o \ +psb_z_cuda_csrg_csmv.o \ +psb_z_cuda_csrg_mold.o \ +psb_z_cuda_csrg_reallocate_nz.o \ +psb_z_cuda_csrg_scal.o \ +psb_z_cuda_csrg_scals.o \ +psb_z_cuda_csrg_from_gpu.o \ +psb_z_cuda_csrg_to_gpu.o \ +psb_z_cuda_csrg_vect_mv.o \ +psb_z_cuda_csrg_inner_vect_sv.o \ +psb_z_cuda_elg_allocate_mnnz.o \ +psb_z_cuda_elg_asb.o \ +psb_z_cuda_elg_csmm.o \ +psb_z_cuda_elg_csmv.o \ +psb_z_cuda_elg_csput.o \ +psb_z_cuda_elg_inner_vect_sv.o \ +psb_z_cuda_elg_mold.o \ +psb_z_cuda_elg_reallocate_nz.o \ +psb_z_cuda_elg_scal.o \ +psb_z_cuda_elg_scals.o \ +psb_z_cuda_elg_to_gpu.o \ +psb_z_cuda_elg_from_gpu.o \ +psb_z_cuda_elg_vect_mv.o \ +psb_z_cuda_mv_csrg_from_coo.o \ +psb_z_cuda_mv_csrg_from_fmt.o \ +psb_z_cuda_mv_elg_from_coo.o \ +psb_z_cuda_mv_elg_from_fmt.o \ +psb_c_cuda_mv_csrg_from_coo.o \ +psb_c_cuda_mv_csrg_from_fmt.o \ +psb_c_cuda_cp_elg_from_coo.o \ +psb_c_cuda_cp_elg_from_fmt.o \ +psb_c_cuda_elg_allocate_mnnz.o \ +psb_c_cuda_elg_asb.o \ +psb_c_cuda_elg_csmm.o \ +psb_c_cuda_elg_csmv.o \ +psb_c_cuda_elg_csput.o \ +psb_c_cuda_elg_inner_vect_sv.o \ +psb_c_cuda_elg_mold.o \ +psb_c_cuda_elg_reallocate_nz.o \ +psb_c_cuda_elg_scal.o \ +psb_c_cuda_elg_scals.o \ +psb_c_cuda_elg_to_gpu.o \ +psb_c_cuda_elg_from_gpu.o \ +psb_c_cuda_elg_vect_mv.o \ +psb_c_cuda_mv_elg_from_coo.o \ +psb_c_cuda_mv_elg_from_fmt.o \ +psb_c_cuda_cp_hlg_from_fmt.o \ +psb_c_cuda_cp_hlg_from_coo.o \ +psb_z_cuda_cp_hlg_from_fmt.o \ +psb_z_cuda_cp_hlg_from_coo.o \ +psb_z_cuda_hlg_allocate_mnnz.o \ +psb_z_cuda_hlg_csmm.o \ +psb_z_cuda_hlg_csmv.o \ +psb_z_cuda_hlg_inner_vect_sv.o \ +psb_z_cuda_hlg_mold.o \ +psb_z_cuda_hlg_reallocate_nz.o \ +psb_z_cuda_hlg_scal.o \ +psb_z_cuda_hlg_scals.o \ +psb_z_cuda_hlg_from_gpu.o \ +psb_z_cuda_hlg_to_gpu.o \ +psb_z_cuda_hlg_vect_mv.o \ +psb_c_cuda_hlg_allocate_mnnz.o \ +psb_c_cuda_hlg_csmm.o \ +psb_c_cuda_hlg_csmv.o \ +psb_c_cuda_hlg_inner_vect_sv.o \ +psb_c_cuda_hlg_mold.o \ +psb_c_cuda_hlg_reallocate_nz.o \ +psb_c_cuda_hlg_scal.o \ +psb_c_cuda_hlg_scals.o \ +psb_c_cuda_hlg_from_gpu.o \ +psb_c_cuda_hlg_to_gpu.o \ +psb_c_cuda_hlg_vect_mv.o \ +psb_c_cuda_mv_hlg_from_coo.o \ +psb_c_cuda_cp_hlg_from_coo.o \ +psb_c_cuda_mv_hlg_from_fmt.o \ +psb_z_cuda_mv_hlg_from_coo.o \ +psb_z_cuda_cp_hlg_from_coo.o \ +psb_z_cuda_mv_hlg_from_fmt.o \ +psb_c_cuda_hybg_allocate_mnnz.o \ +psb_c_cuda_hybg_csmm.o \ +psb_c_cuda_hybg_csmv.o \ +psb_c_cuda_hybg_reallocate_nz.o \ +psb_c_cuda_hybg_scal.o \ +psb_c_cuda_hybg_scals.o \ +psb_c_cuda_hybg_to_gpu.o \ +psb_c_cuda_hybg_vect_mv.o \ +psb_c_cuda_hybg_inner_vect_sv.o \ +psb_c_cuda_cp_hybg_from_coo.o \ +psb_c_cuda_cp_hybg_from_fmt.o \ +psb_c_cuda_mv_hybg_from_fmt.o \ +psb_c_cuda_mv_hybg_from_coo.o \ +psb_c_cuda_hybg_mold.o \ +psb_z_cuda_hybg_allocate_mnnz.o \ +psb_z_cuda_hybg_csmm.o \ +psb_z_cuda_hybg_csmv.o \ +psb_z_cuda_hybg_reallocate_nz.o \ +psb_z_cuda_hybg_scal.o \ +psb_z_cuda_hybg_scals.o \ +psb_z_cuda_hybg_to_gpu.o \ +psb_z_cuda_hybg_vect_mv.o \ +psb_z_cuda_hybg_inner_vect_sv.o \ +psb_z_cuda_cp_hybg_from_coo.o \ +psb_z_cuda_cp_hybg_from_fmt.o \ +psb_z_cuda_mv_hybg_from_fmt.o \ +psb_z_cuda_mv_hybg_from_coo.o \ +psb_z_cuda_hybg_mold.o \ +psb_d_cuda_cp_diag_from_coo.o \ +psb_d_cuda_mv_diag_from_coo.o \ +psb_d_cuda_diag_to_gpu.o \ +psb_d_cuda_diag_csmv.o \ +psb_d_cuda_diag_mold.o \ +psb_d_cuda_diag_vect_mv.o \ +psb_d_cuda_cp_hdiag_from_coo.o \ +psb_d_cuda_mv_hdiag_from_coo.o \ +psb_d_cuda_hdiag_to_gpu.o \ +psb_d_cuda_hdiag_csmv.o \ +psb_d_cuda_hdiag_mold.o \ +psb_d_cuda_hdiag_vect_mv.o \ +psb_s_cuda_cp_hdiag_from_coo.o \ +psb_s_cuda_mv_hdiag_from_coo.o \ +psb_s_cuda_hdiag_to_gpu.o \ +psb_s_cuda_hdiag_csmv.o \ +psb_s_cuda_hdiag_mold.o \ +psb_s_cuda_hdiag_vect_mv.o \ +psb_s_cuda_dnsg_mat_impl.o \ +psb_d_cuda_dnsg_mat_impl.o \ +psb_c_cuda_dnsg_mat_impl.o \ +psb_z_cuda_dnsg_mat_impl.o + + +objs: $(OBJS) +lib: objs + $(AR) ../$(LIBNAME) $(OBJS) + +clean: + /bin/rm -f $(OBJS) + +.c.o: + $(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@ +.f90.o: + $(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@ +.F90.o: + $(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@ +.cpp.o: + $(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@ diff --git a/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 new file mode 100644 index 00000000..aa6a3ba3 --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_coo + implicit none + + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 new file mode 100644 index 00000000..e9d42139 --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_fmt + !use iso_c_binding + implicit none + + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_c_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 new file mode 100644 index 00000000..e70a044e --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_cp_diag_from_coo(a,b,info) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_cp_diag_from_coo + implicit none + + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + call a%psb_c_dia_sparse_mat%cp_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 new file mode 100644 index 00000000..c6105e88 --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 @@ -0,0 +1,161 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_coo + use psi_ext_util_mod + use psb_cuda_env_mod + implicit none + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, ld, ldv, hacksize + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: idisp(:) + + info = psb_success_ + hacksize = max(1,psb_cuda_WarpSize()) + if (b%is_dev()) call b%sync() + + if (b%is_by_rows()) then + + call psi_c_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,b%ja,b%val, a%deviceMat) + call a%set_dev() + else + call b%cp_to_coo(tmp,info) + call psi_c_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) + + call a%set_dev() + end if + + if (info /= psb_success_) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine psi_c_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize) + + use psb_base_mod + use psi_ext_util_mod + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(out) :: info, nzm, ldv + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, & + & ir, ic, hsz_ + real(psb_dpk_) :: t0,t1 + logical, parameter :: timing=.true. + + + info = psb_success_ + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == psb_success_) call psb_realloc(nr+1,idisp,info) + if (info /= psb_success_) return + if (timing) t0=psb_wtime() + + a%irn = 0 + do i=1, nza + ir = b%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + idisp(1) = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + idisp(i+1) = a%nzt + end do + + end subroutine psi_c_count_ell_from_coo + +end subroutine psb_c_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 new file mode 100644 index 00000000..f7e5351e --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_fmt + implicit none + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(elldev_parms) :: gpu_parms + + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_c_ell_sparse_mat) + nzm = psb_size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + end if + a%nzt = nza + call a%to_gpu(info) + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 new file mode 100644 index 00000000..9be741c9 --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod + implicit none + + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + a%hacksize = psb_cuda_WarpSize() + + call a%psb_c_hdia_sparse_mat%cp_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 new file mode 100644 index 00000000..8b0d9f2a --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 @@ -0,0 +1,190 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_cuda_env_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_coo + implicit none + + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + integer(psb_ipk_), allocatable :: idisp(:) + character(len=20) :: name='hll_from_coo' + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:) + real(psb_dpk_), allocatable :: val(:) + logical, parameter :: debug=.false. + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + hksz = max(1,psb_cuda_WarpSize()) + + if (b%is_by_rows()) then + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (debug) write(0,*) 'Copying through GPU',nza + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat) + call a%set_dev() + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + if (debug) write(0,*) 'Copying through GPU' + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat) + + call tmp%free() + call a%set_dev() + end if + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + use psb_base_mod + use psi_ext_util_mod + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info, noffs, isz + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc + logical, parameter :: debug=.false. + + info = 0 + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == 0) call psb_realloc(nr+1,idisp,info) + if (info /= 0) return + a%irn = 0 + if (debug) then + do i=1, nza + if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + else + write(0,*) 'Out of bouds IA ',i,b%ia(i),nr + end if + end do + else + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + end if + a%nzt = nza + + + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (debug) write(0,*) ' noffsets ',noffs,info + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + idisp(1) = 0 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + idisp(i+1) = idisp(i) + a%irn(i) + do k=1,ir-1 + idisp(i+k+1) = idisp(i+k) + a%irn(i+k) + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) +!!$ write(*,*) 'End of psi_comput_hckoff ',info + end subroutine psi_compute_hckoff_from_coo + +end subroutine psb_c_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 new file mode 100644 index 00000000..96a5c5e8 --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_fmt + implicit none + + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_c_hll_sparse_mat%cp_from_fmt(b,info) + if (info == 0) call a%to_gpu(info) + end select + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 new file mode 100644 index 00000000..d69247d0 --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_coo + implicit none + + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_cp_hybg_from_coo +#endif diff --git a/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 new file mode 100644 index 00000000..a6e87518 --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_fmt + implicit none + + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_c_cuda_cp_hybg_from_fmt +#endif diff --git a/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 new file mode 100644 index 00000000..f1e002f3 --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_csr_sparse_mat%allocate(m,n,nz) + + info = initFcusparse() + if (info == 0) call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_c_cuda_csrg_csmm.F90 b/cuda/impl/psb_c_cuda_csrg_csmm.F90 new file mode 100644 index 00000000..b3012952 --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_csmm.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmm + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_csrg_csmv + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_csrg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_csrg_from_gpu + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) info = & + & CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) +#if (PSB_CUDA_SHORT_VERSION <= 10) || (PSB_CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() + +end subroutine psb_c_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 new file mode 100644 index 00000000..7e5bb614 --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 @@ -0,0 +1,125 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_inner_vect_sv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_csrg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='csrg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_c_cuda_csrg_mold.F90 b/cuda/impl/psb_c_cuda_csrg_mold.F90 new file mode 100644 index 00000000..405f2736 --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_csrg_mold(a,b,info) + + use psb_base_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_mold + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csrg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_cuda_csrg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_csrg_mold diff --git a/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 new file mode 100644 index 00000000..a757f477 --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_csrg_reallocate_nz(nz,a) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_cuda_csrg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_c_csr_sparse_mat%reallocate(nz) + + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_c_cuda_csrg_scal.F90 b/cuda/impl/psb_c_cuda_csrg_scal.F90 new file mode 100644 index 00000000..13716339 --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_scal.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_csrg_scal(d,a,info,side) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scal + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + call a%psb_c_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_csrg_scal diff --git a/cuda/impl/psb_c_cuda_csrg_scals.F90 b/cuda/impl/psb_c_cuda_csrg_scals.F90 new file mode 100644 index 00000000..5334be3d --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_scals.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_csrg_scals(d,a,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scals + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + call a%psb_c_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_csrg_scals diff --git a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 new file mode 100644 index 00000000..8dac41c0 --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 @@ -0,0 +1,378 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_to_gpu + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + complex(psb_spk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSRGDeviceFree(a%deviceMat) + end if +#if (PSB_CUDA_SHORT_VERSION <= 10 ) + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if + if ((info == 0) .and. a%is_triangle()) then + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceCsrsmAnalysis(a%deviceMat) + end if + +#elif PSB_CUDA_VERSION < 11030 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) +!!$ write(0,*) 'Done deviceAlloc' + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero) +!!$ write(0,*) 'Done SetIndexBase' + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info) + if (info == 0) then + irpdi(1) = 0 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ write(0,*) 'Done deviceAlloc', info + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,& + & cusparse_index_base_zero) +!!$ write(0,*) 'Done setIndexBase', info + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + nzdi=a%irp(m+1)-1 + if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info) + if (info == 0) then + irpdi(1:m+1) = a%irp(1:m+1) -1 + jadi(1:nzdi) = a%ja(1:nzdi) -1 + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val) +!!$ write(0,*) 'Done Host2Device', info + endif + + +#elif 0 + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +!!$ if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) +!!$ end if + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +#endif + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSRG_TO_GPU ',info + end if + +end subroutine psb_c_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 b/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 new file mode 100644 index 00000000..c58e7ec0 --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_vect_mv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_csrg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= czero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_c_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_c_cuda_diag_csmv.F90 b/cuda/impl/psb_c_cuda_diag_csmv.F90 new file mode 100644 index 00000000..c0940903 --- /dev/null +++ b/cuda/impl/psb_c_cuda_diag_csmv.F90 @@ -0,0 +1,127 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_csmv + implicit none + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='c_cuda_diag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_diag_mold + implicit none + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='diag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_cuda_diag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_diag_mold diff --git a/cuda/impl/psb_c_cuda_diag_to_gpu.F90 b/cuda/impl/psb_c_cuda_diag_to_gpu.F90 new file mode 100644 index 00000000..88bbd8b5 --- /dev/null +++ b/cuda/impl/psb_c_cuda_diag_to_gpu.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_diag_to_gpu(a,info,nzrm) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_to_gpu + use iso_c_binding + implicit none + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d + type(diagdev_parms) :: gpu_parms + + info = 0 + + if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return + + n = size(a%data,1) + d = size(a%data,2) + c = a%get_ncols() + !allocsize = a%get_size() + !write(*,*) 'Create the DIAG matrix' + gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_complex_float) + if (c_associated(a%deviceMat)) then + call freeDiagDevice(a%deviceMat) + endif + info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_complex_float) + if (info == 0) info = & + & writeDiagDevice(a%deviceMat,a%data,a%offset,n) +! if (info /= 0) goto 9999 + +end subroutine psb_c_cuda_diag_to_gpu diff --git a/cuda/impl/psb_c_cuda_diag_vect_mv.F90 b/cuda/impl/psb_c_cuda_diag_vect_mv.F90 new file mode 100644 index 00000000..fba22bc5 --- /dev/null +++ b/cuda/impl/psb_c_cuda_diag_vect_mv.F90 @@ -0,0 +1,116 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_vect_mv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_diag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_diag_vect_mv diff --git a/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 b/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 new file mode 100644 index 00000000..65a42640 --- /dev/null +++ b/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 @@ -0,0 +1,416 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_c_cuda_vect_mod + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_vect_mv + implicit none + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + logical :: tra + character :: trans_ + complex(psb_spk_), allocatable :: rx(:), ry(:) + Integer(Psb_ipk_) :: err_act, m, n, k + character(len=20) :: name='c_cuda_dnsg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (trans_ =='N') then + m = a%get_nrows() + n = 1 + k = a%get_ncols() + else + m = a%get_ncols() + n = 1 + k = a%get_nrows() + end if + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,& + & xx%deviceVect,beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_dnsg_vect_mv + + +subroutine psb_c_cuda_dnsg_mold(a,b,info) + use psb_base_mod + use psb_c_cuda_vect_mod + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_mold + implicit none + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dnsg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_cuda_dnsg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_dnsg_mold + + +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_c_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_c_cuda_dnsg_allocate_mnnz +!!$ end interface + +subroutine psb_c_cuda_dnsg_to_gpu(a,info) + use psb_base_mod + use psb_c_cuda_vect_mod + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_to_gpu + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act, pitch, lda + logical, parameter :: debug=.false. + character(len=20) :: name='c_cuda_dnsg_to_gpu' + + call psb_erractionsave(err_act) + info = psb_success_ + if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) + info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& + & spgpu_type_complex_float,1) + if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) + if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_dnsg_to_gpu + + + +subroutine psb_c_cuda_cp_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_c_cuda_vect_mod + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_coo + implicit none + + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_dnsg_cp_from_coo' + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + type(psb_c_coo_sparse_mat) :: tmp + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + call a%psb_c_dns_sparse_mat%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info + if (info == 0) call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_cp_dnsg_from_coo + +subroutine psb_c_cuda_cp_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_c_cuda_vect_mod + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_fmt + implicit none + + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + +!!$ class is (psb_c_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$ a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$ call a%to_gpu(info) + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_cp_dnsg_from_fmt + + + +subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_c_cuda_vect_mod + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_coo + implicit none + + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + logical, parameter :: debug=.false. + character(len=20) :: name='c_cuda_dnsg_mv_from_coo' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info + call b%free() + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_mv_dnsg_from_coo + +subroutine psb_c_cuda_mv_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_c_cuda_vect_mod + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_fmt + implicit none + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + +!!$ class is (psb_c_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$ a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$ call a%to_gpu(info) + + class default + + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_c_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 new file mode 100644 index 00000000..7f50d547 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + type(elldev_parms) :: gpu_parms + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/)) + goto 9999 + endif + + gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_complex_float,1) + ld = gpu_parms%pitch + nz_ = gpu_parms%maxRowSize + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_c_cuda_elg_asb.f90 b/cuda/impl/psb_c_cuda_elg_asb.f90 new file mode 100644 index 00000000..16d70736 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_asb.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_asb(a) + + use psb_base_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_asb + implicit none + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='elg_asb' + logical :: clear_ + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: valt(:,:) + integer(psb_ipk_), allocatable :: jat(:,:) + integer(psb_ipk_) :: nr, nc + + call psb_erractionsave(err_act) + info = psb_success_ + + ! Only call sync() if we are on host + if (a%is_host()) then + call a%sync() + end if + call a%set_asb() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_asb diff --git a/cuda/impl/psb_c_cuda_elg_csmm.F90 b/cuda/impl/psb_c_cuda_elg_csmm.F90 new file mode 100644 index 00000000..f7ae9892 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_csmm.F90 @@ -0,0 +1,124 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmm + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_elg_csmv + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_elg_csput_a + implicit none + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_elg_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +!!$ write(0,*) 'In ELG_csput_a' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then +!!$ write(*,*) 'elg_csput_a ' + if (a%is_dev()) call a%sync() + call a%psb_c_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_csput_a + + + +subroutine psb_c_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use iso_c_binding + use elldev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_v + use psb_c_cuda_vect_mod + implicit none + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_elg_csput_v' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw + logical :: gpu_invoked + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + integer(psb_ipk_), allocatable :: idxs(:) + logical, parameter :: debug_idxs=.false., debug_vals=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +! write(0,*) 'In ELG_csput_v' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ia%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ja%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (val%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + + t1=psb_wtime() + gpu_invoked = .false. + select type (ia) + class is (psb_i_vect_cuda) + select type (ja) + class is (psb_i_vect_cuda) + select type (val) + class is (psb_c_vect_cuda) + if (a%is_host()) call a%sync() + if (val%is_host()) call val%sync() + if (ia%is_host()) call ia%sync() + if (ja%is_host()) call ja%sync() + info = csputEllDeviceFloatComplex(a%deviceMat,nz,& + & ia%deviceVect,ja%deviceVect,val%deviceVect) + call a%set_dev() + gpu_invoked=.true. + end select + end select + end select + if (.not.gpu_invoked) then +!!$ write(0,*)'Not gpu_invoked ' + if (a%is_dev()) call a%sync() + call a%psb_c_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + call a%set_host() + end if + + if (info /= 0) then + info = psb_err_internal_error_ + end if + + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_c_cuda_elg_csput_v diff --git a/cuda/impl/psb_c_cuda_elg_from_gpu.F90 b/cuda/impl/psb_c_cuda_elg_from_gpu.F90 new file mode 100644 index 00000000..34c6e4a6 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_from_gpu.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_from_gpu(a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_from_gpu + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize + + info = 0 + + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + if (info == 0) call psb_realloc(pitch,a%irn,info) + end if + if (info == 0) info = & + & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_c_cuda_elg_from_gpu diff --git a/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 new file mode 100644 index 00000000..148d72d2 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_inner_vect_sv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_elg_inner_vect_sv' + logical, parameter :: debug=.false. + complex(psb_spk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + if (a%is_dev()) call a%sync() + if (.false.) then + rx = x%get_vect() + ry = y%get_vect() + call a%inner_spsm(alpha,rx,beta,ry,info,trans) + call y%bld(ry) + else + call x%sync() + call y%sync() + call a%psb_c_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + end if + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_c_cuda_elg_mold.F90 b/cuda/impl/psb_c_cuda_elg_mold.F90 new file mode 100644 index 00000000..bb94bf07 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_mold.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_cuda_elg_mold(a,b,info) + + use psb_base_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_mold + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='elg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_cuda_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_mold diff --git a/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 new file mode 100644 index 00000000..6a1f8763 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 @@ -0,0 +1,72 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_reallocate_nz(nz,a) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_cuda_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + if (a%is_dev()) call a%sync() + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call a%to_gpu(info,nzrm=nzrm) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_c_cuda_elg_scal.F90 b/cuda/impl/psb_c_cuda_elg_scal.F90 new file mode 100644 index 00000000..65f84768 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_scal.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_scal(d,a,info,side) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scal + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_c_ell_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_scal diff --git a/cuda/impl/psb_c_cuda_elg_scals.F90 b/cuda/impl/psb_c_cuda_elg_scals.F90 new file mode 100644 index 00000000..966f2e91 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_scals.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_scals(d,a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scals + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_scals diff --git a/cuda/impl/psb_c_cuda_elg_to_gpu.F90 b/cuda/impl/psb_c_cuda_elg_to_gpu.F90 new file mode 100644 index 00000000..495207c7 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_to_gpu.F90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_to_gpu(a,info,nzrm) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_to_gpu + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt + type(elldev_parms) :: gpu_parms + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + nzt = a%get_nzeros() + if (present(nzrm)) nzm = max(nzm,nzrm) + + gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_complex_float,1) + + if (c_associated(a%deviceMat)) then + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + else + pitch = -1 + maxrowsize = -1 + end if + + if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_complex_float,1) + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + end if + if (info == 0) then + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + end if + end if + if (info == 0) info = & + & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_c_cuda_elg_to_gpu diff --git a/cuda/impl/psb_c_cuda_elg_trim.f90 b/cuda/impl/psb_c_cuda_elg_trim.f90 new file mode 100644 index 00000000..78dbe193 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_trim.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_trim(a) + + use psb_base_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_trim + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + ld = max(1_psb_ipk_,size(a%ja,1)) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_trim diff --git a/cuda/impl/psb_c_cuda_elg_vect_mv.F90 b/cuda/impl/psb_c_cuda_elg_vect_mv.F90 new file mode 100644 index 00000000..9da6a34a --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_vect_mv.F90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_vect_mv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_elg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (a%is_dev()) call a%sync() + if (.not.x%is_host()) call x%sync() + if (beta /= czero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_vect_mv diff --git a/cuda/impl/psb_c_cuda_hdiag_csmv.F90 b/cuda/impl/psb_c_cuda_hdiag_csmv.F90 new file mode 100644 index 00000000..36928062 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hdiag_csmv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_csmv + implicit none + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='c_cuda_hdiag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_hdiag_mold + implicit none + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdiag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_cuda_hdiag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hdiag_mold diff --git a/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 b/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 new file mode 100644 index 00000000..8d1b61a1 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hdiag_to_gpu(a,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_to_gpu + use iso_c_binding + implicit none + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight + type(hdiagdev_parms) :: gpu_parms + + info = 0 + + nr = a%get_nrows() + nc = a%get_ncols() + hacksize = a%hackSize + hackCount = a%nhacks + if (.not.allocated(a%hackOffsets)) then + info = -1 + return + end if + allocheight = a%hackOffsets(hackCount+1) +!!$ write(*,*) 'HDIAG TO GPU:',nr,nc,hacksize,hackCount,allocheight,& +!!$ & size(a%hackoffsets),size(a%diaoffsets), size(a%val) + if (.not.allocated(a%diaOffsets)) then + info = -2 + return + end if + if (.not.allocated(a%val)) then + info = -3 + return + end if + + if (c_associated(a%deviceMat)) then + call freeHdiagDevice(a%deviceMat) + endif + + info = FAllocHdiagDevice(a%deviceMat,nr,nc,& + & allocheight,hacksize,hackCount,spgpu_type_double) + if (info == 0) info = & + & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) + +end subroutine psb_c_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 b/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 new file mode 100644 index 00000000..0c7ce856 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_vect_mv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_hdiag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHdiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 new file mode 100644 index 00000000..1b41f132 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + type(hlldev_parms) :: gpu_parms + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_hll_sparse_mat%allocate(m,n,nz) + + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_c_cuda_hlg_csmm.F90 b/cuda/impl/psb_c_cuda_hlg_csmm.F90 new file mode 100644 index 00000000..88aa53a8 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_csmm.F90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_csmm + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_hlg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_hlg_csmv + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='c_cuda_hlg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_hlg_from_gpu + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + + info = 0 + + if (a%is_sync()) return + if (a%is_host()) return + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + + info = getHllDeviceParams(a%deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) + + if (info == 0) call a%set_nzeros(nzeros) + if (info == 0) call a%set_hksz(hksize) + if (info == 0) call psb_realloc(rows,a%irn,info) + if (info == 0) call psb_realloc(rows,a%idiag,info) + if (info == 0) call psb_realloc(allocsize,a%ja,info) + if (info == 0) call psb_realloc(allocsize,a%val,info) + if (info == 0) call psb_realloc((hackOffsLength+1),a%hkoffs,info) + + if (info == 0) info = & + & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_c_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 new file mode 100644 index 00000000..87d7c662 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_inner_vect_sv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_base_inner_vect_sv' + logical, parameter :: debug=.false. + complex(psb_spk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + info = psb_success_ + + + call x%sync() + call y%sync() + if (a%is_dev()) call a%sync() + call a%psb_c_hll_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_c_cuda_hlg_mold.F90 b/cuda/impl/psb_c_cuda_hlg_mold.F90 new file mode 100644 index 00000000..a702e211 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_mold.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_mold(a,b,info) + + use psb_base_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_mold + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='hlg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_cuda_hlg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_c_cuda_hlg_mold diff --git a/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 new file mode 100644 index 00000000..2ec5fa2c --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_reallocate_nz(nz,a) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_reallocate_nz + use iso_c_binding + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_cuda_hlg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + call a%psb_c_hll_sparse_mat%reallocate(nz) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_c_cuda_hlg_scal.F90 b/cuda/impl/psb_c_cuda_hlg_scal.F90 new file mode 100644 index 00000000..770d1734 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_scal.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_scal(d,a,info,side) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_scal + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_c_hll_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hlg_scal diff --git a/cuda/impl/psb_c_cuda_hlg_scals.F90 b/cuda/impl/psb_c_cuda_hlg_scals.F90 new file mode 100644 index 00000000..ef6bc1e3 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_scals.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_scals(d,a,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_scals + use iso_c_binding + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_c_hll_sparse_mat%scal(d,info) + if (info /= psb_success_) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_c_cuda_hlg_scals diff --git a/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 b/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 new file mode 100644 index 00000000..d7d179e7 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_to_gpu(a,info,nzrm) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_to_gpu + use iso_c_binding + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, nza, n, pitch,maxrowsize, allocsize + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + n = a%get_nrows() + allocsize = a%get_size() + nza = a%get_nzeros() + if (c_associated(a%deviceMat)) then + call freehllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_complex_float,1) + if (info == 0) info = & + & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) +! if (info /= 0) goto 9999 + +end subroutine psb_c_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 b/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 new file mode 100644 index 00000000..3789ef17 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 @@ -0,0 +1,119 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_vect_mv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_hlg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= czero) then + if (.not.y%is_host()) call y%sync() + end if + if (a%is_dev()) call a%sync() + call a%psb_c_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvhllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHLLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 new file mode 100644 index 00000000..dbbeb1f2 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_hybg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_csr_sparse_mat%allocate(m,n,nz) + + info = initFcusparse() + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hybg_allocate_mnnz +#endif diff --git a/cuda/impl/psb_c_cuda_hybg_csmm.F90 b/cuda/impl/psb_c_cuda_hybg_csmm.F90 new file mode 100644 index 00000000..8899f54e --- /dev/null +++ b/cuda/impl/psb_c_cuda_hybg_csmm.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_csmm + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_hybg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_hybg_csmv + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_hybg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_hybg_inner_vect_sv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_hybg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra.or.(beta/=czero)) then + call x%sync() + call y%sync() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='hybg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hybg_inner_vect_sv +#endif diff --git a/cuda/impl/psb_c_cuda_hybg_mold.F90 b/cuda/impl/psb_c_cuda_hybg_mold.F90 new file mode 100644 index 00000000..ba0e31a0 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hybg_mold.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_hybg_mold(a,b,info) + + use psb_base_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_mold + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hybg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_cuda_hybg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hybg_mold +#endif diff --git a/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 new file mode 100644 index 00000000..c3bd728b --- /dev/null +++ b/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_hybg_reallocate_nz(nz,a) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_cuda_hybg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_c_csr_sparse_mat%reallocate(nz) + + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hybg_reallocate_nz +#endif diff --git a/cuda/impl/psb_c_cuda_hybg_scal.F90 b/cuda/impl/psb_c_cuda_hybg_scal.F90 new file mode 100644 index 00000000..d7e36019 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hybg_scal.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_hybg_scal(d,a,info,side) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_scal + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m,n,nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_c_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hybg_scal +#endif diff --git a/cuda/impl/psb_c_cuda_hybg_scals.F90 b/cuda/impl/psb_c_cuda_hybg_scals.F90 new file mode 100644 index 00000000..03106f7a --- /dev/null +++ b/cuda/impl/psb_c_cuda_hybg_scals.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_hybg_scals(d,a,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_scals + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + + call a%psb_c_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hybg_scals +#endif diff --git a/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 b/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 new file mode 100644 index 00000000..a424c795 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_hybg_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_to_gpu + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + complex(psb_spk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = HYBGDeviceFree(a%deviceMat) + end if + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! We are explicitly adding the diagonal + if (info == 0) info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + else + + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) then + if (a%is_unit()) then + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + endif + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceHybsmAnalysis(a%deviceMat) + end if + + + if (info /= 0) then + write(0,*) 'Error in HYBG_TO_GPU ',info + end if + +end subroutine psb_c_cuda_hybg_to_gpu +#endif diff --git a/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 b/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 new file mode 100644 index 00000000..cb38321f --- /dev/null +++ b/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 @@ -0,0 +1,118 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_vect_mv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_hybg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= czero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hybg_vect_mv +#endif diff --git a/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 new file mode 100644 index 00000000..f0a74c09 --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_mv_csrg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_mv_csrg_from_coo + implicit none + + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + + info = psb_success_ + + call a%psb_c_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 new file mode 100644 index 00000000..eb3698b0 --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_mv_csrg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_mv_csrg_from_fmt + implicit none + + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_c_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 new file mode 100644 index 00000000..c1ee2ba9 --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_mv_diag_from_coo(a,b,info) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_mv_diag_from_coo + + implicit none + + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) goto 9999 + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 new file mode 100644 index 00000000..f9555729 --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_mv_elg_from_coo(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_mv_elg_from_coo + implicit none + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_c_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 new file mode 100644 index 00000000..59615e25 --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 @@ -0,0 +1,86 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_mv_elg_from_fmt + implicit none + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + type(elldev_parms) :: gpu_parms + + info = psb_success_ + + if (b%is_dev()) call b%sync() + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + + class is (psb_c_ell_sparse_mat) + nzm = size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + deallocate(b%ja,stat=info) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + deallocate(b%val,stat=info) + end if + a%nzt = nza + call b%free() + call a%to_gpu(info) + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 new file mode 100644 index 00000000..21ee731d --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_cuda_mv_hdiag_from_coo(a,b,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod + + implicit none + + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + + a%hacksize = psb_cuda_WarpSize() + + call a%psb_c_hdia_sparse_mat%mv_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 new file mode 100644 index 00000000..50c20fad --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_cuda_mv_hlg_from_coo(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_cuda_env_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_mv_hlg_from_coo + implicit none + + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_c_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 new file mode 100644 index 00000000..3fba905a --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_cuda_mv_hlg_from_fmt(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_mv_hlg_from_fmt + implicit none + + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 new file mode 100644 index 00000000..72837aa6 --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 @@ -0,0 +1,59 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_mv_hybg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_mv_hybg_from_coo + implicit none + + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%psb_c_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_mv_hybg_from_coo +#endif diff --git a/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 new file mode 100644 index 00000000..da910175 --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_mv_hybg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_mv_hybg_from_fmt + implicit none + + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select +end subroutine psb_c_cuda_mv_hybg_from_fmt +#endif diff --git a/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 new file mode 100644 index 00000000..ab3a7256 --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_cp_csrg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_cp_csrg_from_coo + implicit none + + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 new file mode 100644 index 00000000..8f8e8cbe --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_cp_csrg_from_fmt + !use iso_c_binding + implicit none + + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_d_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 new file mode 100644 index 00000000..dc0401d5 --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_cp_diag_from_coo(a,b,info) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_cp_diag_from_coo + implicit none + + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + call a%psb_d_dia_sparse_mat%cp_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 new file mode 100644 index 00000000..890bdc39 --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 @@ -0,0 +1,161 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_cp_elg_from_coo + use psi_ext_util_mod + use psb_cuda_env_mod + implicit none + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, ld, ldv, hacksize + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: idisp(:) + + info = psb_success_ + hacksize = max(1,psb_cuda_WarpSize()) + if (b%is_dev()) call b%sync() + + if (b%is_by_rows()) then + + call psi_d_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,b%ja,b%val, a%deviceMat) + call a%set_dev() + else + call b%cp_to_coo(tmp,info) + call psi_d_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) + + call a%set_dev() + end if + + if (info /= psb_success_) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine psi_d_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize) + + use psb_base_mod + use psi_ext_util_mod + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(out) :: info, nzm, ldv + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, & + & ir, ic, hsz_ + real(psb_dpk_) :: t0,t1 + logical, parameter :: timing=.true. + + + info = psb_success_ + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == psb_success_) call psb_realloc(nr+1,idisp,info) + if (info /= psb_success_) return + if (timing) t0=psb_wtime() + + a%irn = 0 + do i=1, nza + ir = b%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + idisp(1) = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + idisp(i+1) = a%nzt + end do + + end subroutine psi_d_count_ell_from_coo + +end subroutine psb_d_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 new file mode 100644 index 00000000..7beea7f1 --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_cp_elg_from_fmt + implicit none + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(elldev_parms) :: gpu_parms + + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_d_ell_sparse_mat) + nzm = psb_size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + end if + a%nzt = nza + call a%to_gpu(info) + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 new file mode 100644 index 00000000..82ef4876 --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_cp_hdiag_from_coo(a,b,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod + implicit none + + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + a%hacksize = psb_cuda_WarpSize() + + call a%psb_d_hdia_sparse_mat%cp_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 new file mode 100644 index 00000000..34b999a9 --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 @@ -0,0 +1,190 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_cuda_env_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_cp_hlg_from_coo + implicit none + + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + integer(psb_ipk_), allocatable :: idisp(:) + character(len=20) :: name='hll_from_coo' + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:) + real(psb_dpk_), allocatable :: val(:) + logical, parameter :: debug=.false. + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + hksz = max(1,psb_cuda_WarpSize()) + + if (b%is_by_rows()) then + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (debug) write(0,*) 'Copying through GPU',nza + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat) + call a%set_dev() + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + if (debug) write(0,*) 'Copying through GPU' + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat) + + call tmp%free() + call a%set_dev() + end if + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + use psb_base_mod + use psi_ext_util_mod + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info, noffs, isz + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc + logical, parameter :: debug=.false. + + info = 0 + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == 0) call psb_realloc(nr+1,idisp,info) + if (info /= 0) return + a%irn = 0 + if (debug) then + do i=1, nza + if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + else + write(0,*) 'Out of bouds IA ',i,b%ia(i),nr + end if + end do + else + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + end if + a%nzt = nza + + + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (debug) write(0,*) ' noffsets ',noffs,info + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + idisp(1) = 0 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + idisp(i+1) = idisp(i) + a%irn(i) + do k=1,ir-1 + idisp(i+k+1) = idisp(i+k) + a%irn(i+k) + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) +!!$ write(*,*) 'End of psi_comput_hckoff ',info + end subroutine psi_compute_hckoff_from_coo + +end subroutine psb_d_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 new file mode 100644 index 00000000..ecb15157 --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_cp_hlg_from_fmt(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_cp_hlg_from_fmt + implicit none + + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_d_hll_sparse_mat%cp_from_fmt(b,info) + if (info == 0) call a%to_gpu(info) + end select + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 new file mode 100644 index 00000000..6405c61d --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_cp_hybg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_cp_hybg_from_coo + implicit none + + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_cp_hybg_from_coo +#endif diff --git a/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 new file mode 100644 index 00000000..a6f9391b --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_cp_hybg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_cp_hybg_from_fmt + implicit none + + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_d_cuda_cp_hybg_from_fmt +#endif diff --git a/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 new file mode 100644 index 00000000..056f2deb --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csrg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_csr_sparse_mat%allocate(m,n,nz) + + info = initFcusparse() + if (info == 0) call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_d_cuda_csrg_csmm.F90 b/cuda/impl/psb_d_cuda_csrg_csmm.F90 new file mode 100644 index 00000000..ddac1373 --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_csmm.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_csmm + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_csrg_csmv + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_csrg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_csrg_from_gpu + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) info = & + & CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) +#if (PSB_CUDA_SHORT_VERSION <= 10) || (PSB_CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() + +end subroutine psb_d_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 new file mode 100644 index 00000000..9a45ee17 --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 @@ -0,0 +1,125 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_inner_vect_sv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_csrg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='csrg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_d_cuda_csrg_mold.F90 b/cuda/impl/psb_d_cuda_csrg_mold.F90 new file mode 100644 index 00000000..eec34975 --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csrg_mold(a,b,info) + + use psb_base_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_mold + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csrg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_cuda_csrg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_csrg_mold diff --git a/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 new file mode 100644 index 00000000..c27cb943 --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csrg_reallocate_nz(nz,a) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_cuda_csrg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_d_csr_sparse_mat%reallocate(nz) + + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_d_cuda_csrg_scal.F90 b/cuda/impl/psb_d_cuda_csrg_scal.F90 new file mode 100644 index 00000000..860e9396 --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_scal.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csrg_scal(d,a,info,side) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_scal + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + call a%psb_d_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_csrg_scal diff --git a/cuda/impl/psb_d_cuda_csrg_scals.F90 b/cuda/impl/psb_d_cuda_csrg_scals.F90 new file mode 100644 index 00000000..87ef588d --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_scals.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csrg_scals(d,a,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_scals + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + call a%psb_d_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_csrg_scals diff --git a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 new file mode 100644 index 00000000..b4067e88 --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 @@ -0,0 +1,378 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_to_gpu + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + real(psb_dpk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSRGDeviceFree(a%deviceMat) + end if +#if (PSB_CUDA_SHORT_VERSION <= 10 ) + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if + if ((info == 0) .and. a%is_triangle()) then + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceCsrsmAnalysis(a%deviceMat) + end if + +#elif PSB_CUDA_VERSION < 11030 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) +!!$ write(0,*) 'Done deviceAlloc' + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero) +!!$ write(0,*) 'Done SetIndexBase' + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info) + if (info == 0) then + irpdi(1) = 0 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ write(0,*) 'Done deviceAlloc', info + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,& + & cusparse_index_base_zero) +!!$ write(0,*) 'Done setIndexBase', info + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + nzdi=a%irp(m+1)-1 + if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info) + if (info == 0) then + irpdi(1:m+1) = a%irp(1:m+1) -1 + jadi(1:nzdi) = a%ja(1:nzdi) -1 + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val) +!!$ write(0,*) 'Done Host2Device', info + endif + + +#elif 0 + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +!!$ if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) +!!$ end if + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +#endif + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSRG_TO_GPU ',info + end if + +end subroutine psb_d_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 b/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 new file mode 100644 index 00000000..03fefbdd --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_vect_mv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_csrg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_d_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_d_cuda_diag_csmv.F90 b/cuda/impl/psb_d_cuda_diag_csmv.F90 new file mode 100644 index 00000000..0317a369 --- /dev/null +++ b/cuda/impl/psb_d_cuda_diag_csmv.F90 @@ -0,0 +1,127 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_csmv + implicit none + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='d_cuda_diag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_diag_mold + implicit none + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='diag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_cuda_diag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_diag_mold diff --git a/cuda/impl/psb_d_cuda_diag_to_gpu.F90 b/cuda/impl/psb_d_cuda_diag_to_gpu.F90 new file mode 100644 index 00000000..9b648962 --- /dev/null +++ b/cuda/impl/psb_d_cuda_diag_to_gpu.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_diag_to_gpu(a,info,nzrm) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_to_gpu + use iso_c_binding + implicit none + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d + type(diagdev_parms) :: gpu_parms + + info = 0 + + if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return + + n = size(a%data,1) + d = size(a%data,2) + c = a%get_ncols() + !allocsize = a%get_size() + !write(*,*) 'Create the DIAG matrix' + gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_double) + if (c_associated(a%deviceMat)) then + call freeDiagDevice(a%deviceMat) + endif + info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_double) + if (info == 0) info = & + & writeDiagDevice(a%deviceMat,a%data,a%offset,n) +! if (info /= 0) goto 9999 + +end subroutine psb_d_cuda_diag_to_gpu diff --git a/cuda/impl/psb_d_cuda_diag_vect_mv.F90 b/cuda/impl/psb_d_cuda_diag_vect_mv.F90 new file mode 100644 index 00000000..3bc2372d --- /dev/null +++ b/cuda/impl/psb_d_cuda_diag_vect_mv.F90 @@ -0,0 +1,116 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_vect_mv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_diag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_diag_vect_mv diff --git a/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 b/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 new file mode 100644 index 00000000..e4419d81 --- /dev/null +++ b/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 @@ -0,0 +1,416 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_d_cuda_vect_mod + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_vect_mv + implicit none + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + logical :: tra + character :: trans_ + real(psb_dpk_), allocatable :: rx(:), ry(:) + Integer(Psb_ipk_) :: err_act, m, n, k + character(len=20) :: name='d_cuda_dnsg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (trans_ =='N') then + m = a%get_nrows() + n = 1 + k = a%get_ncols() + else + m = a%get_ncols() + n = 1 + k = a%get_nrows() + end if + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,& + & xx%deviceVect,beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_dnsg_vect_mv + + +subroutine psb_d_cuda_dnsg_mold(a,b,info) + use psb_base_mod + use psb_d_cuda_vect_mod + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_mold + implicit none + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dnsg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_cuda_dnsg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_dnsg_mold + + +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_d_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_d_cuda_dnsg_allocate_mnnz +!!$ end interface + +subroutine psb_d_cuda_dnsg_to_gpu(a,info) + use psb_base_mod + use psb_d_cuda_vect_mod + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_to_gpu + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act, pitch, lda + logical, parameter :: debug=.false. + character(len=20) :: name='d_cuda_dnsg_to_gpu' + + call psb_erractionsave(err_act) + info = psb_success_ + if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) + info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& + & spgpu_type_double,1) + if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) + if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_dnsg_to_gpu + + + +subroutine psb_d_cuda_cp_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_d_cuda_vect_mod + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_cp_dnsg_from_coo + implicit none + + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_dnsg_cp_from_coo' + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + type(psb_d_coo_sparse_mat) :: tmp + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + call a%psb_d_dns_sparse_mat%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info + if (info == 0) call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_cp_dnsg_from_coo + +subroutine psb_d_cuda_cp_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_d_cuda_vect_mod + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_cp_dnsg_from_fmt + implicit none + + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + +!!$ class is (psb_d_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$ a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$ call a%to_gpu(info) + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_cp_dnsg_from_fmt + + + +subroutine psb_d_cuda_mv_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_d_cuda_vect_mod + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_mv_dnsg_from_coo + implicit none + + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + logical, parameter :: debug=.false. + character(len=20) :: name='d_cuda_dnsg_mv_from_coo' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info + call b%free() + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_mv_dnsg_from_coo + +subroutine psb_d_cuda_mv_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_d_cuda_vect_mod + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_mv_dnsg_from_fmt + implicit none + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + +!!$ class is (psb_d_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$ a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$ call a%to_gpu(info) + + class default + + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_d_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 new file mode 100644 index 00000000..6db20c96 --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + type(elldev_parms) :: gpu_parms + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/)) + goto 9999 + endif + + gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_double,1) + ld = gpu_parms%pitch + nz_ = gpu_parms%maxRowSize + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_d_cuda_elg_asb.f90 b/cuda/impl/psb_d_cuda_elg_asb.f90 new file mode 100644 index 00000000..7d510ee2 --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_asb.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_asb(a) + + use psb_base_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_asb + implicit none + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='elg_asb' + logical :: clear_ + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: valt(:,:) + integer(psb_ipk_), allocatable :: jat(:,:) + integer(psb_ipk_) :: nr, nc + + call psb_erractionsave(err_act) + info = psb_success_ + + ! Only call sync() if we are on host + if (a%is_host()) then + call a%sync() + end if + call a%set_asb() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_asb diff --git a/cuda/impl/psb_d_cuda_elg_csmm.F90 b/cuda/impl/psb_d_cuda_elg_csmm.F90 new file mode 100644 index 00000000..f77d72d8 --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_csmm.F90 @@ -0,0 +1,124 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csmm + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_elg_csmv + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_elg_csput_a + implicit none + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_elg_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +!!$ write(0,*) 'In ELG_csput_a' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then +!!$ write(*,*) 'elg_csput_a ' + if (a%is_dev()) call a%sync() + call a%psb_d_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_csput_a + + + +subroutine psb_d_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use iso_c_binding + use elldev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csput_v + use psb_d_cuda_vect_mod + implicit none + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_elg_csput_v' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw + logical :: gpu_invoked + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + integer(psb_ipk_), allocatable :: idxs(:) + logical, parameter :: debug_idxs=.false., debug_vals=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +! write(0,*) 'In ELG_csput_v' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ia%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ja%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (val%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + + t1=psb_wtime() + gpu_invoked = .false. + select type (ia) + class is (psb_i_vect_cuda) + select type (ja) + class is (psb_i_vect_cuda) + select type (val) + class is (psb_d_vect_cuda) + if (a%is_host()) call a%sync() + if (val%is_host()) call val%sync() + if (ia%is_host()) call ia%sync() + if (ja%is_host()) call ja%sync() + info = csputEllDeviceDouble(a%deviceMat,nz,& + & ia%deviceVect,ja%deviceVect,val%deviceVect) + call a%set_dev() + gpu_invoked=.true. + end select + end select + end select + if (.not.gpu_invoked) then +!!$ write(0,*)'Not gpu_invoked ' + if (a%is_dev()) call a%sync() + call a%psb_d_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + call a%set_host() + end if + + if (info /= 0) then + info = psb_err_internal_error_ + end if + + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_d_cuda_elg_csput_v diff --git a/cuda/impl/psb_d_cuda_elg_from_gpu.F90 b/cuda/impl/psb_d_cuda_elg_from_gpu.F90 new file mode 100644 index 00000000..720a6d73 --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_from_gpu.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_from_gpu(a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_from_gpu + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize + + info = 0 + + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + if (info == 0) call psb_realloc(pitch,a%irn,info) + end if + if (info == 0) info = & + & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_d_cuda_elg_from_gpu diff --git a/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 new file mode 100644 index 00000000..5e5d72ef --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_inner_vect_sv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_elg_inner_vect_sv' + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + if (a%is_dev()) call a%sync() + if (.false.) then + rx = x%get_vect() + ry = y%get_vect() + call a%inner_spsm(alpha,rx,beta,ry,info,trans) + call y%bld(ry) + else + call x%sync() + call y%sync() + call a%psb_d_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + end if + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_d_cuda_elg_mold.F90 b/cuda/impl/psb_d_cuda_elg_mold.F90 new file mode 100644 index 00000000..107f19af --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_mold.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_elg_mold(a,b,info) + + use psb_base_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_mold + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='elg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_cuda_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_mold diff --git a/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 new file mode 100644 index 00000000..47464760 --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 @@ -0,0 +1,72 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_reallocate_nz(nz,a) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_cuda_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + if (a%is_dev()) call a%sync() + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call a%to_gpu(info,nzrm=nzrm) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_d_cuda_elg_scal.F90 b/cuda/impl/psb_d_cuda_elg_scal.F90 new file mode 100644 index 00000000..420c710e --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_scal.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_scal(d,a,info,side) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_scal + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_d_ell_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_scal diff --git a/cuda/impl/psb_d_cuda_elg_scals.F90 b/cuda/impl/psb_d_cuda_elg_scals.F90 new file mode 100644 index 00000000..ff22002e --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_scals.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_scals(d,a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_scals + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_scals diff --git a/cuda/impl/psb_d_cuda_elg_to_gpu.F90 b/cuda/impl/psb_d_cuda_elg_to_gpu.F90 new file mode 100644 index 00000000..9b88af69 --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_to_gpu.F90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_to_gpu(a,info,nzrm) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_to_gpu + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt + type(elldev_parms) :: gpu_parms + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + nzt = a%get_nzeros() + if (present(nzrm)) nzm = max(nzm,nzrm) + + gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_double,1) + + if (c_associated(a%deviceMat)) then + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + else + pitch = -1 + maxrowsize = -1 + end if + + if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_double,1) + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + end if + if (info == 0) then + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + end if + end if + if (info == 0) info = & + & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_d_cuda_elg_to_gpu diff --git a/cuda/impl/psb_d_cuda_elg_trim.f90 b/cuda/impl/psb_d_cuda_elg_trim.f90 new file mode 100644 index 00000000..a371c673 --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_trim.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_trim(a) + + use psb_base_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_trim + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + ld = max(1_psb_ipk_,size(a%ja,1)) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_trim diff --git a/cuda/impl/psb_d_cuda_elg_vect_mv.F90 b/cuda/impl/psb_d_cuda_elg_vect_mv.F90 new file mode 100644 index 00000000..f0b83c2b --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_vect_mv.F90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_vect_mv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_elg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (a%is_dev()) call a%sync() + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_vect_mv diff --git a/cuda/impl/psb_d_cuda_hdiag_csmv.F90 b/cuda/impl/psb_d_cuda_hdiag_csmv.F90 new file mode 100644 index 00000000..bf4dacc1 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hdiag_csmv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_csmv + implicit none + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='d_cuda_hdiag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_hdiag_mold + implicit none + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdiag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_cuda_hdiag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hdiag_mold diff --git a/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 b/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 new file mode 100644 index 00000000..73c4a47d --- /dev/null +++ b/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hdiag_to_gpu(a,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_to_gpu + use iso_c_binding + implicit none + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight + type(hdiagdev_parms) :: gpu_parms + + info = 0 + + nr = a%get_nrows() + nc = a%get_ncols() + hacksize = a%hackSize + hackCount = a%nhacks + if (.not.allocated(a%hackOffsets)) then + info = -1 + return + end if + allocheight = a%hackOffsets(hackCount+1) +!!$ write(*,*) 'HDIAG TO GPU:',nr,nc,hacksize,hackCount,allocheight,& +!!$ & size(a%hackoffsets),size(a%diaoffsets), size(a%val) + if (.not.allocated(a%diaOffsets)) then + info = -2 + return + end if + if (.not.allocated(a%val)) then + info = -3 + return + end if + + if (c_associated(a%deviceMat)) then + call freeHdiagDevice(a%deviceMat) + endif + + info = FAllocHdiagDevice(a%deviceMat,nr,nc,& + & allocheight,hacksize,hackCount,spgpu_type_double) + if (info == 0) info = & + & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) + +end subroutine psb_d_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 b/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 new file mode 100644 index 00000000..c18c80ac --- /dev/null +++ b/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_vect_mv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_hdiag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHdiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 new file mode 100644 index 00000000..68d9ab50 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + type(hlldev_parms) :: gpu_parms + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_hll_sparse_mat%allocate(m,n,nz) + + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_d_cuda_hlg_csmm.F90 b/cuda/impl/psb_d_cuda_hlg_csmm.F90 new file mode 100644 index 00000000..ee8424e6 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_csmm.F90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_csmm + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_hlg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_hlg_csmv + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='d_cuda_hlg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_hlg_from_gpu + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + + info = 0 + + if (a%is_sync()) return + if (a%is_host()) return + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + + info = getHllDeviceParams(a%deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) + + if (info == 0) call a%set_nzeros(nzeros) + if (info == 0) call a%set_hksz(hksize) + if (info == 0) call psb_realloc(rows,a%irn,info) + if (info == 0) call psb_realloc(rows,a%idiag,info) + if (info == 0) call psb_realloc(allocsize,a%ja,info) + if (info == 0) call psb_realloc(allocsize,a%val,info) + if (info == 0) call psb_realloc((hackOffsLength+1),a%hkoffs,info) + + if (info == 0) info = & + & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_d_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 new file mode 100644 index 00000000..f4a0424d --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_inner_vect_sv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_base_inner_vect_sv' + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + info = psb_success_ + + + call x%sync() + call y%sync() + if (a%is_dev()) call a%sync() + call a%psb_d_hll_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_d_cuda_hlg_mold.F90 b/cuda/impl/psb_d_cuda_hlg_mold.F90 new file mode 100644 index 00000000..e41d56f2 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_mold.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_mold(a,b,info) + + use psb_base_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_mold + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='hlg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_cuda_hlg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_d_cuda_hlg_mold diff --git a/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 new file mode 100644 index 00000000..e696d304 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_reallocate_nz(nz,a) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_reallocate_nz + use iso_c_binding + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_cuda_hlg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + call a%psb_d_hll_sparse_mat%reallocate(nz) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_d_cuda_hlg_scal.F90 b/cuda/impl/psb_d_cuda_hlg_scal.F90 new file mode 100644 index 00000000..042e5805 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_scal.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_scal(d,a,info,side) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_scal + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_d_hll_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hlg_scal diff --git a/cuda/impl/psb_d_cuda_hlg_scals.F90 b/cuda/impl/psb_d_cuda_hlg_scals.F90 new file mode 100644 index 00000000..4c81faa9 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_scals.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_scals(d,a,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_scals + use iso_c_binding + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_d_hll_sparse_mat%scal(d,info) + if (info /= psb_success_) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_d_cuda_hlg_scals diff --git a/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 b/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 new file mode 100644 index 00000000..566c94bd --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_to_gpu(a,info,nzrm) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_to_gpu + use iso_c_binding + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, nza, n, pitch,maxrowsize, allocsize + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + n = a%get_nrows() + allocsize = a%get_size() + nza = a%get_nzeros() + if (c_associated(a%deviceMat)) then + call freehllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_double,1) + if (info == 0) info = & + & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) +! if (info /= 0) goto 9999 + +end subroutine psb_d_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 b/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 new file mode 100644 index 00000000..cccba74b --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 @@ -0,0 +1,119 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_vect_mv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_hlg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + if (a%is_dev()) call a%sync() + call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvhllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHLLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 new file mode 100644 index 00000000..b9c5d3d0 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_hybg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_csr_sparse_mat%allocate(m,n,nz) + + info = initFcusparse() + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hybg_allocate_mnnz +#endif diff --git a/cuda/impl/psb_d_cuda_hybg_csmm.F90 b/cuda/impl/psb_d_cuda_hybg_csmm.F90 new file mode 100644 index 00000000..dbf33990 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hybg_csmm.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_csmm + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_hybg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_hybg_csmv + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_hybg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_hybg_inner_vect_sv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_hybg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='hybg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hybg_inner_vect_sv +#endif diff --git a/cuda/impl/psb_d_cuda_hybg_mold.F90 b/cuda/impl/psb_d_cuda_hybg_mold.F90 new file mode 100644 index 00000000..3e51acf6 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hybg_mold.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_hybg_mold(a,b,info) + + use psb_base_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_mold + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hybg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_cuda_hybg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hybg_mold +#endif diff --git a/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 new file mode 100644 index 00000000..e9699e77 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_hybg_reallocate_nz(nz,a) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_cuda_hybg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_d_csr_sparse_mat%reallocate(nz) + + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hybg_reallocate_nz +#endif diff --git a/cuda/impl/psb_d_cuda_hybg_scal.F90 b/cuda/impl/psb_d_cuda_hybg_scal.F90 new file mode 100644 index 00000000..15a33d7b --- /dev/null +++ b/cuda/impl/psb_d_cuda_hybg_scal.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_hybg_scal(d,a,info,side) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_scal + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m,n,nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_d_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hybg_scal +#endif diff --git a/cuda/impl/psb_d_cuda_hybg_scals.F90 b/cuda/impl/psb_d_cuda_hybg_scals.F90 new file mode 100644 index 00000000..34faa15d --- /dev/null +++ b/cuda/impl/psb_d_cuda_hybg_scals.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_hybg_scals(d,a,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_scals + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + + call a%psb_d_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hybg_scals +#endif diff --git a/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 b/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 new file mode 100644 index 00000000..ba859622 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_hybg_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_to_gpu + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + real(psb_dpk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = HYBGDeviceFree(a%deviceMat) + end if + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! We are explicitly adding the diagonal + if (info == 0) info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + else + + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) then + if (a%is_unit()) then + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + endif + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceHybsmAnalysis(a%deviceMat) + end if + + + if (info /= 0) then + write(0,*) 'Error in HYBG_TO_GPU ',info + end if + +end subroutine psb_d_cuda_hybg_to_gpu +#endif diff --git a/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 b/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 new file mode 100644 index 00000000..49060c3c --- /dev/null +++ b/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 @@ -0,0 +1,118 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_vect_mv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_hybg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hybg_vect_mv +#endif diff --git a/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 new file mode 100644 index 00000000..559bfb2c --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_mv_csrg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_mv_csrg_from_coo + implicit none + + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + + info = psb_success_ + + call a%psb_d_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 new file mode 100644 index 00000000..c2411e90 --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_mv_csrg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_mv_csrg_from_fmt + implicit none + + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_d_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 new file mode 100644 index 00000000..a6a39a1c --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_mv_diag_from_coo(a,b,info) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_mv_diag_from_coo + + implicit none + + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) goto 9999 + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 new file mode 100644 index 00000000..9886e90c --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_mv_elg_from_coo(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_mv_elg_from_coo + implicit none + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_d_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 new file mode 100644 index 00000000..da2d47a1 --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 @@ -0,0 +1,86 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_mv_elg_from_fmt + implicit none + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + type(elldev_parms) :: gpu_parms + + info = psb_success_ + + if (b%is_dev()) call b%sync() + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + + class is (psb_d_ell_sparse_mat) + nzm = size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + deallocate(b%ja,stat=info) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + deallocate(b%val,stat=info) + end if + a%nzt = nza + call b%free() + call a%to_gpu(info) + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 new file mode 100644 index 00000000..b3c4f650 --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_mv_hdiag_from_coo(a,b,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod + + implicit none + + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + + a%hacksize = psb_cuda_WarpSize() + + call a%psb_d_hdia_sparse_mat%mv_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 new file mode 100644 index 00000000..95e86293 --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_mv_hlg_from_coo(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_cuda_env_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_mv_hlg_from_coo + implicit none + + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_d_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 new file mode 100644 index 00000000..02578b19 --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_mv_hlg_from_fmt(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_mv_hlg_from_fmt + implicit none + + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 new file mode 100644 index 00000000..42f3aca3 --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 @@ -0,0 +1,59 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_mv_hybg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_mv_hybg_from_coo + implicit none + + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%psb_d_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_mv_hybg_from_coo +#endif diff --git a/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 new file mode 100644 index 00000000..f9c34f35 --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_mv_hybg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_mv_hybg_from_fmt + implicit none + + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select +end subroutine psb_d_cuda_mv_hybg_from_fmt +#endif diff --git a/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 new file mode 100644 index 00000000..0e3f9113 --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_cp_csrg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_cp_csrg_from_coo + implicit none + + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 new file mode 100644 index 00000000..76871b59 --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_cp_csrg_from_fmt + !use iso_c_binding + implicit none + + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_s_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 new file mode 100644 index 00000000..07025d77 --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_cp_diag_from_coo(a,b,info) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_cp_diag_from_coo + implicit none + + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + call a%psb_s_dia_sparse_mat%cp_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 new file mode 100644 index 00000000..66abf76a --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 @@ -0,0 +1,161 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_cp_elg_from_coo + use psi_ext_util_mod + use psb_cuda_env_mod + implicit none + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, ld, ldv, hacksize + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: idisp(:) + + info = psb_success_ + hacksize = max(1,psb_cuda_WarpSize()) + if (b%is_dev()) call b%sync() + + if (b%is_by_rows()) then + + call psi_s_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,b%ja,b%val, a%deviceMat) + call a%set_dev() + else + call b%cp_to_coo(tmp,info) + call psi_s_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) + + call a%set_dev() + end if + + if (info /= psb_success_) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine psi_s_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize) + + use psb_base_mod + use psi_ext_util_mod + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(out) :: info, nzm, ldv + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, & + & ir, ic, hsz_ + real(psb_dpk_) :: t0,t1 + logical, parameter :: timing=.true. + + + info = psb_success_ + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == psb_success_) call psb_realloc(nr+1,idisp,info) + if (info /= psb_success_) return + if (timing) t0=psb_wtime() + + a%irn = 0 + do i=1, nza + ir = b%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + idisp(1) = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + idisp(i+1) = a%nzt + end do + + end subroutine psi_s_count_ell_from_coo + +end subroutine psb_s_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 new file mode 100644 index 00000000..77df12b3 --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_cp_elg_from_fmt + implicit none + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(elldev_parms) :: gpu_parms + + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_s_ell_sparse_mat) + nzm = psb_size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + end if + a%nzt = nza + call a%to_gpu(info) + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 new file mode 100644 index 00000000..75210478 --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_cuda_cp_hdiag_from_coo(a,b,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod + implicit none + + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + a%hacksize = psb_cuda_WarpSize() + + call a%psb_s_hdia_sparse_mat%cp_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 new file mode 100644 index 00000000..c254b15a --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 @@ -0,0 +1,190 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_cuda_env_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_cp_hlg_from_coo + implicit none + + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + integer(psb_ipk_), allocatable :: idisp(:) + character(len=20) :: name='hll_from_coo' + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:) + real(psb_dpk_), allocatable :: val(:) + logical, parameter :: debug=.false. + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + hksz = max(1,psb_cuda_WarpSize()) + + if (b%is_by_rows()) then + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (debug) write(0,*) 'Copying through GPU',nza + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat) + call a%set_dev() + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + if (debug) write(0,*) 'Copying through GPU' + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat) + + call tmp%free() + call a%set_dev() + end if + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + use psb_base_mod + use psi_ext_util_mod + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info, noffs, isz + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc + logical, parameter :: debug=.false. + + info = 0 + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == 0) call psb_realloc(nr+1,idisp,info) + if (info /= 0) return + a%irn = 0 + if (debug) then + do i=1, nza + if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + else + write(0,*) 'Out of bouds IA ',i,b%ia(i),nr + end if + end do + else + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + end if + a%nzt = nza + + + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (debug) write(0,*) ' noffsets ',noffs,info + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + idisp(1) = 0 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + idisp(i+1) = idisp(i) + a%irn(i) + do k=1,ir-1 + idisp(i+k+1) = idisp(i+k) + a%irn(i+k) + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) +!!$ write(*,*) 'End of psi_comput_hckoff ',info + end subroutine psi_compute_hckoff_from_coo + +end subroutine psb_s_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 new file mode 100644 index 00000000..f04b65e5 --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_cp_hlg_from_fmt(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_cp_hlg_from_fmt + implicit none + + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_s_hll_sparse_mat%cp_from_fmt(b,info) + if (info == 0) call a%to_gpu(info) + end select + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 new file mode 100644 index 00000000..bf33a7a4 --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_cp_hybg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_cp_hybg_from_coo + implicit none + + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_cp_hybg_from_coo +#endif diff --git a/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 new file mode 100644 index 00000000..2f22252d --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_cp_hybg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_cp_hybg_from_fmt + implicit none + + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_s_cuda_cp_hybg_from_fmt +#endif diff --git a/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 new file mode 100644 index 00000000..7e6f0c86 --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_csrg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_csr_sparse_mat%allocate(m,n,nz) + + info = initFcusparse() + if (info == 0) call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_s_cuda_csrg_csmm.F90 b/cuda/impl/psb_s_cuda_csrg_csmm.F90 new file mode 100644 index 00000000..453f5260 --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_csmm.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_csmm + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_csrg_csmv + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_csrg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_csrg_from_gpu + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) info = & + & CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) +#if (PSB_CUDA_SHORT_VERSION <= 10) || (PSB_CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() + +end subroutine psb_s_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 new file mode 100644 index 00000000..df11952c --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 @@ -0,0 +1,125 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_inner_vect_sv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_csrg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='csrg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_s_cuda_csrg_mold.F90 b/cuda/impl/psb_s_cuda_csrg_mold.F90 new file mode 100644 index 00000000..5e33850b --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_csrg_mold(a,b,info) + + use psb_base_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_mold + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csrg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_cuda_csrg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_csrg_mold diff --git a/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 new file mode 100644 index 00000000..dfd115e7 --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_csrg_reallocate_nz(nz,a) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_cuda_csrg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_s_csr_sparse_mat%reallocate(nz) + + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_s_cuda_csrg_scal.F90 b/cuda/impl/psb_s_cuda_csrg_scal.F90 new file mode 100644 index 00000000..ea3406a1 --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_scal.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_csrg_scal(d,a,info,side) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_scal + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + call a%psb_s_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_csrg_scal diff --git a/cuda/impl/psb_s_cuda_csrg_scals.F90 b/cuda/impl/psb_s_cuda_csrg_scals.F90 new file mode 100644 index 00000000..307d5849 --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_scals.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_csrg_scals(d,a,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_scals + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + call a%psb_s_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_csrg_scals diff --git a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 new file mode 100644 index 00000000..0c918dd4 --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 @@ -0,0 +1,378 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_to_gpu + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + real(psb_spk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSRGDeviceFree(a%deviceMat) + end if +#if (PSB_CUDA_SHORT_VERSION <= 10 ) + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if + if ((info == 0) .and. a%is_triangle()) then + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceCsrsmAnalysis(a%deviceMat) + end if + +#elif PSB_CUDA_VERSION < 11030 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) +!!$ write(0,*) 'Done deviceAlloc' + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero) +!!$ write(0,*) 'Done SetIndexBase' + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info) + if (info == 0) then + irpdi(1) = 0 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ write(0,*) 'Done deviceAlloc', info + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,& + & cusparse_index_base_zero) +!!$ write(0,*) 'Done setIndexBase', info + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + nzdi=a%irp(m+1)-1 + if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info) + if (info == 0) then + irpdi(1:m+1) = a%irp(1:m+1) -1 + jadi(1:nzdi) = a%ja(1:nzdi) -1 + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val) +!!$ write(0,*) 'Done Host2Device', info + endif + + +#elif 0 + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +!!$ if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) +!!$ end if + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +#endif + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSRG_TO_GPU ',info + end if + +end subroutine psb_s_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 b/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 new file mode 100644 index 00000000..52820436 --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_vect_mv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_csrg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_s_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_s_cuda_diag_csmv.F90 b/cuda/impl/psb_s_cuda_diag_csmv.F90 new file mode 100644 index 00000000..016b82bc --- /dev/null +++ b/cuda/impl/psb_s_cuda_diag_csmv.F90 @@ -0,0 +1,127 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_csmv + implicit none + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='s_cuda_diag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_diag_mold + implicit none + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='diag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_cuda_diag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_diag_mold diff --git a/cuda/impl/psb_s_cuda_diag_to_gpu.F90 b/cuda/impl/psb_s_cuda_diag_to_gpu.F90 new file mode 100644 index 00000000..c8578e75 --- /dev/null +++ b/cuda/impl/psb_s_cuda_diag_to_gpu.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_diag_to_gpu(a,info,nzrm) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_to_gpu + use iso_c_binding + implicit none + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d + type(diagdev_parms) :: gpu_parms + + info = 0 + + if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return + + n = size(a%data,1) + d = size(a%data,2) + c = a%get_ncols() + !allocsize = a%get_size() + !write(*,*) 'Create the DIAG matrix' + gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_float) + if (c_associated(a%deviceMat)) then + call freeDiagDevice(a%deviceMat) + endif + info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_float) + if (info == 0) info = & + & writeDiagDevice(a%deviceMat,a%data,a%offset,n) +! if (info /= 0) goto 9999 + +end subroutine psb_s_cuda_diag_to_gpu diff --git a/cuda/impl/psb_s_cuda_diag_vect_mv.F90 b/cuda/impl/psb_s_cuda_diag_vect_mv.F90 new file mode 100644 index 00000000..d68e5193 --- /dev/null +++ b/cuda/impl/psb_s_cuda_diag_vect_mv.F90 @@ -0,0 +1,116 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_vect_mv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_diag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_diag_vect_mv diff --git a/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 b/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 new file mode 100644 index 00000000..3f12b293 --- /dev/null +++ b/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 @@ -0,0 +1,416 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_s_cuda_vect_mod + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_vect_mv + implicit none + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + logical :: tra + character :: trans_ + real(psb_spk_), allocatable :: rx(:), ry(:) + Integer(Psb_ipk_) :: err_act, m, n, k + character(len=20) :: name='s_cuda_dnsg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (trans_ =='N') then + m = a%get_nrows() + n = 1 + k = a%get_ncols() + else + m = a%get_ncols() + n = 1 + k = a%get_nrows() + end if + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,& + & xx%deviceVect,beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_dnsg_vect_mv + + +subroutine psb_s_cuda_dnsg_mold(a,b,info) + use psb_base_mod + use psb_s_cuda_vect_mod + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_mold + implicit none + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dnsg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_cuda_dnsg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_dnsg_mold + + +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_s_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_s_cuda_dnsg_allocate_mnnz +!!$ end interface + +subroutine psb_s_cuda_dnsg_to_gpu(a,info) + use psb_base_mod + use psb_s_cuda_vect_mod + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_to_gpu + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act, pitch, lda + logical, parameter :: debug=.false. + character(len=20) :: name='s_cuda_dnsg_to_gpu' + + call psb_erractionsave(err_act) + info = psb_success_ + if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) + info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& + & spgpu_type_float,1) + if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) + if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_dnsg_to_gpu + + + +subroutine psb_s_cuda_cp_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_s_cuda_vect_mod + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_cp_dnsg_from_coo + implicit none + + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_dnsg_cp_from_coo' + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + type(psb_s_coo_sparse_mat) :: tmp + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + call a%psb_s_dns_sparse_mat%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info + if (info == 0) call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_cp_dnsg_from_coo + +subroutine psb_s_cuda_cp_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_s_cuda_vect_mod + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_cp_dnsg_from_fmt + implicit none + + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + +!!$ class is (psb_s_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$ a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$ call a%to_gpu(info) + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_cp_dnsg_from_fmt + + + +subroutine psb_s_cuda_mv_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_s_cuda_vect_mod + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_mv_dnsg_from_coo + implicit none + + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + logical, parameter :: debug=.false. + character(len=20) :: name='s_cuda_dnsg_mv_from_coo' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info + call b%free() + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_mv_dnsg_from_coo + +subroutine psb_s_cuda_mv_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_s_cuda_vect_mod + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_mv_dnsg_from_fmt + implicit none + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + +!!$ class is (psb_s_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$ a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$ call a%to_gpu(info) + + class default + + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_s_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 new file mode 100644 index 00000000..b771ca1b --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + type(elldev_parms) :: gpu_parms + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/)) + goto 9999 + endif + + gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_float,1) + ld = gpu_parms%pitch + nz_ = gpu_parms%maxRowSize + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_s_cuda_elg_asb.f90 b/cuda/impl/psb_s_cuda_elg_asb.f90 new file mode 100644 index 00000000..53a17a32 --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_asb.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_asb(a) + + use psb_base_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_asb + implicit none + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='elg_asb' + logical :: clear_ + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: valt(:,:) + integer(psb_ipk_), allocatable :: jat(:,:) + integer(psb_ipk_) :: nr, nc + + call psb_erractionsave(err_act) + info = psb_success_ + + ! Only call sync() if we are on host + if (a%is_host()) then + call a%sync() + end if + call a%set_asb() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_asb diff --git a/cuda/impl/psb_s_cuda_elg_csmm.F90 b/cuda/impl/psb_s_cuda_elg_csmm.F90 new file mode 100644 index 00000000..ff7b7848 --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_csmm.F90 @@ -0,0 +1,124 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csmm + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_elg_csmv + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_elg_csput_a + implicit none + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_elg_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +!!$ write(0,*) 'In ELG_csput_a' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then +!!$ write(*,*) 'elg_csput_a ' + if (a%is_dev()) call a%sync() + call a%psb_s_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_csput_a + + + +subroutine psb_s_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use iso_c_binding + use elldev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csput_v + use psb_s_cuda_vect_mod + implicit none + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_elg_csput_v' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw + logical :: gpu_invoked + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + integer(psb_ipk_), allocatable :: idxs(:) + logical, parameter :: debug_idxs=.false., debug_vals=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +! write(0,*) 'In ELG_csput_v' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ia%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ja%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (val%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + + t1=psb_wtime() + gpu_invoked = .false. + select type (ia) + class is (psb_i_vect_cuda) + select type (ja) + class is (psb_i_vect_cuda) + select type (val) + class is (psb_s_vect_cuda) + if (a%is_host()) call a%sync() + if (val%is_host()) call val%sync() + if (ia%is_host()) call ia%sync() + if (ja%is_host()) call ja%sync() + info = csputEllDeviceFloat(a%deviceMat,nz,& + & ia%deviceVect,ja%deviceVect,val%deviceVect) + call a%set_dev() + gpu_invoked=.true. + end select + end select + end select + if (.not.gpu_invoked) then +!!$ write(0,*)'Not gpu_invoked ' + if (a%is_dev()) call a%sync() + call a%psb_s_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + call a%set_host() + end if + + if (info /= 0) then + info = psb_err_internal_error_ + end if + + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_s_cuda_elg_csput_v diff --git a/cuda/impl/psb_s_cuda_elg_from_gpu.F90 b/cuda/impl/psb_s_cuda_elg_from_gpu.F90 new file mode 100644 index 00000000..d995157e --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_from_gpu.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_from_gpu(a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_from_gpu + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize + + info = 0 + + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + if (info == 0) call psb_realloc(pitch,a%irn,info) + end if + if (info == 0) info = & + & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_s_cuda_elg_from_gpu diff --git a/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 new file mode 100644 index 00000000..537365a6 --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_inner_vect_sv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_elg_inner_vect_sv' + logical, parameter :: debug=.false. + real(psb_spk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + if (a%is_dev()) call a%sync() + if (.false.) then + rx = x%get_vect() + ry = y%get_vect() + call a%inner_spsm(alpha,rx,beta,ry,info,trans) + call y%bld(ry) + else + call x%sync() + call y%sync() + call a%psb_s_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + end if + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_s_cuda_elg_mold.F90 b/cuda/impl/psb_s_cuda_elg_mold.F90 new file mode 100644 index 00000000..7ff9c7ae --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_mold.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_cuda_elg_mold(a,b,info) + + use psb_base_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_mold + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='elg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_cuda_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_mold diff --git a/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 new file mode 100644 index 00000000..5b99b9ad --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 @@ -0,0 +1,72 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_reallocate_nz(nz,a) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_cuda_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + if (a%is_dev()) call a%sync() + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call a%to_gpu(info,nzrm=nzrm) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_s_cuda_elg_scal.F90 b/cuda/impl/psb_s_cuda_elg_scal.F90 new file mode 100644 index 00000000..dfa99cf1 --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_scal.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_scal(d,a,info,side) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_scal + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_s_ell_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_scal diff --git a/cuda/impl/psb_s_cuda_elg_scals.F90 b/cuda/impl/psb_s_cuda_elg_scals.F90 new file mode 100644 index 00000000..f0aa2504 --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_scals.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_scals(d,a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_scals + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_scals diff --git a/cuda/impl/psb_s_cuda_elg_to_gpu.F90 b/cuda/impl/psb_s_cuda_elg_to_gpu.F90 new file mode 100644 index 00000000..9c16ea8d --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_to_gpu.F90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_to_gpu(a,info,nzrm) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_to_gpu + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt + type(elldev_parms) :: gpu_parms + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + nzt = a%get_nzeros() + if (present(nzrm)) nzm = max(nzm,nzrm) + + gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_float,1) + + if (c_associated(a%deviceMat)) then + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + else + pitch = -1 + maxrowsize = -1 + end if + + if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_float,1) + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + end if + if (info == 0) then + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + end if + end if + if (info == 0) info = & + & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_s_cuda_elg_to_gpu diff --git a/cuda/impl/psb_s_cuda_elg_trim.f90 b/cuda/impl/psb_s_cuda_elg_trim.f90 new file mode 100644 index 00000000..2d390343 --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_trim.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_trim(a) + + use psb_base_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_trim + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + ld = max(1_psb_ipk_,size(a%ja,1)) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_trim diff --git a/cuda/impl/psb_s_cuda_elg_vect_mv.F90 b/cuda/impl/psb_s_cuda_elg_vect_mv.F90 new file mode 100644 index 00000000..6c898fda --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_vect_mv.F90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_vect_mv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_elg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (a%is_dev()) call a%sync() + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_vect_mv diff --git a/cuda/impl/psb_s_cuda_hdiag_csmv.F90 b/cuda/impl/psb_s_cuda_hdiag_csmv.F90 new file mode 100644 index 00000000..3f34c2e7 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hdiag_csmv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_csmv + implicit none + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='s_cuda_hdiag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_hdiag_mold + implicit none + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdiag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_cuda_hdiag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hdiag_mold diff --git a/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 b/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 new file mode 100644 index 00000000..bc3fa325 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hdiag_to_gpu(a,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_to_gpu + use iso_c_binding + implicit none + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight + type(hdiagdev_parms) :: gpu_parms + + info = 0 + + nr = a%get_nrows() + nc = a%get_ncols() + hacksize = a%hackSize + hackCount = a%nhacks + if (.not.allocated(a%hackOffsets)) then + info = -1 + return + end if + allocheight = a%hackOffsets(hackCount+1) +!!$ write(*,*) 'HDIAG TO GPU:',nr,nc,hacksize,hackCount,allocheight,& +!!$ & size(a%hackoffsets),size(a%diaoffsets), size(a%val) + if (.not.allocated(a%diaOffsets)) then + info = -2 + return + end if + if (.not.allocated(a%val)) then + info = -3 + return + end if + + if (c_associated(a%deviceMat)) then + call freeHdiagDevice(a%deviceMat) + endif + + info = FAllocHdiagDevice(a%deviceMat,nr,nc,& + & allocheight,hacksize,hackCount,spgpu_type_double) + if (info == 0) info = & + & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) + +end subroutine psb_s_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 b/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 new file mode 100644 index 00000000..03215047 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_vect_mv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_hdiag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHdiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 new file mode 100644 index 00000000..480f6677 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + type(hlldev_parms) :: gpu_parms + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_hll_sparse_mat%allocate(m,n,nz) + + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_s_cuda_hlg_csmm.F90 b/cuda/impl/psb_s_cuda_hlg_csmm.F90 new file mode 100644 index 00000000..0dc28c7f --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_csmm.F90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_csmm + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_hlg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_hlg_csmv + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='s_cuda_hlg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_hlg_from_gpu + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + + info = 0 + + if (a%is_sync()) return + if (a%is_host()) return + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + + info = getHllDeviceParams(a%deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) + + if (info == 0) call a%set_nzeros(nzeros) + if (info == 0) call a%set_hksz(hksize) + if (info == 0) call psb_realloc(rows,a%irn,info) + if (info == 0) call psb_realloc(rows,a%idiag,info) + if (info == 0) call psb_realloc(allocsize,a%ja,info) + if (info == 0) call psb_realloc(allocsize,a%val,info) + if (info == 0) call psb_realloc((hackOffsLength+1),a%hkoffs,info) + + if (info == 0) info = & + & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_s_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 new file mode 100644 index 00000000..2985a1ab --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_inner_vect_sv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_base_inner_vect_sv' + logical, parameter :: debug=.false. + real(psb_spk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + info = psb_success_ + + + call x%sync() + call y%sync() + if (a%is_dev()) call a%sync() + call a%psb_s_hll_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_s_cuda_hlg_mold.F90 b/cuda/impl/psb_s_cuda_hlg_mold.F90 new file mode 100644 index 00000000..89e329e7 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_mold.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_mold(a,b,info) + + use psb_base_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_mold + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='hlg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_cuda_hlg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_s_cuda_hlg_mold diff --git a/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 new file mode 100644 index 00000000..03742958 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_reallocate_nz(nz,a) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_reallocate_nz + use iso_c_binding + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_cuda_hlg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + call a%psb_s_hll_sparse_mat%reallocate(nz) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_s_cuda_hlg_scal.F90 b/cuda/impl/psb_s_cuda_hlg_scal.F90 new file mode 100644 index 00000000..7074b8b6 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_scal.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_scal(d,a,info,side) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_scal + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_s_hll_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hlg_scal diff --git a/cuda/impl/psb_s_cuda_hlg_scals.F90 b/cuda/impl/psb_s_cuda_hlg_scals.F90 new file mode 100644 index 00000000..2c9f5ae8 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_scals.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_scals(d,a,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_scals + use iso_c_binding + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_s_hll_sparse_mat%scal(d,info) + if (info /= psb_success_) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_s_cuda_hlg_scals diff --git a/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 b/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 new file mode 100644 index 00000000..91cfd5ad --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_to_gpu(a,info,nzrm) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_to_gpu + use iso_c_binding + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, nza, n, pitch,maxrowsize, allocsize + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + n = a%get_nrows() + allocsize = a%get_size() + nza = a%get_nzeros() + if (c_associated(a%deviceMat)) then + call freehllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_float,1) + if (info == 0) info = & + & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) +! if (info /= 0) goto 9999 + +end subroutine psb_s_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 b/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 new file mode 100644 index 00000000..94696949 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 @@ -0,0 +1,119 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_vect_mv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_hlg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + if (a%is_dev()) call a%sync() + call a%psb_s_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvhllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHLLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 new file mode 100644 index 00000000..a8e31ad8 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_hybg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_csr_sparse_mat%allocate(m,n,nz) + + info = initFcusparse() + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hybg_allocate_mnnz +#endif diff --git a/cuda/impl/psb_s_cuda_hybg_csmm.F90 b/cuda/impl/psb_s_cuda_hybg_csmm.F90 new file mode 100644 index 00000000..85250fe3 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hybg_csmm.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_csmm + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_hybg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_hybg_csmv + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_hybg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_hybg_inner_vect_sv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_hybg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra.or.(beta/=szero)) then + call x%sync() + call y%sync() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='hybg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hybg_inner_vect_sv +#endif diff --git a/cuda/impl/psb_s_cuda_hybg_mold.F90 b/cuda/impl/psb_s_cuda_hybg_mold.F90 new file mode 100644 index 00000000..d1b41c2b --- /dev/null +++ b/cuda/impl/psb_s_cuda_hybg_mold.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_hybg_mold(a,b,info) + + use psb_base_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_mold + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hybg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_cuda_hybg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hybg_mold +#endif diff --git a/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 new file mode 100644 index 00000000..d43da4d9 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_hybg_reallocate_nz(nz,a) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_cuda_hybg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_s_csr_sparse_mat%reallocate(nz) + + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hybg_reallocate_nz +#endif diff --git a/cuda/impl/psb_s_cuda_hybg_scal.F90 b/cuda/impl/psb_s_cuda_hybg_scal.F90 new file mode 100644 index 00000000..b1e24426 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hybg_scal.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_hybg_scal(d,a,info,side) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_scal + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m,n,nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_s_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hybg_scal +#endif diff --git a/cuda/impl/psb_s_cuda_hybg_scals.F90 b/cuda/impl/psb_s_cuda_hybg_scals.F90 new file mode 100644 index 00000000..93c74756 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hybg_scals.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_hybg_scals(d,a,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_scals + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + + call a%psb_s_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hybg_scals +#endif diff --git a/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 b/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 new file mode 100644 index 00000000..3f85a5c6 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_hybg_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_to_gpu + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + real(psb_spk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = HYBGDeviceFree(a%deviceMat) + end if + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! We are explicitly adding the diagonal + if (info == 0) info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + else + + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) then + if (a%is_unit()) then + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + endif + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceHybsmAnalysis(a%deviceMat) + end if + + + if (info /= 0) then + write(0,*) 'Error in HYBG_TO_GPU ',info + end if + +end subroutine psb_s_cuda_hybg_to_gpu +#endif diff --git a/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 b/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 new file mode 100644 index 00000000..e786afe1 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 @@ -0,0 +1,118 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_vect_mv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_hybg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hybg_vect_mv +#endif diff --git a/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 new file mode 100644 index 00000000..b61e94d6 --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_mv_csrg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_mv_csrg_from_coo + implicit none + + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + + info = psb_success_ + + call a%psb_s_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 new file mode 100644 index 00000000..52643a10 --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_mv_csrg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_mv_csrg_from_fmt + implicit none + + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_s_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 new file mode 100644 index 00000000..1b2fe8a1 --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_mv_diag_from_coo(a,b,info) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_mv_diag_from_coo + + implicit none + + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) goto 9999 + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 new file mode 100644 index 00000000..e8dbbabf --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_mv_elg_from_coo(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_mv_elg_from_coo + implicit none + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_s_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 new file mode 100644 index 00000000..21d9a339 --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 @@ -0,0 +1,86 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_mv_elg_from_fmt + implicit none + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + type(elldev_parms) :: gpu_parms + + info = psb_success_ + + if (b%is_dev()) call b%sync() + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + + class is (psb_s_ell_sparse_mat) + nzm = size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + deallocate(b%ja,stat=info) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + deallocate(b%val,stat=info) + end if + a%nzt = nza + call b%free() + call a%to_gpu(info) + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 new file mode 100644 index 00000000..b0370ebb --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_cuda_mv_hdiag_from_coo(a,b,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod + + implicit none + + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + + a%hacksize = psb_cuda_WarpSize() + + call a%psb_s_hdia_sparse_mat%mv_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 new file mode 100644 index 00000000..4c8aab71 --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_cuda_mv_hlg_from_coo(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_cuda_env_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_mv_hlg_from_coo + implicit none + + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_s_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 new file mode 100644 index 00000000..a162e2aa --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_cuda_mv_hlg_from_fmt(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_mv_hlg_from_fmt + implicit none + + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 new file mode 100644 index 00000000..a22741fc --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 @@ -0,0 +1,59 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_mv_hybg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_mv_hybg_from_coo + implicit none + + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%psb_s_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_mv_hybg_from_coo +#endif diff --git a/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 new file mode 100644 index 00000000..53865e65 --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_mv_hybg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_mv_hybg_from_fmt + implicit none + + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select +end subroutine psb_s_cuda_mv_hybg_from_fmt +#endif diff --git a/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 new file mode 100644 index 00000000..90f3fb4f --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_cp_csrg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_cp_csrg_from_coo + implicit none + + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 new file mode 100644 index 00000000..e086c8a4 --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_cp_csrg_from_fmt + !use iso_c_binding + implicit none + + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_z_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 new file mode 100644 index 00000000..34706502 --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_cp_diag_from_coo(a,b,info) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_cp_diag_from_coo + implicit none + + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + call a%psb_z_dia_sparse_mat%cp_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 new file mode 100644 index 00000000..e8553cbf --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 @@ -0,0 +1,161 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_cp_elg_from_coo + use psi_ext_util_mod + use psb_cuda_env_mod + implicit none + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, ld, ldv, hacksize + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: idisp(:) + + info = psb_success_ + hacksize = max(1,psb_cuda_WarpSize()) + if (b%is_dev()) call b%sync() + + if (b%is_by_rows()) then + + call psi_z_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,b%ja,b%val, a%deviceMat) + call a%set_dev() + else + call b%cp_to_coo(tmp,info) + call psi_z_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) + + call a%set_dev() + end if + + if (info /= psb_success_) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine psi_z_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize) + + use psb_base_mod + use psi_ext_util_mod + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(out) :: info, nzm, ldv + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, & + & ir, ic, hsz_ + real(psb_dpk_) :: t0,t1 + logical, parameter :: timing=.true. + + + info = psb_success_ + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == psb_success_) call psb_realloc(nr+1,idisp,info) + if (info /= psb_success_) return + if (timing) t0=psb_wtime() + + a%irn = 0 + do i=1, nza + ir = b%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + idisp(1) = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + idisp(i+1) = a%nzt + end do + + end subroutine psi_z_count_ell_from_coo + +end subroutine psb_z_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 new file mode 100644 index 00000000..85066cef --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_cp_elg_from_fmt + implicit none + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(elldev_parms) :: gpu_parms + + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_z_ell_sparse_mat) + nzm = psb_size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + end if + a%nzt = nza + call a%to_gpu(info) + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 new file mode 100644 index 00000000..36013faa --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_cuda_cp_hdiag_from_coo(a,b,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod + implicit none + + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + a%hacksize = psb_cuda_WarpSize() + + call a%psb_z_hdia_sparse_mat%cp_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 new file mode 100644 index 00000000..f7be0835 --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 @@ -0,0 +1,190 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_cuda_env_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_cp_hlg_from_coo + implicit none + + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + integer(psb_ipk_), allocatable :: idisp(:) + character(len=20) :: name='hll_from_coo' + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:) + real(psb_dpk_), allocatable :: val(:) + logical, parameter :: debug=.false. + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + hksz = max(1,psb_cuda_WarpSize()) + + if (b%is_by_rows()) then + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (debug) write(0,*) 'Copying through GPU',nza + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat) + call a%set_dev() + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + if (debug) write(0,*) 'Copying through GPU' + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat) + + call tmp%free() + call a%set_dev() + end if + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + use psb_base_mod + use psi_ext_util_mod + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info, noffs, isz + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc + logical, parameter :: debug=.false. + + info = 0 + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == 0) call psb_realloc(nr+1,idisp,info) + if (info /= 0) return + a%irn = 0 + if (debug) then + do i=1, nza + if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + else + write(0,*) 'Out of bouds IA ',i,b%ia(i),nr + end if + end do + else + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + end if + a%nzt = nza + + + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (debug) write(0,*) ' noffsets ',noffs,info + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + idisp(1) = 0 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + idisp(i+1) = idisp(i) + a%irn(i) + do k=1,ir-1 + idisp(i+k+1) = idisp(i+k) + a%irn(i+k) + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) +!!$ write(*,*) 'End of psi_comput_hckoff ',info + end subroutine psi_compute_hckoff_from_coo + +end subroutine psb_z_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 new file mode 100644 index 00000000..253a034f --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_cp_hlg_from_fmt(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_cp_hlg_from_fmt + implicit none + + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_z_hll_sparse_mat%cp_from_fmt(b,info) + if (info == 0) call a%to_gpu(info) + end select + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 new file mode 100644 index 00000000..f49742cd --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_cp_hybg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_cp_hybg_from_coo + implicit none + + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_cp_hybg_from_coo +#endif diff --git a/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 new file mode 100644 index 00000000..bc59cdba --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_cp_hybg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_cp_hybg_from_fmt + implicit none + + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_z_cuda_cp_hybg_from_fmt +#endif diff --git a/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 new file mode 100644 index 00000000..a7988dd3 --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_csrg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_csr_sparse_mat%allocate(m,n,nz) + + info = initFcusparse() + if (info == 0) call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_z_cuda_csrg_csmm.F90 b/cuda/impl/psb_z_cuda_csrg_csmm.F90 new file mode 100644 index 00000000..731b7417 --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_csmm.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_csmm + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_csrg_csmv + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_csrg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_csrg_from_gpu + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) info = & + & CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) +#if (PSB_CUDA_SHORT_VERSION <= 10) || (PSB_CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() + +end subroutine psb_z_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 new file mode 100644 index 00000000..9a3f8281 --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 @@ -0,0 +1,125 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_inner_vect_sv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_csrg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='csrg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_z_cuda_csrg_mold.F90 b/cuda/impl/psb_z_cuda_csrg_mold.F90 new file mode 100644 index 00000000..23bb658a --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_csrg_mold(a,b,info) + + use psb_base_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_mold + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csrg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_cuda_csrg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_csrg_mold diff --git a/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 new file mode 100644 index 00000000..964cd84e --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_csrg_reallocate_nz(nz,a) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_cuda_csrg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_z_csr_sparse_mat%reallocate(nz) + + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_z_cuda_csrg_scal.F90 b/cuda/impl/psb_z_cuda_csrg_scal.F90 new file mode 100644 index 00000000..9d97433e --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_scal.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_csrg_scal(d,a,info,side) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_scal + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + call a%psb_z_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_csrg_scal diff --git a/cuda/impl/psb_z_cuda_csrg_scals.F90 b/cuda/impl/psb_z_cuda_csrg_scals.F90 new file mode 100644 index 00000000..1479ea3a --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_scals.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_csrg_scals(d,a,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_scals + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + call a%psb_z_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_csrg_scals diff --git a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 new file mode 100644 index 00000000..154c91f8 --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 @@ -0,0 +1,378 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_to_gpu + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + complex(psb_dpk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSRGDeviceFree(a%deviceMat) + end if +#if (PSB_CUDA_SHORT_VERSION <= 10 ) + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if + if ((info == 0) .and. a%is_triangle()) then + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceCsrsmAnalysis(a%deviceMat) + end if + +#elif PSB_CUDA_VERSION < 11030 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) +!!$ write(0,*) 'Done deviceAlloc' + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero) +!!$ write(0,*) 'Done SetIndexBase' + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info) + if (info == 0) then + irpdi(1) = 0 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ write(0,*) 'Done deviceAlloc', info + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,& + & cusparse_index_base_zero) +!!$ write(0,*) 'Done setIndexBase', info + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + nzdi=a%irp(m+1)-1 + if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info) + if (info == 0) then + irpdi(1:m+1) = a%irp(1:m+1) -1 + jadi(1:nzdi) = a%ja(1:nzdi) -1 + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val) +!!$ write(0,*) 'Done Host2Device', info + endif + + +#elif 0 + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +!!$ if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) +!!$ end if + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +#endif + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSRG_TO_GPU ',info + end if + +end subroutine psb_z_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 b/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 new file mode 100644 index 00000000..977d7ff9 --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_vect_mv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_csrg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= zzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_z_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_z_cuda_diag_csmv.F90 b/cuda/impl/psb_z_cuda_diag_csmv.F90 new file mode 100644 index 00000000..fde7147e --- /dev/null +++ b/cuda/impl/psb_z_cuda_diag_csmv.F90 @@ -0,0 +1,127 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_csmv + implicit none + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='z_cuda_diag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_diag_mold + implicit none + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='diag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_cuda_diag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_diag_mold diff --git a/cuda/impl/psb_z_cuda_diag_to_gpu.F90 b/cuda/impl/psb_z_cuda_diag_to_gpu.F90 new file mode 100644 index 00000000..672ce938 --- /dev/null +++ b/cuda/impl/psb_z_cuda_diag_to_gpu.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_diag_to_gpu(a,info,nzrm) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_to_gpu + use iso_c_binding + implicit none + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d + type(diagdev_parms) :: gpu_parms + + info = 0 + + if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return + + n = size(a%data,1) + d = size(a%data,2) + c = a%get_ncols() + !allocsize = a%get_size() + !write(*,*) 'Create the DIAG matrix' + gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_complex_double) + if (c_associated(a%deviceMat)) then + call freeDiagDevice(a%deviceMat) + endif + info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_complex_double) + if (info == 0) info = & + & writeDiagDevice(a%deviceMat,a%data,a%offset,n) +! if (info /= 0) goto 9999 + +end subroutine psb_z_cuda_diag_to_gpu diff --git a/cuda/impl/psb_z_cuda_diag_vect_mv.F90 b/cuda/impl/psb_z_cuda_diag_vect_mv.F90 new file mode 100644 index 00000000..c6d11f04 --- /dev/null +++ b/cuda/impl/psb_z_cuda_diag_vect_mv.F90 @@ -0,0 +1,116 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_vect_mv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_diag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_diag_vect_mv diff --git a/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 b/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 new file mode 100644 index 00000000..7f41b22d --- /dev/null +++ b/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 @@ -0,0 +1,416 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_z_cuda_vect_mod + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_vect_mv + implicit none + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + logical :: tra + character :: trans_ + complex(psb_dpk_), allocatable :: rx(:), ry(:) + Integer(Psb_ipk_) :: err_act, m, n, k + character(len=20) :: name='z_cuda_dnsg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (trans_ =='N') then + m = a%get_nrows() + n = 1 + k = a%get_ncols() + else + m = a%get_ncols() + n = 1 + k = a%get_nrows() + end if + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,& + & xx%deviceVect,beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_dnsg_vect_mv + + +subroutine psb_z_cuda_dnsg_mold(a,b,info) + use psb_base_mod + use psb_z_cuda_vect_mod + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_mold + implicit none + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dnsg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_cuda_dnsg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_dnsg_mold + + +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_z_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_z_cuda_dnsg_allocate_mnnz +!!$ end interface + +subroutine psb_z_cuda_dnsg_to_gpu(a,info) + use psb_base_mod + use psb_z_cuda_vect_mod + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_to_gpu + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act, pitch, lda + logical, parameter :: debug=.false. + character(len=20) :: name='z_cuda_dnsg_to_gpu' + + call psb_erractionsave(err_act) + info = psb_success_ + if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) + info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& + & spgpu_type_complex_double,1) + if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) + if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_dnsg_to_gpu + + + +subroutine psb_z_cuda_cp_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_z_cuda_vect_mod + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_cp_dnsg_from_coo + implicit none + + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_dnsg_cp_from_coo' + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + type(psb_z_coo_sparse_mat) :: tmp + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + call a%psb_z_dns_sparse_mat%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info + if (info == 0) call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_cp_dnsg_from_coo + +subroutine psb_z_cuda_cp_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_z_cuda_vect_mod + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_cp_dnsg_from_fmt + implicit none + + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + +!!$ class is (psb_z_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$ a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$ call a%to_gpu(info) + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_cp_dnsg_from_fmt + + + +subroutine psb_z_cuda_mv_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_z_cuda_vect_mod + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_mv_dnsg_from_coo + implicit none + + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + logical, parameter :: debug=.false. + character(len=20) :: name='z_cuda_dnsg_mv_from_coo' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info + call b%free() + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_mv_dnsg_from_coo + +subroutine psb_z_cuda_mv_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_z_cuda_vect_mod + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_mv_dnsg_from_fmt + implicit none + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + +!!$ class is (psb_z_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$ a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$ call a%to_gpu(info) + + class default + + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_z_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 new file mode 100644 index 00000000..5c54d00b --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + type(elldev_parms) :: gpu_parms + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/)) + goto 9999 + endif + + gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_complex_double,1) + ld = gpu_parms%pitch + nz_ = gpu_parms%maxRowSize + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_z_cuda_elg_asb.f90 b/cuda/impl/psb_z_cuda_elg_asb.f90 new file mode 100644 index 00000000..65b58425 --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_asb.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_asb(a) + + use psb_base_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_asb + implicit none + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='elg_asb' + logical :: clear_ + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: valt(:,:) + integer(psb_ipk_), allocatable :: jat(:,:) + integer(psb_ipk_) :: nr, nc + + call psb_erractionsave(err_act) + info = psb_success_ + + ! Only call sync() if we are on host + if (a%is_host()) then + call a%sync() + end if + call a%set_asb() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_asb diff --git a/cuda/impl/psb_z_cuda_elg_csmm.F90 b/cuda/impl/psb_z_cuda_elg_csmm.F90 new file mode 100644 index 00000000..4414f0e6 --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_csmm.F90 @@ -0,0 +1,124 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csmm + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_elg_csmv + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_elg_csput_a + implicit none + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_elg_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +!!$ write(0,*) 'In ELG_csput_a' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then +!!$ write(*,*) 'elg_csput_a ' + if (a%is_dev()) call a%sync() + call a%psb_z_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_csput_a + + + +subroutine psb_z_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use iso_c_binding + use elldev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csput_v + use psb_z_cuda_vect_mod + implicit none + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_elg_csput_v' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw + logical :: gpu_invoked + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + integer(psb_ipk_), allocatable :: idxs(:) + logical, parameter :: debug_idxs=.false., debug_vals=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +! write(0,*) 'In ELG_csput_v' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ia%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ja%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (val%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + + t1=psb_wtime() + gpu_invoked = .false. + select type (ia) + class is (psb_i_vect_cuda) + select type (ja) + class is (psb_i_vect_cuda) + select type (val) + class is (psb_z_vect_cuda) + if (a%is_host()) call a%sync() + if (val%is_host()) call val%sync() + if (ia%is_host()) call ia%sync() + if (ja%is_host()) call ja%sync() + info = csputEllDeviceDoubleComplex(a%deviceMat,nz,& + & ia%deviceVect,ja%deviceVect,val%deviceVect) + call a%set_dev() + gpu_invoked=.true. + end select + end select + end select + if (.not.gpu_invoked) then +!!$ write(0,*)'Not gpu_invoked ' + if (a%is_dev()) call a%sync() + call a%psb_z_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + call a%set_host() + end if + + if (info /= 0) then + info = psb_err_internal_error_ + end if + + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_z_cuda_elg_csput_v diff --git a/cuda/impl/psb_z_cuda_elg_from_gpu.F90 b/cuda/impl/psb_z_cuda_elg_from_gpu.F90 new file mode 100644 index 00000000..b1291ab2 --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_from_gpu.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_from_gpu(a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_from_gpu + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize + + info = 0 + + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + if (info == 0) call psb_realloc(pitch,a%irn,info) + end if + if (info == 0) info = & + & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_z_cuda_elg_from_gpu diff --git a/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 new file mode 100644 index 00000000..443e7cbb --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_inner_vect_sv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_elg_inner_vect_sv' + logical, parameter :: debug=.false. + complex(psb_dpk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + if (a%is_dev()) call a%sync() + if (.false.) then + rx = x%get_vect() + ry = y%get_vect() + call a%inner_spsm(alpha,rx,beta,ry,info,trans) + call y%bld(ry) + else + call x%sync() + call y%sync() + call a%psb_z_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + end if + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_z_cuda_elg_mold.F90 b/cuda/impl/psb_z_cuda_elg_mold.F90 new file mode 100644 index 00000000..e9a3891d --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_mold.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_cuda_elg_mold(a,b,info) + + use psb_base_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_mold + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='elg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_cuda_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_mold diff --git a/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 new file mode 100644 index 00000000..3a8c2760 --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 @@ -0,0 +1,72 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_reallocate_nz(nz,a) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_cuda_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + if (a%is_dev()) call a%sync() + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call a%to_gpu(info,nzrm=nzrm) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_z_cuda_elg_scal.F90 b/cuda/impl/psb_z_cuda_elg_scal.F90 new file mode 100644 index 00000000..a9846362 --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_scal.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_scal(d,a,info,side) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_scal + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_z_ell_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_scal diff --git a/cuda/impl/psb_z_cuda_elg_scals.F90 b/cuda/impl/psb_z_cuda_elg_scals.F90 new file mode 100644 index 00000000..b4462589 --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_scals.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_scals(d,a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_scals + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_scals diff --git a/cuda/impl/psb_z_cuda_elg_to_gpu.F90 b/cuda/impl/psb_z_cuda_elg_to_gpu.F90 new file mode 100644 index 00000000..3a0ecd14 --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_to_gpu.F90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_to_gpu(a,info,nzrm) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_to_gpu + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt + type(elldev_parms) :: gpu_parms + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + nzt = a%get_nzeros() + if (present(nzrm)) nzm = max(nzm,nzrm) + + gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_complex_double,1) + + if (c_associated(a%deviceMat)) then + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + else + pitch = -1 + maxrowsize = -1 + end if + + if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_complex_double,1) + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + end if + if (info == 0) then + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + end if + end if + if (info == 0) info = & + & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_z_cuda_elg_to_gpu diff --git a/cuda/impl/psb_z_cuda_elg_trim.f90 b/cuda/impl/psb_z_cuda_elg_trim.f90 new file mode 100644 index 00000000..98f92efe --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_trim.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_trim(a) + + use psb_base_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_trim + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + ld = max(1_psb_ipk_,size(a%ja,1)) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_trim diff --git a/cuda/impl/psb_z_cuda_elg_vect_mv.F90 b/cuda/impl/psb_z_cuda_elg_vect_mv.F90 new file mode 100644 index 00000000..1b1a0720 --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_vect_mv.F90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_vect_mv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_elg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (a%is_dev()) call a%sync() + if (.not.x%is_host()) call x%sync() + if (beta /= zzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_vect_mv diff --git a/cuda/impl/psb_z_cuda_hdiag_csmv.F90 b/cuda/impl/psb_z_cuda_hdiag_csmv.F90 new file mode 100644 index 00000000..187655a8 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hdiag_csmv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_csmv + implicit none + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='z_cuda_hdiag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_hdiag_mold + implicit none + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdiag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_cuda_hdiag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hdiag_mold diff --git a/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 b/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 new file mode 100644 index 00000000..a1140961 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hdiag_to_gpu(a,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_to_gpu + use iso_c_binding + implicit none + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight + type(hdiagdev_parms) :: gpu_parms + + info = 0 + + nr = a%get_nrows() + nc = a%get_ncols() + hacksize = a%hackSize + hackCount = a%nhacks + if (.not.allocated(a%hackOffsets)) then + info = -1 + return + end if + allocheight = a%hackOffsets(hackCount+1) +!!$ write(*,*) 'HDIAG TO GPU:',nr,nc,hacksize,hackCount,allocheight,& +!!$ & size(a%hackoffsets),size(a%diaoffsets), size(a%val) + if (.not.allocated(a%diaOffsets)) then + info = -2 + return + end if + if (.not.allocated(a%val)) then + info = -3 + return + end if + + if (c_associated(a%deviceMat)) then + call freeHdiagDevice(a%deviceMat) + endif + + info = FAllocHdiagDevice(a%deviceMat,nr,nc,& + & allocheight,hacksize,hackCount,spgpu_type_double) + if (info == 0) info = & + & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) + +end subroutine psb_z_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 b/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 new file mode 100644 index 00000000..aef5628c --- /dev/null +++ b/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_vect_mv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_hdiag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHdiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 new file mode 100644 index 00000000..f8566661 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + type(hlldev_parms) :: gpu_parms + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_hll_sparse_mat%allocate(m,n,nz) + + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_z_cuda_hlg_csmm.F90 b/cuda/impl/psb_z_cuda_hlg_csmm.F90 new file mode 100644 index 00000000..8eb30ef9 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_csmm.F90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_csmm + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_hlg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_hlg_csmv + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='z_cuda_hlg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_hlg_from_gpu + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + + info = 0 + + if (a%is_sync()) return + if (a%is_host()) return + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + + info = getHllDeviceParams(a%deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) + + if (info == 0) call a%set_nzeros(nzeros) + if (info == 0) call a%set_hksz(hksize) + if (info == 0) call psb_realloc(rows,a%irn,info) + if (info == 0) call psb_realloc(rows,a%idiag,info) + if (info == 0) call psb_realloc(allocsize,a%ja,info) + if (info == 0) call psb_realloc(allocsize,a%val,info) + if (info == 0) call psb_realloc((hackOffsLength+1),a%hkoffs,info) + + if (info == 0) info = & + & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_z_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 new file mode 100644 index 00000000..6a914fc0 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_inner_vect_sv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_base_inner_vect_sv' + logical, parameter :: debug=.false. + complex(psb_dpk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + info = psb_success_ + + + call x%sync() + call y%sync() + if (a%is_dev()) call a%sync() + call a%psb_z_hll_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_z_cuda_hlg_mold.F90 b/cuda/impl/psb_z_cuda_hlg_mold.F90 new file mode 100644 index 00000000..bc631ece --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_mold.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_mold(a,b,info) + + use psb_base_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_mold + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='hlg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_cuda_hlg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_z_cuda_hlg_mold diff --git a/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 new file mode 100644 index 00000000..ac3fbbd1 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_reallocate_nz(nz,a) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_reallocate_nz + use iso_c_binding + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_cuda_hlg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + call a%psb_z_hll_sparse_mat%reallocate(nz) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_z_cuda_hlg_scal.F90 b/cuda/impl/psb_z_cuda_hlg_scal.F90 new file mode 100644 index 00000000..7b9df998 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_scal.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_scal(d,a,info,side) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_scal + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_z_hll_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hlg_scal diff --git a/cuda/impl/psb_z_cuda_hlg_scals.F90 b/cuda/impl/psb_z_cuda_hlg_scals.F90 new file mode 100644 index 00000000..b867e3b8 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_scals.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_scals(d,a,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_scals + use iso_c_binding + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_z_hll_sparse_mat%scal(d,info) + if (info /= psb_success_) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_z_cuda_hlg_scals diff --git a/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 b/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 new file mode 100644 index 00000000..8f81842a --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_to_gpu(a,info,nzrm) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_to_gpu + use iso_c_binding + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, nza, n, pitch,maxrowsize, allocsize + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + n = a%get_nrows() + allocsize = a%get_size() + nza = a%get_nzeros() + if (c_associated(a%deviceMat)) then + call freehllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_complex_double,1) + if (info == 0) info = & + & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) +! if (info /= 0) goto 9999 + +end subroutine psb_z_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 b/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 new file mode 100644 index 00000000..e2e93b85 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 @@ -0,0 +1,119 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_vect_mv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_hlg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= zzero) then + if (.not.y%is_host()) call y%sync() + end if + if (a%is_dev()) call a%sync() + call a%psb_z_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvhllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHLLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 new file mode 100644 index 00000000..dbfbfb91 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_hybg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_csr_sparse_mat%allocate(m,n,nz) + + info = initFcusparse() + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hybg_allocate_mnnz +#endif diff --git a/cuda/impl/psb_z_cuda_hybg_csmm.F90 b/cuda/impl/psb_z_cuda_hybg_csmm.F90 new file mode 100644 index 00000000..3c53acbd --- /dev/null +++ b/cuda/impl/psb_z_cuda_hybg_csmm.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_csmm + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_hybg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_hybg_csmv + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_hybg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_hybg_inner_vect_sv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_hybg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra.or.(beta/=zzero)) then + call x%sync() + call y%sync() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='hybg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hybg_inner_vect_sv +#endif diff --git a/cuda/impl/psb_z_cuda_hybg_mold.F90 b/cuda/impl/psb_z_cuda_hybg_mold.F90 new file mode 100644 index 00000000..236c8359 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hybg_mold.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_hybg_mold(a,b,info) + + use psb_base_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_mold + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hybg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_cuda_hybg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hybg_mold +#endif diff --git a/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 new file mode 100644 index 00000000..2479226c --- /dev/null +++ b/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_hybg_reallocate_nz(nz,a) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_cuda_hybg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_z_csr_sparse_mat%reallocate(nz) + + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hybg_reallocate_nz +#endif diff --git a/cuda/impl/psb_z_cuda_hybg_scal.F90 b/cuda/impl/psb_z_cuda_hybg_scal.F90 new file mode 100644 index 00000000..c365499f --- /dev/null +++ b/cuda/impl/psb_z_cuda_hybg_scal.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_hybg_scal(d,a,info,side) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_scal + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m,n,nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_z_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hybg_scal +#endif diff --git a/cuda/impl/psb_z_cuda_hybg_scals.F90 b/cuda/impl/psb_z_cuda_hybg_scals.F90 new file mode 100644 index 00000000..95bb1273 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hybg_scals.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_hybg_scals(d,a,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_scals + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + + call a%psb_z_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hybg_scals +#endif diff --git a/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 b/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 new file mode 100644 index 00000000..b75f9df7 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_hybg_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_to_gpu + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + complex(psb_dpk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = HYBGDeviceFree(a%deviceMat) + end if + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! We are explicitly adding the diagonal + if (info == 0) info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + else + + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) then + if (a%is_unit()) then + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + endif + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceHybsmAnalysis(a%deviceMat) + end if + + + if (info /= 0) then + write(0,*) 'Error in HYBG_TO_GPU ',info + end if + +end subroutine psb_z_cuda_hybg_to_gpu +#endif diff --git a/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 b/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 new file mode 100644 index 00000000..a5c082de --- /dev/null +++ b/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 @@ -0,0 +1,118 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_vect_mv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_hybg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= zzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hybg_vect_mv +#endif diff --git a/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 new file mode 100644 index 00000000..7cfd4f19 --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_mv_csrg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_mv_csrg_from_coo + implicit none + + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + + info = psb_success_ + + call a%psb_z_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 new file mode 100644 index 00000000..f03294c8 --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_mv_csrg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_mv_csrg_from_fmt + implicit none + + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_z_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 new file mode 100644 index 00000000..8e702e7e --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_mv_diag_from_coo(a,b,info) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_mv_diag_from_coo + + implicit none + + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) goto 9999 + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 new file mode 100644 index 00000000..f0cb23f3 --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_mv_elg_from_coo(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_mv_elg_from_coo + implicit none + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_z_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 new file mode 100644 index 00000000..29f63423 --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 @@ -0,0 +1,86 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_mv_elg_from_fmt + implicit none + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + type(elldev_parms) :: gpu_parms + + info = psb_success_ + + if (b%is_dev()) call b%sync() + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + + class is (psb_z_ell_sparse_mat) + nzm = size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + deallocate(b%ja,stat=info) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + deallocate(b%val,stat=info) + end if + a%nzt = nza + call b%free() + call a%to_gpu(info) + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 new file mode 100644 index 00000000..dd6dae1e --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_cuda_mv_hdiag_from_coo(a,b,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod + + implicit none + + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + + a%hacksize = psb_cuda_WarpSize() + + call a%psb_z_hdia_sparse_mat%mv_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 new file mode 100644 index 00000000..609680b9 --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_cuda_mv_hlg_from_coo(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_cuda_env_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_mv_hlg_from_coo + implicit none + + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_z_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 new file mode 100644 index 00000000..e67c8d83 --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_cuda_mv_hlg_from_fmt(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_mv_hlg_from_fmt + implicit none + + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 new file mode 100644 index 00000000..dbf0e512 --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 @@ -0,0 +1,59 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_mv_hybg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_mv_hybg_from_coo + implicit none + + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%psb_z_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_mv_hybg_from_coo +#endif diff --git a/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 new file mode 100644 index 00000000..d39da291 --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if PSB_CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_mv_hybg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_mv_hybg_from_fmt + implicit none + + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select +end subroutine psb_z_cuda_mv_hybg_from_fmt +#endif diff --git a/cuda/ivectordev.c b/cuda/ivectordev.c new file mode 100644 index 00000000..f908e391 --- /dev/null +++ b/cuda/ivectordev.c @@ -0,0 +1,178 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +//#include "utils.h" +//#include "common.h" +#include "ivectordev.h" + + +int registerMappedInt(void *buff, void **d_p, int n, int dummy) +{ + return registerMappedMemory(buff,d_p,((size_t) n)*sizeof(int)); +} + +int writeMultiVecDeviceInt(void* deviceVec, int* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, + ((size_t) devVec->pitch_)*devVec->count_*sizeof(int)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + + return(i); +} + +int writeMultiVecDeviceIntR2(void* deviceVec, int* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceInt(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceIntR2",i); + } + return(i); +} + +int readMultiVecDeviceInt(void* deviceVec, int* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + ((size_t) devVec->pitch_)*devVec->count_*sizeof(int)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceInt",i); + } + return(i); +} + +int readMultiVecDeviceIntR2(void* deviceVec, int* hostVec, int ld) +{ int i; + i = readMultiVecDeviceInt(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceIntR2",i); + } + return(i); +} + + +int setscalMultiVecDeviceInt(int val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuIsetscal(handle, first, last, indexBase, val, (int *) devVecX->v_); + + return(i); +} + +int geinsMultiVecDeviceInt(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + int beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + + if (dupl == INS_OVERWRITE) + beta = 0; + else if (dupl == INS_ADD) + beta = 1; + else + beta = 0; + + spgpuIscat(handle, (int *) devVecX->v_, n, (int *)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceInt(deviceVec, vectorId, n, + first, (void*) devIdx->v_, hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceInt(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + int *hv = (int *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuIgath(handle,hv, n, idx,indexBase, (int *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, int first, void *deviceIdx, + int hfirst, void* host_values, int indexBase, int beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceInt(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, int beta) +{ int i=0; + int *hv = (int *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuIscat(handle, (int *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} diff --git a/cuda/ivectordev.h b/cuda/ivectordev.h new file mode 100644 index 00000000..6f3a32a0 --- /dev/null +++ b/cuda/ivectordev.h @@ -0,0 +1,62 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +//#include "utils.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" +#include "vector.h" + +int registerMappedInt(void *, void **, int, int); +int writeMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec); +int writeMultiVecDeviceIntR2(void* deviceMultiVec, int* hostMultiVec, int ld); +int readMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec); +int readMultiVecDeviceIntR2(void* deviceMultiVec, int* hostMultiVec, int ld); + +int setscalMultiVecDeviceInt(int val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceInt(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceInt(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, int first, + void *deviceIdx, int hfirst, void* host_values, + int indexBase, int beta); +int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, int beta); diff --git a/cuda/psb_base_vectordev_mod.F90 b/cuda/psb_base_vectordev_mod.F90 new file mode 100644 index 00000000..da02b2f3 --- /dev/null +++ b/cuda/psb_base_vectordev_mod.F90 @@ -0,0 +1,97 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_base_vectordev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: multivec_dev_parms + integer(c_int) :: count + integer(c_int) :: element_type + integer(c_int) :: pitch + integer(c_int) :: size + end type multivec_dev_parms + + interface + function FallocMultiVecDevice(deviceVec,count,Size,elementType) & + & result(res) bind(c,name='FallocMultiVecDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: count,Size,elementType + type(c_ptr) :: deviceVec + end function FallocMultiVecDevice + end interface + + interface + subroutine unregisterMapped(buf) & + & bind(c,name='unregisterMapped') + use iso_c_binding + type(c_ptr), value :: buf + end subroutine unregisterMapped + end interface + + interface + subroutine freeMultiVecDevice(deviceVec) & + & bind(c,name='freeMultiVecDevice') + use iso_c_binding + type(c_ptr), value :: deviceVec + end subroutine freeMultiVecDevice + end interface + + interface + function getMultiVecDeviceSize(deviceVec) & + & bind(c,name='getMultiVecDeviceSize') result(res) + use iso_c_binding + type(c_ptr), value :: deviceVec + integer(c_int) :: res + end function getMultiVecDeviceSize + end interface + + interface + function getMultiVecDeviceCount(deviceVec) & + & bind(c,name='getMultiVecDeviceCount') result(res) + use iso_c_binding + type(c_ptr), value :: deviceVec + integer(c_int) :: res + end function getMultiVecDeviceCount + end interface + + interface + function getMultiVecDevicePitch(deviceVec) & + & bind(c,name='getMultiVecDevicePitch') result(res) + use iso_c_binding + type(c_ptr), value :: deviceVec + integer(c_int) :: res + end function getMultiVecDevicePitch + end interface + +end module psb_base_vectordev_mod diff --git a/cuda/psb_c_cuda_csrg_mat_mod.F90 b/cuda/psb_c_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..1fdeec4a --- /dev/null +++ b/cuda/psb_c_cuda_csrg_mat_mod.F90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_csrg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_csr_sparse_mat) :: psb_c_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! + type(c_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => c_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_c_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_c_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => c_cuda_csrg_free + procedure, pass(a) :: mold => psb_c_cuda_csrg_mold + procedure, pass(a) :: is_host => c_cuda_csrg_is_host + procedure, pass(a) :: is_dev => c_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => c_cuda_csrg_is_sync + procedure, pass(a) :: set_host => c_cuda_csrg_set_host + procedure, pass(a) :: set_dev => c_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => c_cuda_csrg_set_sync + procedure, pass(a) :: sync => c_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_c_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_c_cuda_csrg_from_gpu + final :: c_cuda_csrg_finalize + end type psb_c_cuda_csrg_sparse_mat + + private :: c_cuda_csrg_get_nzeros, c_cuda_csrg_free, c_cuda_csrg_get_fmt, & + & c_cuda_csrg_get_size, c_cuda_csrg_sizeof, c_cuda_csrg_get_nz_row + + + interface + subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_c_cuda_csrg_reallocate_nz(nz,a) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_c_cuda_csrg_mold(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_mold + end interface + + interface + subroutine psb_c_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_c_cuda_csrg_from_gpu(a,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_c_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_c_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_csmv + end interface + interface + subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_csmm + end interface + + interface + subroutine psb_c_cuda_csrg_scal(d,a,info,side) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_cuda_csrg_scal + end interface + + interface + subroutine psb_c_cuda_csrg_scals(d,a,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_cuda_csrg_sizeof + + function c_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function c_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine c_cuda_csrg_set_host(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_cuda_csrg_set_host + + subroutine c_cuda_csrg_set_dev(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_cuda_csrg_set_dev + + subroutine c_cuda_csrg_set_sync(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_cuda_csrg_set_sync + + function c_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_cuda_csrg_is_dev + + function c_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_cuda_csrg_is_host + + function c_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_cuda_csrg_is_sync + + + subroutine c_cuda_csrg_sync(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine c_cuda_csrg_sync + + subroutine c_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_c_csr_sparse_mat%free() + + return + + end subroutine c_cuda_csrg_free + + subroutine c_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine c_cuda_csrg_finalize + +end module psb_c_cuda_csrg_mat_mod diff --git a/cuda/psb_c_cuda_diag_mat_mod.F90 b/cuda/psb_c_cuda_diag_mat_mod.F90 new file mode 100644 index 00000000..d3232965 --- /dev/null +++ b/cuda/psb_c_cuda_diag_mat_mod.F90 @@ -0,0 +1,287 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_c_dia_mat_mod + + type, extends(psb_c_dia_sparse_mat) :: psb_c_cuda_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => c_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => c_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_c_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_c_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_c_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_c_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_c_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_c_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_c_cuda_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => c_cuda_diag_free + procedure, pass(a) :: mold => psb_c_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_diag_to_gpu + final :: c_cuda_diag_finalize + end type psb_c_cuda_diag_sparse_mat + + private :: c_cuda_diag_get_nzeros, c_cuda_diag_free, c_cuda_diag_get_fmt, & + & c_cuda_diag_get_size, c_cuda_diag_sizeof, c_cuda_diag_get_nz_row + + + interface + subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_diag_vect_mv + end interface + + interface + subroutine psb_c_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_cuda_diag_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_diag_inner_vect_sv + end interface + + interface + subroutine psb_c_cuda_diag_reallocate_nz(nz,a) + import :: psb_c_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_diag_reallocate_nz + end interface + + interface + subroutine psb_c_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_cuda_diag_allocate_mnnz + end interface + + interface + subroutine psb_c_cuda_diag_mold(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_diag_mold + end interface + + interface + subroutine psb_c_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_c_cuda_diag_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_cuda_diag_to_gpu + end interface + + interface + subroutine psb_c_cuda_cp_diag_from_coo(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_diag_from_coo + end interface + + interface + subroutine psb_c_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_diag_from_fmt + end interface + + interface + subroutine psb_c_cuda_mv_diag_from_coo(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_diag_from_coo + end interface + + + interface + subroutine psb_c_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_diag_from_fmt + end interface + + interface + subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_diag_csmv + end interface + interface + subroutine psb_c_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_diag_csmm + end interface + + interface + subroutine psb_c_cuda_diag_scal(d,a,info, side) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_cuda_diag_scal + end interface + + interface + subroutine psb_c_cuda_diag_scals(d,a,info) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_cuda_diag_sizeof(a) result(res) + implicit none + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_cuda_diag_sizeof + + function c_cuda_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function c_cuda_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_cuda_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_dia_sparse_mat%free() + + return + + end subroutine c_cuda_diag_free + + subroutine c_cuda_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine c_cuda_diag_finalize + +end module psb_c_cuda_diag_mat_mod diff --git a/cuda/psb_c_cuda_dnsg_mat_mod.F90 b/cuda/psb_c_cuda_dnsg_mat_mod.F90 new file mode 100644 index 00000000..97bacb7d --- /dev/null +++ b/cuda/psb_c_cuda_dnsg_mat_mod.F90 @@ -0,0 +1,273 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_dnsg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use psb_c_dns_mat_mod + use dnsdev_mod + + type, extends(psb_c_dns_sparse_mat) :: psb_c_cuda_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => c_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => c_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_c_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_c_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_c_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_c_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_c_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_c_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_c_cuda_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => c_cuda_dnsg_free + procedure, pass(a) :: mold => psb_c_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_dnsg_to_gpu + final :: c_cuda_dnsg_finalize + end type psb_c_cuda_dnsg_sparse_mat + + private :: c_cuda_dnsg_get_nzeros, c_cuda_dnsg_free, c_cuda_dnsg_get_fmt, & + & c_cuda_dnsg_get_size, c_cuda_dnsg_get_nz_row + + + interface + subroutine psb_c_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_c_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_c_cuda_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_c_cuda_dnsg_mold(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_dnsg_mold + end interface + + interface + subroutine psb_c_cuda_dnsg_to_gpu(a,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_dnsg_to_gpu + end interface + + interface + subroutine psb_c_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_dnsg_from_coo + end interface + + interface + subroutine psb_c_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_c_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:) +!!$ complex(psb_spk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_cuda_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_cuda_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_c_cuda_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cuda_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function c_cuda_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function c_cuda_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_cuda_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_dns_sparse_mat%free() + + return + + end subroutine c_cuda_dnsg_free + + subroutine c_cuda_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine c_cuda_dnsg_finalize + +end module psb_c_cuda_dnsg_mat_mod diff --git a/cuda/psb_c_cuda_elg_mat_mod.F90 b/cuda/psb_c_cuda_elg_mat_mod.F90 new file mode 100644 index 00000000..c507a6c7 --- /dev/null +++ b/cuda/psb_c_cuda_elg_mat_mod.F90 @@ -0,0 +1,454 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_elg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use psb_c_ell_mat_mod + use psb_i_cuda_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_ell_sparse_mat) :: psb_c_cuda_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => c_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_c_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_c_cuda_elg_scals + procedure, pass(a) :: scalv => psb_c_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => c_cuda_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => c_cuda_elg_free + procedure, pass(a) :: mold => psb_c_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_c_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_c_cuda_elg_csput_v + procedure, pass(a) :: is_host => c_cuda_elg_is_host + procedure, pass(a) :: is_dev => c_cuda_elg_is_dev + procedure, pass(a) :: is_sync => c_cuda_elg_is_sync + procedure, pass(a) :: set_host => c_cuda_elg_set_host + procedure, pass(a) :: set_dev => c_cuda_elg_set_dev + procedure, pass(a) :: set_sync => c_cuda_elg_set_sync + procedure, pass(a) :: sync => c_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_c_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_c_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_c_cuda_elg_asb + final :: c_cuda_elg_finalize + end type psb_c_cuda_elg_sparse_mat + + private :: c_cuda_elg_get_nzeros, c_cuda_elg_free, c_cuda_elg_get_fmt, & + & c_cuda_elg_get_size, c_cuda_elg_sizeof, c_cuda_elg_get_nz_row, c_cuda_elg_sync + + + interface + subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_elg_vect_mv + end interface + + interface + subroutine psb_c_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_cuda_elg_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_elg_inner_vect_sv + end interface + + interface + subroutine psb_c_cuda_elg_reallocate_nz(nz,a) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_elg_reallocate_nz + end interface + + interface + subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_cuda_elg_allocate_mnnz + end interface + + interface + subroutine psb_c_cuda_elg_mold(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_elg_mold + end interface + + interface + subroutine psb_c_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_elg_csput_a + end interface + + interface + subroutine psb_c_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_c_base_vect_type,& + & psb_i_base_vect_type + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_elg_csput_v + end interface + + interface + subroutine psb_c_cuda_elg_from_gpu(a,info) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_elg_from_gpu + end interface + + interface + subroutine psb_c_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_cuda_elg_to_gpu + end interface + + interface + subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_elg_from_coo + end interface + + interface + subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_elg_from_fmt + end interface + + interface + subroutine psb_c_cuda_mv_elg_from_coo(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_elg_from_coo + end interface + + + interface + subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_elg_from_fmt + end interface + + interface + subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_elg_csmv + end interface + interface + subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_elg_csmm + end interface + + interface + subroutine psb_c_cuda_elg_scal(d,a,info, side) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_cuda_elg_scal + end interface + + interface + subroutine psb_c_cuda_elg_scals(d,a,info) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_elg_scals + end interface + + interface + subroutine psb_c_cuda_elg_asb(a) + import :: psb_c_cuda_elg_sparse_mat + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_cuda_elg_sizeof(a) result(res) + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_cuda_elg_sizeof + + function c_cuda_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function c_cuda_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine c_cuda_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = czero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine c_cuda_elg_reinit + + subroutine c_cuda_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine c_cuda_elg_free + + subroutine c_cuda_elg_sync(a) + implicit none + class(psb_c_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine c_cuda_elg_sync + + subroutine c_cuda_elg_set_host(a) + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_cuda_elg_set_host + + subroutine c_cuda_elg_set_dev(a) + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_cuda_elg_set_dev + + subroutine c_cuda_elg_set_sync(a) + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_cuda_elg_set_sync + + function c_cuda_elg_is_dev(a) result(res) + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_cuda_elg_is_dev + + function c_cuda_elg_is_host(a) result(res) + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_cuda_elg_is_host + + function c_cuda_elg_is_sync(a) result(res) + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_cuda_elg_is_sync + + subroutine c_cuda_elg_finalize(a) + use elldev_mod + implicit none + type(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine c_cuda_elg_finalize + +end module psb_c_cuda_elg_mat_mod diff --git a/cuda/psb_c_cuda_hdiag_mat_mod.F90 b/cuda/psb_c_cuda_hdiag_mat_mod.F90 new file mode 100644 index 00000000..f06e501e --- /dev/null +++ b/cuda/psb_c_cuda_hdiag_mat_mod.F90 @@ -0,0 +1,268 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_hdiag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_c_hdia_mat_mod + + type, extends(psb_c_hdia_sparse_mat) :: psb_c_cuda_hdiag_sparse_mat + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => c_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => c_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_c_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_c_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_c_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_c_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_c_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_c_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_c_cuda_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => c_cuda_hdiag_free + procedure, pass(a) :: mold => psb_c_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_hdiag_to_gpu + final :: c_cuda_hdiag_finalize + end type psb_c_cuda_hdiag_sparse_mat + + private :: c_cuda_hdiag_get_nzeros, c_cuda_hdiag_free, c_cuda_hdiag_get_fmt, & + & c_cuda_hdiag_get_size, c_cuda_hdiag_sizeof, c_cuda_hdiag_get_nz_row + + + interface + subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_c_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_c_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_cuda_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_cuda_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_c_cuda_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_c_cuda_hdiag_mold(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_hdiag_mold + end interface + + interface + subroutine psb_c_cuda_hdiag_to_gpu(a,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_hdiag_to_gpu + end interface + + interface + subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_c_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cuda_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_c_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cuda_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_c_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_cuda_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_c_cuda_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cuda_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function c_cuda_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function c_cuda_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_cuda_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_hdia_sparse_mat%free() + + return + + end subroutine c_cuda_hdiag_free + + subroutine c_cuda_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_hdia_sparse_mat%free() + + return + end subroutine c_cuda_hdiag_finalize + +end module psb_c_cuda_hdiag_mat_mod diff --git a/cuda/psb_c_cuda_hlg_mat_mod.F90 b/cuda/psb_c_cuda_hlg_mat_mod.F90 new file mode 100644 index 00000000..5b80d3f6 --- /dev/null +++ b/cuda/psb_c_cuda_hlg_mat_mod.F90 @@ -0,0 +1,377 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_hlg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use psb_c_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_hll_sparse_mat) :: psb_c_cuda_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => c_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_c_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_c_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_c_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => c_cuda_hlg_free + procedure, pass(a) :: mold => psb_c_cuda_hlg_mold + procedure, pass(a) :: is_host => c_cuda_hlg_is_host + procedure, pass(a) :: is_dev => c_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => c_cuda_hlg_is_sync + procedure, pass(a) :: set_host => c_cuda_hlg_set_host + procedure, pass(a) :: set_dev => c_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => c_cuda_hlg_set_sync + procedure, pass(a) :: sync => c_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_c_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_c_cuda_hlg_to_gpu + final :: c_cuda_hlg_finalize + end type psb_c_cuda_hlg_sparse_mat + + private :: c_cuda_hlg_get_nzeros, c_cuda_hlg_free, c_cuda_hlg_get_fmt, & + & c_cuda_hlg_get_size, c_cuda_hlg_sizeof, c_cuda_hlg_get_nz_row + + + interface + subroutine psb_c_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hlg_vect_mv + end interface + + interface + subroutine psb_c_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hlg_inner_vect_sv + end interface + + interface + subroutine psb_c_cuda_hlg_reallocate_nz(nz,a) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_hlg_reallocate_nz + end interface + + interface + subroutine psb_c_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_cuda_hlg_allocate_mnnz + end interface + + interface + subroutine psb_c_cuda_hlg_mold(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_hlg_mold + end interface + + interface + subroutine psb_c_cuda_hlg_from_gpu(a,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_hlg_from_gpu + end interface + + interface + subroutine psb_c_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_cuda_hlg_to_gpu + end interface + + interface + subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_hlg_from_coo + end interface + + interface + subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_hlg_from_fmt + end interface + + interface + subroutine psb_c_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_hlg_from_coo + end interface + + + interface + subroutine psb_c_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_hlg_from_fmt + end interface + + interface + subroutine psb_c_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hlg_csmv + end interface + interface + subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hlg_csmm + end interface + + interface + subroutine psb_c_cuda_hlg_scal(d,a,info, side) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_cuda_hlg_scal + end interface + + interface + subroutine psb_c_cuda_hlg_scals(d,a,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_cuda_hlg_sizeof(a) result(res) + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_cuda_hlg_sizeof + + function c_cuda_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function c_cuda_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_cuda_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_hll_sparse_mat%free() + + return + + end subroutine c_cuda_hlg_free + + + subroutine c_cuda_hlg_sync(a) + implicit none + class(psb_c_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine c_cuda_hlg_sync + + subroutine c_cuda_hlg_set_host(a) + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_cuda_hlg_set_host + + subroutine c_cuda_hlg_set_dev(a) + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_cuda_hlg_set_dev + + subroutine c_cuda_hlg_set_sync(a) + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_cuda_hlg_set_sync + + function c_cuda_hlg_is_dev(a) result(res) + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_cuda_hlg_is_dev + + function c_cuda_hlg_is_host(a) result(res) + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_cuda_hlg_is_host + + function c_cuda_hlg_is_sync(a) result(res) + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_cuda_hlg_is_sync + + + subroutine c_cuda_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine c_cuda_hlg_finalize + +end module psb_c_cuda_hlg_mat_mod diff --git a/cuda/psb_c_cuda_hybg_mat_mod.F90 b/cuda/psb_c_cuda_hybg_mat_mod.F90 new file mode 100644 index 00000000..89ec14a5 --- /dev/null +++ b/cuda/psb_c_cuda_hybg_mat_mod.F90 @@ -0,0 +1,287 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if PSB_CUDA_SHORT_VERSION <= 10 + +module psb_c_cuda_hybg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use cusparse_mod + + type, extends(psb_c_csr_sparse_mat) :: psb_c_cuda_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! + type(c_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => c_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_c_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_c_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => c_cuda_hybg_free + procedure, pass(a) :: mold => psb_c_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_hybg_to_gpu + final :: c_cuda_hybg_finalize + end type psb_c_cuda_hybg_sparse_mat + + private :: c_cuda_hybg_get_nzeros, c_cuda_hybg_free, c_cuda_hybg_get_fmt, & + & c_cuda_hybg_get_size, c_cuda_hybg_sizeof, c_cuda_hybg_get_nz_row + + + interface + subroutine psb_c_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hybg_inner_vect_sv + end interface + + interface + subroutine psb_c_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hybg_vect_mv + end interface + + interface + subroutine psb_c_cuda_hybg_reallocate_nz(nz,a) + import :: psb_c_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_hybg_reallocate_nz + end interface + + interface + subroutine psb_c_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_cuda_hybg_allocate_mnnz + end interface + + interface + subroutine psb_c_cuda_hybg_mold(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_hybg_mold + end interface + + interface + subroutine psb_c_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_cuda_hybg_to_gpu + end interface + + interface + subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_hybg_from_coo + end interface + + interface + subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_hybg_from_fmt + end interface + + interface + subroutine psb_c_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_hybg_from_coo + end interface + + interface + subroutine psb_c_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_hybg_from_fmt + end interface + + interface + subroutine psb_c_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hybg_csmv + end interface + interface + subroutine psb_c_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hybg_csmm + end interface + + interface + subroutine psb_c_cuda_hybg_scal(d,a,info,side) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_cuda_hybg_scal + end interface + + interface + subroutine psb_c_cuda_hybg_scals(d,a,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_cuda_hybg_sizeof(a) result(res) + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_cuda_hybg_sizeof + + function c_cuda_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function c_cuda_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_cuda_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_c_csr_sparse_mat%free() + + return + + end subroutine c_cuda_hybg_free + + subroutine c_cuda_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine c_cuda_hybg_finalize + +end module psb_c_cuda_hybg_mat_mod +#endif diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 new file mode 100644 index 00000000..95f6d602 --- /dev/null +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -0,0 +1,2088 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_c_vect_mod + use psb_cuda_env_mod + use psb_i_vect_mod + use psb_i_cuda_vect_mod + use psb_i_vectordev_mod + use psb_c_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_base_vect_type) :: psb_c_vect_cuda + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + complex(c_float_complex), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + complex(c_float_complex), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => c_cuda_get_nrows + procedure, nopass :: get_fmt => c_cuda_get_fmt + + procedure, pass(x) :: all => c_cuda_all + procedure, pass(x) :: zero => c_cuda_zero + procedure, pass(x) :: asb_m => c_cuda_asb_m + procedure, pass(x) :: sync => c_cuda_sync + procedure, pass(x) :: sync_space => c_cuda_sync_space + procedure, pass(x) :: bld_x => c_cuda_bld_x + procedure, pass(x) :: bld_mn => c_cuda_bld_mn + procedure, pass(x) :: free => c_cuda_free + procedure, pass(x) :: ins_a => c_cuda_ins_a + procedure, pass(x) :: ins_v => c_cuda_ins_v + procedure, pass(x) :: is_host => c_cuda_is_host + procedure, pass(x) :: is_dev => c_cuda_is_dev + procedure, pass(x) :: is_sync => c_cuda_is_sync + procedure, pass(x) :: set_host => c_cuda_set_host + procedure, pass(x) :: set_dev => c_cuda_set_dev + procedure, pass(x) :: set_sync => c_cuda_set_sync + procedure, pass(x) :: set_scal => c_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => c_cuda_set_vect + procedure, pass(x) :: gthzv_x => c_cuda_gthzv_x + procedure, pass(y) :: sctb => c_cuda_sctb + procedure, pass(y) :: sctb_x => c_cuda_sctb_x + procedure, pass(x) :: gthzbuf => c_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => c_cuda_sctb_buf + procedure, pass(x) :: new_buffer => c_cuda_new_buffer + procedure, nopass :: device_wait => c_cuda_device_wait + procedure, pass(x) :: free_buffer => c_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => c_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => c_cuda_dot_v + procedure, pass(x) :: dot_a => c_cuda_dot_a + procedure, pass(y) :: axpby_v => c_cuda_axpby_v + procedure, pass(y) :: axpby_a => c_cuda_axpby_a + procedure, pass(z) :: upd_xyz => c_cuda_upd_xyz + procedure, pass(y) :: mlt_v => c_cuda_mlt_v + procedure, pass(y) :: mlt_a => c_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => c_cuda_mlt_v_2 + procedure, pass(x) :: scal => c_cuda_scal + procedure, pass(x) :: nrm2 => c_cuda_nrm2 + procedure, pass(x) :: amax => c_cuda_amax + procedure, pass(x) :: asum => c_cuda_asum + procedure, pass(x) :: absval1 => c_cuda_absval1 + procedure, pass(x) :: absval2 => c_cuda_absval2 + + final :: c_cuda_vect_finalize + end type psb_c_vect_cuda + + public :: psb_c_vect_cuda_ + private :: constructor + interface psb_c_vect_cuda_ + module procedure constructor + end interface psb_c_vect_cuda_ + +contains + + function constructor(x) result(this) + complex(psb_spk_) :: x(:) + type(psb_c_vect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + + subroutine c_cuda_device_wait() + call psb_cudaSync() + end subroutine c_cuda_device_wait + + subroutine c_cuda_new_buffer(n,x,info) + use psb_realloc_mod + use psb_cuda_env_mod + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_cuda_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeFloatComplex(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateFloatComplex(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readFloatComplex(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeFloatComplex(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateFloatComplex(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceFloatComplex(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readFloatComplex(x%dt_buf,y,n) + + end select + + end subroutine c_cuda_gthzv_x + + subroutine c_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + class(psb_c_vect_cuda) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readFloatComplex(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceFloatComplex(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readFloatComplex(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine c_cuda_gthzbuf + + subroutine c_cuda_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + complex(psb_spk_) :: beta, x(:) + class(psb_c_vect_cuda) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_c_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine c_cuda_sctb + + subroutine c_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + complex(psb_spk_) :: beta, x(:) + class(psb_c_vect_cuda) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_cuda_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeFloatComplex(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateFloatComplex(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeFloatComplex(y%dt_buf,x,n) + info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeFloatComplex(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateFloatComplex(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeFloatComplex(y%dt_buf,x,n) + info = iscatMultiVecDeviceFloatComplex(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine c_cuda_sctb_x + + subroutine c_cuda_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_cuda_env_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + complex(psb_spk_) :: beta + class(psb_c_vect_cuda) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_cuda) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_cuda_DeviceHasUVA()) then + info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeFloatComplex(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeFloatComplex(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceFloatComplex(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine c_cuda_sctb_buf + + + subroutine c_cuda_bld_x(x,this) + use psb_base_mod + complex(psb_spk_), intent(in) :: this(:) + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'c_cuda_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine c_cuda_bld_x + + subroutine c_cuda_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'c_cuda_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine c_cuda_bld_mn + + subroutine c_cuda_set_host(x) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine c_cuda_set_host + + subroutine c_cuda_set_dev(x) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine c_cuda_set_dev + + subroutine c_cuda_set_sync(x) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine c_cuda_set_sync + + function c_cuda_is_dev(x) result(res) + implicit none + class(psb_c_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function c_cuda_is_dev + + function c_cuda_is_host(x) result(res) + implicit none + class(psb_c_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function c_cuda_is_host + + function c_cuda_is_sync(x) result(res) + implicit none + class(psb_c_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function c_cuda_is_sync + + + function c_cuda_get_nrows(x) result(res) + implicit none + class(psb_c_vect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function c_cuda_get_nrows + + function c_cuda_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'cGPU' + end function c_cuda_get_fmt + + subroutine c_cuda_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_c_vect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'c_cuda_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine c_cuda_all + + subroutine c_cuda_zero(x) + use psi_serial_mod + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() + call x%set_scal(czero) + end subroutine c_cuda_zero + + subroutine c_cuda_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_c_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'c_cuda_sync') + end if + + end subroutine c_cuda_sync + + subroutine c_cuda_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine c_cuda_free + + subroutine c_cuda_set_scal(x,val,first,last) + class(psb_c_vect_cuda), intent(inout) :: x + complex(psb_spk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine c_cuda_set_scal + + + + function c_cuda_dot_v(n,x,y) result(res) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_) :: res + integer(psb_ipk_) :: info + + res = czero + ! + ! Note: this is the gpu implementation. + ! When we get here, we are sure that X is of + ! TYPE psb_c_vect + ! + select type(yy => y) + type is (psb_c_vect_cuda) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,'c_cuda_dot_v') + end if + + class default + ! y%sync is done in dot_a + if (x%is_dev()) call x%sync() + res = y%dot(n,x%v) + end select + + end function c_cuda_dot_v + + function c_cuda_dot_a(n,x,y) result(res) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + complex(psb_spk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_) :: res + complex(psb_spk_), external :: cdot + + if (x%is_dev()) call x%sync() + res = cdot(n,y,1,x%v,1) + + end function c_cuda_dot_a + + subroutine c_cuda_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: y + complex(psb_spk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny + + info = psb_success_ + + select type(xx => x) + type is (psb_c_vect_cuda) + ! Do something different here + if ((beta /= czero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx x) + class is (psb_c_vect_cuda) + select type(yy => y) + class is (psb_c_vect_cuda) + select type(zz => z) + class is (psb_c_vect_cuda) + ! Do something different here + if ((beta /= czero).and.yy%is_host())& + & call yy%sync() + if ((delta /= czero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) + class is (psb_c_vect_cuda) + select type(yy => y) + class is (psb_c_vect_cuda) + select type(zz => z) + class is (psb_c_vect_cuda) + ! Do something different here + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if (zz%is_host()) call zz%sync() + if (w%is_host()) call w%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + nw = getMultiVecDeviceSize(w%deviceVect) + if ((nx x) + type is (psb_c_base_vect_type) + if (y%is_dev()) call y%sync() + do i=1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + type is (psb_c_vect_cuda) + ! Do something different here + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + info = axyMultiVecDevice(n,cone,xx%deviceVect,y%deviceVect) + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + call y%mlt(xx%v,info) + call y%set_host() + end select + + end subroutine c_cuda_mlt_v + + subroutine c_cuda_mlt_a(x, y, info) + use psi_serial_mod + implicit none + complex(psb_spk_), intent(in) :: x(:) + class(psb_c_vect_cuda), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + call y%psb_c_base_vect_type%mlt(x,info) + ! set_host() is invoked in the base method + end subroutine c_cuda_mlt_a + + subroutine c_cuda_mlt_a_2(alpha,x,y,beta,z,info) + use psi_serial_mod + implicit none + complex(psb_spk_), intent(in) :: alpha,beta + complex(psb_spk_), intent(in) :: x(:) + complex(psb_spk_), intent(in) :: y(:) + class(psb_c_vect_cuda), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + call z%psb_c_base_vect_type%mlt(alpha,x,y,beta,info) + ! set_host() is invoked in the base method + end subroutine c_cuda_mlt_a_2 + + subroutine c_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + complex(psb_spk_), intent(in) :: alpha,beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_vect_cuda), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + if (.false.) then + ! These are present just for coherence with the + ! complex versions; they do nothing here. + conjgx_=.false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') + conjgy_=.false. + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') + end if + + n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) + + ! + ! Need to reconsider BETA in the GPU side + ! of things. + ! + info = 0 + select type(xx => x) + type is (psb_c_vect_cuda) + select type (yy => y) + type is (psb_c_vect_cuda) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= czero).and.(z%is_host())) call z%sync() + info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& + & yy%deviceVect,beta,z%deviceVect) + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= czero).and.(z%is_dev())) call z%sync() + call z%psb_c_base_vect_type%mlt(alpha,xx,yy,beta,info) + call z%set_host() + end select + + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= czero).and.(z%is_dev())) call z%sync() + call z%psb_c_base_vect_type%mlt(alpha,x,y,beta,info) + call z%set_host() + end select + end subroutine c_cuda_mlt_v_2 + + subroutine c_cuda_scal(alpha, x) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + complex(psb_spk_), intent (in) :: alpha + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = scalMultiVecDevice(alpha,x%deviceVect) + call x%set_dev() + end subroutine c_cuda_scal + + + function c_cuda_nrm2(n,x) result(res) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + ! WARNING: this should be changed. + if (x%is_host()) call x%sync() + info = nrm2MultiVecDeviceComplex(res,n,x%deviceVect) + + end function c_cuda_nrm2 + + function c_cuda_amax(n,x) result(res) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = amaxMultiVecDeviceComplex(res,n,x%deviceVect) + + end function c_cuda_amax + + function c_cuda_asum(n,x) result(res) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = asumMultiVecDeviceComplex(res,n,x%deviceVect) + + end function c_cuda_asum + + subroutine c_cuda_absval1(x) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + n=x%get_nrows() + info = absMultiVecDevice(n,cone,x%deviceVect) + + end subroutine c_cuda_absval1 + + subroutine c_cuda_absval2(x,y) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + n=min(x%get_nrows(),y%get_nrows()) + select type (yy=> y) + class is (psb_c_vect_cuda) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = absMultiVecDevice(n,cone,x%deviceVect,yy%deviceVect) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_c_base_vect_type%absval(y) + end select + end subroutine c_cuda_absval2 + + + subroutine c_cuda_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine c_cuda_vect_finalize + + subroutine c_cuda_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_c_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_cuda + + info = 0 + if (psb_errstatus_fatal()) return + + done_cuda = .false. + select type(virl => irl) + class is (psb_i_vect_cuda) + select type(vval => val) + class is (psb_c_vect_cuda) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceFloatComplex(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_cuda=.true. + end select + end select + + if (.not.done_cuda) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'cuda_vect_ins') + return + end if + + end subroutine c_cuda_ins_v + + subroutine c_cuda_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_c_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine c_cuda_ins_a + +end module psb_c_cuda_vect_mod + + +! +! Multivectors +! + + + +module psb_c_cuda_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_c_multivect_mod + use psb_c_base_multivect_mod + use psb_cuda_env_mod + use psb_i_multivect_mod + use psb_i_cuda_multivect_mod + use psb_c_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_base_multivect_type) :: psb_c_multivect_cuda + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => c_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => c_cuda_multi_get_ncols + procedure, nopass :: get_fmt => c_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => c_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => c_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => c_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => c_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => c_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => c_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => c_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => c_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => c_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => c_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => c_cuda_multi_amax +!!$ procedure, pass(x) :: asum => c_cuda_multi_asum + procedure, pass(x) :: all => c_cuda_multi_all + procedure, pass(x) :: zero => c_cuda_multi_zero + procedure, pass(x) :: asb => c_cuda_multi_asb + procedure, pass(x) :: sync => c_cuda_multi_sync + procedure, pass(x) :: sync_space => c_cuda_multi_sync_space + procedure, pass(x) :: bld_x => c_cuda_multi_bld_x + procedure, pass(x) :: bld_n => c_cuda_multi_bld_n + procedure, pass(x) :: free => c_cuda_multi_free + procedure, pass(x) :: ins => c_cuda_multi_ins + procedure, pass(x) :: is_host => c_cuda_multi_is_host + procedure, pass(x) :: is_dev => c_cuda_multi_is_dev + procedure, pass(x) :: is_sync => c_cuda_multi_is_sync + procedure, pass(x) :: set_host => c_cuda_multi_set_host + procedure, pass(x) :: set_dev => c_cuda_multi_set_dev + procedure, pass(x) :: set_sync => c_cuda_multi_set_sync + procedure, pass(x) :: set_scal => c_cuda_multi_set_scal + procedure, pass(x) :: set_vect => c_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => c_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => c_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => c_cuda_multi_sctb_x + final :: c_cuda_multi_vect_finalize + end type psb_c_multivect_cuda + + public :: psb_c_multivect_cuda + private :: mconstructor + interface psb_c_multivect_cuda + module procedure mconstructor + end interface + +contains + + function mconstructor(x) result(this) + complex(psb_spk_) :: x(:,:) + type(psb_c_multivect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function mconstructor + + +!!$ subroutine c_cuda_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ complex(psb_spk_) :: y(:) +!!$ class(psb_c_multivect_cuda) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine c_cuda_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine c_cuda_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ complex(psb_spk_) :: beta, x(:) +!!$ class(psb_c_multivect_cuda) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_c_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine c_cuda_multi_sctb +!!$ +!!$ subroutine c_cuda_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ complex(psb_spk_) :: beta, x(:) +!!$ class(psb_c_multivect_cuda) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine c_cuda_multi_sctb_x + + + subroutine c_cuda_multi_bld_x(x,this) + use psb_base_mod + complex(psb_spk_), intent(in) :: this(:,:) + class(psb_c_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'c_cuda_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine c_cuda_multi_bld_x + + subroutine c_cuda_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'c_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine c_cuda_multi_bld_n + + + subroutine c_cuda_multi_set_host(x) + implicit none + class(psb_c_multivect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine c_cuda_multi_set_host + + subroutine c_cuda_multi_set_dev(x) + implicit none + class(psb_c_multivect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine c_cuda_multi_set_dev + + subroutine c_cuda_multi_set_sync(x) + implicit none + class(psb_c_multivect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine c_cuda_multi_set_sync + + function c_cuda_multi_is_dev(x) result(res) + implicit none + class(psb_c_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function c_cuda_multi_is_dev + + function c_cuda_multi_is_host(x) result(res) + implicit none + class(psb_c_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function c_cuda_multi_is_host + + function c_cuda_multi_is_sync(x) result(res) + implicit none + class(psb_c_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function c_cuda_multi_is_sync + + + function c_cuda_multi_get_nrows(x) result(res) + implicit none + class(psb_c_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function c_cuda_multi_get_nrows + + function c_cuda_multi_get_ncols(x) result(res) + implicit none + class(psb_c_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function c_cuda_multi_get_ncols + + function c_cuda_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'cGPU' + end function c_cuda_multi_get_fmt + +!!$ function c_cuda_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_cuda), intent(inout) :: x +!!$ class(psb_c_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ complex(psb_spk_) :: res +!!$ complex(psb_spk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = czero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_c_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_c_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_c_multivect_cuda) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'c_cuda_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function c_cuda_multi_dot_v +!!$ +!!$ function c_cuda_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_cuda), intent(inout) :: x +!!$ complex(psb_spk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ complex(psb_spk_) :: res +!!$ complex(psb_spk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function c_cuda_multi_dot_a +!!$ +!!$ subroutine c_cuda_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_c_base_multivect_type), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: y +!!$ complex(psb_spk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_c_base_multivect_type) +!!$ if ((beta /= czero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_c_multivect_cuda) +!!$ ! Do something different here +!!$ if ((beta /= czero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_c_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_c_multivect_cuda) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine c_cuda_multi_mlt_v +!!$ +!!$ subroutine c_cuda_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ complex(psb_spk_), intent(in) :: x(:) +!!$ class(psb_c_multivect_cuda), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_c_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine c_cuda_multi_mlt_a +!!$ +!!$ subroutine c_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ complex(psb_spk_), intent(in) :: alpha,beta +!!$ complex(psb_spk_), intent(in) :: x(:) +!!$ complex(psb_spk_), intent(in) :: y(:) +!!$ class(psb_c_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_c_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine c_cuda_multi_mlt_a_2 +!!$ +!!$ subroutine c_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ complex(psb_spk_), intent(in) :: alpha,beta +!!$ class(psb_c_base_multivect_type), intent(inout) :: x +!!$ class(psb_c_base_multivect_type), intent(inout) :: y +!!$ class(psb_c_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_c_multivect_cuda) +!!$ select type (yy => y) +!!$ type is (psb_c_multivect_cuda) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_c_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_c_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine c_cuda_multi_mlt_v_2 + + + subroutine c_cuda_multi_set_scal(x,val) + class(psb_c_multivect_cuda), intent(inout) :: x + complex(psb_spk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_c_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine c_cuda_multi_set_scal + + subroutine c_cuda_multi_set_vect(x,val) + class(psb_c_multivect_cuda), intent(inout) :: x + complex(psb_spk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_c_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine c_cuda_multi_set_vect + + + +!!$ subroutine c_cuda_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_c_multivect_cuda), intent(inout) :: x +!!$ complex(psb_spk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_c_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine c_cuda_multi_scal +!!$ +!!$ +!!$ function c_cuda_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function c_cuda_multi_nrm2 +!!$ +!!$ function c_cuda_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function c_cuda_multi_amax +!!$ +!!$ function c_cuda_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function c_cuda_multi_asum + + subroutine c_cuda_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_multivect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=czero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'c_cuda_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine c_cuda_multi_all + + subroutine c_cuda_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_c_multivect_cuda), intent(inout) :: x + + if (allocated(x%v)) x%v=czero + call x%set_host() + end subroutine c_cuda_multi_zero + + subroutine c_cuda_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_multivect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_c_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc0) then + dev_ = mod(iam,count) + else + dev_ = 0 + end if + info = psb_C_gpu_init(dev_) + end if + if (info == 0) info = initFcusparse() + if (info /= 0) then + call psb_errpush(psb_err_internal_error_,'psb_cuda_init') + goto 9999 + end if + call psb_cudaCreateHandle() + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(ctxt,err_act) + + return + + end subroutine psb_cuda_init + + + subroutine psb_cuda_DeviceSync() + call psb_cudaSync() + end subroutine psb_cuda_DeviceSync + + function psb_cuda_getDeviceCount() result(res) + integer :: res + res = psb_cuda_inner_getDeviceCount() + end function psb_cuda_getDeviceCount + + subroutine psb_cuda_exit() + integer :: res + res = closeFcusparse() + call psb_cuda_innerClose() + call psb_cuda_innerReset() + end subroutine psb_cuda_exit + + function psb_cuda_DeviceHasUVA() result(res) + logical :: res + res = (psb_C_DeviceHasUVA() == 1) + end function psb_cuda_DeviceHasUVA + + function psb_cuda_MultiProcessors() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MultiProcessors() + end function psb_cuda_MultiProcessors + + function psb_cuda_MaxRegistersPerBlock() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MaxRegistersPerBlock() + end function psb_cuda_MaxRegistersPerBlock + + function psb_cuda_MaxThreadsPerMP() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MaxThreadsPerMP() + end function psb_cuda_MaxThreadsPerMP + + function psb_cuda_WarpSize() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_WarpSize() + end function psb_cuda_WarpSize + + function psb_cuda_MemoryClockRate() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MemoryClockRate() + end function psb_cuda_MemoryClockRate + + function psb_cuda_MemoryBusWidth() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MemoryBusWidth() + end function psb_cuda_MemoryBusWidth + + function psb_cuda_MemoryPeakBandwidth() result(res) + real(psb_dpk_) :: res + ! Formula here: 2*ClockRate(KHz)*BusWidth(bit) + ! normalization: bit/byte, KHz/MHz + ! output: MBytes/s + res = 2.d0*0.125d0*1.d-3*psb_C_get_MemoryBusWidth()*psb_C_get_MemoryClockRate() + end function psb_cuda_MemoryPeakBandwidth + + function psb_cuda_DeviceName() result(res) + character(len=256) :: res + character :: cstring(256) + call psb_C_cpy_NameString(cstring) + call stringc2f(cstring,res) + end function psb_cuda_DeviceName + + + subroutine stringc2f(cstring,fstring) + character(c_char) :: cstring(*) + character(len=*) :: fstring + integer :: i + + i = 1 + do + if (cstring(i) == c_null_char) exit + if (i > len(fstring)) exit + fstring(i:i) = cstring(i) + i = i + 1 + end do + do + if (i > len(fstring)) exit + fstring(i:i) = " " + i = i + 1 + end do + return + end subroutine stringc2f + +end module psb_cuda_env_mod diff --git a/cuda/psb_cuda_mod.F90 b/cuda/psb_cuda_mod.F90 new file mode 100644 index 00000000..12b91b90 --- /dev/null +++ b/cuda/psb_cuda_mod.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_cuda_mod + use psb_const_mod + use psb_cuda_env_mod + + use psb_i_cuda_vect_mod + use psb_s_cuda_vect_mod + use psb_d_cuda_vect_mod + use psb_c_cuda_vect_mod + use psb_z_cuda_vect_mod + + use psb_i_cuda_multivect_mod + use psb_s_cuda_multivect_mod + use psb_d_cuda_multivect_mod + use psb_c_cuda_multivect_mod + use psb_z_cuda_multivect_mod + + use psb_d_ell_mat_mod + use psb_d_cuda_elg_mat_mod + use psb_s_ell_mat_mod + use psb_s_cuda_elg_mat_mod + use psb_z_ell_mat_mod + use psb_z_cuda_elg_mat_mod + use psb_c_ell_mat_mod + use psb_c_cuda_elg_mat_mod + + use psb_s_hll_mat_mod + use psb_s_cuda_hlg_mat_mod + use psb_d_hll_mat_mod + use psb_d_cuda_hlg_mat_mod + use psb_c_hll_mat_mod + use psb_c_cuda_hlg_mat_mod + use psb_z_hll_mat_mod + use psb_z_cuda_hlg_mat_mod + + use psb_s_cuda_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod +#if PSB_CUDA_SHORT_VERSION <= 10 + use psb_s_cuda_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod +#endif + use psb_d_cuda_diag_mat_mod + use psb_d_cuda_hdiag_mat_mod + + use psb_s_cuda_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod + + use psb_s_cuda_hdiag_mat_mod + ! use psb_s_cuda_diag_mat_mod + +end module psb_cuda_mod + diff --git a/cuda/psb_d_cuda_csrg_mat_mod.F90 b/cuda/psb_d_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..101959bd --- /dev/null +++ b/cuda/psb_d_cuda_csrg_mat_mod.F90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_csrg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_csr_sparse_mat) :: psb_d_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! + type(d_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => d_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_d_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_d_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => d_cuda_csrg_free + procedure, pass(a) :: mold => psb_d_cuda_csrg_mold + procedure, pass(a) :: is_host => d_cuda_csrg_is_host + procedure, pass(a) :: is_dev => d_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => d_cuda_csrg_is_sync + procedure, pass(a) :: set_host => d_cuda_csrg_set_host + procedure, pass(a) :: set_dev => d_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => d_cuda_csrg_set_sync + procedure, pass(a) :: sync => d_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_d_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_d_cuda_csrg_from_gpu + final :: d_cuda_csrg_finalize + end type psb_d_cuda_csrg_sparse_mat + + private :: d_cuda_csrg_get_nzeros, d_cuda_csrg_free, d_cuda_csrg_get_fmt, & + & d_cuda_csrg_get_size, d_cuda_csrg_sizeof, d_cuda_csrg_get_nz_row + + + interface + subroutine psb_d_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_d_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_d_cuda_csrg_reallocate_nz(nz,a) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_d_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_d_cuda_csrg_mold(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_mold + end interface + + interface + subroutine psb_d_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_d_cuda_csrg_from_gpu(a,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_d_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_d_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_d_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_d_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_csmv + end interface + interface + subroutine psb_d_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_csmm + end interface + + interface + subroutine psb_d_cuda_csrg_scal(d,a,info,side) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_cuda_csrg_scal + end interface + + interface + subroutine psb_d_cuda_csrg_scals(d,a,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_cuda_csrg_sizeof + + function d_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function d_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine d_cuda_csrg_set_host(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_cuda_csrg_set_host + + subroutine d_cuda_csrg_set_dev(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_cuda_csrg_set_dev + + subroutine d_cuda_csrg_set_sync(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_cuda_csrg_set_sync + + function d_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_cuda_csrg_is_dev + + function d_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_cuda_csrg_is_host + + function d_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_cuda_csrg_is_sync + + + subroutine d_cuda_csrg_sync(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine d_cuda_csrg_sync + + subroutine d_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_d_csr_sparse_mat%free() + + return + + end subroutine d_cuda_csrg_free + + subroutine d_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine d_cuda_csrg_finalize + +end module psb_d_cuda_csrg_mat_mod diff --git a/cuda/psb_d_cuda_diag_mat_mod.F90 b/cuda/psb_d_cuda_diag_mat_mod.F90 new file mode 100644 index 00000000..3f570799 --- /dev/null +++ b/cuda/psb_d_cuda_diag_mat_mod.F90 @@ -0,0 +1,287 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_d_dia_mat_mod + + type, extends(psb_d_dia_sparse_mat) :: psb_d_cuda_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => d_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => d_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_d_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_d_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_d_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_d_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_d_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_d_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_d_cuda_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => d_cuda_diag_free + procedure, pass(a) :: mold => psb_d_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_diag_to_gpu + final :: d_cuda_diag_finalize + end type psb_d_cuda_diag_sparse_mat + + private :: d_cuda_diag_get_nzeros, d_cuda_diag_free, d_cuda_diag_get_fmt, & + & d_cuda_diag_get_size, d_cuda_diag_sizeof, d_cuda_diag_get_nz_row + + + interface + subroutine psb_d_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_diag_vect_mv + end interface + + interface + subroutine psb_d_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_diag_inner_vect_sv + end interface + + interface + subroutine psb_d_cuda_diag_reallocate_nz(nz,a) + import :: psb_d_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_diag_reallocate_nz + end interface + + interface + subroutine psb_d_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_cuda_diag_allocate_mnnz + end interface + + interface + subroutine psb_d_cuda_diag_mold(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_diag_mold + end interface + + interface + subroutine psb_d_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_d_cuda_diag_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_cuda_diag_to_gpu + end interface + + interface + subroutine psb_d_cuda_cp_diag_from_coo(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_diag_from_coo + end interface + + interface + subroutine psb_d_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_diag_from_fmt + end interface + + interface + subroutine psb_d_cuda_mv_diag_from_coo(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_diag_from_coo + end interface + + + interface + subroutine psb_d_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_diag_from_fmt + end interface + + interface + subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_diag_csmv + end interface + interface + subroutine psb_d_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_diag_csmm + end interface + + interface + subroutine psb_d_cuda_diag_scal(d,a,info, side) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_cuda_diag_scal + end interface + + interface + subroutine psb_d_cuda_diag_scals(d,a,info) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_cuda_diag_sizeof(a) result(res) + implicit none + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + psb_sizeof_dp * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_cuda_diag_sizeof + + function d_cuda_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function d_cuda_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_cuda_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_dia_sparse_mat%free() + + return + + end subroutine d_cuda_diag_free + + subroutine d_cuda_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine d_cuda_diag_finalize + +end module psb_d_cuda_diag_mat_mod diff --git a/cuda/psb_d_cuda_dnsg_mat_mod.F90 b/cuda/psb_d_cuda_dnsg_mat_mod.F90 new file mode 100644 index 00000000..ffa17eeb --- /dev/null +++ b/cuda/psb_d_cuda_dnsg_mat_mod.F90 @@ -0,0 +1,273 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_dnsg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use psb_d_dns_mat_mod + use dnsdev_mod + + type, extends(psb_d_dns_sparse_mat) :: psb_d_cuda_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => d_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => d_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_d_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_d_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_d_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_d_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_d_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_d_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_d_cuda_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => d_cuda_dnsg_free + procedure, pass(a) :: mold => psb_d_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_dnsg_to_gpu + final :: d_cuda_dnsg_finalize + end type psb_d_cuda_dnsg_sparse_mat + + private :: d_cuda_dnsg_get_nzeros, d_cuda_dnsg_free, d_cuda_dnsg_get_fmt, & + & d_cuda_dnsg_get_size, d_cuda_dnsg_get_nz_row + + + interface + subroutine psb_d_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_d_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_d_cuda_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_d_cuda_dnsg_mold(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_dnsg_mold + end interface + + interface + subroutine psb_d_cuda_dnsg_to_gpu(a,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_dnsg_to_gpu + end interface + + interface + subroutine psb_d_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_dnsg_from_coo + end interface + + interface + subroutine psb_d_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_d_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_d_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:) +!!$ real(psb_dpk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_cuda_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_cuda_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_d_cuda_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cuda_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function d_cuda_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function d_cuda_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_cuda_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_dns_sparse_mat%free() + + return + + end subroutine d_cuda_dnsg_free + + subroutine d_cuda_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine d_cuda_dnsg_finalize + +end module psb_d_cuda_dnsg_mat_mod diff --git a/cuda/psb_d_cuda_elg_mat_mod.F90 b/cuda/psb_d_cuda_elg_mat_mod.F90 new file mode 100644 index 00000000..3fde2075 --- /dev/null +++ b/cuda/psb_d_cuda_elg_mat_mod.F90 @@ -0,0 +1,454 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_elg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use psb_d_ell_mat_mod + use psb_i_cuda_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_ell_sparse_mat) :: psb_d_cuda_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => d_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_d_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_d_cuda_elg_scals + procedure, pass(a) :: scalv => psb_d_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => d_cuda_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => d_cuda_elg_free + procedure, pass(a) :: mold => psb_d_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_d_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_d_cuda_elg_csput_v + procedure, pass(a) :: is_host => d_cuda_elg_is_host + procedure, pass(a) :: is_dev => d_cuda_elg_is_dev + procedure, pass(a) :: is_sync => d_cuda_elg_is_sync + procedure, pass(a) :: set_host => d_cuda_elg_set_host + procedure, pass(a) :: set_dev => d_cuda_elg_set_dev + procedure, pass(a) :: set_sync => d_cuda_elg_set_sync + procedure, pass(a) :: sync => d_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_d_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_d_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_d_cuda_elg_asb + final :: d_cuda_elg_finalize + end type psb_d_cuda_elg_sparse_mat + + private :: d_cuda_elg_get_nzeros, d_cuda_elg_free, d_cuda_elg_get_fmt, & + & d_cuda_elg_get_size, d_cuda_elg_sizeof, d_cuda_elg_get_nz_row, d_cuda_elg_sync + + + interface + subroutine psb_d_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_elg_vect_mv + end interface + + interface + subroutine psb_d_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_elg_inner_vect_sv + end interface + + interface + subroutine psb_d_cuda_elg_reallocate_nz(nz,a) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_elg_reallocate_nz + end interface + + interface + subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_cuda_elg_allocate_mnnz + end interface + + interface + subroutine psb_d_cuda_elg_mold(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_elg_mold + end interface + + interface + subroutine psb_d_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_elg_csput_a + end interface + + interface + subroutine psb_d_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_d_base_vect_type,& + & psb_i_base_vect_type + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_elg_csput_v + end interface + + interface + subroutine psb_d_cuda_elg_from_gpu(a,info) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_elg_from_gpu + end interface + + interface + subroutine psb_d_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_cuda_elg_to_gpu + end interface + + interface + subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_elg_from_coo + end interface + + interface + subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_elg_from_fmt + end interface + + interface + subroutine psb_d_cuda_mv_elg_from_coo(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_elg_from_coo + end interface + + + interface + subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_elg_from_fmt + end interface + + interface + subroutine psb_d_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_elg_csmv + end interface + interface + subroutine psb_d_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_elg_csmm + end interface + + interface + subroutine psb_d_cuda_elg_scal(d,a,info, side) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_cuda_elg_scal + end interface + + interface + subroutine psb_d_cuda_elg_scals(d,a,info) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_elg_scals + end interface + + interface + subroutine psb_d_cuda_elg_asb(a) + import :: psb_d_cuda_elg_sparse_mat + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_cuda_elg_sizeof(a) result(res) + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_cuda_elg_sizeof + + function d_cuda_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function d_cuda_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine d_cuda_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = dzero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine d_cuda_elg_reinit + + subroutine d_cuda_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine d_cuda_elg_free + + subroutine d_cuda_elg_sync(a) + implicit none + class(psb_d_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine d_cuda_elg_sync + + subroutine d_cuda_elg_set_host(a) + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_cuda_elg_set_host + + subroutine d_cuda_elg_set_dev(a) + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_cuda_elg_set_dev + + subroutine d_cuda_elg_set_sync(a) + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_cuda_elg_set_sync + + function d_cuda_elg_is_dev(a) result(res) + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_cuda_elg_is_dev + + function d_cuda_elg_is_host(a) result(res) + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_cuda_elg_is_host + + function d_cuda_elg_is_sync(a) result(res) + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_cuda_elg_is_sync + + subroutine d_cuda_elg_finalize(a) + use elldev_mod + implicit none + type(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine d_cuda_elg_finalize + +end module psb_d_cuda_elg_mat_mod diff --git a/cuda/psb_d_cuda_hdiag_mat_mod.F90 b/cuda/psb_d_cuda_hdiag_mat_mod.F90 new file mode 100644 index 00000000..46b63b43 --- /dev/null +++ b/cuda/psb_d_cuda_hdiag_mat_mod.F90 @@ -0,0 +1,268 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_hdiag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_d_hdia_mat_mod + + type, extends(psb_d_hdia_sparse_mat) :: psb_d_cuda_hdiag_sparse_mat + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => d_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => d_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_d_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_d_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_d_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_d_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_d_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_d_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_d_cuda_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => d_cuda_hdiag_free + procedure, pass(a) :: mold => psb_d_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_hdiag_to_gpu + final :: d_cuda_hdiag_finalize + end type psb_d_cuda_hdiag_sparse_mat + + private :: d_cuda_hdiag_get_nzeros, d_cuda_hdiag_free, d_cuda_hdiag_get_fmt, & + & d_cuda_hdiag_get_size, d_cuda_hdiag_sizeof, d_cuda_hdiag_get_nz_row + + + interface + subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_d_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_d_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_cuda_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_cuda_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_d_cuda_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_d_cuda_hdiag_mold(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_hdiag_mold + end interface + + interface + subroutine psb_d_cuda_hdiag_to_gpu(a,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_hdiag_to_gpu + end interface + + interface + subroutine psb_d_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_d_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cuda_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_d_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cuda_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_d_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_cuda_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_d_cuda_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cuda_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function d_cuda_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function d_cuda_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_cuda_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_hdia_sparse_mat%free() + + return + + end subroutine d_cuda_hdiag_free + + subroutine d_cuda_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_hdia_sparse_mat%free() + + return + end subroutine d_cuda_hdiag_finalize + +end module psb_d_cuda_hdiag_mat_mod diff --git a/cuda/psb_d_cuda_hlg_mat_mod.F90 b/cuda/psb_d_cuda_hlg_mat_mod.F90 new file mode 100644 index 00000000..a1a2539d --- /dev/null +++ b/cuda/psb_d_cuda_hlg_mat_mod.F90 @@ -0,0 +1,377 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_hlg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use psb_d_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_hll_sparse_mat) :: psb_d_cuda_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => d_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_d_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_d_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_d_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => d_cuda_hlg_free + procedure, pass(a) :: mold => psb_d_cuda_hlg_mold + procedure, pass(a) :: is_host => d_cuda_hlg_is_host + procedure, pass(a) :: is_dev => d_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => d_cuda_hlg_is_sync + procedure, pass(a) :: set_host => d_cuda_hlg_set_host + procedure, pass(a) :: set_dev => d_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => d_cuda_hlg_set_sync + procedure, pass(a) :: sync => d_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_d_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_d_cuda_hlg_to_gpu + final :: d_cuda_hlg_finalize + end type psb_d_cuda_hlg_sparse_mat + + private :: d_cuda_hlg_get_nzeros, d_cuda_hlg_free, d_cuda_hlg_get_fmt, & + & d_cuda_hlg_get_size, d_cuda_hlg_sizeof, d_cuda_hlg_get_nz_row + + + interface + subroutine psb_d_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hlg_vect_mv + end interface + + interface + subroutine psb_d_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hlg_inner_vect_sv + end interface + + interface + subroutine psb_d_cuda_hlg_reallocate_nz(nz,a) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_hlg_reallocate_nz + end interface + + interface + subroutine psb_d_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_cuda_hlg_allocate_mnnz + end interface + + interface + subroutine psb_d_cuda_hlg_mold(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_hlg_mold + end interface + + interface + subroutine psb_d_cuda_hlg_from_gpu(a,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_hlg_from_gpu + end interface + + interface + subroutine psb_d_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_cuda_hlg_to_gpu + end interface + + interface + subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_hlg_from_coo + end interface + + interface + subroutine psb_d_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_hlg_from_fmt + end interface + + interface + subroutine psb_d_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_hlg_from_coo + end interface + + + interface + subroutine psb_d_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_hlg_from_fmt + end interface + + interface + subroutine psb_d_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hlg_csmv + end interface + interface + subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hlg_csmm + end interface + + interface + subroutine psb_d_cuda_hlg_scal(d,a,info, side) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_cuda_hlg_scal + end interface + + interface + subroutine psb_d_cuda_hlg_scals(d,a,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_cuda_hlg_sizeof(a) result(res) + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_cuda_hlg_sizeof + + function d_cuda_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function d_cuda_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_cuda_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_hll_sparse_mat%free() + + return + + end subroutine d_cuda_hlg_free + + + subroutine d_cuda_hlg_sync(a) + implicit none + class(psb_d_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine d_cuda_hlg_sync + + subroutine d_cuda_hlg_set_host(a) + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_cuda_hlg_set_host + + subroutine d_cuda_hlg_set_dev(a) + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_cuda_hlg_set_dev + + subroutine d_cuda_hlg_set_sync(a) + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_cuda_hlg_set_sync + + function d_cuda_hlg_is_dev(a) result(res) + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_cuda_hlg_is_dev + + function d_cuda_hlg_is_host(a) result(res) + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_cuda_hlg_is_host + + function d_cuda_hlg_is_sync(a) result(res) + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_cuda_hlg_is_sync + + + subroutine d_cuda_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine d_cuda_hlg_finalize + +end module psb_d_cuda_hlg_mat_mod diff --git a/cuda/psb_d_cuda_hybg_mat_mod.F90 b/cuda/psb_d_cuda_hybg_mat_mod.F90 new file mode 100644 index 00000000..4ff72f51 --- /dev/null +++ b/cuda/psb_d_cuda_hybg_mat_mod.F90 @@ -0,0 +1,287 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if PSB_CUDA_SHORT_VERSION <= 10 + +module psb_d_cuda_hybg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use cusparse_mod + + type, extends(psb_d_csr_sparse_mat) :: psb_d_cuda_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! + type(d_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => d_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_d_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_d_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => d_cuda_hybg_free + procedure, pass(a) :: mold => psb_d_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_hybg_to_gpu + final :: d_cuda_hybg_finalize + end type psb_d_cuda_hybg_sparse_mat + + private :: d_cuda_hybg_get_nzeros, d_cuda_hybg_free, d_cuda_hybg_get_fmt, & + & d_cuda_hybg_get_size, d_cuda_hybg_sizeof, d_cuda_hybg_get_nz_row + + + interface + subroutine psb_d_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hybg_inner_vect_sv + end interface + + interface + subroutine psb_d_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hybg_vect_mv + end interface + + interface + subroutine psb_d_cuda_hybg_reallocate_nz(nz,a) + import :: psb_d_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_hybg_reallocate_nz + end interface + + interface + subroutine psb_d_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_cuda_hybg_allocate_mnnz + end interface + + interface + subroutine psb_d_cuda_hybg_mold(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_hybg_mold + end interface + + interface + subroutine psb_d_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_cuda_hybg_to_gpu + end interface + + interface + subroutine psb_d_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_hybg_from_coo + end interface + + interface + subroutine psb_d_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_hybg_from_fmt + end interface + + interface + subroutine psb_d_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_hybg_from_coo + end interface + + interface + subroutine psb_d_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_hybg_from_fmt + end interface + + interface + subroutine psb_d_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hybg_csmv + end interface + interface + subroutine psb_d_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hybg_csmm + end interface + + interface + subroutine psb_d_cuda_hybg_scal(d,a,info,side) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_cuda_hybg_scal + end interface + + interface + subroutine psb_d_cuda_hybg_scals(d,a,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_cuda_hybg_sizeof(a) result(res) + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_cuda_hybg_sizeof + + function d_cuda_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function d_cuda_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_cuda_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_d_csr_sparse_mat%free() + + return + + end subroutine d_cuda_hybg_free + + subroutine d_cuda_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine d_cuda_hybg_finalize + +end module psb_d_cuda_hybg_mat_mod +#endif diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 new file mode 100644 index 00000000..080c8686 --- /dev/null +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -0,0 +1,2088 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_d_vect_mod + use psb_cuda_env_mod + use psb_i_vect_mod + use psb_i_cuda_vect_mod + use psb_i_vectordev_mod + use psb_d_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_base_vect_type) :: psb_d_vect_cuda + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + real(c_double), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => d_cuda_get_nrows + procedure, nopass :: get_fmt => d_cuda_get_fmt + + procedure, pass(x) :: all => d_cuda_all + procedure, pass(x) :: zero => d_cuda_zero + procedure, pass(x) :: asb_m => d_cuda_asb_m + procedure, pass(x) :: sync => d_cuda_sync + procedure, pass(x) :: sync_space => d_cuda_sync_space + procedure, pass(x) :: bld_x => d_cuda_bld_x + procedure, pass(x) :: bld_mn => d_cuda_bld_mn + procedure, pass(x) :: free => d_cuda_free + procedure, pass(x) :: ins_a => d_cuda_ins_a + procedure, pass(x) :: ins_v => d_cuda_ins_v + procedure, pass(x) :: is_host => d_cuda_is_host + procedure, pass(x) :: is_dev => d_cuda_is_dev + procedure, pass(x) :: is_sync => d_cuda_is_sync + procedure, pass(x) :: set_host => d_cuda_set_host + procedure, pass(x) :: set_dev => d_cuda_set_dev + procedure, pass(x) :: set_sync => d_cuda_set_sync + procedure, pass(x) :: set_scal => d_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => d_cuda_set_vect + procedure, pass(x) :: gthzv_x => d_cuda_gthzv_x + procedure, pass(y) :: sctb => d_cuda_sctb + procedure, pass(y) :: sctb_x => d_cuda_sctb_x + procedure, pass(x) :: gthzbuf => d_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => d_cuda_sctb_buf + procedure, pass(x) :: new_buffer => d_cuda_new_buffer + procedure, nopass :: device_wait => d_cuda_device_wait + procedure, pass(x) :: free_buffer => d_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => d_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => d_cuda_dot_v + procedure, pass(x) :: dot_a => d_cuda_dot_a + procedure, pass(y) :: axpby_v => d_cuda_axpby_v + procedure, pass(y) :: axpby_a => d_cuda_axpby_a + procedure, pass(z) :: upd_xyz => d_cuda_upd_xyz + procedure, pass(y) :: mlt_v => d_cuda_mlt_v + procedure, pass(y) :: mlt_a => d_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => d_cuda_mlt_v_2 + procedure, pass(x) :: scal => d_cuda_scal + procedure, pass(x) :: nrm2 => d_cuda_nrm2 + procedure, pass(x) :: amax => d_cuda_amax + procedure, pass(x) :: asum => d_cuda_asum + procedure, pass(x) :: absval1 => d_cuda_absval1 + procedure, pass(x) :: absval2 => d_cuda_absval2 + + final :: d_cuda_vect_finalize + end type psb_d_vect_cuda + + public :: psb_d_vect_cuda_ + private :: constructor + interface psb_d_vect_cuda_ + module procedure constructor + end interface psb_d_vect_cuda_ + +contains + + function constructor(x) result(this) + real(psb_dpk_) :: x(:) + type(psb_d_vect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + + subroutine d_cuda_device_wait() + call psb_cudaSync() + end subroutine d_cuda_device_wait + + subroutine d_cuda_new_buffer(n,x,info) + use psb_realloc_mod + use psb_cuda_env_mod + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_cuda_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeDouble(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateDouble(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readDouble(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeDouble(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateDouble(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceDouble(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readDouble(x%dt_buf,y,n) + + end select + + end subroutine d_cuda_gthzv_x + + subroutine d_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + class(psb_d_vect_cuda) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readDouble(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceDouble(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readDouble(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine d_cuda_gthzbuf + + subroutine d_cuda_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + real(psb_dpk_) :: beta, x(:) + class(psb_d_vect_cuda) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_d_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine d_cuda_sctb + + subroutine d_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + real(psb_dpk_) :: beta, x(:) + class(psb_d_vect_cuda) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_cuda_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeDouble(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateDouble(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeDouble(y%dt_buf,x,n) + info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeDouble(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateDouble(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeDouble(y%dt_buf,x,n) + info = iscatMultiVecDeviceDouble(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine d_cuda_sctb_x + + subroutine d_cuda_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_cuda_env_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + real(psb_dpk_) :: beta + class(psb_d_vect_cuda) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_cuda) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_cuda_DeviceHasUVA()) then + info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeDouble(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeDouble(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceDouble(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine d_cuda_sctb_buf + + + subroutine d_cuda_bld_x(x,this) + use psb_base_mod + real(psb_dpk_), intent(in) :: this(:) + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'d_cuda_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine d_cuda_bld_x + + subroutine d_cuda_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'d_cuda_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine d_cuda_bld_mn + + subroutine d_cuda_set_host(x) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine d_cuda_set_host + + subroutine d_cuda_set_dev(x) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine d_cuda_set_dev + + subroutine d_cuda_set_sync(x) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine d_cuda_set_sync + + function d_cuda_is_dev(x) result(res) + implicit none + class(psb_d_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function d_cuda_is_dev + + function d_cuda_is_host(x) result(res) + implicit none + class(psb_d_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function d_cuda_is_host + + function d_cuda_is_sync(x) result(res) + implicit none + class(psb_d_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function d_cuda_is_sync + + + function d_cuda_get_nrows(x) result(res) + implicit none + class(psb_d_vect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function d_cuda_get_nrows + + function d_cuda_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'dGPU' + end function d_cuda_get_fmt + + subroutine d_cuda_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_d_vect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'d_cuda_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine d_cuda_all + + subroutine d_cuda_zero(x) + use psi_serial_mod + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() + call x%set_scal(dzero) + end subroutine d_cuda_zero + + subroutine d_cuda_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_d_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'d_cuda_sync') + end if + + end subroutine d_cuda_sync + + subroutine d_cuda_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine d_cuda_free + + subroutine d_cuda_set_scal(x,val,first,last) + class(psb_d_vect_cuda), intent(inout) :: x + real(psb_dpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine d_cuda_set_scal + + + + function d_cuda_dot_v(n,x,y) result(res) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + res = dzero + ! + ! Note: this is the gpu implementation. + ! When we get here, we are sure that X is of + ! TYPE psb_d_vect + ! + select type(yy => y) + type is (psb_d_vect_cuda) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,'d_cuda_dot_v') + end if + + class default + ! y%sync is done in dot_a + if (x%is_dev()) call x%sync() + res = y%dot(n,x%v) + end select + + end function d_cuda_dot_v + + function d_cuda_dot_a(n,x,y) result(res) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + real(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + real(psb_dpk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n,y,1,x%v,1) + + end function d_cuda_dot_a + + subroutine d_cuda_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: y + real(psb_dpk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny + + info = psb_success_ + + select type(xx => x) + type is (psb_d_vect_cuda) + ! Do something different here + if ((beta /= dzero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx x) + class is (psb_d_vect_cuda) + select type(yy => y) + class is (psb_d_vect_cuda) + select type(zz => z) + class is (psb_d_vect_cuda) + ! Do something different here + if ((beta /= dzero).and.yy%is_host())& + & call yy%sync() + if ((delta /= dzero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) + class is (psb_d_vect_cuda) + select type(yy => y) + class is (psb_d_vect_cuda) + select type(zz => z) + class is (psb_d_vect_cuda) + ! Do something different here + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if (zz%is_host()) call zz%sync() + if (w%is_host()) call w%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + nw = getMultiVecDeviceSize(w%deviceVect) + if ((nx x) + type is (psb_d_base_vect_type) + if (y%is_dev()) call y%sync() + do i=1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + type is (psb_d_vect_cuda) + ! Do something different here + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + call y%mlt(xx%v,info) + call y%set_host() + end select + + end subroutine d_cuda_mlt_v + + subroutine d_cuda_mlt_a(x, y, info) + use psi_serial_mod + implicit none + real(psb_dpk_), intent(in) :: x(:) + class(psb_d_vect_cuda), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + call y%psb_d_base_vect_type%mlt(x,info) + ! set_host() is invoked in the base method + end subroutine d_cuda_mlt_a + + subroutine d_cuda_mlt_a_2(alpha,x,y,beta,z,info) + use psi_serial_mod + implicit none + real(psb_dpk_), intent(in) :: alpha,beta + real(psb_dpk_), intent(in) :: x(:) + real(psb_dpk_), intent(in) :: y(:) + class(psb_d_vect_cuda), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + call z%psb_d_base_vect_type%mlt(alpha,x,y,beta,info) + ! set_host() is invoked in the base method + end subroutine d_cuda_mlt_a_2 + + subroutine d_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + real(psb_dpk_), intent(in) :: alpha,beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_cuda), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + if (.false.) then + ! These are present just for coherence with the + ! complex versions; they do nothing here. + conjgx_=.false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') + conjgy_=.false. + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') + end if + + n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) + + ! + ! Need to reconsider BETA in the GPU side + ! of things. + ! + info = 0 + select type(xx => x) + type is (psb_d_vect_cuda) + select type (yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= dzero).and.(z%is_host())) call z%sync() + info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& + & yy%deviceVect,beta,z%deviceVect) + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= dzero).and.(z%is_dev())) call z%sync() + call z%psb_d_base_vect_type%mlt(alpha,xx,yy,beta,info) + call z%set_host() + end select + + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= dzero).and.(z%is_dev())) call z%sync() + call z%psb_d_base_vect_type%mlt(alpha,x,y,beta,info) + call z%set_host() + end select + end subroutine d_cuda_mlt_v_2 + + subroutine d_cuda_scal(alpha, x) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + real(psb_dpk_), intent (in) :: alpha + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = scalMultiVecDevice(alpha,x%deviceVect) + call x%set_dev() + end subroutine d_cuda_scal + + + function d_cuda_nrm2(n,x) result(res) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + ! WARNING: this should be changed. + if (x%is_host()) call x%sync() + info = nrm2MultiVecDevice(res,n,x%deviceVect) + + end function d_cuda_nrm2 + + function d_cuda_amax(n,x) result(res) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = amaxMultiVecDevice(res,n,x%deviceVect) + + end function d_cuda_amax + + function d_cuda_asum(n,x) result(res) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = asumMultiVecDevice(res,n,x%deviceVect) + + end function d_cuda_asum + + subroutine d_cuda_absval1(x) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + n=x%get_nrows() + info = absMultiVecDevice(n,done,x%deviceVect) + + end subroutine d_cuda_absval1 + + subroutine d_cuda_absval2(x,y) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + n=min(x%get_nrows(),y%get_nrows()) + select type (yy=> y) + class is (psb_d_vect_cuda) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = absMultiVecDevice(n,done,x%deviceVect,yy%deviceVect) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_d_base_vect_type%absval(y) + end select + end subroutine d_cuda_absval2 + + + subroutine d_cuda_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine d_cuda_vect_finalize + + subroutine d_cuda_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_d_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_cuda + + info = 0 + if (psb_errstatus_fatal()) return + + done_cuda = .false. + select type(virl => irl) + class is (psb_i_vect_cuda) + select type(vval => val) + class is (psb_d_vect_cuda) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceDouble(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_cuda=.true. + end select + end select + + if (.not.done_cuda) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'cuda_vect_ins') + return + end if + + end subroutine d_cuda_ins_v + + subroutine d_cuda_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_d_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine d_cuda_ins_a + +end module psb_d_cuda_vect_mod + + +! +! Multivectors +! + + + +module psb_d_cuda_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_d_multivect_mod + use psb_d_base_multivect_mod + use psb_cuda_env_mod + use psb_i_multivect_mod + use psb_i_cuda_multivect_mod + use psb_d_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_base_multivect_type) :: psb_d_multivect_cuda + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => d_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => d_cuda_multi_get_ncols + procedure, nopass :: get_fmt => d_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => d_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => d_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => d_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => d_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => d_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => d_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => d_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => d_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => d_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => d_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => d_cuda_multi_amax +!!$ procedure, pass(x) :: asum => d_cuda_multi_asum + procedure, pass(x) :: all => d_cuda_multi_all + procedure, pass(x) :: zero => d_cuda_multi_zero + procedure, pass(x) :: asb => d_cuda_multi_asb + procedure, pass(x) :: sync => d_cuda_multi_sync + procedure, pass(x) :: sync_space => d_cuda_multi_sync_space + procedure, pass(x) :: bld_x => d_cuda_multi_bld_x + procedure, pass(x) :: bld_n => d_cuda_multi_bld_n + procedure, pass(x) :: free => d_cuda_multi_free + procedure, pass(x) :: ins => d_cuda_multi_ins + procedure, pass(x) :: is_host => d_cuda_multi_is_host + procedure, pass(x) :: is_dev => d_cuda_multi_is_dev + procedure, pass(x) :: is_sync => d_cuda_multi_is_sync + procedure, pass(x) :: set_host => d_cuda_multi_set_host + procedure, pass(x) :: set_dev => d_cuda_multi_set_dev + procedure, pass(x) :: set_sync => d_cuda_multi_set_sync + procedure, pass(x) :: set_scal => d_cuda_multi_set_scal + procedure, pass(x) :: set_vect => d_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => d_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => d_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => d_cuda_multi_sctb_x + final :: d_cuda_multi_vect_finalize + end type psb_d_multivect_cuda + + public :: psb_d_multivect_cuda + private :: mconstructor + interface psb_d_multivect_cuda + module procedure mconstructor + end interface + +contains + + function mconstructor(x) result(this) + real(psb_dpk_) :: x(:,:) + type(psb_d_multivect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function mconstructor + + +!!$ subroutine d_cuda_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ real(psb_dpk_) :: y(:) +!!$ class(psb_d_multivect_cuda) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine d_cuda_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine d_cuda_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ real(psb_dpk_) :: beta, x(:) +!!$ class(psb_d_multivect_cuda) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_d_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine d_cuda_multi_sctb +!!$ +!!$ subroutine d_cuda_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ real(psb_dpk_) :: beta, x(:) +!!$ class(psb_d_multivect_cuda) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine d_cuda_multi_sctb_x + + + subroutine d_cuda_multi_bld_x(x,this) + use psb_base_mod + real(psb_dpk_), intent(in) :: this(:,:) + class(psb_d_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'d_cuda_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine d_cuda_multi_bld_x + + subroutine d_cuda_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'d_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine d_cuda_multi_bld_n + + + subroutine d_cuda_multi_set_host(x) + implicit none + class(psb_d_multivect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine d_cuda_multi_set_host + + subroutine d_cuda_multi_set_dev(x) + implicit none + class(psb_d_multivect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine d_cuda_multi_set_dev + + subroutine d_cuda_multi_set_sync(x) + implicit none + class(psb_d_multivect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine d_cuda_multi_set_sync + + function d_cuda_multi_is_dev(x) result(res) + implicit none + class(psb_d_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function d_cuda_multi_is_dev + + function d_cuda_multi_is_host(x) result(res) + implicit none + class(psb_d_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function d_cuda_multi_is_host + + function d_cuda_multi_is_sync(x) result(res) + implicit none + class(psb_d_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function d_cuda_multi_is_sync + + + function d_cuda_multi_get_nrows(x) result(res) + implicit none + class(psb_d_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function d_cuda_multi_get_nrows + + function d_cuda_multi_get_ncols(x) result(res) + implicit none + class(psb_d_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function d_cuda_multi_get_ncols + + function d_cuda_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'dGPU' + end function d_cuda_multi_get_fmt + +!!$ function d_cuda_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_cuda), intent(inout) :: x +!!$ class(psb_d_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ real(psb_dpk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = dzero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_d_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_d_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_d_multivect_cuda) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'d_cuda_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function d_cuda_multi_dot_v +!!$ +!!$ function d_cuda_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_cuda), intent(inout) :: x +!!$ real(psb_dpk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ real(psb_dpk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function d_cuda_multi_dot_a +!!$ +!!$ subroutine d_cuda_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_d_base_multivect_type), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: y +!!$ real(psb_dpk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_d_base_multivect_type) +!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_d_multivect_cuda) +!!$ ! Do something different here +!!$ if ((beta /= dzero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_d_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_d_multivect_cuda) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine d_cuda_multi_mlt_v +!!$ +!!$ subroutine d_cuda_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: x(:) +!!$ class(psb_d_multivect_cuda), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_d_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine d_cuda_multi_mlt_a +!!$ +!!$ subroutine d_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: alpha,beta +!!$ real(psb_dpk_), intent(in) :: x(:) +!!$ real(psb_dpk_), intent(in) :: y(:) +!!$ class(psb_d_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_d_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine d_cuda_multi_mlt_a_2 +!!$ +!!$ subroutine d_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: alpha,beta +!!$ class(psb_d_base_multivect_type), intent(inout) :: x +!!$ class(psb_d_base_multivect_type), intent(inout) :: y +!!$ class(psb_d_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_d_multivect_cuda) +!!$ select type (yy => y) +!!$ type is (psb_d_multivect_cuda) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_d_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_d_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine d_cuda_multi_mlt_v_2 + + + subroutine d_cuda_multi_set_scal(x,val) + class(psb_d_multivect_cuda), intent(inout) :: x + real(psb_dpk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_d_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine d_cuda_multi_set_scal + + subroutine d_cuda_multi_set_vect(x,val) + class(psb_d_multivect_cuda), intent(inout) :: x + real(psb_dpk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_d_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine d_cuda_multi_set_vect + + + +!!$ subroutine d_cuda_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_d_multivect_cuda), intent(inout) :: x +!!$ real(psb_dpk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_d_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine d_cuda_multi_scal +!!$ +!!$ +!!$ function d_cuda_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function d_cuda_multi_nrm2 +!!$ +!!$ function d_cuda_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function d_cuda_multi_amax +!!$ +!!$ function d_cuda_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function d_cuda_multi_asum + + subroutine d_cuda_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_multivect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=dzero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'d_cuda_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine d_cuda_multi_all + + subroutine d_cuda_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_d_multivect_cuda), intent(inout) :: x + + if (allocated(x%v)) x%v=dzero + call x%set_host() + end subroutine d_cuda_multi_zero + + subroutine d_cuda_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_multivect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_d_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc i_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_i_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_i_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_i_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => i_cuda_csrg_free + procedure, pass(a) :: mold => psb_i_cuda_csrg_mold + procedure, pass(a) :: is_host => i_cuda_csrg_is_host + procedure, pass(a) :: is_dev => i_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => i_cuda_csrg_is_sync + procedure, pass(a) :: set_host => i_cuda_csrg_set_host + procedure, pass(a) :: set_dev => i_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => i_cuda_csrg_set_sync + procedure, pass(a) :: sync => i_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_i_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_i_cuda_csrg_from_gpu + final :: i_cuda_csrg_finalize + end type psb_i_cuda_csrg_sparse_mat + + private :: i_cuda_csrg_get_nzeros, i_cuda_csrg_free, i_cuda_csrg_get_fmt, & + & i_cuda_csrg_get_size, i_cuda_csrg_sizeof, i_cuda_csrg_get_nz_row + + + interface + subroutine psb_i_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_i_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_i_cuda_csrg_reallocate_nz(nz,a) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_i_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_i_cuda_csrg_mold(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_mold + end interface + + interface + subroutine psb_i_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_i_cuda_csrg_from_gpu(a,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_i_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_i_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_i_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_i_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_i_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_csmv + end interface + interface + subroutine psb_i_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_csmm + end interface + + interface + subroutine psb_i_cuda_csrg_scal(d,a,info,side) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_cuda_csrg_scal + end interface + + interface + subroutine psb_i_cuda_csrg_scals(d,a,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_ip * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_cuda_csrg_sizeof + + function i_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function i_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine i_cuda_csrg_set_host(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine i_cuda_csrg_set_host + + subroutine i_cuda_csrg_set_dev(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine i_cuda_csrg_set_dev + + subroutine i_cuda_csrg_set_sync(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine i_cuda_csrg_set_sync + + function i_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function i_cuda_csrg_is_dev + + function i_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function i_cuda_csrg_is_host + + function i_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function i_cuda_csrg_is_sync + + + subroutine i_cuda_csrg_sync(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_i_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine i_cuda_csrg_sync + + subroutine i_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_i_csr_sparse_mat%free() + + return + + end subroutine i_cuda_csrg_free + + subroutine i_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine i_cuda_csrg_finalize + +end module psb_i_cuda_csrg_mat_mod diff --git a/cuda/psb_i_cuda_diag_mat_mod.F90 b/cuda/psb_i_cuda_diag_mat_mod.F90 new file mode 100644 index 00000000..2862d2eb --- /dev/null +++ b/cuda/psb_i_cuda_diag_mat_mod.F90 @@ -0,0 +1,287 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_cuda_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_i_dia_mat_mod + + type, extends(psb_i_dia_sparse_mat) :: psb_i_cuda_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => i_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => i_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_i_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_i_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_i_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_i_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_i_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_i_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_i_cuda_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => i_cuda_diag_free + procedure, pass(a) :: mold => psb_i_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_diag_to_gpu + final :: i_cuda_diag_finalize + end type psb_i_cuda_diag_sparse_mat + + private :: i_cuda_diag_get_nzeros, i_cuda_diag_free, i_cuda_diag_get_fmt, & + & i_cuda_diag_get_size, i_cuda_diag_sizeof, i_cuda_diag_get_nz_row + + + interface + subroutine psb_i_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_diag_vect_mv + end interface + + interface + subroutine psb_i_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_diag_inner_vect_sv + end interface + + interface + subroutine psb_i_cuda_diag_reallocate_nz(nz,a) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_diag_reallocate_nz + end interface + + interface + subroutine psb_i_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_cuda_diag_allocate_mnnz + end interface + + interface + subroutine psb_i_cuda_diag_mold(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_diag_mold + end interface + + interface + subroutine psb_i_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_cuda_diag_to_gpu + end interface + + interface + subroutine psb_i_cuda_cp_diag_from_coo(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_diag_from_coo + end interface + + interface + subroutine psb_i_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_diag_from_fmt + end interface + + interface + subroutine psb_i_cuda_mv_diag_from_coo(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_diag_from_coo + end interface + + + interface + subroutine psb_i_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_diag_from_fmt + end interface + + interface + subroutine psb_i_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_diag_csmv + end interface + interface + subroutine psb_i_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_diag_csmm + end interface + + interface + subroutine psb_i_cuda_diag_scal(d,a,info, side) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_cuda_diag_scal + end interface + + interface + subroutine psb_i_cuda_diag_scals(d,a,info) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_cuda_diag_sizeof(a) result(res) + implicit none + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + psb_sizeof_ip * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_cuda_diag_sizeof + + function i_cuda_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function i_cuda_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_cuda_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_dia_sparse_mat%free() + + return + + end subroutine i_cuda_diag_free + + subroutine i_cuda_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine i_cuda_diag_finalize + +end module psb_i_cuda_diag_mat_mod diff --git a/cuda/psb_i_cuda_dnsg_mat_mod.F90 b/cuda/psb_i_cuda_dnsg_mat_mod.F90 new file mode 100644 index 00000000..f3079d9e --- /dev/null +++ b/cuda/psb_i_cuda_dnsg_mat_mod.F90 @@ -0,0 +1,273 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_cuda_dnsg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use psb_i_dns_mat_mod + use dnsdev_mod + + type, extends(psb_i_dns_sparse_mat) :: psb_i_cuda_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => i_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => i_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_i_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_i_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_i_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_i_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_i_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_i_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_i_cuda_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => i_cuda_dnsg_free + procedure, pass(a) :: mold => psb_i_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_dnsg_to_gpu + final :: i_cuda_dnsg_finalize + end type psb_i_cuda_dnsg_sparse_mat + + private :: i_cuda_dnsg_get_nzeros, i_cuda_dnsg_free, i_cuda_dnsg_get_fmt, & + & i_cuda_dnsg_get_size, i_cuda_dnsg_get_nz_row + + + interface + subroutine psb_i_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta +!!$ class(psb_i_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_i_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_i_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_i_cuda_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_i_cuda_dnsg_mold(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_dnsg_mold + end interface + + interface + subroutine psb_i_cuda_dnsg_to_gpu(a,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_dnsg_to_gpu + end interface + + interface + subroutine psb_i_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_dnsg_from_coo + end interface + + interface + subroutine psb_i_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_i_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_i_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_i_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:) +!!$ integer(psb_ipk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_cuda_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_i_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) +!!$ integer(psb_ipk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_cuda_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_i_cuda_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_i_cuda_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function i_cuda_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function i_cuda_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_cuda_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_dns_sparse_mat%free() + + return + + end subroutine i_cuda_dnsg_free + + subroutine i_cuda_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine i_cuda_dnsg_finalize + +end module psb_i_cuda_dnsg_mat_mod diff --git a/cuda/psb_i_cuda_elg_mat_mod.F90 b/cuda/psb_i_cuda_elg_mat_mod.F90 new file mode 100644 index 00000000..bd975d51 --- /dev/null +++ b/cuda/psb_i_cuda_elg_mat_mod.F90 @@ -0,0 +1,454 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_cuda_elg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use psb_i_ell_mat_mod + use psb_i_cuda_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_ell_sparse_mat) :: psb_i_cuda_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => i_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_i_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_i_cuda_elg_scals + procedure, pass(a) :: scalv => psb_i_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => i_cuda_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => i_cuda_elg_free + procedure, pass(a) :: mold => psb_i_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_i_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_i_cuda_elg_csput_v + procedure, pass(a) :: is_host => i_cuda_elg_is_host + procedure, pass(a) :: is_dev => i_cuda_elg_is_dev + procedure, pass(a) :: is_sync => i_cuda_elg_is_sync + procedure, pass(a) :: set_host => i_cuda_elg_set_host + procedure, pass(a) :: set_dev => i_cuda_elg_set_dev + procedure, pass(a) :: set_sync => i_cuda_elg_set_sync + procedure, pass(a) :: sync => i_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_i_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_i_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_i_cuda_elg_asb + final :: i_cuda_elg_finalize + end type psb_i_cuda_elg_sparse_mat + + private :: i_cuda_elg_get_nzeros, i_cuda_elg_free, i_cuda_elg_get_fmt, & + & i_cuda_elg_get_size, i_cuda_elg_sizeof, i_cuda_elg_get_nz_row, i_cuda_elg_sync + + + interface + subroutine psb_i_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_elg_vect_mv + end interface + + interface + subroutine psb_i_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_elg_inner_vect_sv + end interface + + interface + subroutine psb_i_cuda_elg_reallocate_nz(nz,a) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_elg_reallocate_nz + end interface + + interface + subroutine psb_i_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_cuda_elg_allocate_mnnz + end interface + + interface + subroutine psb_i_cuda_elg_mold(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_elg_mold + end interface + + interface + subroutine psb_i_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_elg_csput_a + end interface + + interface + subroutine psb_i_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_i_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_i_base_vect_type,& + & psb_i_base_vect_type + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_i_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_elg_csput_v + end interface + + interface + subroutine psb_i_cuda_elg_from_gpu(a,info) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_elg_from_gpu + end interface + + interface + subroutine psb_i_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_cuda_elg_to_gpu + end interface + + interface + subroutine psb_i_cuda_cp_elg_from_coo(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_elg_from_coo + end interface + + interface + subroutine psb_i_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_elg_from_fmt + end interface + + interface + subroutine psb_i_cuda_mv_elg_from_coo(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_elg_from_coo + end interface + + + interface + subroutine psb_i_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_elg_from_fmt + end interface + + interface + subroutine psb_i_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_elg_csmv + end interface + interface + subroutine psb_i_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_elg_csmm + end interface + + interface + subroutine psb_i_cuda_elg_scal(d,a,info, side) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_cuda_elg_scal + end interface + + interface + subroutine psb_i_cuda_elg_scals(d,a,info) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_elg_scals + end interface + + interface + subroutine psb_i_cuda_elg_asb(a) + import :: psb_i_cuda_elg_sparse_mat + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_cuda_elg_sizeof(a) result(res) + implicit none + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_ip * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_cuda_elg_sizeof + + function i_cuda_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function i_cuda_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine i_cuda_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = izero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine i_cuda_elg_reinit + + subroutine i_cuda_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine i_cuda_elg_free + + subroutine i_cuda_elg_sync(a) + implicit none + class(psb_i_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine i_cuda_elg_sync + + subroutine i_cuda_elg_set_host(a) + implicit none + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine i_cuda_elg_set_host + + subroutine i_cuda_elg_set_dev(a) + implicit none + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine i_cuda_elg_set_dev + + subroutine i_cuda_elg_set_sync(a) + implicit none + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine i_cuda_elg_set_sync + + function i_cuda_elg_is_dev(a) result(res) + implicit none + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function i_cuda_elg_is_dev + + function i_cuda_elg_is_host(a) result(res) + implicit none + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function i_cuda_elg_is_host + + function i_cuda_elg_is_sync(a) result(res) + implicit none + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function i_cuda_elg_is_sync + + subroutine i_cuda_elg_finalize(a) + use elldev_mod + implicit none + type(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine i_cuda_elg_finalize + +end module psb_i_cuda_elg_mat_mod diff --git a/cuda/psb_i_cuda_hdiag_mat_mod.F90 b/cuda/psb_i_cuda_hdiag_mat_mod.F90 new file mode 100644 index 00000000..22277f3a --- /dev/null +++ b/cuda/psb_i_cuda_hdiag_mat_mod.F90 @@ -0,0 +1,268 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_cuda_hdiag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_i_hdia_mat_mod + + type, extends(psb_i_hdia_sparse_mat) :: psb_i_cuda_hdiag_sparse_mat + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => i_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => i_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_i_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_i_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_i_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_i_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_i_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_i_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_i_cuda_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => i_cuda_hdiag_free + procedure, pass(a) :: mold => psb_i_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_hdiag_to_gpu + final :: i_cuda_hdiag_finalize + end type psb_i_cuda_hdiag_sparse_mat + + private :: i_cuda_hdiag_get_nzeros, i_cuda_hdiag_free, i_cuda_hdiag_get_fmt, & + & i_cuda_hdiag_get_size, i_cuda_hdiag_sizeof, i_cuda_hdiag_get_nz_row + + + interface + subroutine psb_i_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_i_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta +!!$ class(psb_i_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_cuda_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_i_cuda_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_i_cuda_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_i_cuda_hdiag_mold(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_hdiag_mold + end interface + + interface + subroutine psb_i_cuda_hdiag_to_gpu(a,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_hdiag_to_gpu + end interface + + interface + subroutine psb_i_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_i_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_i_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_i_cuda_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_i_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_i_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_i_cuda_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_i_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_i_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) +!!$ integer(psb_ipk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_cuda_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_i_cuda_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_i_cuda_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function i_cuda_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function i_cuda_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_cuda_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_hdia_sparse_mat%free() + + return + + end subroutine i_cuda_hdiag_free + + subroutine i_cuda_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_hdia_sparse_mat%free() + + return + end subroutine i_cuda_hdiag_finalize + +end module psb_i_cuda_hdiag_mat_mod diff --git a/cuda/psb_i_cuda_hlg_mat_mod.F90 b/cuda/psb_i_cuda_hlg_mat_mod.F90 new file mode 100644 index 00000000..8b9dc654 --- /dev/null +++ b/cuda/psb_i_cuda_hlg_mat_mod.F90 @@ -0,0 +1,377 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_cuda_hlg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use psb_i_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_hll_sparse_mat) :: psb_i_cuda_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => i_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_i_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_i_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_i_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => i_cuda_hlg_free + procedure, pass(a) :: mold => psb_i_cuda_hlg_mold + procedure, pass(a) :: is_host => i_cuda_hlg_is_host + procedure, pass(a) :: is_dev => i_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => i_cuda_hlg_is_sync + procedure, pass(a) :: set_host => i_cuda_hlg_set_host + procedure, pass(a) :: set_dev => i_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => i_cuda_hlg_set_sync + procedure, pass(a) :: sync => i_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_i_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_i_cuda_hlg_to_gpu + final :: i_cuda_hlg_finalize + end type psb_i_cuda_hlg_sparse_mat + + private :: i_cuda_hlg_get_nzeros, i_cuda_hlg_free, i_cuda_hlg_get_fmt, & + & i_cuda_hlg_get_size, i_cuda_hlg_sizeof, i_cuda_hlg_get_nz_row + + + interface + subroutine psb_i_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hlg_vect_mv + end interface + + interface + subroutine psb_i_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hlg_inner_vect_sv + end interface + + interface + subroutine psb_i_cuda_hlg_reallocate_nz(nz,a) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_hlg_reallocate_nz + end interface + + interface + subroutine psb_i_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_cuda_hlg_allocate_mnnz + end interface + + interface + subroutine psb_i_cuda_hlg_mold(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_hlg_mold + end interface + + interface + subroutine psb_i_cuda_hlg_from_gpu(a,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_hlg_from_gpu + end interface + + interface + subroutine psb_i_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_cuda_hlg_to_gpu + end interface + + interface + subroutine psb_i_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_hlg_from_coo + end interface + + interface + subroutine psb_i_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_hlg_from_fmt + end interface + + interface + subroutine psb_i_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_hlg_from_coo + end interface + + + interface + subroutine psb_i_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_hlg_from_fmt + end interface + + interface + subroutine psb_i_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hlg_csmv + end interface + interface + subroutine psb_i_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hlg_csmm + end interface + + interface + subroutine psb_i_cuda_hlg_scal(d,a,info, side) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_cuda_hlg_scal + end interface + + interface + subroutine psb_i_cuda_hlg_scals(d,a,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_cuda_hlg_sizeof(a) result(res) + implicit none + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_ip * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_cuda_hlg_sizeof + + function i_cuda_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function i_cuda_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_cuda_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_hll_sparse_mat%free() + + return + + end subroutine i_cuda_hlg_free + + + subroutine i_cuda_hlg_sync(a) + implicit none + class(psb_i_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine i_cuda_hlg_sync + + subroutine i_cuda_hlg_set_host(a) + implicit none + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine i_cuda_hlg_set_host + + subroutine i_cuda_hlg_set_dev(a) + implicit none + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine i_cuda_hlg_set_dev + + subroutine i_cuda_hlg_set_sync(a) + implicit none + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine i_cuda_hlg_set_sync + + function i_cuda_hlg_is_dev(a) result(res) + implicit none + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function i_cuda_hlg_is_dev + + function i_cuda_hlg_is_host(a) result(res) + implicit none + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function i_cuda_hlg_is_host + + function i_cuda_hlg_is_sync(a) result(res) + implicit none + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function i_cuda_hlg_is_sync + + + subroutine i_cuda_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine i_cuda_hlg_finalize + +end module psb_i_cuda_hlg_mat_mod diff --git a/cuda/psb_i_cuda_hybg_mat_mod.F90 b/cuda/psb_i_cuda_hybg_mat_mod.F90 new file mode 100644 index 00000000..eb38e498 --- /dev/null +++ b/cuda/psb_i_cuda_hybg_mat_mod.F90 @@ -0,0 +1,287 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if PSB_CUDA_SHORT_VERSION <= 10 + +module psb_i_cuda_hybg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use cusparse_mod + + type, extends(psb_i_csr_sparse_mat) :: psb_i_cuda_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! + type(i_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => i_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_i_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_i_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_i_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => i_cuda_hybg_free + procedure, pass(a) :: mold => psb_i_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_hybg_to_gpu + final :: i_cuda_hybg_finalize + end type psb_i_cuda_hybg_sparse_mat + + private :: i_cuda_hybg_get_nzeros, i_cuda_hybg_free, i_cuda_hybg_get_fmt, & + & i_cuda_hybg_get_size, i_cuda_hybg_sizeof, i_cuda_hybg_get_nz_row + + + interface + subroutine psb_i_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hybg_inner_vect_sv + end interface + + interface + subroutine psb_i_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hybg_vect_mv + end interface + + interface + subroutine psb_i_cuda_hybg_reallocate_nz(nz,a) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_hybg_reallocate_nz + end interface + + interface + subroutine psb_i_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_cuda_hybg_allocate_mnnz + end interface + + interface + subroutine psb_i_cuda_hybg_mold(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_hybg_mold + end interface + + interface + subroutine psb_i_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_cuda_hybg_to_gpu + end interface + + interface + subroutine psb_i_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_hybg_from_coo + end interface + + interface + subroutine psb_i_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_hybg_from_fmt + end interface + + interface + subroutine psb_i_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_hybg_from_coo + end interface + + interface + subroutine psb_i_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_hybg_from_fmt + end interface + + interface + subroutine psb_i_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hybg_csmv + end interface + interface + subroutine psb_i_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hybg_csmm + end interface + + interface + subroutine psb_i_cuda_hybg_scal(d,a,info,side) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_cuda_hybg_scal + end interface + + interface + subroutine psb_i_cuda_hybg_scals(d,a,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_cuda_hybg_sizeof(a) result(res) + implicit none + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + psb_sizeof_ip * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_cuda_hybg_sizeof + + function i_cuda_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function i_cuda_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_cuda_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_i_csr_sparse_mat%free() + + return + + end subroutine i_cuda_hybg_free + + subroutine i_cuda_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine i_cuda_hybg_finalize + +end module psb_i_cuda_hybg_mat_mod +#endif diff --git a/cuda/psb_i_cuda_vect_mod.F90 b/cuda/psb_i_cuda_vect_mod.F90 new file mode 100644 index 00000000..04e69d24 --- /dev/null +++ b/cuda/psb_i_cuda_vect_mod.F90 @@ -0,0 +1,1648 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_cuda_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_i_vect_mod + use psb_cuda_env_mod + use psb_i_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_base_vect_type) :: psb_i_vect_cuda + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + integer(c_int), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + integer(c_int), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => i_cuda_get_nrows + procedure, nopass :: get_fmt => i_cuda_get_fmt + + procedure, pass(x) :: all => i_cuda_all + procedure, pass(x) :: zero => i_cuda_zero + procedure, pass(x) :: asb_m => i_cuda_asb_m + procedure, pass(x) :: sync => i_cuda_sync + procedure, pass(x) :: sync_space => i_cuda_sync_space + procedure, pass(x) :: bld_x => i_cuda_bld_x + procedure, pass(x) :: bld_mn => i_cuda_bld_mn + procedure, pass(x) :: free => i_cuda_free + procedure, pass(x) :: ins_a => i_cuda_ins_a + procedure, pass(x) :: ins_v => i_cuda_ins_v + procedure, pass(x) :: is_host => i_cuda_is_host + procedure, pass(x) :: is_dev => i_cuda_is_dev + procedure, pass(x) :: is_sync => i_cuda_is_sync + procedure, pass(x) :: set_host => i_cuda_set_host + procedure, pass(x) :: set_dev => i_cuda_set_dev + procedure, pass(x) :: set_sync => i_cuda_set_sync + procedure, pass(x) :: set_scal => i_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => i_cuda_set_vect + procedure, pass(x) :: gthzv_x => i_cuda_gthzv_x + procedure, pass(y) :: sctb => i_cuda_sctb + procedure, pass(y) :: sctb_x => i_cuda_sctb_x + procedure, pass(x) :: gthzbuf => i_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => i_cuda_sctb_buf + procedure, pass(x) :: new_buffer => i_cuda_new_buffer + procedure, nopass :: device_wait => i_cuda_device_wait + procedure, pass(x) :: free_buffer => i_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => i_cuda_maybe_free_buffer + + final :: i_cuda_vect_finalize + end type psb_i_vect_cuda + + public :: psb_i_vect_cuda_ + private :: constructor + interface psb_i_vect_cuda_ + module procedure constructor + end interface psb_i_vect_cuda_ + +contains + + function constructor(x) result(this) + integer(psb_ipk_) :: x(:) + type(psb_i_vect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + + subroutine i_cuda_device_wait() + call psb_cudaSync() + end subroutine i_cuda_device_wait + + subroutine i_cuda_new_buffer(n,x,info) + use psb_realloc_mod + use psb_cuda_env_mod + implicit none + class(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_cuda_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeInt(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateInt(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readInt(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeInt(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateInt(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceInt(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readInt(x%dt_buf,y,n) + + end select + + end subroutine i_cuda_gthzv_x + + subroutine i_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + class(psb_i_vect_cuda) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readInt(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceInt(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readInt(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine i_cuda_gthzbuf + + subroutine i_cuda_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_ipk_) :: beta, x(:) + class(psb_i_vect_cuda) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_i_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine i_cuda_sctb + + subroutine i_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: beta, x(:) + class(psb_i_vect_cuda) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_cuda_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeInt(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateInt(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeInt(y%dt_buf,x,n) + info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeInt(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateInt(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeInt(y%dt_buf,x,n) + info = iscatMultiVecDeviceInt(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine i_cuda_sctb_x + + subroutine i_cuda_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_cuda_env_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: beta + class(psb_i_vect_cuda) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_cuda) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_cuda_DeviceHasUVA()) then + info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeInt(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeInt(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceInt(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine i_cuda_sctb_buf + + + subroutine i_cuda_bld_x(x,this) + use psb_base_mod + integer(psb_ipk_), intent(in) :: this(:) + class(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'i_cuda_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine i_cuda_bld_x + + subroutine i_cuda_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'i_cuda_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine i_cuda_bld_mn + + subroutine i_cuda_set_host(x) + implicit none + class(psb_i_vect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine i_cuda_set_host + + subroutine i_cuda_set_dev(x) + implicit none + class(psb_i_vect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine i_cuda_set_dev + + subroutine i_cuda_set_sync(x) + implicit none + class(psb_i_vect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine i_cuda_set_sync + + function i_cuda_is_dev(x) result(res) + implicit none + class(psb_i_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function i_cuda_is_dev + + function i_cuda_is_host(x) result(res) + implicit none + class(psb_i_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function i_cuda_is_host + + function i_cuda_is_sync(x) result(res) + implicit none + class(psb_i_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function i_cuda_is_sync + + + function i_cuda_get_nrows(x) result(res) + implicit none + class(psb_i_vect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function i_cuda_get_nrows + + function i_cuda_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'iGPU' + end function i_cuda_get_fmt + + subroutine i_cuda_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_i_vect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'i_cuda_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine i_cuda_all + + subroutine i_cuda_zero(x) + use psi_serial_mod + implicit none + class(psb_i_vect_cuda), intent(inout) :: x + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() + call x%set_scal(izero) + end subroutine i_cuda_zero + + subroutine i_cuda_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_i_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'i_cuda_sync') + end if + + end subroutine i_cuda_sync + + subroutine i_cuda_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine i_cuda_free + + subroutine i_cuda_set_scal(x,val,first,last) + class(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine i_cuda_set_scal + + + + subroutine i_cuda_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine i_cuda_vect_finalize + + subroutine i_cuda_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_i_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_cuda + + info = 0 + if (psb_errstatus_fatal()) return + + done_cuda = .false. + select type(virl => irl) + class is (psb_i_vect_cuda) + select type(vval => val) + class is (psb_i_vect_cuda) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceInt(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_cuda=.true. + end select + end select + + if (.not.done_cuda) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'cuda_vect_ins') + return + end if + + end subroutine i_cuda_ins_v + + subroutine i_cuda_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + integer(psb_ipk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_i_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine i_cuda_ins_a + +end module psb_i_cuda_vect_mod + + +! +! Multivectors +! + + + +module psb_i_cuda_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_i_multivect_mod + use psb_i_base_multivect_mod + use psb_cuda_env_mod + use psb_i_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_base_multivect_type) :: psb_i_multivect_cuda + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => i_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => i_cuda_multi_get_ncols + procedure, nopass :: get_fmt => i_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => i_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => i_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => i_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => i_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => i_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => i_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => i_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => i_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => i_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => i_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => i_cuda_multi_amax +!!$ procedure, pass(x) :: asum => i_cuda_multi_asum + procedure, pass(x) :: all => i_cuda_multi_all + procedure, pass(x) :: zero => i_cuda_multi_zero + procedure, pass(x) :: asb => i_cuda_multi_asb + procedure, pass(x) :: sync => i_cuda_multi_sync + procedure, pass(x) :: sync_space => i_cuda_multi_sync_space + procedure, pass(x) :: bld_x => i_cuda_multi_bld_x + procedure, pass(x) :: bld_n => i_cuda_multi_bld_n + procedure, pass(x) :: free => i_cuda_multi_free + procedure, pass(x) :: ins => i_cuda_multi_ins + procedure, pass(x) :: is_host => i_cuda_multi_is_host + procedure, pass(x) :: is_dev => i_cuda_multi_is_dev + procedure, pass(x) :: is_sync => i_cuda_multi_is_sync + procedure, pass(x) :: set_host => i_cuda_multi_set_host + procedure, pass(x) :: set_dev => i_cuda_multi_set_dev + procedure, pass(x) :: set_sync => i_cuda_multi_set_sync + procedure, pass(x) :: set_scal => i_cuda_multi_set_scal + procedure, pass(x) :: set_vect => i_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => i_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => i_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => i_cuda_multi_sctb_x + final :: i_cuda_multi_vect_finalize + end type psb_i_multivect_cuda + + public :: psb_i_multivect_cuda + private :: mconstructor + interface psb_i_multivect_cuda + module procedure mconstructor + end interface + +contains + + function mconstructor(x) result(this) + integer(psb_ipk_) :: x(:,:) + type(psb_i_multivect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function mconstructor + + +!!$ subroutine i_cuda_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ integer(psb_ipk_) :: y(:) +!!$ class(psb_i_multivect_cuda) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine i_cuda_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine i_cuda_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ integer(psb_ipk_) :: beta, x(:) +!!$ class(psb_i_multivect_cuda) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_i_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine i_cuda_multi_sctb +!!$ +!!$ subroutine i_cuda_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ integer(psb_ipk_) :: beta, x(:) +!!$ class(psb_i_multivect_cuda) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine i_cuda_multi_sctb_x + + + subroutine i_cuda_multi_bld_x(x,this) + use psb_base_mod + integer(psb_ipk_), intent(in) :: this(:,:) + class(psb_i_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'i_cuda_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine i_cuda_multi_bld_x + + subroutine i_cuda_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'i_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine i_cuda_multi_bld_n + + + subroutine i_cuda_multi_set_host(x) + implicit none + class(psb_i_multivect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine i_cuda_multi_set_host + + subroutine i_cuda_multi_set_dev(x) + implicit none + class(psb_i_multivect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine i_cuda_multi_set_dev + + subroutine i_cuda_multi_set_sync(x) + implicit none + class(psb_i_multivect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine i_cuda_multi_set_sync + + function i_cuda_multi_is_dev(x) result(res) + implicit none + class(psb_i_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function i_cuda_multi_is_dev + + function i_cuda_multi_is_host(x) result(res) + implicit none + class(psb_i_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function i_cuda_multi_is_host + + function i_cuda_multi_is_sync(x) result(res) + implicit none + class(psb_i_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function i_cuda_multi_is_sync + + + function i_cuda_multi_get_nrows(x) result(res) + implicit none + class(psb_i_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function i_cuda_multi_get_nrows + + function i_cuda_multi_get_ncols(x) result(res) + implicit none + class(psb_i_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function i_cuda_multi_get_ncols + + function i_cuda_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'iGPU' + end function i_cuda_multi_get_fmt + +!!$ function i_cuda_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_cuda), intent(inout) :: x +!!$ class(psb_i_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ integer(psb_ipk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = izero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_i_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_i_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_i_multivect_cuda) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'i_cuda_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function i_cuda_multi_dot_v +!!$ +!!$ function i_cuda_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ integer(psb_ipk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function i_cuda_multi_dot_a +!!$ +!!$ subroutine i_cuda_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_i_base_multivect_type), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: y +!!$ integer(psb_ipk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_i_base_multivect_type) +!!$ if ((beta /= izero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_i_multivect_cuda) +!!$ ! Do something different here +!!$ if ((beta /= izero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_i_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_i_multivect_cuda) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine i_cuda_multi_mlt_v +!!$ +!!$ subroutine i_cuda_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: x(:) +!!$ class(psb_i_multivect_cuda), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_i_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine i_cuda_multi_mlt_a +!!$ +!!$ subroutine i_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: alpha,beta +!!$ integer(psb_ipk_), intent(in) :: x(:) +!!$ integer(psb_ipk_), intent(in) :: y(:) +!!$ class(psb_i_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_i_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine i_cuda_multi_mlt_a_2 +!!$ +!!$ subroutine i_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: alpha,beta +!!$ class(psb_i_base_multivect_type), intent(inout) :: x +!!$ class(psb_i_base_multivect_type), intent(inout) :: y +!!$ class(psb_i_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_i_multivect_cuda) +!!$ select type (yy => y) +!!$ type is (psb_i_multivect_cuda) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_i_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_i_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine i_cuda_multi_mlt_v_2 + + + subroutine i_cuda_multi_set_scal(x,val) + class(psb_i_multivect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_i_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine i_cuda_multi_set_scal + + subroutine i_cuda_multi_set_vect(x,val) + class(psb_i_multivect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_i_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine i_cuda_multi_set_vect + + + +!!$ subroutine i_cuda_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_i_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_i_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine i_cuda_multi_scal +!!$ +!!$ +!!$ function i_cuda_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function i_cuda_multi_nrm2 +!!$ +!!$ function i_cuda_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function i_cuda_multi_amax +!!$ +!!$ function i_cuda_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function i_cuda_multi_asum + + subroutine i_cuda_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_multivect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=izero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'i_cuda_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine i_cuda_multi_all + + subroutine i_cuda_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_i_multivect_cuda), intent(inout) :: x + + if (allocated(x%v)) x%v=izero + call x%set_host() + end subroutine i_cuda_multi_zero + + subroutine i_cuda_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_multivect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_i_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc s_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_s_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_s_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => s_cuda_csrg_free + procedure, pass(a) :: mold => psb_s_cuda_csrg_mold + procedure, pass(a) :: is_host => s_cuda_csrg_is_host + procedure, pass(a) :: is_dev => s_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => s_cuda_csrg_is_sync + procedure, pass(a) :: set_host => s_cuda_csrg_set_host + procedure, pass(a) :: set_dev => s_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => s_cuda_csrg_set_sync + procedure, pass(a) :: sync => s_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_s_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_s_cuda_csrg_from_gpu + final :: s_cuda_csrg_finalize + end type psb_s_cuda_csrg_sparse_mat + + private :: s_cuda_csrg_get_nzeros, s_cuda_csrg_free, s_cuda_csrg_get_fmt, & + & s_cuda_csrg_get_size, s_cuda_csrg_sizeof, s_cuda_csrg_get_nz_row + + + interface + subroutine psb_s_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_s_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_s_cuda_csrg_reallocate_nz(nz,a) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_s_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_s_cuda_csrg_mold(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_mold + end interface + + interface + subroutine psb_s_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_s_cuda_csrg_from_gpu(a,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_s_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_s_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_s_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_s_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_csmv + end interface + interface + subroutine psb_s_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_csmm + end interface + + interface + subroutine psb_s_cuda_csrg_scal(d,a,info,side) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_cuda_csrg_scal + end interface + + interface + subroutine psb_s_cuda_csrg_scals(d,a,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_cuda_csrg_sizeof + + function s_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function s_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine s_cuda_csrg_set_host(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_cuda_csrg_set_host + + subroutine s_cuda_csrg_set_dev(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_cuda_csrg_set_dev + + subroutine s_cuda_csrg_set_sync(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_cuda_csrg_set_sync + + function s_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_cuda_csrg_is_dev + + function s_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_cuda_csrg_is_host + + function s_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_cuda_csrg_is_sync + + + subroutine s_cuda_csrg_sync(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine s_cuda_csrg_sync + + subroutine s_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_s_csr_sparse_mat%free() + + return + + end subroutine s_cuda_csrg_free + + subroutine s_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine s_cuda_csrg_finalize + +end module psb_s_cuda_csrg_mat_mod diff --git a/cuda/psb_s_cuda_diag_mat_mod.F90 b/cuda/psb_s_cuda_diag_mat_mod.F90 new file mode 100644 index 00000000..30386618 --- /dev/null +++ b/cuda/psb_s_cuda_diag_mat_mod.F90 @@ -0,0 +1,287 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_cuda_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_s_dia_mat_mod + + type, extends(psb_s_dia_sparse_mat) :: psb_s_cuda_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => s_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => s_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_s_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_s_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_s_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_s_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_s_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_s_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_s_cuda_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => s_cuda_diag_free + procedure, pass(a) :: mold => psb_s_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_diag_to_gpu + final :: s_cuda_diag_finalize + end type psb_s_cuda_diag_sparse_mat + + private :: s_cuda_diag_get_nzeros, s_cuda_diag_free, s_cuda_diag_get_fmt, & + & s_cuda_diag_get_size, s_cuda_diag_sizeof, s_cuda_diag_get_nz_row + + + interface + subroutine psb_s_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_diag_vect_mv + end interface + + interface + subroutine psb_s_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_cuda_diag_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_diag_inner_vect_sv + end interface + + interface + subroutine psb_s_cuda_diag_reallocate_nz(nz,a) + import :: psb_s_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_diag_reallocate_nz + end interface + + interface + subroutine psb_s_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_cuda_diag_allocate_mnnz + end interface + + interface + subroutine psb_s_cuda_diag_mold(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_diag_mold + end interface + + interface + subroutine psb_s_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_s_cuda_diag_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_cuda_diag_to_gpu + end interface + + interface + subroutine psb_s_cuda_cp_diag_from_coo(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_diag_from_coo + end interface + + interface + subroutine psb_s_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_diag_from_fmt + end interface + + interface + subroutine psb_s_cuda_mv_diag_from_coo(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_diag_from_coo + end interface + + + interface + subroutine psb_s_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_diag_from_fmt + end interface + + interface + subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_diag_csmv + end interface + interface + subroutine psb_s_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_diag_csmm + end interface + + interface + subroutine psb_s_cuda_diag_scal(d,a,info, side) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_cuda_diag_scal + end interface + + interface + subroutine psb_s_cuda_diag_scals(d,a,info) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_cuda_diag_sizeof(a) result(res) + implicit none + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + psb_sizeof_sp * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_cuda_diag_sizeof + + function s_cuda_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function s_cuda_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_cuda_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_dia_sparse_mat%free() + + return + + end subroutine s_cuda_diag_free + + subroutine s_cuda_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine s_cuda_diag_finalize + +end module psb_s_cuda_diag_mat_mod diff --git a/cuda/psb_s_cuda_dnsg_mat_mod.F90 b/cuda/psb_s_cuda_dnsg_mat_mod.F90 new file mode 100644 index 00000000..07b49f03 --- /dev/null +++ b/cuda/psb_s_cuda_dnsg_mat_mod.F90 @@ -0,0 +1,273 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_cuda_dnsg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use psb_s_dns_mat_mod + use dnsdev_mod + + type, extends(psb_s_dns_sparse_mat) :: psb_s_cuda_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => s_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => s_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_s_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_s_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_s_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_s_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_s_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_s_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_s_cuda_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => s_cuda_dnsg_free + procedure, pass(a) :: mold => psb_s_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_dnsg_to_gpu + final :: s_cuda_dnsg_finalize + end type psb_s_cuda_dnsg_sparse_mat + + private :: s_cuda_dnsg_get_nzeros, s_cuda_dnsg_free, s_cuda_dnsg_get_fmt, & + & s_cuda_dnsg_get_size, s_cuda_dnsg_get_nz_row + + + interface + subroutine psb_s_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_s_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_s_cuda_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_s_cuda_dnsg_mold(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_dnsg_mold + end interface + + interface + subroutine psb_s_cuda_dnsg_to_gpu(a,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_dnsg_to_gpu + end interface + + interface + subroutine psb_s_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_dnsg_from_coo + end interface + + interface + subroutine psb_s_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_s_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_s_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:) +!!$ real(psb_spk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_cuda_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_cuda_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_s_cuda_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cuda_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function s_cuda_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function s_cuda_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_cuda_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_dns_sparse_mat%free() + + return + + end subroutine s_cuda_dnsg_free + + subroutine s_cuda_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine s_cuda_dnsg_finalize + +end module psb_s_cuda_dnsg_mat_mod diff --git a/cuda/psb_s_cuda_elg_mat_mod.F90 b/cuda/psb_s_cuda_elg_mat_mod.F90 new file mode 100644 index 00000000..0626fd24 --- /dev/null +++ b/cuda/psb_s_cuda_elg_mat_mod.F90 @@ -0,0 +1,454 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_cuda_elg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use psb_s_ell_mat_mod + use psb_i_cuda_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_ell_sparse_mat) :: psb_s_cuda_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => s_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_s_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_s_cuda_elg_scals + procedure, pass(a) :: scalv => psb_s_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => s_cuda_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => s_cuda_elg_free + procedure, pass(a) :: mold => psb_s_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_s_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_s_cuda_elg_csput_v + procedure, pass(a) :: is_host => s_cuda_elg_is_host + procedure, pass(a) :: is_dev => s_cuda_elg_is_dev + procedure, pass(a) :: is_sync => s_cuda_elg_is_sync + procedure, pass(a) :: set_host => s_cuda_elg_set_host + procedure, pass(a) :: set_dev => s_cuda_elg_set_dev + procedure, pass(a) :: set_sync => s_cuda_elg_set_sync + procedure, pass(a) :: sync => s_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_s_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_s_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_s_cuda_elg_asb + final :: s_cuda_elg_finalize + end type psb_s_cuda_elg_sparse_mat + + private :: s_cuda_elg_get_nzeros, s_cuda_elg_free, s_cuda_elg_get_fmt, & + & s_cuda_elg_get_size, s_cuda_elg_sizeof, s_cuda_elg_get_nz_row, s_cuda_elg_sync + + + interface + subroutine psb_s_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_elg_vect_mv + end interface + + interface + subroutine psb_s_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_cuda_elg_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_elg_inner_vect_sv + end interface + + interface + subroutine psb_s_cuda_elg_reallocate_nz(nz,a) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_elg_reallocate_nz + end interface + + interface + subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_cuda_elg_allocate_mnnz + end interface + + interface + subroutine psb_s_cuda_elg_mold(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_elg_mold + end interface + + interface + subroutine psb_s_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_elg_csput_a + end interface + + interface + subroutine psb_s_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_s_base_vect_type,& + & psb_i_base_vect_type + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_elg_csput_v + end interface + + interface + subroutine psb_s_cuda_elg_from_gpu(a,info) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_elg_from_gpu + end interface + + interface + subroutine psb_s_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_cuda_elg_to_gpu + end interface + + interface + subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_elg_from_coo + end interface + + interface + subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_elg_from_fmt + end interface + + interface + subroutine psb_s_cuda_mv_elg_from_coo(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_elg_from_coo + end interface + + + interface + subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_elg_from_fmt + end interface + + interface + subroutine psb_s_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_elg_csmv + end interface + interface + subroutine psb_s_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_elg_csmm + end interface + + interface + subroutine psb_s_cuda_elg_scal(d,a,info, side) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_cuda_elg_scal + end interface + + interface + subroutine psb_s_cuda_elg_scals(d,a,info) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_elg_scals + end interface + + interface + subroutine psb_s_cuda_elg_asb(a) + import :: psb_s_cuda_elg_sparse_mat + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_cuda_elg_sizeof(a) result(res) + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_cuda_elg_sizeof + + function s_cuda_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function s_cuda_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine s_cuda_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = szero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine s_cuda_elg_reinit + + subroutine s_cuda_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine s_cuda_elg_free + + subroutine s_cuda_elg_sync(a) + implicit none + class(psb_s_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine s_cuda_elg_sync + + subroutine s_cuda_elg_set_host(a) + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_cuda_elg_set_host + + subroutine s_cuda_elg_set_dev(a) + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_cuda_elg_set_dev + + subroutine s_cuda_elg_set_sync(a) + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_cuda_elg_set_sync + + function s_cuda_elg_is_dev(a) result(res) + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_cuda_elg_is_dev + + function s_cuda_elg_is_host(a) result(res) + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_cuda_elg_is_host + + function s_cuda_elg_is_sync(a) result(res) + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_cuda_elg_is_sync + + subroutine s_cuda_elg_finalize(a) + use elldev_mod + implicit none + type(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine s_cuda_elg_finalize + +end module psb_s_cuda_elg_mat_mod diff --git a/cuda/psb_s_cuda_hdiag_mat_mod.F90 b/cuda/psb_s_cuda_hdiag_mat_mod.F90 new file mode 100644 index 00000000..cac72c86 --- /dev/null +++ b/cuda/psb_s_cuda_hdiag_mat_mod.F90 @@ -0,0 +1,268 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_cuda_hdiag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_s_hdia_mat_mod + + type, extends(psb_s_hdia_sparse_mat) :: psb_s_cuda_hdiag_sparse_mat + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => s_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => s_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_s_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_s_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_s_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_s_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_s_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_s_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_s_cuda_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => s_cuda_hdiag_free + procedure, pass(a) :: mold => psb_s_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_hdiag_to_gpu + final :: s_cuda_hdiag_finalize + end type psb_s_cuda_hdiag_sparse_mat + + private :: s_cuda_hdiag_get_nzeros, s_cuda_hdiag_free, s_cuda_hdiag_get_fmt, & + & s_cuda_hdiag_get_size, s_cuda_hdiag_sizeof, s_cuda_hdiag_get_nz_row + + + interface + subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_s_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_s_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_cuda_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_cuda_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_s_cuda_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_s_cuda_hdiag_mold(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_hdiag_mold + end interface + + interface + subroutine psb_s_cuda_hdiag_to_gpu(a,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_hdiag_to_gpu + end interface + + interface + subroutine psb_s_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_s_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cuda_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_s_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cuda_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_s_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_cuda_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_s_cuda_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cuda_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function s_cuda_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function s_cuda_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_cuda_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_hdia_sparse_mat%free() + + return + + end subroutine s_cuda_hdiag_free + + subroutine s_cuda_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_hdia_sparse_mat%free() + + return + end subroutine s_cuda_hdiag_finalize + +end module psb_s_cuda_hdiag_mat_mod diff --git a/cuda/psb_s_cuda_hlg_mat_mod.F90 b/cuda/psb_s_cuda_hlg_mat_mod.F90 new file mode 100644 index 00000000..7b8c49b5 --- /dev/null +++ b/cuda/psb_s_cuda_hlg_mat_mod.F90 @@ -0,0 +1,377 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_cuda_hlg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use psb_s_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_hll_sparse_mat) :: psb_s_cuda_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => s_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_s_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_s_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_s_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => s_cuda_hlg_free + procedure, pass(a) :: mold => psb_s_cuda_hlg_mold + procedure, pass(a) :: is_host => s_cuda_hlg_is_host + procedure, pass(a) :: is_dev => s_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => s_cuda_hlg_is_sync + procedure, pass(a) :: set_host => s_cuda_hlg_set_host + procedure, pass(a) :: set_dev => s_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => s_cuda_hlg_set_sync + procedure, pass(a) :: sync => s_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_s_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_s_cuda_hlg_to_gpu + final :: s_cuda_hlg_finalize + end type psb_s_cuda_hlg_sparse_mat + + private :: s_cuda_hlg_get_nzeros, s_cuda_hlg_free, s_cuda_hlg_get_fmt, & + & s_cuda_hlg_get_size, s_cuda_hlg_sizeof, s_cuda_hlg_get_nz_row + + + interface + subroutine psb_s_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hlg_vect_mv + end interface + + interface + subroutine psb_s_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hlg_inner_vect_sv + end interface + + interface + subroutine psb_s_cuda_hlg_reallocate_nz(nz,a) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_hlg_reallocate_nz + end interface + + interface + subroutine psb_s_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_cuda_hlg_allocate_mnnz + end interface + + interface + subroutine psb_s_cuda_hlg_mold(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_hlg_mold + end interface + + interface + subroutine psb_s_cuda_hlg_from_gpu(a,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_hlg_from_gpu + end interface + + interface + subroutine psb_s_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_cuda_hlg_to_gpu + end interface + + interface + subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_hlg_from_coo + end interface + + interface + subroutine psb_s_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_hlg_from_fmt + end interface + + interface + subroutine psb_s_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_hlg_from_coo + end interface + + + interface + subroutine psb_s_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_hlg_from_fmt + end interface + + interface + subroutine psb_s_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hlg_csmv + end interface + interface + subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hlg_csmm + end interface + + interface + subroutine psb_s_cuda_hlg_scal(d,a,info, side) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_cuda_hlg_scal + end interface + + interface + subroutine psb_s_cuda_hlg_scals(d,a,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_cuda_hlg_sizeof(a) result(res) + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_cuda_hlg_sizeof + + function s_cuda_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function s_cuda_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_cuda_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_hll_sparse_mat%free() + + return + + end subroutine s_cuda_hlg_free + + + subroutine s_cuda_hlg_sync(a) + implicit none + class(psb_s_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine s_cuda_hlg_sync + + subroutine s_cuda_hlg_set_host(a) + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_cuda_hlg_set_host + + subroutine s_cuda_hlg_set_dev(a) + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_cuda_hlg_set_dev + + subroutine s_cuda_hlg_set_sync(a) + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_cuda_hlg_set_sync + + function s_cuda_hlg_is_dev(a) result(res) + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_cuda_hlg_is_dev + + function s_cuda_hlg_is_host(a) result(res) + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_cuda_hlg_is_host + + function s_cuda_hlg_is_sync(a) result(res) + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_cuda_hlg_is_sync + + + subroutine s_cuda_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine s_cuda_hlg_finalize + +end module psb_s_cuda_hlg_mat_mod diff --git a/cuda/psb_s_cuda_hybg_mat_mod.F90 b/cuda/psb_s_cuda_hybg_mat_mod.F90 new file mode 100644 index 00000000..b4a7cd75 --- /dev/null +++ b/cuda/psb_s_cuda_hybg_mat_mod.F90 @@ -0,0 +1,287 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if PSB_CUDA_SHORT_VERSION <= 10 + +module psb_s_cuda_hybg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use cusparse_mod + + type, extends(psb_s_csr_sparse_mat) :: psb_s_cuda_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! + type(s_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => s_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_s_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_s_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => s_cuda_hybg_free + procedure, pass(a) :: mold => psb_s_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_hybg_to_gpu + final :: s_cuda_hybg_finalize + end type psb_s_cuda_hybg_sparse_mat + + private :: s_cuda_hybg_get_nzeros, s_cuda_hybg_free, s_cuda_hybg_get_fmt, & + & s_cuda_hybg_get_size, s_cuda_hybg_sizeof, s_cuda_hybg_get_nz_row + + + interface + subroutine psb_s_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hybg_inner_vect_sv + end interface + + interface + subroutine psb_s_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hybg_vect_mv + end interface + + interface + subroutine psb_s_cuda_hybg_reallocate_nz(nz,a) + import :: psb_s_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_hybg_reallocate_nz + end interface + + interface + subroutine psb_s_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_cuda_hybg_allocate_mnnz + end interface + + interface + subroutine psb_s_cuda_hybg_mold(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_hybg_mold + end interface + + interface + subroutine psb_s_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_cuda_hybg_to_gpu + end interface + + interface + subroutine psb_s_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_hybg_from_coo + end interface + + interface + subroutine psb_s_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_hybg_from_fmt + end interface + + interface + subroutine psb_s_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_hybg_from_coo + end interface + + interface + subroutine psb_s_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_hybg_from_fmt + end interface + + interface + subroutine psb_s_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hybg_csmv + end interface + interface + subroutine psb_s_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hybg_csmm + end interface + + interface + subroutine psb_s_cuda_hybg_scal(d,a,info,side) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_cuda_hybg_scal + end interface + + interface + subroutine psb_s_cuda_hybg_scals(d,a,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_cuda_hybg_sizeof(a) result(res) + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_cuda_hybg_sizeof + + function s_cuda_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function s_cuda_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_cuda_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_s_csr_sparse_mat%free() + + return + + end subroutine s_cuda_hybg_free + + subroutine s_cuda_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine s_cuda_hybg_finalize + +end module psb_s_cuda_hybg_mat_mod +#endif diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 new file mode 100644 index 00000000..3651d6e6 --- /dev/null +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -0,0 +1,2088 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_cuda_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_s_vect_mod + use psb_cuda_env_mod + use psb_i_vect_mod + use psb_i_cuda_vect_mod + use psb_i_vectordev_mod + use psb_s_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_base_vect_type) :: psb_s_vect_cuda + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + real(c_float), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + real(c_float), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => s_cuda_get_nrows + procedure, nopass :: get_fmt => s_cuda_get_fmt + + procedure, pass(x) :: all => s_cuda_all + procedure, pass(x) :: zero => s_cuda_zero + procedure, pass(x) :: asb_m => s_cuda_asb_m + procedure, pass(x) :: sync => s_cuda_sync + procedure, pass(x) :: sync_space => s_cuda_sync_space + procedure, pass(x) :: bld_x => s_cuda_bld_x + procedure, pass(x) :: bld_mn => s_cuda_bld_mn + procedure, pass(x) :: free => s_cuda_free + procedure, pass(x) :: ins_a => s_cuda_ins_a + procedure, pass(x) :: ins_v => s_cuda_ins_v + procedure, pass(x) :: is_host => s_cuda_is_host + procedure, pass(x) :: is_dev => s_cuda_is_dev + procedure, pass(x) :: is_sync => s_cuda_is_sync + procedure, pass(x) :: set_host => s_cuda_set_host + procedure, pass(x) :: set_dev => s_cuda_set_dev + procedure, pass(x) :: set_sync => s_cuda_set_sync + procedure, pass(x) :: set_scal => s_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => s_cuda_set_vect + procedure, pass(x) :: gthzv_x => s_cuda_gthzv_x + procedure, pass(y) :: sctb => s_cuda_sctb + procedure, pass(y) :: sctb_x => s_cuda_sctb_x + procedure, pass(x) :: gthzbuf => s_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => s_cuda_sctb_buf + procedure, pass(x) :: new_buffer => s_cuda_new_buffer + procedure, nopass :: device_wait => s_cuda_device_wait + procedure, pass(x) :: free_buffer => s_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => s_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => s_cuda_dot_v + procedure, pass(x) :: dot_a => s_cuda_dot_a + procedure, pass(y) :: axpby_v => s_cuda_axpby_v + procedure, pass(y) :: axpby_a => s_cuda_axpby_a + procedure, pass(z) :: upd_xyz => s_cuda_upd_xyz + procedure, pass(y) :: mlt_v => s_cuda_mlt_v + procedure, pass(y) :: mlt_a => s_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => s_cuda_mlt_v_2 + procedure, pass(x) :: scal => s_cuda_scal + procedure, pass(x) :: nrm2 => s_cuda_nrm2 + procedure, pass(x) :: amax => s_cuda_amax + procedure, pass(x) :: asum => s_cuda_asum + procedure, pass(x) :: absval1 => s_cuda_absval1 + procedure, pass(x) :: absval2 => s_cuda_absval2 + + final :: s_cuda_vect_finalize + end type psb_s_vect_cuda + + public :: psb_s_vect_cuda_ + private :: constructor + interface psb_s_vect_cuda_ + module procedure constructor + end interface psb_s_vect_cuda_ + +contains + + function constructor(x) result(this) + real(psb_spk_) :: x(:) + type(psb_s_vect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + + subroutine s_cuda_device_wait() + call psb_cudaSync() + end subroutine s_cuda_device_wait + + subroutine s_cuda_new_buffer(n,x,info) + use psb_realloc_mod + use psb_cuda_env_mod + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_cuda_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeFloat(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateFloat(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readFloat(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeFloat(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateFloat(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceFloat(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readFloat(x%dt_buf,y,n) + + end select + + end subroutine s_cuda_gthzv_x + + subroutine s_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + class(psb_s_vect_cuda) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readFloat(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceFloat(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readFloat(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine s_cuda_gthzbuf + + subroutine s_cuda_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + real(psb_spk_) :: beta, x(:) + class(psb_s_vect_cuda) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_s_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine s_cuda_sctb + + subroutine s_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + real(psb_spk_) :: beta, x(:) + class(psb_s_vect_cuda) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_cuda_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeFloat(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateFloat(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeFloat(y%dt_buf,x,n) + info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeFloat(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateFloat(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeFloat(y%dt_buf,x,n) + info = iscatMultiVecDeviceFloat(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine s_cuda_sctb_x + + subroutine s_cuda_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_cuda_env_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + real(psb_spk_) :: beta + class(psb_s_vect_cuda) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_cuda) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_cuda_DeviceHasUVA()) then + info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeFloat(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeFloat(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceFloat(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine s_cuda_sctb_buf + + + subroutine s_cuda_bld_x(x,this) + use psb_base_mod + real(psb_spk_), intent(in) :: this(:) + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'s_cuda_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine s_cuda_bld_x + + subroutine s_cuda_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'s_cuda_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine s_cuda_bld_mn + + subroutine s_cuda_set_host(x) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine s_cuda_set_host + + subroutine s_cuda_set_dev(x) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine s_cuda_set_dev + + subroutine s_cuda_set_sync(x) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine s_cuda_set_sync + + function s_cuda_is_dev(x) result(res) + implicit none + class(psb_s_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function s_cuda_is_dev + + function s_cuda_is_host(x) result(res) + implicit none + class(psb_s_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function s_cuda_is_host + + function s_cuda_is_sync(x) result(res) + implicit none + class(psb_s_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function s_cuda_is_sync + + + function s_cuda_get_nrows(x) result(res) + implicit none + class(psb_s_vect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function s_cuda_get_nrows + + function s_cuda_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'sGPU' + end function s_cuda_get_fmt + + subroutine s_cuda_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_s_vect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'s_cuda_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine s_cuda_all + + subroutine s_cuda_zero(x) + use psi_serial_mod + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() + call x%set_scal(szero) + end subroutine s_cuda_zero + + subroutine s_cuda_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_s_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'s_cuda_sync') + end if + + end subroutine s_cuda_sync + + subroutine s_cuda_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine s_cuda_free + + subroutine s_cuda_set_scal(x,val,first,last) + class(psb_s_vect_cuda), intent(inout) :: x + real(psb_spk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine s_cuda_set_scal + + + + function s_cuda_dot_v(n,x,y) result(res) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + res = szero + ! + ! Note: this is the gpu implementation. + ! When we get here, we are sure that X is of + ! TYPE psb_s_vect + ! + select type(yy => y) + type is (psb_s_vect_cuda) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,'s_cuda_dot_v') + end if + + class default + ! y%sync is done in dot_a + if (x%is_dev()) call x%sync() + res = y%dot(n,x%v) + end select + + end function s_cuda_dot_v + + function s_cuda_dot_a(n,x,y) result(res) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + real(psb_spk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + real(psb_spk_), external :: sdot + + if (x%is_dev()) call x%sync() + res = sdot(n,y,1,x%v,1) + + end function s_cuda_dot_a + + subroutine s_cuda_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: y + real(psb_spk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny + + info = psb_success_ + + select type(xx => x) + type is (psb_s_vect_cuda) + ! Do something different here + if ((beta /= szero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx x) + class is (psb_s_vect_cuda) + select type(yy => y) + class is (psb_s_vect_cuda) + select type(zz => z) + class is (psb_s_vect_cuda) + ! Do something different here + if ((beta /= szero).and.yy%is_host())& + & call yy%sync() + if ((delta /= szero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) + class is (psb_s_vect_cuda) + select type(yy => y) + class is (psb_s_vect_cuda) + select type(zz => z) + class is (psb_s_vect_cuda) + ! Do something different here + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if (zz%is_host()) call zz%sync() + if (w%is_host()) call w%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + nw = getMultiVecDeviceSize(w%deviceVect) + if ((nx x) + type is (psb_s_base_vect_type) + if (y%is_dev()) call y%sync() + do i=1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + type is (psb_s_vect_cuda) + ! Do something different here + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + info = axyMultiVecDevice(n,sone,xx%deviceVect,y%deviceVect) + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + call y%mlt(xx%v,info) + call y%set_host() + end select + + end subroutine s_cuda_mlt_v + + subroutine s_cuda_mlt_a(x, y, info) + use psi_serial_mod + implicit none + real(psb_spk_), intent(in) :: x(:) + class(psb_s_vect_cuda), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + call y%psb_s_base_vect_type%mlt(x,info) + ! set_host() is invoked in the base method + end subroutine s_cuda_mlt_a + + subroutine s_cuda_mlt_a_2(alpha,x,y,beta,z,info) + use psi_serial_mod + implicit none + real(psb_spk_), intent(in) :: alpha,beta + real(psb_spk_), intent(in) :: x(:) + real(psb_spk_), intent(in) :: y(:) + class(psb_s_vect_cuda), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + call z%psb_s_base_vect_type%mlt(alpha,x,y,beta,info) + ! set_host() is invoked in the base method + end subroutine s_cuda_mlt_a_2 + + subroutine s_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + real(psb_spk_), intent(in) :: alpha,beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_vect_cuda), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + if (.false.) then + ! These are present just for coherence with the + ! complex versions; they do nothing here. + conjgx_=.false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') + conjgy_=.false. + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') + end if + + n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) + + ! + ! Need to reconsider BETA in the GPU side + ! of things. + ! + info = 0 + select type(xx => x) + type is (psb_s_vect_cuda) + select type (yy => y) + type is (psb_s_vect_cuda) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= szero).and.(z%is_host())) call z%sync() + info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& + & yy%deviceVect,beta,z%deviceVect) + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= szero).and.(z%is_dev())) call z%sync() + call z%psb_s_base_vect_type%mlt(alpha,xx,yy,beta,info) + call z%set_host() + end select + + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= szero).and.(z%is_dev())) call z%sync() + call z%psb_s_base_vect_type%mlt(alpha,x,y,beta,info) + call z%set_host() + end select + end subroutine s_cuda_mlt_v_2 + + subroutine s_cuda_scal(alpha, x) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + real(psb_spk_), intent (in) :: alpha + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = scalMultiVecDevice(alpha,x%deviceVect) + call x%set_dev() + end subroutine s_cuda_scal + + + function s_cuda_nrm2(n,x) result(res) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + ! WARNING: this should be changed. + if (x%is_host()) call x%sync() + info = nrm2MultiVecDevice(res,n,x%deviceVect) + + end function s_cuda_nrm2 + + function s_cuda_amax(n,x) result(res) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = amaxMultiVecDevice(res,n,x%deviceVect) + + end function s_cuda_amax + + function s_cuda_asum(n,x) result(res) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = asumMultiVecDevice(res,n,x%deviceVect) + + end function s_cuda_asum + + subroutine s_cuda_absval1(x) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + n=x%get_nrows() + info = absMultiVecDevice(n,sone,x%deviceVect) + + end subroutine s_cuda_absval1 + + subroutine s_cuda_absval2(x,y) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + n=min(x%get_nrows(),y%get_nrows()) + select type (yy=> y) + class is (psb_s_vect_cuda) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = absMultiVecDevice(n,sone,x%deviceVect,yy%deviceVect) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_s_base_vect_type%absval(y) + end select + end subroutine s_cuda_absval2 + + + subroutine s_cuda_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine s_cuda_vect_finalize + + subroutine s_cuda_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_s_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_cuda + + info = 0 + if (psb_errstatus_fatal()) return + + done_cuda = .false. + select type(virl => irl) + class is (psb_i_vect_cuda) + select type(vval => val) + class is (psb_s_vect_cuda) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceFloat(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_cuda=.true. + end select + end select + + if (.not.done_cuda) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'cuda_vect_ins') + return + end if + + end subroutine s_cuda_ins_v + + subroutine s_cuda_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_s_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine s_cuda_ins_a + +end module psb_s_cuda_vect_mod + + +! +! Multivectors +! + + + +module psb_s_cuda_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_s_multivect_mod + use psb_s_base_multivect_mod + use psb_cuda_env_mod + use psb_i_multivect_mod + use psb_i_cuda_multivect_mod + use psb_s_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_base_multivect_type) :: psb_s_multivect_cuda + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => s_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => s_cuda_multi_get_ncols + procedure, nopass :: get_fmt => s_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => s_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => s_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => s_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => s_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => s_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => s_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => s_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => s_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => s_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => s_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => s_cuda_multi_amax +!!$ procedure, pass(x) :: asum => s_cuda_multi_asum + procedure, pass(x) :: all => s_cuda_multi_all + procedure, pass(x) :: zero => s_cuda_multi_zero + procedure, pass(x) :: asb => s_cuda_multi_asb + procedure, pass(x) :: sync => s_cuda_multi_sync + procedure, pass(x) :: sync_space => s_cuda_multi_sync_space + procedure, pass(x) :: bld_x => s_cuda_multi_bld_x + procedure, pass(x) :: bld_n => s_cuda_multi_bld_n + procedure, pass(x) :: free => s_cuda_multi_free + procedure, pass(x) :: ins => s_cuda_multi_ins + procedure, pass(x) :: is_host => s_cuda_multi_is_host + procedure, pass(x) :: is_dev => s_cuda_multi_is_dev + procedure, pass(x) :: is_sync => s_cuda_multi_is_sync + procedure, pass(x) :: set_host => s_cuda_multi_set_host + procedure, pass(x) :: set_dev => s_cuda_multi_set_dev + procedure, pass(x) :: set_sync => s_cuda_multi_set_sync + procedure, pass(x) :: set_scal => s_cuda_multi_set_scal + procedure, pass(x) :: set_vect => s_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => s_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => s_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => s_cuda_multi_sctb_x + final :: s_cuda_multi_vect_finalize + end type psb_s_multivect_cuda + + public :: psb_s_multivect_cuda + private :: mconstructor + interface psb_s_multivect_cuda + module procedure mconstructor + end interface + +contains + + function mconstructor(x) result(this) + real(psb_spk_) :: x(:,:) + type(psb_s_multivect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function mconstructor + + +!!$ subroutine s_cuda_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ real(psb_spk_) :: y(:) +!!$ class(psb_s_multivect_cuda) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine s_cuda_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine s_cuda_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ real(psb_spk_) :: beta, x(:) +!!$ class(psb_s_multivect_cuda) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_s_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine s_cuda_multi_sctb +!!$ +!!$ subroutine s_cuda_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ real(psb_spk_) :: beta, x(:) +!!$ class(psb_s_multivect_cuda) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine s_cuda_multi_sctb_x + + + subroutine s_cuda_multi_bld_x(x,this) + use psb_base_mod + real(psb_spk_), intent(in) :: this(:,:) + class(psb_s_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'s_cuda_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine s_cuda_multi_bld_x + + subroutine s_cuda_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'s_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine s_cuda_multi_bld_n + + + subroutine s_cuda_multi_set_host(x) + implicit none + class(psb_s_multivect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine s_cuda_multi_set_host + + subroutine s_cuda_multi_set_dev(x) + implicit none + class(psb_s_multivect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine s_cuda_multi_set_dev + + subroutine s_cuda_multi_set_sync(x) + implicit none + class(psb_s_multivect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine s_cuda_multi_set_sync + + function s_cuda_multi_is_dev(x) result(res) + implicit none + class(psb_s_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function s_cuda_multi_is_dev + + function s_cuda_multi_is_host(x) result(res) + implicit none + class(psb_s_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function s_cuda_multi_is_host + + function s_cuda_multi_is_sync(x) result(res) + implicit none + class(psb_s_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function s_cuda_multi_is_sync + + + function s_cuda_multi_get_nrows(x) result(res) + implicit none + class(psb_s_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function s_cuda_multi_get_nrows + + function s_cuda_multi_get_ncols(x) result(res) + implicit none + class(psb_s_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function s_cuda_multi_get_ncols + + function s_cuda_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'sGPU' + end function s_cuda_multi_get_fmt + +!!$ function s_cuda_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_cuda), intent(inout) :: x +!!$ class(psb_s_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ real(psb_spk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = szero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_s_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_s_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_s_multivect_cuda) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'s_cuda_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function s_cuda_multi_dot_v +!!$ +!!$ function s_cuda_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_cuda), intent(inout) :: x +!!$ real(psb_spk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ real(psb_spk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function s_cuda_multi_dot_a +!!$ +!!$ subroutine s_cuda_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_s_base_multivect_type), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: y +!!$ real(psb_spk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_s_base_multivect_type) +!!$ if ((beta /= szero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_s_multivect_cuda) +!!$ ! Do something different here +!!$ if ((beta /= szero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_s_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_s_multivect_cuda) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine s_cuda_multi_mlt_v +!!$ +!!$ subroutine s_cuda_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_spk_), intent(in) :: x(:) +!!$ class(psb_s_multivect_cuda), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_s_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine s_cuda_multi_mlt_a +!!$ +!!$ subroutine s_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_spk_), intent(in) :: alpha,beta +!!$ real(psb_spk_), intent(in) :: x(:) +!!$ real(psb_spk_), intent(in) :: y(:) +!!$ class(psb_s_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_s_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine s_cuda_multi_mlt_a_2 +!!$ +!!$ subroutine s_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ real(psb_spk_), intent(in) :: alpha,beta +!!$ class(psb_s_base_multivect_type), intent(inout) :: x +!!$ class(psb_s_base_multivect_type), intent(inout) :: y +!!$ class(psb_s_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_s_multivect_cuda) +!!$ select type (yy => y) +!!$ type is (psb_s_multivect_cuda) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_s_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_s_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine s_cuda_multi_mlt_v_2 + + + subroutine s_cuda_multi_set_scal(x,val) + class(psb_s_multivect_cuda), intent(inout) :: x + real(psb_spk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_s_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine s_cuda_multi_set_scal + + subroutine s_cuda_multi_set_vect(x,val) + class(psb_s_multivect_cuda), intent(inout) :: x + real(psb_spk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_s_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine s_cuda_multi_set_vect + + + +!!$ subroutine s_cuda_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_s_multivect_cuda), intent(inout) :: x +!!$ real(psb_spk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_s_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine s_cuda_multi_scal +!!$ +!!$ +!!$ function s_cuda_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function s_cuda_multi_nrm2 +!!$ +!!$ function s_cuda_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function s_cuda_multi_amax +!!$ +!!$ function s_cuda_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function s_cuda_multi_asum + + subroutine s_cuda_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_multivect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=szero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'s_cuda_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine s_cuda_multi_all + + subroutine s_cuda_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_s_multivect_cuda), intent(inout) :: x + + if (allocated(x%v)) x%v=szero + call x%set_host() + end subroutine s_cuda_multi_zero + + subroutine s_cuda_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_multivect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_s_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc z_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_z_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_z_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => z_cuda_csrg_free + procedure, pass(a) :: mold => psb_z_cuda_csrg_mold + procedure, pass(a) :: is_host => z_cuda_csrg_is_host + procedure, pass(a) :: is_dev => z_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => z_cuda_csrg_is_sync + procedure, pass(a) :: set_host => z_cuda_csrg_set_host + procedure, pass(a) :: set_dev => z_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => z_cuda_csrg_set_sync + procedure, pass(a) :: sync => z_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_z_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_z_cuda_csrg_from_gpu + final :: z_cuda_csrg_finalize + end type psb_z_cuda_csrg_sparse_mat + + private :: z_cuda_csrg_get_nzeros, z_cuda_csrg_free, z_cuda_csrg_get_fmt, & + & z_cuda_csrg_get_size, z_cuda_csrg_sizeof, z_cuda_csrg_get_nz_row + + + interface + subroutine psb_z_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_z_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_z_cuda_csrg_reallocate_nz(nz,a) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_z_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_z_cuda_csrg_mold(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_mold + end interface + + interface + subroutine psb_z_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_z_cuda_csrg_from_gpu(a,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_z_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_z_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_z_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_z_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_csmv + end interface + interface + subroutine psb_z_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_csmm + end interface + + interface + subroutine psb_z_cuda_csrg_scal(d,a,info,side) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_cuda_csrg_scal + end interface + + interface + subroutine psb_z_cuda_csrg_scals(d,a,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_cuda_csrg_sizeof + + function z_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function z_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine z_cuda_csrg_set_host(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_cuda_csrg_set_host + + subroutine z_cuda_csrg_set_dev(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_cuda_csrg_set_dev + + subroutine z_cuda_csrg_set_sync(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_cuda_csrg_set_sync + + function z_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_cuda_csrg_is_dev + + function z_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_cuda_csrg_is_host + + function z_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_cuda_csrg_is_sync + + + subroutine z_cuda_csrg_sync(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine z_cuda_csrg_sync + + subroutine z_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_z_csr_sparse_mat%free() + + return + + end subroutine z_cuda_csrg_free + + subroutine z_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine z_cuda_csrg_finalize + +end module psb_z_cuda_csrg_mat_mod diff --git a/cuda/psb_z_cuda_diag_mat_mod.F90 b/cuda/psb_z_cuda_diag_mat_mod.F90 new file mode 100644 index 00000000..8b0d2168 --- /dev/null +++ b/cuda/psb_z_cuda_diag_mat_mod.F90 @@ -0,0 +1,287 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_cuda_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_z_dia_mat_mod + + type, extends(psb_z_dia_sparse_mat) :: psb_z_cuda_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => z_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => z_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_z_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_z_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_z_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_z_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_z_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_z_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_z_cuda_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => z_cuda_diag_free + procedure, pass(a) :: mold => psb_z_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_diag_to_gpu + final :: z_cuda_diag_finalize + end type psb_z_cuda_diag_sparse_mat + + private :: z_cuda_diag_get_nzeros, z_cuda_diag_free, z_cuda_diag_get_fmt, & + & z_cuda_diag_get_size, z_cuda_diag_sizeof, z_cuda_diag_get_nz_row + + + interface + subroutine psb_z_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_diag_vect_mv + end interface + + interface + subroutine psb_z_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_diag_inner_vect_sv + end interface + + interface + subroutine psb_z_cuda_diag_reallocate_nz(nz,a) + import :: psb_z_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_diag_reallocate_nz + end interface + + interface + subroutine psb_z_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_cuda_diag_allocate_mnnz + end interface + + interface + subroutine psb_z_cuda_diag_mold(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_diag_mold + end interface + + interface + subroutine psb_z_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_z_cuda_diag_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_cuda_diag_to_gpu + end interface + + interface + subroutine psb_z_cuda_cp_diag_from_coo(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_diag_from_coo + end interface + + interface + subroutine psb_z_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_diag_from_fmt + end interface + + interface + subroutine psb_z_cuda_mv_diag_from_coo(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_diag_from_coo + end interface + + + interface + subroutine psb_z_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_diag_from_fmt + end interface + + interface + subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_diag_csmv + end interface + interface + subroutine psb_z_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_diag_csmm + end interface + + interface + subroutine psb_z_cuda_diag_scal(d,a,info, side) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_cuda_diag_scal + end interface + + interface + subroutine psb_z_cuda_diag_scals(d,a,info) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_cuda_diag_sizeof(a) result(res) + implicit none + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_cuda_diag_sizeof + + function z_cuda_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function z_cuda_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_cuda_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_dia_sparse_mat%free() + + return + + end subroutine z_cuda_diag_free + + subroutine z_cuda_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine z_cuda_diag_finalize + +end module psb_z_cuda_diag_mat_mod diff --git a/cuda/psb_z_cuda_dnsg_mat_mod.F90 b/cuda/psb_z_cuda_dnsg_mat_mod.F90 new file mode 100644 index 00000000..6f687a75 --- /dev/null +++ b/cuda/psb_z_cuda_dnsg_mat_mod.F90 @@ -0,0 +1,273 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_cuda_dnsg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use psb_z_dns_mat_mod + use dnsdev_mod + + type, extends(psb_z_dns_sparse_mat) :: psb_z_cuda_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => z_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => z_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_z_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_z_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_z_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_z_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_z_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_z_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_z_cuda_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => z_cuda_dnsg_free + procedure, pass(a) :: mold => psb_z_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_dnsg_to_gpu + final :: z_cuda_dnsg_finalize + end type psb_z_cuda_dnsg_sparse_mat + + private :: z_cuda_dnsg_get_nzeros, z_cuda_dnsg_free, z_cuda_dnsg_get_fmt, & + & z_cuda_dnsg_get_size, z_cuda_dnsg_get_nz_row + + + interface + subroutine psb_z_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_z_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_z_cuda_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_z_cuda_dnsg_mold(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_dnsg_mold + end interface + + interface + subroutine psb_z_cuda_dnsg_to_gpu(a,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_dnsg_to_gpu + end interface + + interface + subroutine psb_z_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_dnsg_from_coo + end interface + + interface + subroutine psb_z_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_z_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_z_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:) +!!$ complex(psb_dpk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_cuda_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_cuda_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_z_cuda_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cuda_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function z_cuda_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function z_cuda_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_cuda_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_dns_sparse_mat%free() + + return + + end subroutine z_cuda_dnsg_free + + subroutine z_cuda_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine z_cuda_dnsg_finalize + +end module psb_z_cuda_dnsg_mat_mod diff --git a/cuda/psb_z_cuda_elg_mat_mod.F90 b/cuda/psb_z_cuda_elg_mat_mod.F90 new file mode 100644 index 00000000..1348a7cb --- /dev/null +++ b/cuda/psb_z_cuda_elg_mat_mod.F90 @@ -0,0 +1,454 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_cuda_elg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use psb_z_ell_mat_mod + use psb_i_cuda_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_ell_sparse_mat) :: psb_z_cuda_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => z_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_z_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_z_cuda_elg_scals + procedure, pass(a) :: scalv => psb_z_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => z_cuda_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => z_cuda_elg_free + procedure, pass(a) :: mold => psb_z_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_z_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_z_cuda_elg_csput_v + procedure, pass(a) :: is_host => z_cuda_elg_is_host + procedure, pass(a) :: is_dev => z_cuda_elg_is_dev + procedure, pass(a) :: is_sync => z_cuda_elg_is_sync + procedure, pass(a) :: set_host => z_cuda_elg_set_host + procedure, pass(a) :: set_dev => z_cuda_elg_set_dev + procedure, pass(a) :: set_sync => z_cuda_elg_set_sync + procedure, pass(a) :: sync => z_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_z_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_z_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_z_cuda_elg_asb + final :: z_cuda_elg_finalize + end type psb_z_cuda_elg_sparse_mat + + private :: z_cuda_elg_get_nzeros, z_cuda_elg_free, z_cuda_elg_get_fmt, & + & z_cuda_elg_get_size, z_cuda_elg_sizeof, z_cuda_elg_get_nz_row, z_cuda_elg_sync + + + interface + subroutine psb_z_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_elg_vect_mv + end interface + + interface + subroutine psb_z_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_elg_inner_vect_sv + end interface + + interface + subroutine psb_z_cuda_elg_reallocate_nz(nz,a) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_elg_reallocate_nz + end interface + + interface + subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_cuda_elg_allocate_mnnz + end interface + + interface + subroutine psb_z_cuda_elg_mold(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_elg_mold + end interface + + interface + subroutine psb_z_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_elg_csput_a + end interface + + interface + subroutine psb_z_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_z_base_vect_type,& + & psb_i_base_vect_type + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_elg_csput_v + end interface + + interface + subroutine psb_z_cuda_elg_from_gpu(a,info) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_elg_from_gpu + end interface + + interface + subroutine psb_z_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_cuda_elg_to_gpu + end interface + + interface + subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_elg_from_coo + end interface + + interface + subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_elg_from_fmt + end interface + + interface + subroutine psb_z_cuda_mv_elg_from_coo(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_elg_from_coo + end interface + + + interface + subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_elg_from_fmt + end interface + + interface + subroutine psb_z_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_elg_csmv + end interface + interface + subroutine psb_z_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_elg_csmm + end interface + + interface + subroutine psb_z_cuda_elg_scal(d,a,info, side) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_cuda_elg_scal + end interface + + interface + subroutine psb_z_cuda_elg_scals(d,a,info) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_elg_scals + end interface + + interface + subroutine psb_z_cuda_elg_asb(a) + import :: psb_z_cuda_elg_sparse_mat + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_cuda_elg_sizeof(a) result(res) + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_cuda_elg_sizeof + + function z_cuda_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function z_cuda_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine z_cuda_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = zzero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine z_cuda_elg_reinit + + subroutine z_cuda_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine z_cuda_elg_free + + subroutine z_cuda_elg_sync(a) + implicit none + class(psb_z_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine z_cuda_elg_sync + + subroutine z_cuda_elg_set_host(a) + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_cuda_elg_set_host + + subroutine z_cuda_elg_set_dev(a) + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_cuda_elg_set_dev + + subroutine z_cuda_elg_set_sync(a) + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_cuda_elg_set_sync + + function z_cuda_elg_is_dev(a) result(res) + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_cuda_elg_is_dev + + function z_cuda_elg_is_host(a) result(res) + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_cuda_elg_is_host + + function z_cuda_elg_is_sync(a) result(res) + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_cuda_elg_is_sync + + subroutine z_cuda_elg_finalize(a) + use elldev_mod + implicit none + type(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine z_cuda_elg_finalize + +end module psb_z_cuda_elg_mat_mod diff --git a/cuda/psb_z_cuda_hdiag_mat_mod.F90 b/cuda/psb_z_cuda_hdiag_mat_mod.F90 new file mode 100644 index 00000000..70897664 --- /dev/null +++ b/cuda/psb_z_cuda_hdiag_mat_mod.F90 @@ -0,0 +1,268 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_cuda_hdiag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_z_hdia_mat_mod + + type, extends(psb_z_hdia_sparse_mat) :: psb_z_cuda_hdiag_sparse_mat + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => z_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => z_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_z_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_z_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_z_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_z_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_z_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_z_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_z_cuda_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => z_cuda_hdiag_free + procedure, pass(a) :: mold => psb_z_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_hdiag_to_gpu + final :: z_cuda_hdiag_finalize + end type psb_z_cuda_hdiag_sparse_mat + + private :: z_cuda_hdiag_get_nzeros, z_cuda_hdiag_free, z_cuda_hdiag_get_fmt, & + & z_cuda_hdiag_get_size, z_cuda_hdiag_sizeof, z_cuda_hdiag_get_nz_row + + + interface + subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_z_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_z_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_cuda_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_cuda_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_z_cuda_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_z_cuda_hdiag_mold(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_hdiag_mold + end interface + + interface + subroutine psb_z_cuda_hdiag_to_gpu(a,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_hdiag_to_gpu + end interface + + interface + subroutine psb_z_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_z_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cuda_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_z_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cuda_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_z_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_cuda_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_z_cuda_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cuda_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function z_cuda_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function z_cuda_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_cuda_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_hdia_sparse_mat%free() + + return + + end subroutine z_cuda_hdiag_free + + subroutine z_cuda_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_hdia_sparse_mat%free() + + return + end subroutine z_cuda_hdiag_finalize + +end module psb_z_cuda_hdiag_mat_mod diff --git a/cuda/psb_z_cuda_hlg_mat_mod.F90 b/cuda/psb_z_cuda_hlg_mat_mod.F90 new file mode 100644 index 00000000..f7150250 --- /dev/null +++ b/cuda/psb_z_cuda_hlg_mat_mod.F90 @@ -0,0 +1,377 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_cuda_hlg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use psb_z_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_hll_sparse_mat) :: psb_z_cuda_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => z_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_z_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_z_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_z_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => z_cuda_hlg_free + procedure, pass(a) :: mold => psb_z_cuda_hlg_mold + procedure, pass(a) :: is_host => z_cuda_hlg_is_host + procedure, pass(a) :: is_dev => z_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => z_cuda_hlg_is_sync + procedure, pass(a) :: set_host => z_cuda_hlg_set_host + procedure, pass(a) :: set_dev => z_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => z_cuda_hlg_set_sync + procedure, pass(a) :: sync => z_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_z_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_z_cuda_hlg_to_gpu + final :: z_cuda_hlg_finalize + end type psb_z_cuda_hlg_sparse_mat + + private :: z_cuda_hlg_get_nzeros, z_cuda_hlg_free, z_cuda_hlg_get_fmt, & + & z_cuda_hlg_get_size, z_cuda_hlg_sizeof, z_cuda_hlg_get_nz_row + + + interface + subroutine psb_z_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hlg_vect_mv + end interface + + interface + subroutine psb_z_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hlg_inner_vect_sv + end interface + + interface + subroutine psb_z_cuda_hlg_reallocate_nz(nz,a) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_hlg_reallocate_nz + end interface + + interface + subroutine psb_z_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_cuda_hlg_allocate_mnnz + end interface + + interface + subroutine psb_z_cuda_hlg_mold(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_hlg_mold + end interface + + interface + subroutine psb_z_cuda_hlg_from_gpu(a,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_hlg_from_gpu + end interface + + interface + subroutine psb_z_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_cuda_hlg_to_gpu + end interface + + interface + subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_hlg_from_coo + end interface + + interface + subroutine psb_z_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_hlg_from_fmt + end interface + + interface + subroutine psb_z_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_hlg_from_coo + end interface + + + interface + subroutine psb_z_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_hlg_from_fmt + end interface + + interface + subroutine psb_z_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hlg_csmv + end interface + interface + subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hlg_csmm + end interface + + interface + subroutine psb_z_cuda_hlg_scal(d,a,info, side) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_cuda_hlg_scal + end interface + + interface + subroutine psb_z_cuda_hlg_scals(d,a,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_cuda_hlg_sizeof(a) result(res) + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_cuda_hlg_sizeof + + function z_cuda_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function z_cuda_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_cuda_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_hll_sparse_mat%free() + + return + + end subroutine z_cuda_hlg_free + + + subroutine z_cuda_hlg_sync(a) + implicit none + class(psb_z_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine z_cuda_hlg_sync + + subroutine z_cuda_hlg_set_host(a) + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_cuda_hlg_set_host + + subroutine z_cuda_hlg_set_dev(a) + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_cuda_hlg_set_dev + + subroutine z_cuda_hlg_set_sync(a) + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_cuda_hlg_set_sync + + function z_cuda_hlg_is_dev(a) result(res) + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_cuda_hlg_is_dev + + function z_cuda_hlg_is_host(a) result(res) + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_cuda_hlg_is_host + + function z_cuda_hlg_is_sync(a) result(res) + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_cuda_hlg_is_sync + + + subroutine z_cuda_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine z_cuda_hlg_finalize + +end module psb_z_cuda_hlg_mat_mod diff --git a/cuda/psb_z_cuda_hybg_mat_mod.F90 b/cuda/psb_z_cuda_hybg_mat_mod.F90 new file mode 100644 index 00000000..efcd73f7 --- /dev/null +++ b/cuda/psb_z_cuda_hybg_mat_mod.F90 @@ -0,0 +1,287 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if PSB_CUDA_SHORT_VERSION <= 10 + +module psb_z_cuda_hybg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use cusparse_mod + + type, extends(psb_z_csr_sparse_mat) :: psb_z_cuda_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! + type(z_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => z_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_z_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_z_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => z_cuda_hybg_free + procedure, pass(a) :: mold => psb_z_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_hybg_to_gpu + final :: z_cuda_hybg_finalize + end type psb_z_cuda_hybg_sparse_mat + + private :: z_cuda_hybg_get_nzeros, z_cuda_hybg_free, z_cuda_hybg_get_fmt, & + & z_cuda_hybg_get_size, z_cuda_hybg_sizeof, z_cuda_hybg_get_nz_row + + + interface + subroutine psb_z_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hybg_inner_vect_sv + end interface + + interface + subroutine psb_z_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hybg_vect_mv + end interface + + interface + subroutine psb_z_cuda_hybg_reallocate_nz(nz,a) + import :: psb_z_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_hybg_reallocate_nz + end interface + + interface + subroutine psb_z_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_cuda_hybg_allocate_mnnz + end interface + + interface + subroutine psb_z_cuda_hybg_mold(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_hybg_mold + end interface + + interface + subroutine psb_z_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_cuda_hybg_to_gpu + end interface + + interface + subroutine psb_z_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_hybg_from_coo + end interface + + interface + subroutine psb_z_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_hybg_from_fmt + end interface + + interface + subroutine psb_z_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_hybg_from_coo + end interface + + interface + subroutine psb_z_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_hybg_from_fmt + end interface + + interface + subroutine psb_z_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hybg_csmv + end interface + interface + subroutine psb_z_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hybg_csmm + end interface + + interface + subroutine psb_z_cuda_hybg_scal(d,a,info,side) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_cuda_hybg_scal + end interface + + interface + subroutine psb_z_cuda_hybg_scals(d,a,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_cuda_hybg_sizeof(a) result(res) + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_cuda_hybg_sizeof + + function z_cuda_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function z_cuda_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_cuda_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_z_csr_sparse_mat%free() + + return + + end subroutine z_cuda_hybg_free + + subroutine z_cuda_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine z_cuda_hybg_finalize + +end module psb_z_cuda_hybg_mat_mod +#endif diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 new file mode 100644 index 00000000..4e490df6 --- /dev/null +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -0,0 +1,2088 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_cuda_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_z_vect_mod + use psb_cuda_env_mod + use psb_i_vect_mod + use psb_i_cuda_vect_mod + use psb_i_vectordev_mod + use psb_z_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_base_vect_type) :: psb_z_vect_cuda + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + complex(c_double_complex), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + complex(c_double_complex), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => z_cuda_get_nrows + procedure, nopass :: get_fmt => z_cuda_get_fmt + + procedure, pass(x) :: all => z_cuda_all + procedure, pass(x) :: zero => z_cuda_zero + procedure, pass(x) :: asb_m => z_cuda_asb_m + procedure, pass(x) :: sync => z_cuda_sync + procedure, pass(x) :: sync_space => z_cuda_sync_space + procedure, pass(x) :: bld_x => z_cuda_bld_x + procedure, pass(x) :: bld_mn => z_cuda_bld_mn + procedure, pass(x) :: free => z_cuda_free + procedure, pass(x) :: ins_a => z_cuda_ins_a + procedure, pass(x) :: ins_v => z_cuda_ins_v + procedure, pass(x) :: is_host => z_cuda_is_host + procedure, pass(x) :: is_dev => z_cuda_is_dev + procedure, pass(x) :: is_sync => z_cuda_is_sync + procedure, pass(x) :: set_host => z_cuda_set_host + procedure, pass(x) :: set_dev => z_cuda_set_dev + procedure, pass(x) :: set_sync => z_cuda_set_sync + procedure, pass(x) :: set_scal => z_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => z_cuda_set_vect + procedure, pass(x) :: gthzv_x => z_cuda_gthzv_x + procedure, pass(y) :: sctb => z_cuda_sctb + procedure, pass(y) :: sctb_x => z_cuda_sctb_x + procedure, pass(x) :: gthzbuf => z_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => z_cuda_sctb_buf + procedure, pass(x) :: new_buffer => z_cuda_new_buffer + procedure, nopass :: device_wait => z_cuda_device_wait + procedure, pass(x) :: free_buffer => z_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => z_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => z_cuda_dot_v + procedure, pass(x) :: dot_a => z_cuda_dot_a + procedure, pass(y) :: axpby_v => z_cuda_axpby_v + procedure, pass(y) :: axpby_a => z_cuda_axpby_a + procedure, pass(z) :: upd_xyz => z_cuda_upd_xyz + procedure, pass(y) :: mlt_v => z_cuda_mlt_v + procedure, pass(y) :: mlt_a => z_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => z_cuda_mlt_v_2 + procedure, pass(x) :: scal => z_cuda_scal + procedure, pass(x) :: nrm2 => z_cuda_nrm2 + procedure, pass(x) :: amax => z_cuda_amax + procedure, pass(x) :: asum => z_cuda_asum + procedure, pass(x) :: absval1 => z_cuda_absval1 + procedure, pass(x) :: absval2 => z_cuda_absval2 + + final :: z_cuda_vect_finalize + end type psb_z_vect_cuda + + public :: psb_z_vect_cuda_ + private :: constructor + interface psb_z_vect_cuda_ + module procedure constructor + end interface psb_z_vect_cuda_ + +contains + + function constructor(x) result(this) + complex(psb_dpk_) :: x(:) + type(psb_z_vect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + + subroutine z_cuda_device_wait() + call psb_cudaSync() + end subroutine z_cuda_device_wait + + subroutine z_cuda_new_buffer(n,x,info) + use psb_realloc_mod + use psb_cuda_env_mod + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_cuda_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeDoubleComplex(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateDoubleComplex(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readDoubleComplex(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeDoubleComplex(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateDoubleComplex(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceDoubleComplex(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readDoubleComplex(x%dt_buf,y,n) + + end select + + end subroutine z_cuda_gthzv_x + + subroutine z_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + class(psb_z_vect_cuda) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readDoubleComplex(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceDoubleComplex(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readDoubleComplex(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine z_cuda_gthzbuf + + subroutine z_cuda_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + complex(psb_dpk_) :: beta, x(:) + class(psb_z_vect_cuda) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_z_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine z_cuda_sctb + + subroutine z_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + complex(psb_dpk_) :: beta, x(:) + class(psb_z_vect_cuda) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_cuda_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeDoubleComplex(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateDoubleComplex(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeDoubleComplex(y%dt_buf,x,n) + info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeDoubleComplex(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateDoubleComplex(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeDoubleComplex(y%dt_buf,x,n) + info = iscatMultiVecDeviceDoubleComplex(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine z_cuda_sctb_x + + subroutine z_cuda_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_cuda_env_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: i + class(psb_i_base_vect_type) :: idx + complex(psb_dpk_) :: beta + class(psb_z_vect_cuda) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_cuda) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_cuda_DeviceHasUVA()) then + info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeDoubleComplex(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeDoubleComplex(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceDoubleComplex(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine z_cuda_sctb_buf + + + subroutine z_cuda_bld_x(x,this) + use psb_base_mod + complex(psb_dpk_), intent(in) :: this(:) + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'z_cuda_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine z_cuda_bld_x + + subroutine z_cuda_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'z_cuda_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine z_cuda_bld_mn + + subroutine z_cuda_set_host(x) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine z_cuda_set_host + + subroutine z_cuda_set_dev(x) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine z_cuda_set_dev + + subroutine z_cuda_set_sync(x) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine z_cuda_set_sync + + function z_cuda_is_dev(x) result(res) + implicit none + class(psb_z_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function z_cuda_is_dev + + function z_cuda_is_host(x) result(res) + implicit none + class(psb_z_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function z_cuda_is_host + + function z_cuda_is_sync(x) result(res) + implicit none + class(psb_z_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function z_cuda_is_sync + + + function z_cuda_get_nrows(x) result(res) + implicit none + class(psb_z_vect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function z_cuda_get_nrows + + function z_cuda_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'zGPU' + end function z_cuda_get_fmt + + subroutine z_cuda_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_z_vect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'z_cuda_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine z_cuda_all + + subroutine z_cuda_zero(x) + use psi_serial_mod + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() + call x%set_scal(zzero) + end subroutine z_cuda_zero + + subroutine z_cuda_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_z_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'z_cuda_sync') + end if + + end subroutine z_cuda_sync + + subroutine z_cuda_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine z_cuda_free + + subroutine z_cuda_set_scal(x,val,first,last) + class(psb_z_vect_cuda), intent(inout) :: x + complex(psb_dpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine z_cuda_set_scal + + + + function z_cuda_dot_v(n,x,y) result(res) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_) :: res + integer(psb_ipk_) :: info + + res = zzero + ! + ! Note: this is the gpu implementation. + ! When we get here, we are sure that X is of + ! TYPE psb_z_vect + ! + select type(yy => y) + type is (psb_z_vect_cuda) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,'z_cuda_dot_v') + end if + + class default + ! y%sync is done in dot_a + if (x%is_dev()) call x%sync() + res = y%dot(n,x%v) + end select + + end function z_cuda_dot_v + + function z_cuda_dot_a(n,x,y) result(res) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + complex(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_) :: res + complex(psb_dpk_), external :: zdot + + if (x%is_dev()) call x%sync() + res = zdot(n,y,1,x%v,1) + + end function z_cuda_dot_a + + subroutine z_cuda_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: y + complex(psb_dpk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny + + info = psb_success_ + + select type(xx => x) + type is (psb_z_vect_cuda) + ! Do something different here + if ((beta /= zzero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx x) + class is (psb_z_vect_cuda) + select type(yy => y) + class is (psb_z_vect_cuda) + select type(zz => z) + class is (psb_z_vect_cuda) + ! Do something different here + if ((beta /= zzero).and.yy%is_host())& + & call yy%sync() + if ((delta /= zzero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) + class is (psb_z_vect_cuda) + select type(yy => y) + class is (psb_z_vect_cuda) + select type(zz => z) + class is (psb_z_vect_cuda) + ! Do something different here + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if (zz%is_host()) call zz%sync() + if (w%is_host()) call w%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + nw = getMultiVecDeviceSize(w%deviceVect) + if ((nx x) + type is (psb_z_base_vect_type) + if (y%is_dev()) call y%sync() + do i=1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + type is (psb_z_vect_cuda) + ! Do something different here + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + info = axyMultiVecDevice(n,zone,xx%deviceVect,y%deviceVect) + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + call y%mlt(xx%v,info) + call y%set_host() + end select + + end subroutine z_cuda_mlt_v + + subroutine z_cuda_mlt_a(x, y, info) + use psi_serial_mod + implicit none + complex(psb_dpk_), intent(in) :: x(:) + class(psb_z_vect_cuda), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + call y%psb_z_base_vect_type%mlt(x,info) + ! set_host() is invoked in the base method + end subroutine z_cuda_mlt_a + + subroutine z_cuda_mlt_a_2(alpha,x,y,beta,z,info) + use psi_serial_mod + implicit none + complex(psb_dpk_), intent(in) :: alpha,beta + complex(psb_dpk_), intent(in) :: x(:) + complex(psb_dpk_), intent(in) :: y(:) + class(psb_z_vect_cuda), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + call z%psb_z_base_vect_type%mlt(alpha,x,y,beta,info) + ! set_host() is invoked in the base method + end subroutine z_cuda_mlt_a_2 + + subroutine z_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + complex(psb_dpk_), intent(in) :: alpha,beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_vect_cuda), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + if (.false.) then + ! These are present just for coherence with the + ! complex versions; they do nothing here. + conjgx_=.false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') + conjgy_=.false. + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') + end if + + n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) + + ! + ! Need to reconsider BETA in the GPU side + ! of things. + ! + info = 0 + select type(xx => x) + type is (psb_z_vect_cuda) + select type (yy => y) + type is (psb_z_vect_cuda) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= zzero).and.(z%is_host())) call z%sync() + info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& + & yy%deviceVect,beta,z%deviceVect) + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= zzero).and.(z%is_dev())) call z%sync() + call z%psb_z_base_vect_type%mlt(alpha,xx,yy,beta,info) + call z%set_host() + end select + + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= zzero).and.(z%is_dev())) call z%sync() + call z%psb_z_base_vect_type%mlt(alpha,x,y,beta,info) + call z%set_host() + end select + end subroutine z_cuda_mlt_v_2 + + subroutine z_cuda_scal(alpha, x) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + complex(psb_dpk_), intent (in) :: alpha + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = scalMultiVecDevice(alpha,x%deviceVect) + call x%set_dev() + end subroutine z_cuda_scal + + + function z_cuda_nrm2(n,x) result(res) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + ! WARNING: this should be changed. + if (x%is_host()) call x%sync() + info = nrm2MultiVecDeviceComplex(res,n,x%deviceVect) + + end function z_cuda_nrm2 + + function z_cuda_amax(n,x) result(res) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = amaxMultiVecDeviceComplex(res,n,x%deviceVect) + + end function z_cuda_amax + + function z_cuda_asum(n,x) result(res) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = asumMultiVecDeviceComplex(res,n,x%deviceVect) + + end function z_cuda_asum + + subroutine z_cuda_absval1(x) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + n=x%get_nrows() + info = absMultiVecDevice(n,zone,x%deviceVect) + + end subroutine z_cuda_absval1 + + subroutine z_cuda_absval2(x,y) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + n=min(x%get_nrows(),y%get_nrows()) + select type (yy=> y) + class is (psb_z_vect_cuda) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = absMultiVecDevice(n,zone,x%deviceVect,yy%deviceVect) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_z_base_vect_type%absval(y) + end select + end subroutine z_cuda_absval2 + + + subroutine z_cuda_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine z_cuda_vect_finalize + + subroutine z_cuda_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_z_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_cuda + + info = 0 + if (psb_errstatus_fatal()) return + + done_cuda = .false. + select type(virl => irl) + class is (psb_i_vect_cuda) + select type(vval => val) + class is (psb_z_vect_cuda) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceDoubleComplex(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_cuda=.true. + end select + end select + + if (.not.done_cuda) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'cuda_vect_ins') + return + end if + + end subroutine z_cuda_ins_v + + subroutine z_cuda_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_z_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine z_cuda_ins_a + +end module psb_z_cuda_vect_mod + + +! +! Multivectors +! + + + +module psb_z_cuda_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_z_multivect_mod + use psb_z_base_multivect_mod + use psb_cuda_env_mod + use psb_i_multivect_mod + use psb_i_cuda_multivect_mod + use psb_z_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_base_multivect_type) :: psb_z_multivect_cuda + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => z_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => z_cuda_multi_get_ncols + procedure, nopass :: get_fmt => z_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => z_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => z_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => z_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => z_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => z_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => z_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => z_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => z_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => z_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => z_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => z_cuda_multi_amax +!!$ procedure, pass(x) :: asum => z_cuda_multi_asum + procedure, pass(x) :: all => z_cuda_multi_all + procedure, pass(x) :: zero => z_cuda_multi_zero + procedure, pass(x) :: asb => z_cuda_multi_asb + procedure, pass(x) :: sync => z_cuda_multi_sync + procedure, pass(x) :: sync_space => z_cuda_multi_sync_space + procedure, pass(x) :: bld_x => z_cuda_multi_bld_x + procedure, pass(x) :: bld_n => z_cuda_multi_bld_n + procedure, pass(x) :: free => z_cuda_multi_free + procedure, pass(x) :: ins => z_cuda_multi_ins + procedure, pass(x) :: is_host => z_cuda_multi_is_host + procedure, pass(x) :: is_dev => z_cuda_multi_is_dev + procedure, pass(x) :: is_sync => z_cuda_multi_is_sync + procedure, pass(x) :: set_host => z_cuda_multi_set_host + procedure, pass(x) :: set_dev => z_cuda_multi_set_dev + procedure, pass(x) :: set_sync => z_cuda_multi_set_sync + procedure, pass(x) :: set_scal => z_cuda_multi_set_scal + procedure, pass(x) :: set_vect => z_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => z_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => z_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => z_cuda_multi_sctb_x + final :: z_cuda_multi_vect_finalize + end type psb_z_multivect_cuda + + public :: psb_z_multivect_cuda + private :: mconstructor + interface psb_z_multivect_cuda + module procedure mconstructor + end interface + +contains + + function mconstructor(x) result(this) + complex(psb_dpk_) :: x(:,:) + type(psb_z_multivect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function mconstructor + + +!!$ subroutine z_cuda_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ complex(psb_dpk_) :: y(:) +!!$ class(psb_z_multivect_cuda) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine z_cuda_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine z_cuda_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ complex(psb_dpk_) :: beta, x(:) +!!$ class(psb_z_multivect_cuda) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_z_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine z_cuda_multi_sctb +!!$ +!!$ subroutine z_cuda_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ complex(psb_dpk_) :: beta, x(:) +!!$ class(psb_z_multivect_cuda) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine z_cuda_multi_sctb_x + + + subroutine z_cuda_multi_bld_x(x,this) + use psb_base_mod + complex(psb_dpk_), intent(in) :: this(:,:) + class(psb_z_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'z_cuda_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine z_cuda_multi_bld_x + + subroutine z_cuda_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'z_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine z_cuda_multi_bld_n + + + subroutine z_cuda_multi_set_host(x) + implicit none + class(psb_z_multivect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine z_cuda_multi_set_host + + subroutine z_cuda_multi_set_dev(x) + implicit none + class(psb_z_multivect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine z_cuda_multi_set_dev + + subroutine z_cuda_multi_set_sync(x) + implicit none + class(psb_z_multivect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine z_cuda_multi_set_sync + + function z_cuda_multi_is_dev(x) result(res) + implicit none + class(psb_z_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function z_cuda_multi_is_dev + + function z_cuda_multi_is_host(x) result(res) + implicit none + class(psb_z_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function z_cuda_multi_is_host + + function z_cuda_multi_is_sync(x) result(res) + implicit none + class(psb_z_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function z_cuda_multi_is_sync + + + function z_cuda_multi_get_nrows(x) result(res) + implicit none + class(psb_z_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function z_cuda_multi_get_nrows + + function z_cuda_multi_get_ncols(x) result(res) + implicit none + class(psb_z_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function z_cuda_multi_get_ncols + + function z_cuda_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'zGPU' + end function z_cuda_multi_get_fmt + +!!$ function z_cuda_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_cuda), intent(inout) :: x +!!$ class(psb_z_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ complex(psb_dpk_) :: res +!!$ complex(psb_dpk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = zzero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_z_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_z_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_z_multivect_cuda) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'z_cuda_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function z_cuda_multi_dot_v +!!$ +!!$ function z_cuda_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_cuda), intent(inout) :: x +!!$ complex(psb_dpk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ complex(psb_dpk_) :: res +!!$ complex(psb_dpk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function z_cuda_multi_dot_a +!!$ +!!$ subroutine z_cuda_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_z_base_multivect_type), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: y +!!$ complex(psb_dpk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_z_base_multivect_type) +!!$ if ((beta /= zzero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_z_multivect_cuda) +!!$ ! Do something different here +!!$ if ((beta /= zzero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_z_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_z_multivect_cuda) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine z_cuda_multi_mlt_v +!!$ +!!$ subroutine z_cuda_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ complex(psb_dpk_), intent(in) :: x(:) +!!$ class(psb_z_multivect_cuda), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_z_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine z_cuda_multi_mlt_a +!!$ +!!$ subroutine z_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ complex(psb_dpk_), intent(in) :: alpha,beta +!!$ complex(psb_dpk_), intent(in) :: x(:) +!!$ complex(psb_dpk_), intent(in) :: y(:) +!!$ class(psb_z_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_z_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine z_cuda_multi_mlt_a_2 +!!$ +!!$ subroutine z_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ complex(psb_dpk_), intent(in) :: alpha,beta +!!$ class(psb_z_base_multivect_type), intent(inout) :: x +!!$ class(psb_z_base_multivect_type), intent(inout) :: y +!!$ class(psb_z_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_z_multivect_cuda) +!!$ select type (yy => y) +!!$ type is (psb_z_multivect_cuda) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_z_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_z_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine z_cuda_multi_mlt_v_2 + + + subroutine z_cuda_multi_set_scal(x,val) + class(psb_z_multivect_cuda), intent(inout) :: x + complex(psb_dpk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_z_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine z_cuda_multi_set_scal + + subroutine z_cuda_multi_set_vect(x,val) + class(psb_z_multivect_cuda), intent(inout) :: x + complex(psb_dpk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_z_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine z_cuda_multi_set_vect + + + +!!$ subroutine z_cuda_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_z_multivect_cuda), intent(inout) :: x +!!$ complex(psb_dpk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_z_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine z_cuda_multi_scal +!!$ +!!$ +!!$ function z_cuda_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function z_cuda_multi_nrm2 +!!$ +!!$ function z_cuda_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function z_cuda_multi_amax +!!$ +!!$ function z_cuda_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function z_cuda_multi_asum + + subroutine z_cuda_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_multivect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=zzero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'z_cuda_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine z_cuda_multi_all + + subroutine z_cuda_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_z_multivect_cuda), intent(inout) :: x + + if (allocated(x%v)) x%v=zzero + call x%set_host() + end subroutine z_cuda_multi_zero + + subroutine z_cuda_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_multivect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_z_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc +#include + +#include +#include +#include "fcusparse.h" + +#include "scusparse.h" +#include "fcusparse_dat.h" +#include "fcusparse_fct.h" diff --git a/cuda/scusparse.h b/cuda/scusparse.h new file mode 100644 index 00000000..a5e53446 --- /dev/null +++ b/cuda/scusparse.h @@ -0,0 +1,100 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef SCUSPARSE_ +#define SCUSPARSE_ + + +#include +#include + +#include +#include +#include "cintrf.h" + +/* Double precision real */ +#define TYPE float +#define CUSPARSE_BASE_TYPE CUDA_R_32F +#define T_CSRGDeviceMat s_CSRGDeviceMat +#define T_Cmat s_Cmat +#define T_spmvCSRGDevice s_spmvCSRGDevice +#define T_spsvCSRGDevice s_spsvCSRGDevice +#define T_CSRGDeviceAlloc s_CSRGDeviceAlloc +#define T_CSRGDeviceFree s_CSRGDeviceFree +#define T_CSRGHost2Device s_CSRGHost2Device +#define T_CSRGDevice2Host s_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode s_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType s_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms s_CSRGDeviceGetParms + +#if PSB_CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase s_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis s_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseScsrmv +#define cusparseTcsrsv_solve cusparseScsrsv_solve +#define cusparseTcsrsv_analysis cusparseScsrsv_analysis +#define T_HYBGDeviceMat s_HYBGDeviceMat +#define T_Hmat s_Hmat +#define T_HYBGDeviceFree s_HYBGDeviceFree +#define T_spmvHYBGDevice s_spmvHYBGDevice +#define T_HYBGDeviceAlloc s_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType s_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase s_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType s_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode s_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis s_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice s_spsvHYBGDevice +#define T_HYBGHost2Device s_HYBGHost2Device +#define cusparseThybmv cusparseShybmv +#define cusparseThybsv_solve cusparseShybsv_solve +#define cusparseThybsv_analysis cusparseShybsv_analysis +#define cusparseTcsr2hyb cusparseScsr2hyb + +#elif PSB_CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase s_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis s_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseScsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseScsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseScsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer s_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr s_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr s_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr s_CSRGCreateSpMVDescr + +#endif + +#include "fcusparse.h" + +#endif diff --git a/cuda/spgpu/Makefile b/cuda/spgpu/Makefile new file mode 100644 index 00000000..ae0b1be6 --- /dev/null +++ b/cuda/spgpu/Makefile @@ -0,0 +1,40 @@ +TOP=../.. +include $(TOP)/Make.inc +# +# Libraries used +# +LIBDIR=$(TOP)/lib +INCDIR=$(TOP)/include +MODDIR=$(TOP)/modules +LIBNAME=libspgpu.a + +OBJS=coo.o core.o dia.o ell.o hdia.o hell.o +CU_INCLUDES=-I$(INCDIR) $(CUDA_INCLUDES) + +all: includes objs + +objs: $(OBJS) iobjs + +lib: objs iobjs ilib + $(AR) $(LIBNAME) $(OBJS) + /bin/cp -p $(LIBNAME) $(LIBDIR) + +iobjs: + $(MAKE) -C kernels objs +ilib: + $(MAKE) -C kernels lib LIBNAME=$(LIBNAME) + +includes: + /bin/cp -p *.h $(INCDIR) + +clean: iclean + /bin/rm -fr $(OBJS) $(LIBNAME) +iclean: + $(MAKE) -C kernels clean +.c.o: + $(CC) $(CCOPT) $(CINCLUDES) $(CDEFINES) $(CU_INCLUDES) -c $< +.cpp.o: + $(CXX) $(CXXOPT) $(CXXINCLUDES) $(CXXDEFINES) $(CU_INCLUDES) -c $< -o $@ + +.cu.o: + $(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) $(CU_INCLUDES) -c $< diff --git a/cuda/spgpu/coo.cpp b/cuda/spgpu/coo.cpp new file mode 100644 index 00000000..eaeba95e --- /dev/null +++ b/cuda/spgpu/coo.cpp @@ -0,0 +1,98 @@ +#include "coo_conv.h" +#include "core.h" + +#include + +// returns the number of non-zero blocks +int computeBcooSize(int blockRows, int blockCols, const int* rows, const int* cols, int nonZeros) +{ + // use a map to count al non zero blocks + std::map blocksPositions; + + int blockCount = 0; + + int i; + for (i=0; i::iterator it = blocksPositions.find(blockId); + + // not found + if(it == blocksPositions.end()) + { + blocksPositions[blockId] = blockCount; + ++blockCount; + } + } + + return blockCount; +} + + +void cooToBcoo(int* bRows, int* bCols, void* blockValues, /*int isBlockColumnMajor,*/ int blockRows, int blockCols, + const int* rows, const int* cols, const void* values, int nonZeros, spgpuType_t valuesType) +{ + // use a map to count al non zero blocks + std::map blocksPositions; + + int blockCount = 0; + + size_t elementSize = spgpuSizeOf(valuesType); + size_t blockElementSize = elementSize*blockRows*blockCols; + int i; + for (i=0; i::iterator it = blocksPositions.find(blockId); + + int blockPos; + + // not found + if(it == blocksPositions.end()) + { + blocksPositions[blockId] = blockCount; + blockPos = blockCount; + + bRows[blockCount] = blockRowId; + bCols[blockCount] = blockColId; + + memset((char*)blockValues + blockCount*blockElementSize, 0, blockElementSize); + + ++blockCount; + } + else + blockPos = it->second; + + int blockRowOffset = rowId % blockRows; + int blockColOffset = colId % blockCols; + + int blockOffset; + + //if (isBlockColumnMajor) + blockOffset = blockRowOffset + blockColOffset*blockRows; + /*else + blockOffset = blockRowOffset*blockCols + blockColOffset; + */ + + char* dest = (char*)blockValues + blockPos*blockElementSize + blockOffset*elementSize; + const char* src = (const char*)values + elementSize*i; + + memcpy(dest, src, elementSize); + } +} + + diff --git a/cuda/spgpu/coo_conv.h b/cuda/spgpu/coo_conv.h new file mode 100644 index 00000000..feccb63a --- /dev/null +++ b/cuda/spgpu/coo_conv.h @@ -0,0 +1,40 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include "core.h" + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +int computeBcooSize(int blockRows, int blockCols, const int* rows, const int* cols, int nonZeros); + +// column-major format for blocks +void cooToBcoo(int* bRows, int* bCols, void* blockValues, /*int isBlockColumnMajor,*/ int blockRows, int blockCols, + const int* rows, const int* cols, const void* values, int nonZeros, spgpuType_t valuesType); + +#ifdef __cplusplus +} +#endif + +/** @}*/ diff --git a/cuda/spgpu/core.c b/cuda/spgpu/core.c new file mode 100644 index 00000000..e89aa2cf --- /dev/null +++ b/cuda/spgpu/core.c @@ -0,0 +1,109 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "core.h" +#include "stdlib.h" +#include "cuda_runtime.h" + +spgpuStatus_t spgpuCreate(spgpuHandle_t* pHandle, int device) +{ + struct cudaDeviceProp deviceProperties; + cudaError_t err = cudaGetDeviceProperties(&deviceProperties, device); + + SpgpuHandleStruct* handle = (SpgpuHandleStruct*) malloc(sizeof(SpgpuHandleStruct)); + + int currentDevice; + cudaGetDevice(¤tDevice); + cudaSetDevice(device); + cudaStreamCreate(&handle->defaultStream); + handle->currentStream = handle->defaultStream; + cudaSetDevice(currentDevice); + + handle->device = device; + handle->warpSize = deviceProperties.warpSize; + handle->maxThreadsPerBlock = deviceProperties.maxThreadsPerBlock; + handle->multiProcessorCount = deviceProperties.multiProcessorCount; + handle->maxGridSizeX = deviceProperties.maxGridSize[0]; + handle->maxGridSizeY = deviceProperties.maxGridSize[1]; + handle->maxGridSizeZ = deviceProperties.maxGridSize[2]; + handle->capabilityMajor = deviceProperties.major; + handle->capabilityMinor = deviceProperties.minor; + + *pHandle = handle; + + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else + return SPGPU_UNSPECIFIED; +} + +void spgpuDestroy(spgpuHandle_t pHandle) +{ + cudaStreamDestroy(pHandle->defaultStream); + + free((void*)pHandle); +} + +void spgpuStreamCreate(spgpuHandle_t pHandle, cudaStream_t* stream) +{ + int currentDevice; + cudaGetDevice(¤tDevice); + cudaSetDevice(pHandle->device); + cudaStreamCreate(stream); + cudaSetDevice(currentDevice); +} + +void spgpuStreamDestroy(cudaStream_t stream) +{ + cudaStreamDestroy(stream); +} + +void spgpuSetStream(spgpuHandle_t pHandle, cudaStream_t stream) +{ + SpgpuHandleStruct* handle = (SpgpuHandleStruct*)pHandle; + + if (stream) + { + handle->currentStream = stream; + } + else + handle->currentStream = pHandle->defaultStream; +} + +cudaStream_t spgpuGetStream(spgpuHandle_t pHandle) +{ + SpgpuHandleStruct* handle = (SpgpuHandleStruct*)pHandle; + return handle->currentStream; +} + +size_t spgpuSizeOf(spgpuType_t typeCode) +{ + switch (typeCode) + { + case SPGPU_TYPE_INT: + return sizeof(int); + case SPGPU_TYPE_FLOAT: + return sizeof(float); + case SPGPU_TYPE_DOUBLE: + return sizeof(double); + case SPGPU_TYPE_COMPLEX_FLOAT: + return sizeof(cuFloatComplex); + case SPGPU_TYPE_COMPLEX_DOUBLE: + return sizeof(cuDoubleComplex); + default: + return 0; // error + } +} diff --git a/cuda/spgpu/core.h b/cuda/spgpu/core.h new file mode 100644 index 00000000..35d5849d --- /dev/null +++ b/cuda/spgpu/core.h @@ -0,0 +1,186 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +/*! \mainpage The spGPU library documentation + * + * \section intro_sec Introduction + * + * spGPU is a set of custom matrix storages and CUDA kernels for sparse linear algebra computing on GPU. It isn't a replacement for cuBLAS/cuSPARSE that should be used for a full featured linear algebra environment on GPU.\n + * The main matrix storage used by spGPU is a GPU-friendly ELLpack format, as well as our HELL (Hacked ELLpack) format, an enhanced version of ELLpack with some interesting memory saving properties.\n + * HELL format provides a better memory storage compared to ELL (it avoids allocation inefficency provided by spikes in row sizes), while providing quite the same performances for sparse matrix-vector multiply routine.. + * + * \section install_sec How to build spgpu + * \subsection linuxbuild Linux (and other unix systems) + * cd spgpu/build/cmake\n + * sh configure.sh\n + * make + * \section cr_sec Copyright + * Copyright (C) 2010 - 2015\n + * Davide Barbieri - University of Rome Tor Vergata\n + * Valeria Cardellini - University of Rome Tor Vergata\n + * Salvatore Filippone - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or\n + * modify it under the terms of the GNU General Public License\n + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful,\n + * but WITHOUT ANY WARRANTY; without even the implied warranty of\n + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n + * GNU General Public License for more details. + */ + +#include "psb_config.h" +#include "psb_types.h" +#include "driver_types.h" +#include "cuComplex.h" + +/** \addtogroup coreFun Core Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/// __host pointers reference host allocations (it's just a placeholder) +#define __host +/// __device pointers reference device allocations (it's just a placeholder) +#define __device + +/// The return code for synchronous functions +typedef int spgpuStatus_t; + +#define SPGPU_SUCCESS 0 +#define SPGPU_UNSUPPORTED 1 +#define SPGPU_UNSPECIFIED 2 +#define SPGPU_OUTOFMEMORY 3 + +/// Code to identify a primitive type +typedef int spgpuType_t; + +#define SPGPU_TYPE_INT 0 +#define SPGPU_TYPE_FLOAT 1 +#define SPGPU_TYPE_DOUBLE 2 +#define SPGPU_TYPE_COMPLEX_FLOAT 3 +#define SPGPU_TYPE_COMPLEX_DOUBLE 4 + +/// this struct should be modified only internally by spgpu +typedef struct spgpuHandleStruct { + /// the current stream used by every calls on spgpu routines (passing this handle) + cudaStream_t currentStream; + /// the default stream, created during the handle creation. + cudaStream_t defaultStream; + /// the device associated to this handle + int device; + /// the warp size for this device + int warpSize; + /// the max threads per block count for this device + int maxThreadsPerBlock; + /// the max size for the X coordinate of the grid dimensions + int maxGridSizeX; + /// the max size for the Y coordinate of the grid dimensions + int maxGridSizeY; + /// the max size for the Z coordinate of the grid dimensions + int maxGridSizeZ; + /// Number of SM + int multiProcessorCount; + // compute capability + int capabilityMajor; + int capabilityMinor; +} SpgpuHandleStruct; + +/// A spGPU handle represents a single CUDA device on your platform. +typedef const SpgpuHandleStruct* spgpuHandle_t; + +/** +* \fn spgpuStatus_t spgpuCreate(spgpuHandle_t* pHandle, int device) +* Create a spgpu context for a GPU device. Every call to spgpu routines using this +* handle will execute on the same GPU. This is re-entrant, so it will work if used by multiple host threads. +* \param pHandle outputs the handle +* \param device id of the device to be used by this context +*/ +spgpuStatus_t spgpuCreate(spgpuHandle_t* pHandle, int device); + +/** +* \fn void spgpuDestroy(spgpuHandle_t pHandle) +* Destroy the spgpu context for pHandle. +* \param pHandle the handle previously created with spgpuCreate(). +*/ +void spgpuDestroy(spgpuHandle_t pHandle); + +/** +* \fn void spgpuStreamCreate(spgpuHandle_t pHandle, cudaStream_t* stream) +* Create a cuda stream according to the device of the spgpu handle. +* \param stream outputs the new stream +* \param pHandle the handle used to obtain the device id for the stream +*/ +void spgpuStreamCreate(spgpuHandle_t pHandle, cudaStream_t* stream); + +/** +* \fn void spgpuStreamDestroy(cudaStream_t stream) +* Destroy a stream, previously created with spgpuStreamCreate(). +* \param stream the stream to destroy +*/ +void spgpuStreamDestroy(cudaStream_t stream); + +/** +* \fn void spgpuSetStream(spgpuHandle_t pHandle, cudaStream_t stream) +* Change the current stream for the handle pHandle. +* \param pHandle the handle to configure. +* \param stream the stream to use for next spgpu routines call. Use 0 to reset to the default stream. +*/ +void spgpuSetStream(spgpuHandle_t pHandle, cudaStream_t stream); + +/** +* \fn cudaStram_t spgpuGetStream(spgpuHandle_t pHandle) +* Get the current stream from the handle pHandle. +* \param pHandle the handle from which get the stream. +*/ +cudaStream_t spgpuGetStream(spgpuHandle_t pHandle); + +/** +* \fn size_t spgpuSizeOf(spgpuType_t typeCode) +* Returns the size, in bytes, of the type denoted by typeCode (e.g. 4 for SPGPU_TYPE_FLOAT, 8 for SPGPU_TYPE_DOUBLE). +* \param typeCode outputs the handle +*/ +size_t spgpuSizeOf(spgpuType_t typeCode); + +/* +typedef struct { +spgpuMatrix + +spgpuMatrixType_t MatrixType; +spgpuFillMode_t FillMode; +spgpuDiagType_t DiagType; +int baseIndex; +} spgpuMatrixDesc_t +*/ + +#define cuFloatComplex_isZero(a) (a.x == 0.0f && a.y == 0.0f) +#define cuDoubleComplex_isZero(a) (a.x == 0.0 && a.y == 0.0) +#define cuFloatComplex_isNotZero(a) (a.x != 0.0f || a.y != 0.0f) +#define cuDoubleComplex_isNotZero(a) (a.x != 0.0 || a.y != 0.0) + +#ifdef __cplusplus +} +#endif + +/** @}*/ + diff --git a/cuda/spgpu/debug.h b/cuda/spgpu/debug.h new file mode 100644 index 00000000..d5bc6de2 --- /dev/null +++ b/cuda/spgpu/debug.h @@ -0,0 +1,58 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "stdlib.h" + +#ifndef _WIN32 +#include + +inline +void printTrace (void) +{ + void *array[32]; + size_t size; + char **strings; + size_t i; + + size = backtrace (array, 32); + strings = backtrace_symbols (array, size); + + printf ("---- Obtained %zd stack frames.\n", size); + + for (i = 0; i < size; i++) + printf ("%s\n", strings[i]); + + free (strings); +} +#endif + +inline void __assert(int e, const char* w) +{ + if (!e) + { + printf("%s\n",w); + +#ifndef _WIN32 + printTrace(); +#endif + + exit(0); + } +} + diff --git a/cuda/spgpu/dia.c b/cuda/spgpu/dia.c new file mode 100644 index 00000000..cf761c5c --- /dev/null +++ b/cuda/spgpu/dia.c @@ -0,0 +1,105 @@ +#include "dia.h" +#include "dia_conv.h" +#include "stdlib.h" + +int computeDiaAllocPitch(int rowsCount) +{ + // returns a pitch good for indices and values + return ((rowsCount + 31)/32)*32; +} + +int computeDiaDiagonalsCount( + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices) +{ + int* diagIds = (int*)malloc((rowsCount + columnsCount - 1)*sizeof(int)); + int diagonalsCount = 0; + int i; + + for (i=0; i<(rowsCount + columnsCount - 1); ++i) + diagIds[i] = -1; + + for (i=0; i + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +int computeDiaDiagonalsCount( + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices); + + +void coo2dia( + void* values, + int* offsets, + int valuesPitch, + int diagonals, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType); + + +/** +* \fn int computeDiaAllocPitch(int rowsCount) + * This function returns a pitch (in number of elements) that can be used to allocate the values array for DIA matrix format. + * \param rowsCount the rows count + * \return the pitch for an DIA matrix of rowsCount rows. +*/ +int computeDiaAllocPitch(int rowsCount); + + +#ifdef __cplusplus +} +#endif + +/** @}*/ diff --git a/cuda/spgpu/ell.c b/cuda/spgpu/ell.c new file mode 100644 index 00000000..33381c28 --- /dev/null +++ b/cuda/spgpu/ell.c @@ -0,0 +1,202 @@ +#include "ell.h" +#include "ell_conv.h" +#include "stdlib.h" + +void computeEllRowLenghts( + int *ellRowLengths, + int *ellMaxRowSize, + int rowsCount, + int nonZerosCount, + const int* cooRowIndices, + int cooBaseIndex + ) +{ + // find the max number of non zero per row + int maxRowSize = 0; + int i; + for (i=0; i maxRowSize) + maxRowSize = currCount; + } + + *ellMaxRowSize = maxRowSize; +} + +int computeEllAllocPitch(int rowsCount) +{ + // returns a pitch good for indices and values + return ((rowsCount + 31)/32)*32; +} + +void cooToEll( + void *ellValues, + int *ellIndices, + int ellValuesPitch, + int ellIndicesPitch, + int ellMaxRowSize, + int ellBaseIndex, + int rowsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType + ) +{ + + size_t elementSize = spgpuSizeOf(valuesType); + + // fill values and indices + int* currentPos = (int*)malloc(rowsCount*sizeof(int)); + int i; + + for (i=0; i dstRs[j]) { + app_dstRs[k] = dstRs[i]; + app_rIdx[k] = rIdx[i]; + + ++k; ++i; + } else { + app_dstRs[k] = dstRs[j]; + app_rIdx[k] = rIdx[j]; + + ++k; ++j; + } + } + + while (i<=center) + { + app_dstRs[k] = dstRs[i]; + app_rIdx[k] = rIdx[i]; + + ++k; ++i; + } + + while (j<=end) + { + app_dstRs[k] = dstRs[j]; + app_rIdx[k] = rIdx[j]; + + ++k; ++j; + } + + for (k=start; k<=end; k++) + { + dstRs[k] = app_dstRs[k-start]; + rIdx[k] = app_rIdx[k-start]; + } +} + +void mergesort(int *dstRs, int *rIdx, int size) { + int* app_dstRs = (int*)malloc(size*sizeof(int)); + int* app_rIdx = (int*)malloc(size*sizeof(int)); + + int sizetomerge=size-1; + size--; + int i; + int n=2; + + while (nsizetomerge) + merge (app_dstRs, app_rIdx, dstRs, rIdx, sizetomerge -((sizetomerge)%n),sizetomerge,size,size); + sizetomerge=sizetomerge-((sizetomerge+1)%n);} + n=n*2; + } + + if (size>sizetomerge) + merge (app_dstRs, app_rIdx, dstRs,rIdx,0,size-(size-sizetomerge),size,size); + + free(app_dstRs); + free(app_rIdx); +} + + + +void ellToOell( + int *rIdx, + void *dstEllValues, + int *dstEllIndices, + int *dstRs, + const void *srcEllValues, + const int *srcEllIndices, + const int *srcRs, + int ellValuesPitch, + int ellIndicesPitch, + int rowsCount, + spgpuType_t valuesType + ) +{ + size_t elementSize = spgpuSizeOf(valuesType); + + int i,j; + for (i=0; i + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** +* \fn void computeEllRowLenghts(int *ellRowLengths, int *ellMaxRowSize, int rowsCount, int nonZerosCount, const int* cooRowIndices, int cooBaseIndex) + * Compute the Ell row lengths array (and the greatest row size) from the COO matrix format. + * \param ellRowLengths Array of length rowsCount to be filled by the non zeros count for every matrix row + * \param ellMaxRowSize outputs the greatest row size (in non zeros) + * \param rowsCount the number of rows of the coo matrix to convert + * \param nonZerosCount the non zeros count of the coo matrix to convert + * \param cooRowIndices the row indices array for the coo matrix to convert + * \param cooBaseIndex the input base index (e.g. 0 for C, 1 for Fortran) + */ +void computeEllRowLenghts( + int *ellRowLengths, + int *ellMaxRowSize, + int rowsCount, + int nonZerosCount, + const int* cooRowIndices, + int cooBaseIndex + ); + +/** +* \fn int computeEllAllocPitch(int rowsCount) + * This function returns a pitch (in number of elements) that can be used to allocate both indices and values arrays for ELL matrix format. + * \param rowsCount the rows count + * \return the pitch for an ELL matrix of rowsCount rows. +*/ +int computeEllAllocPitch(int rowsCount); + + +/** +* \fn void cooToEll(void *ellValues,int *ellIndices,int ellValuesPitch,int ellIndicesPitch,int ellMaxRowSize,int ellBaseIndex,int rowsCount,int nonZerosCount,const int* cooRowIndices,const int* cooColsIndices,const void* cooValues,int cooBaseIndex, spgpuType_t valuesType) + * Convert a matrix in COO format to a matrix in ELL format. + * The matrix is stored in column-major format. The ellValues and ellIndices sizes are ellMaxRowSize * pitch (pitch is in bytes). + * \param ellValues pointer to the area that will be filled by the non zero coefficients + * \param ellIndices pointer to the area that will be filled by the non zero indices + * \param ellValuesPitch the column-major allocation's pitch of ellValues (in number of elements) + * \param ellIndicesPitch the column-major allocation's pitch of ellIndices (in number of elements) + * \param ellMaxRowSize the greatest row size + * \param ellBaseIndex the desired base index for the ELL matrix (e.g. 0 for C, 1 for Fortran) + * \param rowsCount input matrix rows count + * \param nonZerosCount input matrix non zeros count + * \param cooRowIndices input matrix row indices pointer + * \param cooColsIndices input matrix column indices pointer + * \param cooValues input matrix non zeros values pointer + * \param cooBaseIndex input matrix base index + * \param valuesType the type for elements in ellValues and cooValues (i.e. SPGPU_TYPE_FLOAT or SPGPU_TYPE_DOUBLE) + */ +void cooToEll( + void *ellValues, + int *ellIndices, + int ellValuesPitch, + int ellIndicesPitch, + int ellMaxRowSize, + int ellBaseIndex, + int rowsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType + ); + +void ellToOell( + int *rIdx, + void *dstEllValues, + int *dstEllIndices, + int *dstRs, + const void *srcEllValues, + const int *srcEllIndices, + const int *srcRs, + int ellValuesPitch, + int ellIndicesPitch, + int rowsCount, + spgpuType_t valuesType + ); + +#ifdef __cplusplus +} +#endif + +/** @}*/ diff --git a/cuda/spgpu/hdia.cpp b/cuda/spgpu/hdia.cpp new file mode 100644 index 00000000..daf74173 --- /dev/null +++ b/cuda/spgpu/hdia.cpp @@ -0,0 +1,374 @@ +#include "hdia_conv.h" +#include "stdlib.h" +#include "string.h" + +#include +#include + +int getHdiaHacksCount(int hackSize, int rowsCount) +{ + return (rowsCount + hackSize - 1)/hackSize; +} + +void computeHdiaHackOffsets( + int *allocationHeight, + int *hackOffsets, + int hackSize, + const void* diaValues, + int diaValuesPitch, + int diagonals, + int rowsCount, + spgpuType_t valuesType + ) +{ + int i,r,s, hack; + int hackCount = getHdiaHacksCount(hackSize, rowsCount); + + size_t elementSize = spgpuSizeOf(valuesType); + + int hackHeight = 0; + + hackOffsets[0] = 0; + for (hack=0; hack= rowsCount) + break; + + const char* val = (char*)diaValues + elementSize*(row + i*diaValuesPitch); + + for (s=0; s= rowsCount) + break; + + const char* val = (const char*)diaValues + elementSize*(row + i*diaValuesPitch); + + for (s=0; s= rowsCount) + break; + + char* dest = (char*)hdiaValues + elementSize*((posOffset + i)*hackSize + r); + const char* src = (const char*)diaValues + elementSize*(row + diagPosInsideDia*diaValuesPitch); + + memcpy(dest, src, elementSize); + } + } + } +} + + + + + + + +void computeHdiaHackOffsetsFromCoo( + int *allocationHeight, + int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + int cooBaseIndex + ) +{ + + int i,j,h; + + int hackCount = getHdiaHacksCount(hackSize, rowsCount); + + + // Find rows per hack + std::vector *rowsPerHack = new std::vector [hackCount]; + + for (i=0; i diagIdsToPos; + + hackOffsets[0] = 0; + for (h=0; h *hackRows = &rowsPerHack[h]; + int hackRowsSize = hackRows->size(); + + for (j=0; jat(j); + int rowIdx = cooRowIndices[i]; + int colIdx = cooColsIndices[i]; + int diagId = (colIdx-cooBaseIndex) - ((rowIdx-cooBaseIndex) % hackSize); + int diagPos = hackSize - 1 + diagId; + + std::map::iterator it = diagIdsToPos.find(diagPos); + + if(it == diagIdsToPos.end()) + { + diagIdsToPos[diagPos] = 1; + ++diagonalsCount; + } + } + + hackOffsets[h+1] = hackOffsets[h] + diagonalsCount; + } + + *allocationHeight = hackOffsets[hackCount]; + + delete[] rowsPerHack; +} + +void cooToHdia_size( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + size_t elementSize + ) +{ + int i,j,h; + + int hackCount = getHdiaHacksCount(hackSize, rowsCount); + + // Find rows per hack + std::vector *rowsPerHack = new std::vector [hackCount]; + + for (i=0; i hackDiagIdsToPos; + + for (h=0; h *hackRows = &rowsPerHack[h]; + int hackRowsSize = hackRows->size(); + + for (j=0; jat(j); + + int rowIdx = cooRowIndices[i]; + int colIdx = cooColsIndices[i]; + int globalDiagId = colIdx - rowIdx; + int diagId = (colIdx - cooBaseIndex) - ((rowIdx - cooBaseIndex) % hackSize); + int diagPos = hackSize - 1 + diagId; + + std::map::iterator it = hackDiagIdsToPos.find(diagPos); + + if(it == hackDiagIdsToPos.end()) + { + hackDiagIdsToPos[diagPos] = globalDiagId; + } + } + + // Reorder diags + for (std::map::iterator it = hackDiagIdsToPos.begin(); it != hackDiagIdsToPos.end(); ++it) + { + int i = it->first; + + int globalDiagId = it->second; + int diagPosInsideOffsets; + int diagId = i - hackSize + 1; + hackDiagIdsToPos[i] = diagPosInsideOffsets = diagonalsCount++; + hdiaOffsets[diagPosInsideOffsets] = globalDiagId; + } + + + hdiaOffsets += diagonalsCount; + + for (j=0; jat(j); + int rowIdx = cooRowIndices[i]; + int colIdx = cooColsIndices[i]; + int diagId = (colIdx - cooBaseIndex) - ((rowIdx - cooBaseIndex) % hackSize); + + int diagPosInsideOffsets = hackDiagIdsToPos[hackSize - 1 + diagId]; + + char* valAddr = (char*)hdiaValues + + elementSize*(((rowIdx - cooBaseIndex) % hackSize) + + hackSize* (hackOffsets[h] + diagPosInsideOffsets)); + + memcpy(valAddr, (const char*)cooValues + i*elementSize, elementSize); + } + } + + delete[] rowsPerHack; +} + + + +void cooToHdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType + ) +{ + size_t elementSize = spgpuSizeOf(valuesType); + + cooToHdia_size(hdiaValues, hdiaOffsets, + hackOffsets, hackSize, rowsCount, + columnsCount, nonZerosCount, + cooRowIndices, cooColsIndices, cooValues, cooBaseIndex, elementSize); +} + +void bcooToBhdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType, + int blockSize + ) +{ + size_t elementSize = blockSize*spgpuSizeOf(valuesType); + + cooToHdia_size(hdiaValues, hdiaOffsets, + hackOffsets, hackSize, rowsCount, + columnsCount, nonZerosCount, + cooRowIndices, cooColsIndices, cooValues, cooBaseIndex, elementSize); +} + diff --git a/cuda/spgpu/hdia.h b/cuda/spgpu/hdia.h new file mode 100644 index 00000000..e8808fb7 --- /dev/null +++ b/cuda/spgpu/hdia.h @@ -0,0 +1,159 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2013 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "core.h" + + +/** \addtogroup diaFun DIA/HDIA Format + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + + +/** +* \fn spgpuShdiaspmv (spgpuHandle_t handle, float* z, const float *y, float alpha, const float* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const float *x, float beta) + * Computes single precision z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. + * \param handle The spgpu handle used to call this routine + * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). + * \param y The y input vector + * \param alpha The alpha scalar + * \param dM The stacked HDIA non zero values allocation pointer + * \param offsets The stacked HDIA diagonals offsets vector + * \param hackSize The constant size of every hack (must be a multiple of 32) + * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector + * \param rows the rows count + * \param cols the columns count + * \param x the x vector + * \param beta the beta scalar + */ +void +spgpuShdiaspmv (spgpuHandle_t handle, + float* z, + const float *y, + float alpha, + const float* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const float *x, + float beta); + + +/** +* \fn spgpuDhdiaspmv (spgpuHandle_t handle, double* z, const double *y, double alpha, const double* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const double *x, double beta) + * Computes double precision z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. + * \param handle The spgpu handle used to call this routine + * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). + * \param y The y input vector + * \param alpha The alpha scalar + * \param dM The stacked HDIA non zero values allocation pointer + * \param offsets The stacked HDIA diagonals offsets vector + * \param hackSize The constant size of every hack (must be a multiple of 32) + * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector + * \param rows the rows count + * \param cols the columns count + * \param x the x vector + * \param beta the beta scalar + */ +void +spgpuDhdiaspmv (spgpuHandle_t handle, + double* z, + const double *y, + double alpha, + const double* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const double *x, + double beta); + + +/** +* \fn spgpuChdiaspmv (spgpuHandle_t handle, cuFloatComplex* z, const cuFloatComplex *y, cuFloatComplex alpha, const cuFloatComplex* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const cuFloatComplex *x, cuFloatComplex beta) + * Computes single precision complex z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. + * \param handle The spgpu handle used to call this routine + * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). + * \param y The y input vector + * \param alpha The alpha scalar + * \param dM The stacked HDIA non zero values allocation pointer + * \param offsets The stacked HDIA diagonals offsets vector + * \param hackSize The constant size of every hack (must be a multiple of 32) + * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector + * \param rows the rows count + * \param cols the columns count + * \param x the x vector + * \param beta the beta scalar + */ +void +spgpuChdiaspmv (spgpuHandle_t handle, + cuFloatComplex* z, + const cuFloatComplex *y, + cuFloatComplex alpha, + const cuFloatComplex* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const cuFloatComplex *x, + cuFloatComplex beta); + + +/** +* \fn spgpuZhdiaspmv (spgpuHandle_t handle, cuDoubleComplex* z, const cuDoubleComplex *y, cuDoubleComplex alpha, const cuDoubleComplex* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const cuDoubleComplex *x, cuDoubleComplex beta) + * Computes double precision complex z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. + * \param handle The spgpu handle used to call this routine + * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). + * \param y The y input vector + * \param alpha The alpha scalar + * \param dM The stacked HDIA non zero values allocation pointer + * \param offsets The stacked HDIA diagonals offsets vector + * \param hackSize The constant size of every hack (must be a multiple of 32) + * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector + * \param rows the rows count + * \param cols the columns count + * \param x the x vector + * \param beta the beta scalar + */ +void +spgpuZhdiaspmv (spgpuHandle_t handle, + cuDoubleComplex* z, + const cuDoubleComplex *y, + cuDoubleComplex alpha, + const cuDoubleComplex* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const cuDoubleComplex *x, + cuDoubleComplex beta); + +/** @}*/ + +#ifdef __cplusplus +} +#endif + diff --git a/cuda/spgpu/hdia_conv.h b/cuda/spgpu/hdia_conv.h new file mode 100644 index 00000000..c22bf990 --- /dev/null +++ b/cuda/spgpu/hdia_conv.h @@ -0,0 +1,102 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2013 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "core.h" + + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +int getHdiaHacksCount(int hackSize, int rowsCount); + +void computeHdiaHackOffsets( + int *allocationHeight, + int *hackOffsets, + int hackSize, + const void* diaValues, + int diaValuesPitch, + int diagonals, + int rowsCount, + spgpuType_t valuesType); + +void diaToHdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + const void* diaValues, + const int* diaOffsets, + int diaValuesPitch, + int diagonals, + int rowsCount, + spgpuType_t valuesType + ); + +void computeHdiaHackOffsetsFromCoo( + int *allocationHeight, + int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + int cooBaseIndex + ); + +void cooToHdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType + ); + +void bcooToBhdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType, + int blockSize); + +#ifdef __cplusplus +} +#endif + + +/** @}*/ diff --git a/cuda/spgpu/hell.c b/cuda/spgpu/hell.c new file mode 100644 index 00000000..07764ee3 --- /dev/null +++ b/cuda/spgpu/hell.c @@ -0,0 +1,104 @@ +#include "hell.h" +#include "hell_conv.h" + +void computeHellAllocSize( + int* allocationHeight, + int hackSize, + int rowsCount, + const int *ellRowLengths + ) +{ + int totalLen = 0; + int i; + int remainings; + int done; + int maxLen; + + for (i=0; i maxLen) + maxLen = curLen; + } + totalLen += maxLen; + } + + remainings = rowsCount % hackSize; + done = (rowsCount/hackSize)*hackSize; + maxLen = 0; + + for (i=0; i maxLen) + maxLen = curLen; + } + + *allocationHeight = totalLen + maxLen; +} + +void ellToHell( + void *hellValues, + int *hellIndices, + int* hackOffsets, + int hackSize, + + const void *ellValues, + const int *ellIndices, + int ellValuesPitch, + int ellIndicesPitch, + int *ellRowLengths, + int rowsCount, + spgpuType_t valuesType + ) +{ + + size_t elementSize = spgpuSizeOf(valuesType); + + int hacks = (rowsCount + hackSize - 1)/hackSize; + + char* currValPos = (char*)hellValues; + int* currIndPos = hellIndices; + + int hackOffset = 0; + int i; + for (i=0; i= rowsCount) + break; + + rowLen = ellRowLengths[row]; + + if (rowLen > maxLen) + maxLen = rowLen; + + for (k=0; k + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** +* \fn void computeHellAllocSize(int* allocationHeight, int hackSize, int rowsCount, const int *ellRowLengths) + * Compute the HELL format allocation's height for the Hell allocation + * (the resulting size should be allocationHeight*hackSize*sizeof(elementsType)). + * \param allocationHeight outputs the total allocation's height + * \param hackSize the hack size for this matrix (32 or 64 are good choices) + * \param rowsCount the rows count + * \param ellRowLengths the row lengths array from the ell matrix to convert +*/ +void computeHellAllocSize( + int* allocationHeight, + int hackSize, + int rowsCount, + const int *ellRowLengths + ); + +/** +* \fn void ellToHell(void *hellValues, int *hellIndices, int* hackOffsets, int hackSize, const void *ellValues, const int *ellIndices, int ellValuesPitch, int ellIndicesPitch, int *ellRowLengths, int rowsCount, spgpuType_t valuesType) + * Convert a matrix from the ELL format to the HELL format. + * \param hellValues pointer to the area that will be filled by the non zero coefficients + * \param hellIndices pointer to the area that will be filled by the non zero indices + * \param hackOffsets + * \param hackSize the hack size used to allocate hellValues and hellIndices (32 or 64 are good choices) + * \param ellValues the input matrix coefficients + * \param ellIndices the input matrix indices + * \param ellValuesPitch the input values allocation pitch (in number of elements) + * \param ellIndicesPitch the input indices allocation pitch (in number of elements) + * \param ellRowLengths the row lengths array of the input matrix + * \param rowsCount the rows count + * \param valuesType the type of hellValues and ellValues elements (i.e. SPGPU_TYPE_FLOAT or SPGPU_TYPE_DOUBLE) +*/ +void ellToHell( + void *hellValues, + int *hellIndices, + int* hackOffsets, + int hackSize, + const void *ellValues, + const int *ellIndices, + int ellValuesPitch, + int ellIndicesPitch, + int *ellRowLengths, + int rowsCount, + spgpuType_t valuesType + ); + +#ifdef __cplusplus +} +#endif + + +/** @}*/ diff --git a/cuda/spgpu/kernels/Makefile b/cuda/spgpu/kernels/Makefile new file mode 100644 index 00000000..37113c75 --- /dev/null +++ b/cuda/spgpu/kernels/Makefile @@ -0,0 +1,34 @@ +TOP=../../.. +include $(TOP)/Make.inc +# +# Libraries used +# +LIBDIR=$(TOP)/lib +INCDIR=$(TOP)/include +MODDIR=$(TOP)/modules +UP=.. +LIBNAME=$(UP)/libspgpu.a +CINCLUDES=-I$(INCDIR) +CU_INCLUDES=-I$(INCDIR) $(CUDA_INCLUDES) +OBJS=cabs.o camax.o casum.o caxpby.o caxy.o cdot.o cgath.o \ + cnrm2.o cscal.o cscat.o csetscal.o cupd_xyz.o\ + dabs.o damax.o dasum.o daxpby.o daxy.o ddot.o dgath.o dupd_xyz.o\ + dia_cspmv.o dia_dspmv.o dia_sspmv.o dia_zspmv.o dnrm2.o \ + dscal.o dscat.o dsetscal.o ell_ccsput.o ell_cspmv.o \ + ell_dcsput.o ell_dspmv.o ell_scsput.o ell_sspmv.o ell_zcsput.o ell_zspmv.o \ + hdia_cspmv.o hdia_dspmv.o hdia_sspmv.o hdia_zspmv.o hell_cspmv.o hell_dspmv.o \ + hell_sspmv.o hell_zspmv.o igath.o iscat.o isetscal.o sabs.o samax.o sasum.o \ + saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o supd_xyz.o\ + zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o zupd_xyz.o \ + sxyzw.o cxyzw.o dxyzw.o zxyzw.o + +objs: $(OBJS) +lib: objs + $(AR) $(UP)/$(LIBNAME) $(OBJS) + + +clean: + /bin/rm -fr $(OBJS) +.cu.o: + $(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) $(CU_INCLUDES) -c $< + diff --git a/cuda/spgpu/kernels/abs_base.cuh b/cuda/spgpu/kernels/abs_base.cuh new file mode 100644 index 00000000..d48f815d --- /dev/null +++ b/cuda/spgpu/kernels/abs_base.cuh @@ -0,0 +1,110 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_FUNC_NAME +#define GEN_SPGPU_FUNC_NAME(x) CONCAT(CONCAT(spgpu,x),abs) + +#define BLOCK_SIZE 256 + +// Define: +//#define RES_VALUE_TYPE +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#include "mathbase.cuh" + +__device__ __host__ static inline bool is_one_float(float x) { return (x==1.0f); } +__device__ __host__ static inline bool is_one_cuFloatComplex(cuFloatComplex x) { return ((x.x==1.0f)&&(x.y==0.0f));} + + +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static inline bool is_one_double(double x) { return (x==1.0); } +__device__ __host__ static inline bool is_one_cuDoubleComplex(cuDoubleComplex x) { return ((x.x==1.0)&&(x.y==0.0));} +#endif + + +__global__ void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern_alpha) + (RES_VALUE_TYPE *y, int n, RES_VALUE_TYPE alpha, VALUE_TYPE* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since y, and x are accessed with the same offset by the same thread, + // and the write to y follows the read of x, then x could be y. + + y[id] = CONCAT(RES_VALUE_TYPE, _mul)(alpha, CONCAT(VALUE_TYPE, _abs)(x[id])); + } +} + +__global__ void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern) + (RES_VALUE_TYPE *y, int n, VALUE_TYPE* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since y, and x are accessed with the same offset by the same thread, + // and the write to y follows the read of x, then x could be y. + + y[id] = CONCAT(VALUE_TYPE, _abs)(x[id]); + } +} + +void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (spgpuHandle_t handle, RES_VALUE_TYPE *y, int n, RES_VALUE_TYPE alpha, VALUE_TYPE* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + + if (CONCAT(is_one_,RES_VALUE_TYPE)(alpha)) + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(y, n, x); + else + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern_alpha)<<currentStream>>>(y, n, alpha, x); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device RES_VALUE_TYPE *y, + int n, + RES_VALUE_TYPE alpha, + __device VALUE_TYPE *x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, y, maxNForACall, alpha, x); + x = x + maxNForACall; + y = y + maxNForACall; + n -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) (handle, y, n, alpha, x); + cudaCheckError("CUDA error on abs"); +} diff --git a/cuda/spgpu/kernels/amax_base.cuh b/cuda/spgpu/kernels/amax_base.cuh new file mode 100644 index 00000000..4d7e7d4a --- /dev/null +++ b/cuda/spgpu/kernels/amax_base.cuh @@ -0,0 +1,233 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_AMAX_NAME +#define GEN_SPGPU_AMAX_NAME(x) CONCAT(CONCAT(spgpu,x),amax) + +// Define: +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#define BLOCK_SIZE 512 + +typedef float absType_float; +typedef float absType_cuFloatComplex; +typedef double absType_double; +typedef double absType_cuDoubleComplex; + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } + +__device__ __host__ static float abs_float(float a) { return fabsf(a); } +__device__ __host__ static float abs_cuFloatComplex(cuFloatComplex a) { return cuCabsf(a); } + + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ __host__ static double abs_double(float a) { return fabs(a); } +__device__ __host__ static double abs_cuDoubleComplex(cuDoubleComplex a) { return cuCabs(a); } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif + +static __device__ CONCAT(absType_,VALUE_TYPE) CONCAT(TYPE_SYMBOL,amaxReductionResult)[128]; + +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +static __device__ CONCAT(absType_,VALUE_TYPE) amaxvv(VALUE_TYPE a, VALUE_TYPE b) +{ + CONCAT(absType_,VALUE_TYPE) absa = CONCAT(abs_,VALUE_TYPE)(a); + CONCAT(absType_,VALUE_TYPE) absb = CONCAT(abs_,VALUE_TYPE)(b); + + return MAX(absa,absb); +} + +static __device__ CONCAT(absType_,VALUE_TYPE) amaxaa(CONCAT(absType_,VALUE_TYPE) a, CONCAT(absType_,VALUE_TYPE) b) +{ + return MAX(a,b); +} + +static __device__ CONCAT(absType_,VALUE_TYPE) amaxav(CONCAT(absType_,VALUE_TYPE) a, VALUE_TYPE b) +{ + CONCAT(absType_,VALUE_TYPE) absb = CONCAT(abs_,VALUE_TYPE)(b); + + return MAX(a,absb); +} + +__global__ void +CONCAT(GEN_SPGPU_AMAX_NAME(TYPE_SYMBOL),_kern) +(int n, VALUE_TYPE* x) +{ + __shared__ CONCAT(absType_,VALUE_TYPE) sSum[BLOCK_SIZE]; + + CONCAT(absType_,VALUE_TYPE) res = 0; + + VALUE_TYPE* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + while (x < lastX) + { + VALUE_TYPE x1 = x[0]; + res = amaxav(res, x1); + + x += blockOffset; + + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + CONCAT(absType_,VALUE_TYPE) tRes[128]; + + CONCAT(GEN_SPGPU_AMAX_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(n, x);; + cudaMemcpyFromSymbol(tRes, CONCAT(TYPE_SYMBOL,amaxReductionResult), blocks*sizeof(CONCAT(absType_,VALUE_TYPE))); + + for (int i=0; i= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ __host__ static double abs_double(float a) { return fabs(a); } +__device__ __host__ static double abs_cuDoubleComplex(cuDoubleComplex a) { return cuCabs(a); } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ __host__ static double double_add(double a, double b) { +#ifndef __CUDA__ARCH__ + return a + b; +#else + return PREC_DADD (a, b); +#endif +} +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif + +static __device__ CONCAT(absType_,VALUE_TYPE) CONCAT(TYPE_SYMBOL,asumReductionResult)[128]; + + +static __device__ CONCAT(absType_,VALUE_TYPE) asumvv(VALUE_TYPE a, VALUE_TYPE b) +{ + CONCAT(absType_,VALUE_TYPE) absa = CONCAT(abs_,VALUE_TYPE)(a); + CONCAT(absType_,VALUE_TYPE) absb = CONCAT(abs_,VALUE_TYPE)(b); + + return absa + absb; +} + +static __device__ __host__ CONCAT(absType_,VALUE_TYPE) asumaa(CONCAT(absType_,VALUE_TYPE) a, CONCAT(absType_,VALUE_TYPE) b) +{ + return a + b; +} + +static __device__ CONCAT(absType_,VALUE_TYPE) asumav(CONCAT(absType_,VALUE_TYPE) a, VALUE_TYPE b) +{ + CONCAT(absType_,VALUE_TYPE) absb = CONCAT(abs_,VALUE_TYPE)(b); + + return a + absb; +} + +__global__ void +CONCAT(GEN_SPGPU_ASUM_NAME(TYPE_SYMBOL),_kern) +(int n, VALUE_TYPE* x) +{ + __shared__ CONCAT(absType_,VALUE_TYPE) sSum[BLOCK_SIZE]; + + CONCAT(absType_,VALUE_TYPE) res = 0; + + VALUE_TYPE* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + while (x < lastX) + { + VALUE_TYPE x1 = x[0]; + res = asumav(res, x1); + + x += blockOffset; + + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + CONCAT(absType_,VALUE_TYPE) tRes[128]; + + CONCAT(GEN_SPGPU_ASUM_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(n, x);; + cudaMemcpyFromSymbol(tRes, CONCAT(TYPE_SYMBOL,asumReductionResult), blocks*sizeof(CONCAT(absType_,VALUE_TYPE))); + + for (int i=0; icurrentStream>>>(z, n, alpha, x, y); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device VALUE_TYPE *z, + int n, + VALUE_TYPE alpha, + __device VALUE_TYPE *x, + __device VALUE_TYPE *y) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, z, maxNForACall, alpha, x, y); + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) (handle, z, n, alpha, x, y); + cudaCheckError("CUDA error on axy"); +} + +void +GEN_SPGPU_MFUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device VALUE_TYPE *z, + int n, + VALUE_TYPE alpha, + __device VALUE_TYPE* x, + __device VALUE_TYPE *y, + int count, + int pitch) +{ + for (int i=0; icurrentStream>>>(w, n, beta, z, alpha, x, y); + +} + +void +GEN_SPGPU_FUNC_NAME_2(TYPE_SYMBOL) + (spgpuHandle_t handle, + __device VALUE_TYPE *w, + int n, + VALUE_TYPE beta, + __device VALUE_TYPE *z, + VALUE_TYPE alpha, + __device VALUE_TYPE* x, + __device VALUE_TYPE *y + ) +{ + + if (CONCAT(VALUE_TYPE, _isZero(alpha))) + { + GEN_SPGPU_SCAL_NAME(TYPE_SYMBOL) + (handle, w, n, beta, z); + } + else if (CONCAT(VALUE_TYPE, _isZero(beta))) { + GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) + (handle, w, n, alpha, x, y); + } + else { + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + + while (n > maxNForACall) //managing large vectors + { + + CONCAT(GEN_SPGPU_FUNC_NAME_2(TYPE_SYMBOL),_) + (handle, w, maxNForACall, beta, z, alpha, x, y); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + w = w + maxNForACall; + n -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME_2(TYPE_SYMBOL),_) + (handle, w, n, beta, z, alpha, x, y); + } + + cudaCheckError("CUDA error on axypbz"); +} + +void +GEN_SPGPU_MFUNC_NAME_2(TYPE_SYMBOL) + (spgpuHandle_t handle, + __device VALUE_TYPE *w, + int n, + VALUE_TYPE beta, + __device VALUE_TYPE *z, + VALUE_TYPE alpha, + __device VALUE_TYPE* x, + __device VALUE_TYPE *y, + int count, + int pitch) +{ + for (int i=0; icurrentStream>>>(z, n, beta, y, alpha, x); +} + +#else + + +__global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, cuFloatComplex *y, cuFloatComplex alpha, cuFloatComplex* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (cuFloatComplex_isZero(beta)) + z[id] = cuCmulf(alpha,x[id]); + else + z[id] = cuCfmaf(beta, y[id], cuCmulf(alpha, x[id])); + } +} + + +void spgpuCaxpby_(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + spgpuCaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} + +void spgpuCaxpby(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + + while (n > maxNForACall) //managing large vectors + { + spgpuCaxpby_(handle, z, maxNForACall, beta, y, alpha, x); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + spgpuCaxpby_(handle, z, n, beta, y, alpha, x); + + cudaCheckError("CUDA error on saxpby"); +} +#endif +void spgpuCmaxpby(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x, + int count, int pitch) +{ + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + + cuFloatComplex tRes[128]; + + spgpuCdot_kern<<currentStream>>>(n, a, b); + cudaMemcpyFromSymbol(tRes, sdotReductionResult, blocks*sizeof(cuFloatComplex)); + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + float tRes[128]; + + spgpuCnrm2_kern<<currentStream>>>(n, x); + cudaMemcpyFromSymbol(tRes, snrm2ReductionResult,blocks*sizeof(float)); + + for (int i=0; i= 200 +#define PREC_FADD(a,b) ((a) + (b)) +#define PREC_FMUL(a,b) ((a) * (b)) +#else +#define PREC_FADD(a,b) __fadd_rn((a),(b)) +#define PREC_FMUL(a,b) __fmul_rn((a),(b)) +#endif + +#define PREC_DADD(a,b) ((a) + (b)) +#define PREC_DMUL(a,b) ((a) * (b)) + + +inline __host__ __device__ double2 make_double2(double s) +{ + return make_double2(s, s); +} + +inline __host__ __device__ double2 operator+(double2 a, double2 b) +{ + return make_double2(a.x + b.x, a.y + b.y); +} + +inline __host__ __device__ void operator+=(double2 &a, double2 b) +{ + a.x += b.x; a.y += b.y; +} + +inline __host__ __device__ double2 operator-(double2 a, double2 b) +{ + return make_double2(a.x - b.x, a.y - b.y); +} + +inline __host__ __device__ void operator-=(double2 &a, double2 b) +{ + a.x -= b.x; a.y -= b.y; +} + +inline __host__ __device__ double2 operator*(double2 a, double s) +{ + return make_double2(a.x * s, a.y * s); +} + +inline __host__ __device__ double2 operator*(double s, double2 a) +{ + return make_double2(a.x * s, a.y * s); +} + +inline __host__ __device__ void operator*=(double2 &a, double s) +{ + a.x *= s; a.y *= s; +} diff --git a/cuda/spgpu/kernels/cupd_xyz.cu b/cuda/spgpu/kernels/cupd_xyz.cu new file mode 100644 index 00000000..e8dc9300 --- /dev/null +++ b/cuda/spgpu/kernels/cupd_xyz.cu @@ -0,0 +1,79 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include +#include "core.h" + +extern "C" +{ +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuCupd_xyz_krn(int n, cuFloatComplex alpha, cuFloatComplex beta, + cuFloatComplex gamma, cuFloatComplex delta, + cuFloatComplex * x, cuFloatComplex *y, cuFloatComplex *z) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + cuFloatComplex t; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + if (cuFloatComplex_isZero(beta)) + t = cuCmulf(alpha,x[id]); + else + t = cuCfmaf(alpha, x[id], cuCmulf(beta,y[id])); + if (cuFloatComplex_isZero(delta)) + z[id] = cuCmulf(gamma, t); + else + z[id] = cuCfmaf(gamma, t, cuCmulf(delta,z[id])); + y[id] = t; + } +} + + +void spgpuCupd_xyz(spgpuHandle_t handle, + int n, + cuFloatComplex alpha, + cuFloatComplex beta, + cuFloatComplex gamma, + cuFloatComplex delta, + __device cuFloatComplex * x, + __device cuFloatComplex * y, + __device cuFloatComplex *z) +{ + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuCupd_xyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + x, y, z); +} + diff --git a/cuda/spgpu/kernels/cxyzw.cu b/cuda/spgpu/kernels/cxyzw.cu new file mode 100644 index 00000000..783c7714 --- /dev/null +++ b/cuda/spgpu/kernels/cxyzw.cu @@ -0,0 +1,77 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include +#include "core.h" + +extern "C" +{ +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuCxyzw_krn(int n, cuFloatComplex a, cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + cuFloatComplex * x, cuFloatComplex *y, + cuFloatComplex *z, cuFloatComplex *w) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + cuFloatComplex ty, tz; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + ty = cuCfmaf(a, x[id], cuCmulf(b,y[id])); + tz = cuCfmaf(c, ty, cuCmulf(d,z[id])); + w[id] = cuCfmaf(e, tz, cuCmulf(f,w[id])); + y[id] = ty; + z[id] = tz; + } +} + + +void spgpuCxyzw(spgpuHandle_t handle, + int n, + cuFloatComplex a, cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + __device cuFloatComplex * x, + __device cuFloatComplex * y, + __device cuFloatComplex * z, + __device cuFloatComplex *w) +{ + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuCxyzw_krn<<currentStream>>>(n, a,b,c,d,e,f, + x, y, z,w); +} + diff --git a/cuda/spgpu/kernels/dabs.cu b/cuda/spgpu/kernels/dabs.cu new file mode 100644 index 00000000..a27f21e4 --- /dev/null +++ b/cuda/spgpu/kernels/dabs.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define RES_VALUE_TYPE double +#define TYPE_SYMBOL D +#include "abs_base.cuh" + diff --git a/cuda/spgpu/kernels/damax.cu b/cuda/spgpu/kernels/damax.cu new file mode 100644 index 00000000..eee0b065 --- /dev/null +++ b/cuda/spgpu/kernels/damax.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#include "amax_base.cuh" + diff --git a/cuda/spgpu/kernels/dasum.cu b/cuda/spgpu/kernels/dasum.cu new file mode 100644 index 00000000..5d7d7f9b --- /dev/null +++ b/cuda/spgpu/kernels/dasum.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#include "asum_base.cuh" + diff --git a/cuda/spgpu/kernels/daxpby.cu b/cuda/spgpu/kernels/daxpby.cu new file mode 100644 index 00000000..41e493b1 --- /dev/null +++ b/cuda/spgpu/kernels/daxpby.cu @@ -0,0 +1,146 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include +#include "core.h" + +extern "C" +{ +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); + //#include "cuda_util.h" +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + + +#if 1 +__global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double alpha, double* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + if (beta == 0.0) { + for ( ; id < n; id +=gridSize) + { + + z[id] = PREC_DMUL(alpha,x[id]); + } + } else { + for ( ; id < n; id +=gridSize) + { + z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); + } + } +} + +void spgpuDaxpby(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x) +{ + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuDaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} +#else + +__global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double alpha, double* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (beta == 0.0) + z[id] = PREC_DMUL(alpha,x[id]); + else + z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); + } +} + + +void spgpuDaxpby_(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + spgpuDaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} + +void spgpuDaxpby(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + spgpuDaxpby_(handle, z, maxNForACall, beta, y, alpha, x); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + spgpuDaxpby_(handle, z, n, beta, y, alpha, x); + + cudaCheckError("CUDA error on daxpby"); +} + +#endif +void spgpuDmaxpby(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x, + int count, int pitch) +{ + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + double tRes[128]; + + spgpuDdot_kern<<currentStream>>>(n, a, b); + cudaMemcpyFromSymbol(tRes, ddotReductionResult,blocks*sizeof(double)); + + for (int i=0; i= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif + +#if 0 +// Texture cache management +texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX; + +#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x) +#define unbind_tex_x(x) cudaUnbindTexture(X_TEX) + +__device__ static VALUE_TYPE +fetchTex (int pointer) +{ + TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer); + return CONCAT(readValue_,VALUE_TYPE) (fetch); +} +#endif + +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv_vanilla) +#define GEN_SPGPU_DIA_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),diaspmv_vanilla) +#include "dia_spmv_base_template.cuh" +#if 0 +#undef GEN_SPGPU_DIA_NAME +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv_prefetch) +#define GEN_SPGPU_DIA_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),diaspmv_prefetch) +#undef USE_PREFETCHING +#define USE_PREFETCHING +#include "dia_spmv_base_template.cuh" +#define ENABLE_CACHE +#undef ENABLE_CACHE +#undef GEN_SPGPU_DIA_NAME +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv_texcache_prefetch) +#define GEN_SPGPU_DIA_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),diaspmv_texcache_prefetch) +#include "dia_spmv_base_template.cuh" +#undef GEN_SPGPU_DIA_NAME +#undef USE_PREFETCHING +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv_texcache) +#define GEN_SPGPU_DIA_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),diaspmv_texcache) +#include "dia_spmv_base_template.cuh" +#endif + +#undef GEN_SPGPU_DIA_NAME +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv) +void +GEN_SPGPU_DIA_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + VALUE_TYPE* z, + const VALUE_TYPE *y, + VALUE_TYPE alpha, + const VALUE_TYPE* dM, + const int* offsets, + int dMPitch, + int rows, + int cols, + int diags, + const VALUE_TYPE *x, + VALUE_TYPE beta) +{ + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + while (rows > maxNForACall) //managing large vectors + { + //if (diags < 10 && handle->capabilityMajor > 1) + // CONCAT(_,GEN_SPGPU_DIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, maxNForACall, cols, diags, x, beta); + //else + CONCAT(_,GEN_SPGPU_DIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, maxNForACall, cols, diags, x, beta); + #if 0 + if (diags < 20) + CONCAT(_,GEN_SPGPU_DIA_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, maxNForACall, cols, diags, x, beta); + else + CONCAT(_,GEN_SPGPU_DIA_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, maxNForACall, cols, diags, x, beta); +#endif + y = y + maxNForACall; + z = z + maxNForACall; + dM = dM + maxNForACall; + + rows -= maxNForACall; + } + CONCAT(_,GEN_SPGPU_DIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); +#if 0 + //if (diags < 10 && handle->capabilityMajor > 1) + // CONCAT(_,GEN_SPGPU_DIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); + //else + if (diags < 20) + CONCAT(_,GEN_SPGPU_DIA_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); + else + CONCAT(_,GEN_SPGPU_DIA_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); +#endif + cudaCheckError("CUDA error on dia_spmv"); +} + diff --git a/cuda/spgpu/kernels/dia_spmv_base_template.cuh b/cuda/spgpu/kernels/dia_spmv_base_template.cuh new file mode 100644 index 00000000..792fa7ed --- /dev/null +++ b/cuda/spgpu/kernels/dia_spmv_base_template.cuh @@ -0,0 +1,217 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define THREAD_BLOCK 128 + +__device__ void +CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int dMPitch, int rows, int cols, int diags, const VALUE_TYPE *x, VALUE_TYPE beta) +{ + int i = threadIdx.x + blockIdx.x * (blockDim.x); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows && CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + dM += i; + + extern __shared__ int offsetsChunk[]; + + int rounds = (diags + blockDim.x - 1)/blockDim.x; + + for (int r = 0; r < rounds; r++) + { + // in the last round diags will be <= blockDim.x + if (threadIdx.x < diags) + offsetsChunk[threadIdx.x] = offsets[threadIdx.x]; + + __syncthreads(); + + if (i < rows) + { + int count = min(diags, blockDim.x ); + + +#ifdef USE_PREFETCHING + int j; + for (j=0; j<=count-3; j += 3) + { + // Prefetch 3 values + int column1 = offsetsChunk[j] + i; + int column2 = offsetsChunk[j+1] + i; + int column3 = offsetsChunk[j+2] + i; + + bool inside1 = column1 >= 0 && column1 < cols; + bool inside2 = column2 >= 0 && column2 < cols; + bool inside3 = column3 >= 0 && column3 < cols; + + // Anticipate global memory read + + VALUE_TYPE xValue1, xValue2, xValue3; + VALUE_TYPE mValue1, mValue2, mValue3; + + if(inside1) + { + mValue1 = dM[0]; +#ifdef ENABLE_CACHE + xValue1 = fetchTex (column1); +#else + xValue1 = x[column1]; +#endif + } + dM += dMPitch; + + if(inside2) + { + mValue2 = dM[0]; +#ifdef ENABLE_CACHE + xValue2 = fetchTex (column2); +#else + xValue2 = x[column2]; +#endif + } + dM += dMPitch; + + if(inside3) + { + mValue3 = dM[0]; +#ifdef ENABLE_CACHE + xValue3 = fetchTex (column3); +#else + xValue3 = x[column3]; +#endif + } + dM += dMPitch; + + if(inside1) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue1, xValue1, zProd); + if(inside2) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue2, xValue2, zProd); + if(inside3) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue3, xValue3, zProd); + } + + for (;j= 0 && column < cols) + { + VALUE_TYPE xValue; +#ifdef ENABLE_CACHE + xValue = fetchTex (column); +#else + xValue = x[column]; +#endif + VALUE_TYPE mValue = dM[0]; + zProd = CONCAT(VALUE_TYPE, _fma)(mValue, xValue, zProd); + + } + + dM += dMPitch; + } +#else + for (int j=0; j= 0 && column < cols) + { + + VALUE_TYPE xValue; + +#ifdef ENABLE_CACHE + xValue = fetchTex (column); +#else + xValue = x[column]; +#endif + + VALUE_TYPE mValue = dM[0]; + zProd = CONCAT(VALUE_TYPE, _fma)(mValue, xValue, zProd); + } + + dM += dMPitch; + } +#endif + } + + diags -= blockDim.x; + offsets += blockDim.x; + __syncthreads(); + } + + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (i >= rows) + return; + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[i] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[i] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn_b0) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int dMPitch, int rows, int cols, int diags, const VALUE_TYPE *x) +{ + CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _) + (z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, CONCAT(zero_,VALUE_TYPE)()); +} + +__global__ void +CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int dMPitch, int rows, int cols, int diags, const VALUE_TYPE *x, VALUE_TYPE beta) +{ + CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _) + (z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); +} + +void +CONCAT(_,GEN_SPGPU_DIA_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE* z, const VALUE_TYPE *y, VALUE_TYPE alpha, + const VALUE_TYPE* dM, const int* offsets, int dMPitch, int rows, int cols, int diags, + const VALUE_TYPE *x, VALUE_TYPE beta) +{ + dim3 block (THREAD_BLOCK ); + dim3 grid ((rows + THREAD_BLOCK - 1) / THREAD_BLOCK ); + +#ifdef ENABLE_CACHE + bind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn_b0), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn) + <<< grid, block, block.x*sizeof(int), handle->currentStream >>> (z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); + else + CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn_b0) <<< grid, block, block.x*sizeof(int), handle->currentStream >>> (z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x); + +#ifdef ENABLE_CACHE + unbind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + +} + diff --git a/cuda/spgpu/kernels/dia_sspmv.cu b/cuda/spgpu/kernels/dia_sspmv.cu new file mode 100644 index 00000000..17464b43 --- /dev/null +++ b/cuda/spgpu/kernels/dia_sspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "dia.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#define TEX_FETCH_TYPE float +#include "dia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/dia_zspmv.cu b/cuda/spgpu/kernels/dia_zspmv.cu new file mode 100644 index 00000000..4f53bf5e --- /dev/null +++ b/cuda/spgpu/kernels/dia_zspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" +#include "core.h" + +extern "C" +{ +#include "dia.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#define TEX_FETCH_TYPE int4 +#include "dia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/dnrm2.cu b/cuda/spgpu/kernels/dnrm2.cu new file mode 100644 index 00000000..aae3edfc --- /dev/null +++ b/cuda/spgpu/kernels/dnrm2.cu @@ -0,0 +1,157 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudalang.h" +#include "cudadebug.h" + +#include "core.h" + +extern "C" +{ +#include "vector.h" +} + + +//#define USE_CUBLAS + +//#define ASSUME_LOCK_SYNC_PARALLELISM + + +#define BLOCK_SIZE 512 + +static __device__ double dnrm2ReductionResult[128]; + +__global__ void spgpuDnrm2_kern(int n, double* x) +{ + __shared__ double sSum[BLOCK_SIZE]; + + double res = 0; + + double* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + while (x < lastX) + { + double x1 = x[0]; + res = PREC_DADD(res, PREC_DMUL(x1, x1)); + + x += blockOffset; + + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + double tRes[128]; + + spgpuDnrm2_kern<<currentStream>>>(n, x);; + cudaMemcpyFromSymbol(tRes, dnrm2ReductionResult,blocks*sizeof(double)); + + for (int i=0; i +#include "core.h" + +extern "C" +{ +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuDupd_xyz_krn(int n, double alpha, double beta, double gamma, double delta, + double* x, double *y, double *z) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + double t; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + if (beta == 0.0) + t = PREC_DMUL(alpha,x[id]); + else + t = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); + if (delta == 0.0) + z[id] = gamma * t; + else + z[id] = PREC_DADD(PREC_DMUL(gamma, t), PREC_DMUL(delta,z[id])); + y[id] = t; + } +} + + +void spgpuDupd_xyz(spgpuHandle_t handle, + int n, + double alpha, + double beta, + double gamma, + double delta, + __device double* x, + __device double* y, + __device double *z) +{ + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuDupd_xyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + x, y, z); +} + diff --git a/cuda/spgpu/kernels/dxyzw.cu b/cuda/spgpu/kernels/dxyzw.cu new file mode 100644 index 00000000..f9964cc7 --- /dev/null +++ b/cuda/spgpu/kernels/dxyzw.cu @@ -0,0 +1,77 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include +#include "core.h" + +extern "C" +{ +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuDxyzw_krn(int n, double a, double b, + double c, double d, + double e, double f, + double * x, double *y, + double *z, double *w) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + double ty, tz; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + ty = PREC_DADD(PREC_DADD(a, x[id]), PREC_DMUL(b,y[id])); + tz = PREC_DADD(PREC_DADD(c, ty), PREC_DMUL(d,z[id])); + w[id] = PREC_DADD(PREC_DADD(e, tz), PREC_DMUL(f,w[id])); + y[id] = ty; + z[id] = tz; + } +} + + +void spgpuDxyzw(spgpuHandle_t handle, + int n, + double a, double b, + double c, double d, + double e, double f, + __device double * x, + __device double * y, + __device double * z, + __device double *w) +{ + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuDxyzw_krn<<currentStream>>>(n, a,b,c,d,e,f, + x, y, z,w); +} + diff --git a/cuda/spgpu/kernels/ell_ccsput.cu b/cuda/spgpu/kernels/ell_ccsput.cu new file mode 100644 index 00000000..66126bd0 --- /dev/null +++ b/cuda/spgpu/kernels/ell_ccsput.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" +#include "core.h" + +extern "C" +{ +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL C +#include "ell_csput_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_cspmv.cu b/cuda/spgpu/kernels/ell_cspmv.cu new file mode 100644 index 00000000..b2a5ca3a --- /dev/null +++ b/cuda/spgpu/kernels/ell_cspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" +#include "core.h" + +extern "C" +{ +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL C +#define TEX_FETCH_TYPE cuFloatComplex +#include "ell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_csput_base.cuh b/cuda/spgpu/kernels/ell_csput_base.cuh new file mode 100644 index 00000000..95e0e33d --- /dev/null +++ b/cuda/spgpu/kernels/ell_csput_base.cuh @@ -0,0 +1,126 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_ELL_NAME +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellcsput) + +#define THREAD_BLOCK 256 + +// Define: +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#include "mathbase.cuh" + + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn) +(VALUE_TYPE alpha, VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, + int nnz, int *aI, int *aJ, VALUE_TYPE *aVal, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + if (i < nnz) + { + int row = aI[i] - baseIndex; + int column = aJ[i]; + VALUE_TYPE value = aVal[i]; + + if (row < 0) + return; + + // Select row + cM += row; + rP += row; + rS += row; + + // Binary search + int lower = 0; + int upper = (*rS - 1); + + while(lower <= upper) + { + int medium = (lower + upper) / 2; + + int currentColumn = rP[medium*rPPitch]; + + if(currentColumn == column) + { + cM[medium*cMPitch] = value; + break; + } + else if(currentColumn < column) + lower = medium + 1; + else + upper = medium - 1; + } + } +} + + + +void +CONCAT(_,GEN_SPGPU_ELL_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE alpha, VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, + int nnz, int *aI, int *aJ, VALUE_TYPE *aVal, int baseIndex) +{ + dim3 block (THREAD_BLOCK, 1); + + dim3 grid ((nnz + THREAD_BLOCK - 1) / THREAD_BLOCK); + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn) + <<< grid, block, 0, handle->currentStream >>> (alpha, cM, rP, cMPitch, rPPitch, rS, nnz, aI, aJ, aVal, baseIndex); +} + +void +GEN_SPGPU_ELL_NAME(TYPE_SYMBOL) + (spgpuHandle_t handle, + VALUE_TYPE alpha, + VALUE_TYPE* cM, + const int* rP, + int cMPitch, + int rPPitch, + const int* rS, + int nnz, + int *aI, + int *aJ, + VALUE_TYPE *aVal, + int baseIndex) +{ + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + while (nnz > maxNForACall) //managing large vectors + { + CONCAT(_,GEN_SPGPU_ELL_NAME(TYPE_SYMBOL)) + (handle, alpha, cM, rP, cMPitch, rPPitch, rS, maxNForACall, aI, aJ, aVal, baseIndex); + + aI = aI + maxNForACall; + aJ = aJ + maxNForACall; + aVal = aVal + maxNForACall; + + nnz -= maxNForACall; + } + + CONCAT(_,GEN_SPGPU_ELL_NAME(TYPE_SYMBOL)) + (handle, alpha, cM, rP, cMPitch, rPPitch, rS, nnz, aI, aJ, aVal, baseIndex); + + cudaCheckError("CUDA error on ell_csput"); +} + diff --git a/cuda/spgpu/kernels/ell_dcsput.cu b/cuda/spgpu/kernels/ell_dcsput.cu new file mode 100644 index 00000000..b1a25d52 --- /dev/null +++ b/cuda/spgpu/kernels/ell_dcsput.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#include "ell_csput_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_dspmv.cu b/cuda/spgpu/kernels/ell_dspmv.cu new file mode 100644 index 00000000..e4e2e815 --- /dev/null +++ b/cuda/spgpu/kernels/ell_dspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#define TEX_FETCH_TYPE int2 +#include "ell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_scsput.cu b/cuda/spgpu/kernels/ell_scsput.cu new file mode 100644 index 00000000..0e3ed7b3 --- /dev/null +++ b/cuda/spgpu/kernels/ell_scsput.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#include "ell_csput_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_spmv_base.cuh b/cuda/spgpu/kernels/ell_spmv_base.cuh new file mode 100644 index 00000000..e2af5896 --- /dev/null +++ b/cuda/spgpu/kernels/ell_spmv_base.cuh @@ -0,0 +1,154 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_ELL_NAME +#undef X_TEX +#define X_TEX CONCAT(x_tex_, FUNC_SUFFIX) + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif + +#if 0 +// Texture cache management +texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX; + +#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x) +#define unbind_tex_x(x) cudaUnbindTexture(X_TEX) + +__device__ static VALUE_TYPE +fetchTex (int pointer) +{ + TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer); + return CONCAT(readValue_,VALUE_TYPE) (fetch); +} +#endif +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv_vanilla) +#define GEN_SPGPU_ELL_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),ellspmv_vanilla) +#include "ell_spmv_base_template.cuh" +#if 0 +#undef GEN_SPGPU_ELL_NAME +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv_prefetch) +#define GEN_SPGPU_ELL_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),ellspmv_prefetch) +#undef USE_PREFETCHING +#define USE_PREFETCHING +#include "ell_spmv_base_template.cuh" +#define ENABLE_CACHE +#undef GEN_SPGPU_ELL_NAME +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv_texcache_prefetch) +#define GEN_SPGPU_ELL_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),ellspmv_texcache_prefetch) +#include "ell_spmv_base_template.cuh" +#undef GEN_SPGPU_ELL_NAME +#undef USE_PREFETCHING +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv_texcache) +#define GEN_SPGPU_ELL_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),ellspmv_texcache) +#include "ell_spmv_base_template.cuh" +#endif +#undef GEN_SPGPU_ELL_NAME +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv) +void +GEN_SPGPU_ELL_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + VALUE_TYPE* z, + const VALUE_TYPE *y, + VALUE_TYPE alpha, + const VALUE_TYPE* cM, + const int* rP, + int cMPitch, + int rPPitch, + const int* rS, + const __device int* rIdx, + int avgNnzPerRow, + int maxNnzPerRow, + int rows, + const VALUE_TYPE *x, + VALUE_TYPE beta, + int baseIndex) +{ + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + while (rows > maxNForACall) //managing large vectors + { +#if 0 + if (avgNnzPerRow < 10 && handle->capabilityMajor > 1) +#endif + CONCAT(_,GEN_SPGPU_ELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, maxNForACall, x, beta, baseIndex); +#if 0 + else if (avgNnzPerRow < 20) + CONCAT(_,GEN_SPGPU_ELL_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, maxNForACall, x, beta, baseIndex); + else + CONCAT(_,GEN_SPGPU_ELL_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, maxNForACall, x, beta, baseIndex); +#endif + y = y + maxNForACall; + z = z + maxNForACall; + cM = cM + maxNForACall; + rP = rP + maxNForACall; + rS = rS + maxNForACall; + + rows -= maxNForACall; + } +#if 0 + if (avgNnzPerRow < 10 && handle->capabilityMajor > 1) +#endif + CONCAT(_,GEN_SPGPU_ELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, rows, x, beta, baseIndex); + #if 0 + else if (avgNnzPerRow < 20) + CONCAT(_,GEN_SPGPU_ELL_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, rows, x, beta, baseIndex); + else + CONCAT(_,GEN_SPGPU_ELL_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, rows, x, beta, baseIndex); +#endif + cudaCheckError("CUDA error on ell_spmv"); +} + diff --git a/cuda/spgpu/kernels/ell_spmv_base_nors.cuh b/cuda/spgpu/kernels/ell_spmv_base_nors.cuh new file mode 100644 index 00000000..8cd89704 --- /dev/null +++ b/cuda/spgpu/kernels/ell_spmv_base_nors.cuh @@ -0,0 +1,340 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4_noRs) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int rows, int maxNnzPerRow, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[2][THREAD_BLOCK+1]; + + if (i < rows) + { + rP += i; cM += i; + + int rowSizeM = maxNnzPerRow / 4; + + + if ((maxNnzPerRow % 4) > threadIdx.y) + ++rowSizeM; + + rP += threadIdx.y*rPPitch; + cM += threadIdx.y*cMPitch; + + + for (int j = 0; j < rowSizeM; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += 4*rPPitch; + + value = cM[0]; + cM += 4*cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y > 1) + temp[threadIdx.y - 2][threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y <= 1) + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.y][threadIdx.x]); + + if (threadIdx.y == 1) + temp[1][threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y == 0) + { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[1][threadIdx.x]); + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2_noRs) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, const int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[THREAD_BLOCK]; + + if (i < rows) + { + rP += i; cM += i; + + int rowSizeM = maxNnzPerRow / 2; + + if (threadIdx.y == 0) + { + if (maxNnzPerRow % 2) + ++rowSizeM; + } + else + { + rP += rPPitch; + cM += cMPitch; + } + + + for (int j = 0; j < rowSizeM; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += rPPitch; + rP += rPPitch; + + value = cM[0]; + cM += cMPitch; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y == 1) + temp[threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y == 0) + { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.x]); + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_noRs) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) + { + rP += i; cM += i; + +#ifdef USE_PREFETCHING + for (int j = 0; j < maxNnzPerRow / 2; j++) + { + int pointers1, pointers2; + VALUE_TYPE values1, values2; + VALUE_TYPE fetches1, fetches2; + + pointers1 = rP[0] - baseIndex; + rP += rPPitch; + pointers2 = rP[0] - baseIndex; + rP += rPPitch; + + values1 = cM[0]; + cM += cMPitch; + + values2 = cM[0]; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetches1 = fetchTex(pointers1); + fetches2 = fetchTex(pointers2); +#else + fetches1 = x[pointers1]; + fetches2 = x[pointers2]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(values1, fetches1, zProd); + zProd = CONCAT(VALUE_TYPE, _fma)(values2, fetches2, zProd); + } + + // odd row size + if (maxNnzPerRow % 2) + { + int pointer = rP[0] - baseIndex; + VALUE_TYPE value = cM[0]; + VALUE_TYPE fetch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#else + for (int j = 0; j < maxNnzPerRow; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += rPPitch; + + value = cM[0]; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#endif + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } +} + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx_noRs) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rIdx, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + int outRow = 0; + if (i < rows) + { + + outRow = rIdx[i]; + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[outRow]; + } + + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_noRs) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + else //if (blockDim.y == 2) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2_noRs) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + /* + else if (blockDim.y == 4) + + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4_noRs) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + */ +} + + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _noRs) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) + { + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + } + + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_noRs) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + + else //if (blockDim.y == 2) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2_noRs) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + /* + else if (blockDim.y == 4) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4_noRs) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + */ + +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0_noRs) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, int rows, const VALUE_TYPE *x, int baseIndex) +{ + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _noRs) + (z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, CONCAT(zero_,VALUE_TYPE)(), baseIndex); +} + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_noRs) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _noRs) + (z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); +} diff --git a/cuda/spgpu/kernels/ell_spmv_base_template.cuh b/cuda/spgpu/kernels/ell_spmv_base_template.cuh new file mode 100644 index 00000000..fa39d8a6 --- /dev/null +++ b/cuda/spgpu/kernels/ell_spmv_base_template.cuh @@ -0,0 +1,426 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define THREAD_BLOCK 128 + +#include "ell_spmv_base_nors.cuh" + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[2][THREAD_BLOCK+1]; + + if (i < rows) + { + rS += i; rP += i; cM += i; + + int rowSize = rS[0]; + int rowSizeM = rowSize / 4; + + + if ((rowSize % 4) > threadIdx.y) + ++rowSizeM; + + rP += threadIdx.y*rPPitch; + cM += threadIdx.y*cMPitch; + + + for (int j = 0; j < rowSizeM; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += 4*rPPitch; + + value = cM[0]; + cM += 4*cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y > 1) + temp[threadIdx.y - 2][threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y <= 1) + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.y][threadIdx.x]); + + if (threadIdx.y == 1) + temp[1][threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y == 0) + { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[1][threadIdx.x]); + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[THREAD_BLOCK]; + + if (i < rows) + { + rS += i; rP += i; cM += i; + + int rowSize = rS[0]; + int rowSizeM = rowSize / 2; + + if (threadIdx.y == 0) + { + if (rowSize % 2) + ++rowSizeM; + } + else + { + rP += rPPitch; + cM += cMPitch; + } + + + for (int j = 0; j < rowSizeM; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += rPPitch; + rP += rPPitch; + + value = cM[0]; + cM += cMPitch; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y == 1) + temp[threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y == 0) + { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.x]); + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) + { + rS += i; rP += i; cM += i; + + int rowSize = rS[0]; + +#ifdef USE_PREFETCHING + for (int j = 0; j < rowSize / 2; j++) + { + int pointers1, pointers2; + VALUE_TYPE values1, values2; + VALUE_TYPE fetches1, fetches2; + + pointers1 = rP[0] - baseIndex; + rP += rPPitch; + pointers2 = rP[0] - baseIndex; + rP += rPPitch; + + values1 = cM[0]; + cM += cMPitch; + + values2 = cM[0]; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetches1 = fetchTex(pointers1); + fetches2 = fetchTex(pointers2); +#else + fetches1 = x[pointers1]; + fetches2 = x[pointers2]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(values1, fetches1, zProd); + zProd = CONCAT(VALUE_TYPE, _fma)(values2, fetches2, zProd); + } + + // odd row size + if (rowSize % 2) + { + int pointer = rP[0] - baseIndex; + VALUE_TYPE value = cM[0]; + VALUE_TYPE fetch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#else + for (int j = 0; j < rowSize; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += rPPitch; + + value = cM[0]; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#endif + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } +} + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, const int* rIdx, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + int outRow = 0; + if (i < rows) + { + + outRow = rIdx[i]; + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[outRow]; + } + + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + else //if (blockDim.y == 2) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + /* + else if (blockDim.y == 4) + + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + */ +} + + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) + { + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + } + + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + + else //if (blockDim.y == 2) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + /* + else if (blockDim.y == 4) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + */ + +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, int baseIndex) +{ + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _) + (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, CONCAT(zero_,VALUE_TYPE)(), baseIndex); +} + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _) + (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); +} + +void +CONCAT(_,GEN_SPGPU_ELL_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE* z, const VALUE_TYPE *y, VALUE_TYPE alpha, + const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, + const __device int* rIdx, int avgNnzPerRow, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int avgThreshold; + + if (handle->capabilityMajor < 2) + avgThreshold = 8; + else if (handle->capabilityMajor < 3) + avgThreshold = 16; + else + avgThreshold = 32; + +#if defined(ELL_FORCE_THREADS_1) + dim3 block (THREAD_BLOCK, 1); +#elif defined(ELL_FORCE_THREADS_2) + dim3 block (THREAD_BLOCK, 2); +#else + dim3 block (THREAD_BLOCK, avgNnzPerRow >= avgThreshold ? 2 : 1); +#endif + + dim3 grid ((rows + THREAD_BLOCK - 1) / THREAD_BLOCK); + +#ifdef ENABLE_CACHE + bind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + + if (rIdx) + { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx_noRs), cudaFuncCachePreferL1); + + if (rS) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx_noRs) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, rIdx, maxNnzPerRow, rows, x, beta, baseIndex); + } + else + { + + + if (rS) + { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, baseIndex); + } + else + { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_noRs), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0_noRs), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_noRs) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0_noRs) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, baseIndex); + } + } + +#ifdef ENABLE_CACHE + unbind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + +} + diff --git a/cuda/spgpu/kernels/ell_sspmv.cu b/cuda/spgpu/kernels/ell_sspmv.cu new file mode 100644 index 00000000..298ff1cb --- /dev/null +++ b/cuda/spgpu/kernels/ell_sspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#define TEX_FETCH_TYPE float +#include "ell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_zcsput.cu b/cuda/spgpu/kernels/ell_zcsput.cu new file mode 100644 index 00000000..9fbd4311 --- /dev/null +++ b/cuda/spgpu/kernels/ell_zcsput.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" +#include "core.h" + +extern "C" +{ +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#include "ell_csput_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_zspmv.cu b/cuda/spgpu/kernels/ell_zspmv.cu new file mode 100644 index 00000000..dd1bc72d --- /dev/null +++ b/cuda/spgpu/kernels/ell_zspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" +#include "core.h" + +extern "C" +{ +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#define TEX_FETCH_TYPE int4 +#include "ell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/gath_base.cuh b/cuda/spgpu/kernels/gath_base.cuh new file mode 100644 index 00000000..a0e77b44 --- /dev/null +++ b/cuda/spgpu/kernels/gath_base.cuh @@ -0,0 +1,86 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_FUNC_NAME +#define GEN_SPGPU_FUNC_NAME(x) CONCAT(CONCAT(spgpu,x),gath) + +#define BLOCK_SIZE 256 + +// Define: +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#include "mathbase.cuh" + +__global__ void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern) + (VALUE_TYPE* values, int count, const int* indices, int firstIndex, const VALUE_TYPE* vector) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < count) + { + int pos = indices[id]-firstIndex; + + if (pos < 0) + return; + + values[id] = vector[pos]; + } +} + +void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (spgpuHandle_t handle, VALUE_TYPE *xValues, int xNnz, + const __device int *xIndices, int xBaseIndex, const VALUE_TYPE* y) +{ + int msize = (xNnz+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(xValues, xNnz, xIndices, xBaseIndex, y); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device VALUE_TYPE *xValues, + int xNnz, + const __device int *xIndices, + int xBaseIndex, + const __device VALUE_TYPE* y) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (xNnz > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, xValues, maxNForACall, xIndices, xBaseIndex, y); + xIndices += maxNForACall; + xValues += maxNForACall; + xNnz -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, xValues, xNnz, xIndices, xBaseIndex, y); + cudaCheckError("CUDA error on gath"); +} + diff --git a/cuda/spgpu/kernels/hdia_cspmv.cu b/cuda/spgpu/kernels/hdia_cspmv.cu new file mode 100644 index 00000000..27bb30e1 --- /dev/null +++ b/cuda/spgpu/kernels/hdia_cspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" +#include "core.h" + +extern "C" +{ +#include "hdia.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL C +#define TEX_FETCH_TYPE cuFloatComplex +#include "hdia_spmv_base.cuh" diff --git a/cuda/spgpu/kernels/hdia_dspmv.cu b/cuda/spgpu/kernels/hdia_dspmv.cu new file mode 100644 index 00000000..c1ac9af1 --- /dev/null +++ b/cuda/spgpu/kernels/hdia_dspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "hdia.h" +} + +#include "debug.h" + +//#define ENABLE_CACHE +#define VALUE_TYPE double +#define TYPE_SYMBOL D +//#define TEX_FETCH_TYPE int2 +#include "hdia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hdia_spmv_base.cuh b/cuda/spgpu/kernels/hdia_spmv_base.cuh new file mode 100644 index 00000000..2c8cfbaf --- /dev/null +++ b/cuda/spgpu/kernels/hdia_spmv_base.cuh @@ -0,0 +1,149 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_HDIA_NAME +#undef X_TEX +#define X_TEX CONCAT(x_tex_, FUNC_SUFFIX) + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif +#if 0 +// Texture cache management +texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX; + +#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x) +#define unbind_tex_x(x) cudaUnbindTexture(X_TEX) + +__device__ static VALUE_TYPE +fetchTex (int pointer) +{ + TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer); + return CONCAT(readValue_,VALUE_TYPE) (fetch); +} +#endif +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_vanilla) +#define GEN_SPGPU_HDIA_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_vanilla) +#include "hdia_spmv_base_template.cuh" +#if 0 +#undef GEN_SPGPU_HDIA_NAME +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_prefetch) +#define GEN_SPGPU_HDIA_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_prefetch) +#undef USE_PREFETCHING +#define USE_PREFETCHING +#include "hdia_spmv_base_template.cuh" +#define ENABLE_CACHE +#undef GEN_SPGPU_HDIA_NAME +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache_prefetch) +#define GEN_SPGPU_HDIA_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache_prefetch) +#include "hdia_spmv_base_template.cuh" +#undef GEN_SPGPU_HDIA_NAME +#undef USE_PREFETCHING +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache) +#define GEN_SPGPU_HDIA_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache) +#include "hdia_spmv_base_template.cuh" +#endif +#undef GEN_SPGPU_HDIA_NAME +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv) +void +GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + VALUE_TYPE* z, + const VALUE_TYPE *y, + VALUE_TYPE alpha, + const VALUE_TYPE* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const VALUE_TYPE *x, + VALUE_TYPE beta) +{ + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + // maxNForACall should be a multiple of hackSize + maxNForACall = (maxNForACall/hackSize)*hackSize; + + while (rows > maxNForACall) //managing large vectors + { + + CONCAT(_,GEN_SPGPU_HDIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, maxNForACall, cols, x, beta); + //if (avgDiags < 10 && handle->capabilityMajor > 1) + // CONCAT(_,GEN_SPGPU_HDIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, maxNForACall, cols, x, beta); + //else + //if (avgDiags < 20) + // CONCAT(_,GEN_SPGPU_HDIA_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, maxNForACall, cols, x, beta); + //else + //CONCAT(_,GEN_SPGPU_HDIA_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, maxNForACall, cols, x, beta); + + y = y + maxNForACall; + z = z + maxNForACall; + hackOffsets += maxNForACall/hackSize; + + rows -= maxNForACall; + } + CONCAT(_,GEN_SPGPU_HDIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta); + //if (avgDiags < 10 && handle->capabilityMajor > 1) + // CONCAT(_,GEN_SPGPU_HDIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); + //else + //if (avgDiags < 20) + // CONCAT(_,GEN_SPGPU_HDIA_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta); + //else + //CONCAT(_,GEN_SPGPU_HDIA_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta); + + cudaCheckError("CUDA error on hdia_spmv"); +} + diff --git a/cuda/spgpu/kernels/hdia_spmv_base_template.cuh b/cuda/spgpu/kernels/hdia_spmv_base_template.cuh new file mode 100644 index 00000000..155179fd --- /dev/null +++ b/cuda/spgpu/kernels/hdia_spmv_base_template.cuh @@ -0,0 +1,253 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define THREAD_BLOCK 128 + +__device__ void +CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int hackSize, const int* hackOffsets, + int rows, int cols, const VALUE_TYPE *x, VALUE_TYPE beta, int hackCount) +{ + int i = threadIdx.x + blockIdx.x * (blockDim.x); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows && CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + int hackId = i / hackSize; + int hackLaneId = i % hackSize; + + + // shared between offsetsChunks and warpHackOffsetTemp + extern __shared__ int dynShrMem[]; + + int hackOffset = 0; + int nextOffset = 0; + + unsigned int laneId = threadIdx.x % warpSize; + unsigned int warpId = threadIdx.x / warpSize; + +#if __CUDA_ARCH__ < 300 + int* warpHackOffset = dynShrMem; + + + if (laneId == 0 && i < rows) + { + warpHackOffset[warpId] = hackOffsets[hackId]; + warpHackOffset[warpId + (blockDim.x / warpSize)] = hackOffsets[hackId+1]; + } + + __syncthreads(); + hackOffset = warpHackOffset[warpId]; + nextOffset = warpHackOffset[warpId + blockDim.x / warpSize]; + __syncthreads(); +#elif __CUDA_ARCH__ < 700 + if (laneId == 0 && i < rows) + { + hackOffset = hackOffsets[hackId]; + nextOffset = hackOffsets[hackId+1]; + } + + hackOffset = __shfl(hackOffset, 0); + nextOffset = __shfl(nextOffset, 0); +#else + if (laneId == 0 && i < rows) + { + hackOffset = hackOffsets[hackId]; + nextOffset = hackOffsets[hackId+1]; + } + + hackOffset = __shfl_sync(0xFFFFFFFF,hackOffset, 0); + nextOffset = __shfl_sync(0xFFFFFFFF,nextOffset, 0); + +#endif + + if (hackId >= hackCount) + return; + + dM += hackOffset*hackSize + hackLaneId; + offsets += hackOffset; + + // diags for this hack is next hackOffset minus current hackOffset + int diags = nextOffset - hackOffset; + + + // Warp oriented + int rounds = (diags + warpSize - 1)/warpSize; + + volatile int *offsetsChunk = dynShrMem + warpId*warpSize; + + for (int r = 0; r < rounds; r++) + { + // in the last round diags will be <= warpSize + if (laneId < diags) + offsetsChunk[laneId] = offsets[laneId]; + + if (i < rows) + { + int count = min(diags, warpSize); + +#ifdef USE_PREFETCHING + int j; + for (j=0; j<=count-2; j += 2) + { + // prefetch 2 values + int column1 = offsetsChunk[j] + i; + int column2 = offsetsChunk[j+1] + i; + + VALUE_TYPE xValue1, xValue2; + VALUE_TYPE mValue1, mValue2; + + bool inside1 = column1 >= 0 && column1 < cols; + bool inside2 = column2 >= 0 && column2 < cols; + + if(inside1) + { + mValue1 = dM[0]; +#ifdef ENABLE_CACHE + xValue1 = fetchTex (column1); +#else + xValue1 = x[column1]; +#endif + } + + dM += hackSize; + + if(inside2) + { + mValue2 = dM[0]; +#ifdef ENABLE_CACHE + xValue2 = fetchTex (column2); +#else + xValue2 = x[column2]; +#endif + } + + dM += hackSize; + + if(inside1) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue1, xValue1, zProd); + if(inside2) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue2, xValue2, zProd); + } + + for (;j= 0 && column < cols) + { + VALUE_TYPE xValue; +#ifdef ENABLE_CACHE + xValue = fetchTex (column); +#else + xValue = x[column]; +#endif + VALUE_TYPE mValue = dM[0]; + zProd = CONCAT(VALUE_TYPE, _fma)(mValue, xValue, zProd); + } + + dM += hackSize; + } +#else + for (int j=0;j= 0 && column < cols) + { + VALUE_TYPE xValue; +#ifdef ENABLE_CACHE + xValue = fetchTex (column); +#else + xValue = x[column]; +#endif + VALUE_TYPE mValue = dM[0]; + zProd = CONCAT(VALUE_TYPE, _fma)(mValue, xValue, zProd); + } + + dM += hackSize; + } +#endif + + } + + diags -= warpSize; + offsets += warpSize; + } + + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (i >= rows) + return; + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[i] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[i] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn_b0) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const VALUE_TYPE *x, int hackCount) +{ + CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _) + (z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, CONCAT(zero_,VALUE_TYPE)(), hackCount); +} + +__global__ void +CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const VALUE_TYPE *x, VALUE_TYPE beta, int hackCount) +{ + CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _) + (z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta, hackCount); +} + +void +CONCAT(_,GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE* z, const VALUE_TYPE *y, VALUE_TYPE alpha, + const VALUE_TYPE* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, + const VALUE_TYPE *x, VALUE_TYPE beta) +{ + dim3 block (THREAD_BLOCK); + dim3 grid ((rows + THREAD_BLOCK - 1) / THREAD_BLOCK); + + int hackCount = (rows + hackSize - 1)/hackSize; + +#ifdef ENABLE_CACHE + bind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn_b0), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn) <<< grid, block, block.x*sizeof(int), handle->currentStream >>> (z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta, hackCount); + else + CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn_b0) <<< grid, block, block.x*sizeof(int), handle->currentStream >>> (z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, hackCount); + +#ifdef ENABLE_CACHE + unbind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + +} + diff --git a/cuda/spgpu/kernels/hdia_sspmv.cu b/cuda/spgpu/kernels/hdia_sspmv.cu new file mode 100644 index 00000000..ad1d77e7 --- /dev/null +++ b/cuda/spgpu/kernels/hdia_sspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "hdia.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#define TEX_FETCH_TYPE float +#include "hdia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hdia_zspmv.cu b/cuda/spgpu/kernels/hdia_zspmv.cu new file mode 100644 index 00000000..f0b592f6 --- /dev/null +++ b/cuda/spgpu/kernels/hdia_zspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" +#include "core.h" + +extern "C" +{ +#include "hdia.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#define TEX_FETCH_TYPE int4 +#include "hdia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hell_cspmv.cu b/cuda/spgpu/kernels/hell_cspmv.cu new file mode 100644 index 00000000..c2b17419 --- /dev/null +++ b/cuda/spgpu/kernels/hell_cspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" +#include "core.h" + +extern "C" +{ +#include "hell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL C +#define TEX_FETCH_TYPE cuFloatComplex +#include "hell_spmv_base.cuh" diff --git a/cuda/spgpu/kernels/hell_dspmv.cu b/cuda/spgpu/kernels/hell_dspmv.cu new file mode 100644 index 00000000..fe379841 --- /dev/null +++ b/cuda/spgpu/kernels/hell_dspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "hell.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#define TEX_FETCH_TYPE int2 +#include "hell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hell_spmv_base.cuh b/cuda/spgpu/kernels/hell_spmv_base.cuh new file mode 100644 index 00000000..ca074d33 --- /dev/null +++ b/cuda/spgpu/kernels/hell_spmv_base.cuh @@ -0,0 +1,159 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_HELL_NAME +#undef X_TEX +#define X_TEX CONCAT(x_tex_, FUNC_SUFFIX) + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif +#if 0 +// Texture cache management +texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX; + +#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x) +#define unbind_tex_x(x) cudaUnbindTexture(X_TEX) + +__device__ static VALUE_TYPE +fetchTex (int pointer) +{ + TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer); + return CONCAT(readValue_,VALUE_TYPE) (fetch); +} +#endif +#if __CUDA_ARCH__ < 300 +extern __shared__ int dynShrMem[]; +#endif + +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv_vanilla) +#define GEN_SPGPU_HELL_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),hellspmv_vanilla) +#include "hell_spmv_base_template.cuh" +#undef GEN_SPGPU_HELL_NAME +#if 0 +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv_prefetch) +#define GEN_SPGPU_HELL_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hellspmv_prefetch) +#undef USE_PREFETCHING +#define USE_PREFETCHING +#include "hell_spmv_base_template.cuh" +#define ENABLE_CACHE +#undef GEN_SPGPU_HELL_NAME +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv_texcache_prefetch) +#define GEN_SPGPU_HELL_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hellspmv_texcache_prefetch) +#include "hell_spmv_base_template.cuh" +#undef GEN_SPGPU_HELL_NAME +#undef USE_PREFETCHING +#endif +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv_texcache) +#define GEN_SPGPU_HELL_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),hellspmv_texcache) +#include "hell_spmv_base_template.cuh" +#undef GEN_SPGPU_HELL_NAME +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv) +void +GEN_SPGPU_HELL_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + VALUE_TYPE* z, + const VALUE_TYPE *y, + VALUE_TYPE alpha, + const VALUE_TYPE* cM, + const int* rP, + int hackSize, + const __device int* hackOffsets, + const __device int* rS, + const __device int* rIdx, + int avgNnzPerRow, + int rows, + const VALUE_TYPE *x, + VALUE_TYPE beta, + int baseIndex) +{ + + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + // maxNForACall should be a multiple of hackSize + maxNForACall = (maxNForACall/hackSize)*hackSize; + //fprintf(stderr,"Entering kernel %d maxNForACall\n",maxNForACall); + + while (rows > maxNForACall) //managing large vectors + { + + CONCAT(_,GEN_SPGPU_HELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, maxNForACall, x, beta, baseIndex); + /* if (avgNnzPerRow < 10 && handle->capabilityMajor > 1) */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, maxNForACall, x, beta, baseIndex); */ + /* else if (avgNnzPerRow < 20) */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, maxNForACall, x, beta, baseIndex); */ + /* else */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, maxNForACall, x, beta, baseIndex); */ + + y = y + maxNForACall; + z = z + maxNForACall; + hackOffsets = hackOffsets + maxNForACall/hackSize; + rS = rS + maxNForACall; + + rows -= maxNForACall; + } + //fprintf(stderr,"Calling kernel on %d rows\n",rows); + CONCAT(_,GEN_SPGPU_HELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, rows, x, beta, baseIndex); + //fprintf(stderr,"Done kernel on %d rows\n",rows); + /* if (avgNnzPerRow < 10 && handle->capabilityMajor > 1) */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, rows, x, beta, baseIndex); */ + /* else if (avgNnzPerRow < 20) */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, rows, x, beta, baseIndex); */ + /* else */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, rows, x, beta, baseIndex); */ + + cudaCheckError("CUDA error on hell_spmv"); +} + diff --git a/cuda/spgpu/kernels/hell_spmv_base_template.cuh b/cuda/spgpu/kernels/hell_spmv_base_template.cuh new file mode 100644 index 00000000..9ecd8f74 --- /dev/null +++ b/cuda/spgpu/kernels/hell_spmv_base_template.cuh @@ -0,0 +1,357 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#define IDX2 +#define THREAD_BLOCK 128 + +__device__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx_2) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[THREAD_BLOCK]; + + if (i < rows) { + int hackId = i / hackSize; + int hackLaneId = i % hackSize; + + int hackOffset; + unsigned int laneId = threadIdx.x % 32; +#if __CUDA_ARCH__ < 300 + // "volatile" used to avoid __syncthreads() + volatile int* warpHackOffset = dynShrMem; + + unsigned int warpId = threadIdx.x / 32; + + if (laneId == 0) + warpHackOffset[warpId] = hackOffsets[hackId]; + + hackOffset = warpHackOffset[warpId] + hackLaneId; +#elif __CUDA_ARCH__ < 700 + if (laneId == 0) + hackOffset = hackOffsets[hackId]; + //__syncthreads(); + hackOffset = __shfl(hackOffset, 0) + hackLaneId; +#else + if (laneId == 0) + hackOffset = hackOffsets[hackId]; + //__syncthreads(); + hackOffset = __shfl_sync(0xFFFFFFFF,hackOffset, 0) + hackLaneId; +#endif + + rP += hackOffset; + cM += hackOffset; + + int rowSize = rS[i]; + int rowSizeM = rowSize / 2; + + if (threadIdx.y == 0) { + if (rowSize % 2) + ++rowSizeM; + } else { + rP += hackSize; + cM += hackSize; + } + + + for (int j = 0; j < rowSizeM; j++) { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += hackSize; + rP += hackSize; + + value = cM[0]; + cM += hackSize; + cM += hackSize; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y == 1) + temp[threadIdx.x] = zProd; + + __syncthreads(); + + if (threadIdx.y == 0) { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.x]); + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) { + + int hackId = i / hackSize; + int hackLaneId = i % hackSize; + + int hackOffset; + unsigned int laneId = threadIdx.x % 32; +#if __CUDA_ARCH__ < 300 + // "volatile" used to avoid __syncthreads() + volatile int* warpHackOffset = dynShrMem; + + unsigned int warpId = threadIdx.x / 32; + + if (laneId == 0) + warpHackOffset[warpId] = hackOffsets[hackId]; + + hackOffset = warpHackOffset[warpId] + hackLaneId; +#elif __CUDA_ARCH__ < 700 + if (laneId == 0) + hackOffset = hackOffsets[hackId]; + //__syncthreads(); + hackOffset = __shfl(hackOffset, 0) + hackLaneId; +#else + if (laneId == 0) + hackOffset = hackOffsets[hackId]; + //__syncthreads(); + hackOffset = __shfl_sync(0xFFFFFFFF,hackOffset, 0) + hackLaneId; +#endif + + rP += hackOffset; + cM += hackOffset; + + int rowSize = rS[i]; + +#ifdef USE_PREFETCHING + for (int j = 0; j < rowSize / 2; j++) { + int pointers1, pointers2; + VALUE_TYPE values1, values2; + VALUE_TYPE fetches1, fetches2; + + pointers1 = rP[0] - baseIndex; + rP += hackSize; + pointers2 = rP[0] - baseIndex; + rP += hackSize; + + values1 = cM[0]; + cM += hackSize; + + values2 = cM[0]; + cM += hackSize; + +#ifdef ENABLE_CACHE + fetches1 = fetchTex(pointers1); + fetches2 = fetchTex(pointers2); +#else + fetches1 = x[pointers1]; + fetches2 = x[pointers2]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(values1, fetches1, zProd); + zProd = CONCAT(VALUE_TYPE, _fma)(values2, fetches2, zProd); + } + + // odd row size + if (rowSize % 2) { + int pointer = rP[0] - baseIndex; + VALUE_TYPE value = cM[0]; + VALUE_TYPE fetch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#else + for (int j = 0; j < rowSize; j++) { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += hackSize; + + value = cM[0]; + cM += hackSize; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#endif + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + +} + +__global__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_ridx) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, const int* rIdx, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + int outRow = 0; + if (i < rows) { + + outRow = rIdx[i]; + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[outRow]; + } +#if 1 + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, outRow, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx_2) + (i, yVal, outRow, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +#else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, outRow, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +#endif +} + + +__device__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _) + (VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) { + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + } + +#ifdef IDX2 + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, i, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx_2) + (i, yVal, i, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +#else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, i, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +#endif + +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_b0) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, int baseIndex) +{ + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _) + (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, CONCAT(zero_,VALUE_TYPE)(), baseIndex); +} + +__global__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn) + (VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _) + (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +} + +void +CONCAT(_,GEN_SPGPU_HELL_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE* z, const VALUE_TYPE *y, VALUE_TYPE alpha, + const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, + const __device int* rIdx, int avgNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int avgThreshold; + + if (handle->capabilityMajor < 2) + avgThreshold = 8; + else if (handle->capabilityMajor < 3) + avgThreshold = 16; + else + avgThreshold = 32; +#ifdef IDX2 +#if defined(HELL_FORCE_THREADS_1) + dim3 block (THREAD_BLOCK, 1); +#elif defined(HELL_FORCE_THREADS_2) + dim3 block (THREAD_BLOCK, 2); +#else + dim3 block (THREAD_BLOCK, avgNnzPerRow >= avgThreshold ? 2 : 1); +#endif +#else + dim3 block (THREAD_BLOCK, 1); +#endif + dim3 grid ((rows + THREAD_BLOCK - 1) / THREAD_BLOCK); + + // Should we generalize the code to 1/2/4/8 threads per row? + // And maybe adjust THREAD_BLOCK size? + int shrMemSize; + shrMemSize=THREAD_BLOCK*sizeof(VALUE_TYPE); + +#ifdef ENABLE_CACHE + bind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + + if (rIdx) { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_ridx), cudaFuncCachePreferL1); + + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_ridx) + <<< grid, block, shrMemSize, handle->currentStream >>> (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, rows, x, beta, baseIndex); + } else { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_b0), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn) + <<< grid, block, shrMemSize, handle->currentStream >>> (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_b0) + <<< grid, block, shrMemSize, handle->currentStream >>> (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, baseIndex); + } + +#ifdef ENABLE_CACHE + unbind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + +} diff --git a/cuda/spgpu/kernels/hell_sspmv.cu b/cuda/spgpu/kernels/hell_sspmv.cu new file mode 100644 index 00000000..86b4c7b3 --- /dev/null +++ b/cuda/spgpu/kernels/hell_sspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "hell.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#define TEX_FETCH_TYPE float +#include "hell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hell_zspmv.cu b/cuda/spgpu/kernels/hell_zspmv.cu new file mode 100644 index 00000000..aa8f6418 --- /dev/null +++ b/cuda/spgpu/kernels/hell_zspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" +#include "core.h" + +extern "C" +{ +#include "hell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#define TEX_FETCH_TYPE int4 +#include "hell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/igath.cu b/cuda/spgpu/kernels/igath.cu new file mode 100644 index 00000000..a97a1c98 --- /dev/null +++ b/cuda/spgpu/kernels/igath.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE int +#define TYPE_SYMBOL I +#include "gath_base.cuh" diff --git a/cuda/spgpu/kernels/iscat.cu b/cuda/spgpu/kernels/iscat.cu new file mode 100644 index 00000000..dfb7e3ca --- /dev/null +++ b/cuda/spgpu/kernels/iscat.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE int +#define TYPE_SYMBOL I +#include "scat_base.cuh" diff --git a/cuda/spgpu/kernels/isetscal.cu b/cuda/spgpu/kernels/isetscal.cu new file mode 100644 index 00000000..fe028b0e --- /dev/null +++ b/cuda/spgpu/kernels/isetscal.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE int +#define TYPE_SYMBOL I +#include "setscal_base.cuh" diff --git a/cuda/spgpu/kernels/mathbase.cuh b/cuda/spgpu/kernels/mathbase.cuh new file mode 100644 index 00000000..e7d77778 --- /dev/null +++ b/cuda/spgpu/kernels/mathbase.cuh @@ -0,0 +1,53 @@ +#pragma once + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } +__device__ __host__ static bool float_isZero(float x) { return x == 0.0f; } +__device__ __host__ static bool int_isZero(int x) { return x == 0; } +__device__ __host__ static bool int_isNotZero(int x) { return x != 0; } + +__device__ static int int_fma(int a, int b, int c) { return ((a*b)+c); } + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } +__device__ static float float_abs(float a) { return fabsf(a); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_abs(cuFloatComplex a) { return make_cuFloatComplex(cuCabsf(a),0); } + +//__device__ static float cuFloatComplex_abs(cuFloatComplex a) { return cuCabsf(a); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } +__device__ __host__ static bool double_isZero(double x) { return x == 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } +__device__ static double double_abs(double a) { return fabs (a); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_abs(cuDoubleComplex a) { return make_cuDoubleComplex(cuCabs(a),0); } +//__device__ static double cuDoubleComplex_abs(cuDoubleComplex a) { return cuCabs(a); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif diff --git a/cuda/spgpu/kernels/sabs.cu b/cuda/spgpu/kernels/sabs.cu new file mode 100644 index 00000000..ee0efd8b --- /dev/null +++ b/cuda/spgpu/kernels/sabs.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define RES_VALUE_TYPE float +#define TYPE_SYMBOL S +#include "abs_base.cuh" + diff --git a/cuda/spgpu/kernels/samax.cu b/cuda/spgpu/kernels/samax.cu new file mode 100644 index 00000000..8366e3e1 --- /dev/null +++ b/cuda/spgpu/kernels/samax.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#include "amax_base.cuh" + diff --git a/cuda/spgpu/kernels/sasum.cu b/cuda/spgpu/kernels/sasum.cu new file mode 100644 index 00000000..3620775b --- /dev/null +++ b/cuda/spgpu/kernels/sasum.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#include "asum_base.cuh" + diff --git a/cuda/spgpu/kernels/saxpby.cu b/cuda/spgpu/kernels/saxpby.cu new file mode 100644 index 00000000..ab103bbb --- /dev/null +++ b/cuda/spgpu/kernels/saxpby.cu @@ -0,0 +1,149 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); + //#include "cuda_util.h" +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + + +#if 1 +__global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alpha, float* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + if (beta == 0.0f) { + for ( ; id < n; id +=gridSize) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + z[id] = PREC_FMUL(alpha,x[id]); + } + } else { + for ( ; id < n; id +=gridSize) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + z[id] = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id])); + } + } +} + +void spgpuSaxpby(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x) +{ + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuSaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} + +#else + +__global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alpha, float* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (beta == 0.0f) + z[id] = PREC_FMUL(alpha,x[id]); + else + z[id] = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id])); + } +} + + + +void spgpuSaxpby_(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + spgpuSaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} + +void spgpuSaxpby(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + spgpuSaxpby_(handle, z, maxNForACall, beta, y, alpha, x); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + spgpuSaxpby_(handle, z, n, beta, y, alpha, x); + + cudaCheckError("CUDA error on saxpby"); +} +#endif +void spgpuSmaxpby(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x, + int count, int pitch) +{ + + for (int i=0; icurrentStream>>>(y, n, alpha, x); +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) + (spgpuHandle_t handle, + __device VALUE_TYPE *y, + int n, + VALUE_TYPE alpha, + __device VALUE_TYPE *x) +{ + + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + while (n > maxNForACall) //managing large vectors + { + CONCAT(_,GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL))(handle, y, maxNForACall, alpha, x); + x = x + maxNForACall; + y = y + maxNForACall; + n -= maxNForACall; + } + + CONCAT(_,GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL))(handle, y, n, alpha, x); + + cudaCheckError("CUDA error on scal"); +} diff --git a/cuda/spgpu/kernels/scat_base.cuh b/cuda/spgpu/kernels/scat_base.cuh new file mode 100644 index 00000000..f6f619b1 --- /dev/null +++ b/cuda/spgpu/kernels/scat_base.cuh @@ -0,0 +1,89 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_FUNC_NAME +#define GEN_SPGPU_FUNC_NAME(x) CONCAT(CONCAT(spgpu,x),scat) + +#define BLOCK_SIZE 256 + +// Define: +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#include "mathbase.cuh" + +__global__ void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern) + (VALUE_TYPE* vector, int count, const int* indices, const VALUE_TYPE* values, int firstIndex, VALUE_TYPE beta) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < count) + { + int pos = indices[id]-firstIndex; + if (pos < 0) + return; + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + vector[pos] = CONCAT(VALUE_TYPE, _fma)(beta, vector[pos], values[id]); + else + vector[pos] = values[id]; + } +} + +void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (spgpuHandle_t handle, VALUE_TYPE* y, int xNnz, const VALUE_TYPE *xValues, + const __device int *xIndices, int xBaseIndex, VALUE_TYPE beta) +{ + int msize = (xNnz+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(y, xNnz, xIndices, xValues, xBaseIndex, beta); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device VALUE_TYPE* y, + int xNnz, + const __device VALUE_TYPE *xValues, + const __device int *xIndices, + int xBaseIndex, + VALUE_TYPE beta) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (xNnz > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, y, maxNForACall, xValues, xIndices, xBaseIndex, beta); + xIndices += maxNForACall; + xValues += maxNForACall; + xNnz -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, y, xNnz, xValues, xIndices, xBaseIndex, beta); + cudaCheckError("CUDA error on scat"); +} + diff --git a/cuda/spgpu/kernels/sdot.cu b/cuda/spgpu/kernels/sdot.cu new file mode 100644 index 00000000..5952c34e --- /dev/null +++ b/cuda/spgpu/kernels/sdot.cu @@ -0,0 +1,174 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudalang.h" +#include "cudadebug.h" +#include "core.h" + +extern "C" +{ +#include "vector.h" +} + +//#define USE_CUBLAS + +#ifdef USE_CUBLAS +#include "cublas.h" +#endif + +#define BLOCK_SIZE 320 +//#define BLOCK_SIZE 512 + +//#define ASSUME_LOCK_SYNC_PARALLELISM + +#ifndef USE_CUBLAS +static __device__ float sdotReductionResult[128]; +#endif + +__global__ void spgpuSdot_kern(int n, float* x, float* y) +{ + __shared__ float sSum[BLOCK_SIZE]; + + float res = 0; + + float* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + y += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + int numSteps = (lastX - x + blockOffset - 1)/blockOffset; + + // prefetching + for (int j = 0; j < numSteps / 2; j++) + { + float x1 = x[0]; x += blockOffset; + float y1 = y[0]; y += blockOffset; + float x2 = x[0]; x += blockOffset; + float y2 = y[0]; y += blockOffset; + + res = PREC_FADD(res, PREC_FMUL(x1,y1)); + res = PREC_FADD(res, PREC_FMUL(x2,y2)); + + } + + if (numSteps % 2) + { + res = PREC_FADD(res, PREC_FMUL(*x,*y)); + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + float tRes[128]; + spgpuSdot_kern<<currentStream>>>(n, a, b); + cudaMemcpyFromSymbol(tRes, sdotReductionResult, blocks*sizeof(float)); + + for (int i=0; icurrentStream>>>(y, n, val); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) + (spgpuHandle_t handle, + int first, + int last, + int baseIndex, + VALUE_TYPE val, + __device VALUE_TYPE* y) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + int n=last-first+1; + y += (first-baseIndex); + while (n > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, maxNForACall, val,y); + y += maxNForACall; + n -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, n, val,y); + cudaCheckError("CUDA error on scat"); +} + diff --git a/cuda/spgpu/kernels/sgath.cu b/cuda/spgpu/kernels/sgath.cu new file mode 100644 index 00000000..6ccfa796 --- /dev/null +++ b/cuda/spgpu/kernels/sgath.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#include "gath_base.cuh" diff --git a/cuda/spgpu/kernels/snrm2.cu b/cuda/spgpu/kernels/snrm2.cu new file mode 100644 index 00000000..1186cbe9 --- /dev/null +++ b/cuda/spgpu/kernels/snrm2.cu @@ -0,0 +1,166 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudalang.h" +#include "cudadebug.h" + +#include "core.h" + +extern "C" +{ +#include "vector.h" +} + +//#define USE_CUBLAS + +#define BLOCK_SIZE 320 +//#define BLOCK_SIZE 512 + +//#define ASSUME_LOCK_SYNC_PARALLELISM + +static __device__ float snrm2ReductionResult[128]; + +__global__ void spgpuSnrm2_kern(int n, float* x) +{ + __shared__ float sSum[BLOCK_SIZE]; + + float res = 0; + + float* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + int numSteps = (lastX - x + blockOffset - 1)/blockOffset; + + // prefetching + for (int j = 0; j < numSteps / 2; j++) + { + float x1 = x[0]; x += blockOffset; + float x2 = x[0]; x += blockOffset; + + res = PREC_FADD(res, PREC_FMUL(x1,x1)); + res = PREC_FADD(res, PREC_FMUL(x2,x2)); + + } + + if (numSteps % 2) + { + float x1 = x[0]; + res = PREC_FADD(res, PREC_FMUL(x1,x1)); + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + float tRes[128]; + + spgpuSnrm2_kern<<currentStream>>>(n, x); + cudaMemcpyFromSymbol(tRes, snrm2ReductionResult,blocks*sizeof(float)); + + for (int i=0; i +#include "core.h" + +extern "C" +{ +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuSupd_xyz_krn(int n, float alpha, float beta, float gamma, float delta, + float* x, float *y, float *z) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + float t; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + if (beta == 0.0) + t = PREC_FMUL(alpha,x[id]); + else + t = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id])); + if (delta == 0.0) + z[id] = gamma * t; + else + z[id] = PREC_FADD(PREC_FMUL(gamma, t), PREC_FMUL(delta,z[id])); + y[id] = t; + } +} + + +void spgpuSupd_xyz(spgpuHandle_t handle, + int n, + float alpha, + float beta, + float gamma, + float delta, + __device float* x, + __device float* y, + __device float *z) +{ + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuSupd_xyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + x, y, z); +} + diff --git a/cuda/spgpu/kernels/sxyzw.cu b/cuda/spgpu/kernels/sxyzw.cu new file mode 100644 index 00000000..26422e55 --- /dev/null +++ b/cuda/spgpu/kernels/sxyzw.cu @@ -0,0 +1,77 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include +#include "core.h" + +extern "C" +{ +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuSxyzw_krn(int n, float a, float b, + float c, float d, + float e, float f, + float * x, float *y, + float *z, float *w) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + float ty, tz; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + ty = PREC_FADD(PREC_FMUL(a, x[id]), PREC_FMUL(b,y[id])); + tz = PREC_FADD(PREC_FMUL(c, ty), PREC_FMUL(d,z[id])); + w[id] = PREC_FADD(PREC_FMUL(e, tz), PREC_FMUL(f,w[id])); + y[id] = ty; + z[id] = tz; + } +} + + +void spgpuSxyzw(spgpuHandle_t handle, + int n, + float a, float b, + float c, float d, + float e, float f, + __device float * x, + __device float * y, + __device float * z, + __device float *w) +{ + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuSxyzw_krn<<currentStream>>>(n, a,b,c,d,e,f, + x, y, z,w); +} + diff --git a/cuda/spgpu/kernels/zabs.cu b/cuda/spgpu/kernels/zabs.cu new file mode 100644 index 00000000..4bef9294 --- /dev/null +++ b/cuda/spgpu/kernels/zabs.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define RES_VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#include "abs_base.cuh" + diff --git a/cuda/spgpu/kernels/zamax.cu b/cuda/spgpu/kernels/zamax.cu new file mode 100644 index 00000000..4e65cf22 --- /dev/null +++ b/cuda/spgpu/kernels/zamax.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#include "amax_base.cuh" diff --git a/cuda/spgpu/kernels/zasum.cu b/cuda/spgpu/kernels/zasum.cu new file mode 100644 index 00000000..fa40b58c --- /dev/null +++ b/cuda/spgpu/kernels/zasum.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" +#include "core.h" + +extern "C" +{ +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#include "asum_base.cuh" diff --git a/cuda/spgpu/kernels/zaxpby.cu b/cuda/spgpu/kernels/zaxpby.cu new file mode 100644 index 00000000..07122ff5 --- /dev/null +++ b/cuda/spgpu/kernels/zaxpby.cu @@ -0,0 +1,148 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +#include "core.h" + +extern "C" +{ +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); + //#include "cuda_util.h" +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +#if 1 +__global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + if (cuDoubleComplex_isZero(beta)) { + for ( ; id < n; id +=gridSize) + //if (id,n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + z[id] = cuCmul(alpha,x[id]); + } + } else { + for ( ; id < n; id +=gridSize) + //if (id,n) + { + z[id] = cuCfma(beta, y[id], cuCmul(alpha, x[id])); + } + } +} + +void spgpuZaxpby(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x) +{ + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuZaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} +#else +__global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (cuDoubleComplex_isZero(beta)) + z[id] = cuCmul(alpha,x[id]); + else + z[id] = cuCfma(alpha, x[id], cuCmul(beta,y[id])); + } +} + + +void spgpuZaxpby_(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + spgpuZaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} + +void spgpuZaxpby(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + spgpuZaxpby_(handle, z, maxNForACall, beta, y, alpha, x); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + spgpuZaxpby_(handle, z, n, beta, y, alpha, x); + + cudaCheckError("CUDA error on daxpby"); +} +#endif +void spgpuZmaxpby(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x, + int count, int pitch) +{ + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + cuDoubleComplex tRes[128]; + + spgpuZdot_kern<<currentStream>>>(n, a, b); + cudaMemcpyFromSymbol(tRes, ddotReductionResult,blocks*sizeof(cuDoubleComplex)); + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + double tRes[128]; + + spgpuZnrm2_kern<<currentStream>>>(n, x);; + cudaMemcpyFromSymbol(tRes, dnrm2ReductionResult,blocks*sizeof(double)); + + for (int i=0; i +#include "core.h" + +extern "C" +{ +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuZupd_xyz_krn(int n, cuDoubleComplex alpha, cuDoubleComplex beta, + cuDoubleComplex gamma, cuDoubleComplex delta, + cuDoubleComplex * x, cuDoubleComplex *y, cuDoubleComplex *z) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + cuDoubleComplex t; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + if (cuDoubleComplex_isZero(beta)) + t = cuCmul(alpha,x[id]); + else + t = cuCfma(alpha, x[id], cuCmul(beta,y[id])); + if (cuDoubleComplex_isZero(delta)) + z[id] = cuCmul(gamma, t); + else + z[id] = cuCfma(gamma, t, cuCmul(delta,z[id])); + y[id] = t; + } +} + + +void spgpuZupd_xyz(spgpuHandle_t handle, + int n, + cuDoubleComplex alpha, + cuDoubleComplex beta, + cuDoubleComplex gamma, + cuDoubleComplex delta, + __device cuDoubleComplex * x, + __device cuDoubleComplex * y, + __device cuDoubleComplex *z) +{ + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuZupd_xyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + x, y, z); +} + diff --git a/cuda/spgpu/kernels/zxyzw.cu b/cuda/spgpu/kernels/zxyzw.cu new file mode 100644 index 00000000..c02991dd --- /dev/null +++ b/cuda/spgpu/kernels/zxyzw.cu @@ -0,0 +1,77 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include +#include "core.h" + +extern "C" +{ +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuZxyzw_krn(int n, cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + cuDoubleComplex * x, cuDoubleComplex *y, + cuDoubleComplex *z, cuDoubleComplex *w) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + cuDoubleComplex ty, tz; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + ty = cuCfma(a, x[id], cuCmul(b,y[id])); + tz = cuCfma(c, ty, cuCmul(d,z[id])); + w[id] = cuCfma(e, tz, cuCmul(f,w[id])); + y[id] = ty; + z[id] = tz; + } +} + + +void spgpuZxyzw(spgpuHandle_t handle, + int n, + cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + __device cuDoubleComplex * x, + __device cuDoubleComplex * y, + __device cuDoubleComplex * z, + __device cuDoubleComplex *w) +{ + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuZxyzw_krn<<currentStream>>>(n, a,b,c,d,e,f, + x, y, z,w); +} + diff --git a/cuda/spgpu/vector.h b/cuda/spgpu/vector.h new file mode 100644 index 00000000..26c3443d --- /dev/null +++ b/cuda/spgpu/vector.h @@ -0,0 +1,1326 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * Salvatore Filippone - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "core.h" + +/** \addtogroup vecFun Vectors and sparse vectors routines + * @{ + */ + + +#ifdef __cplusplus +extern "C" { +#endif + +/** +* \fn void spgpuIgath(spgpuHandle_t handle, __device int *xValues, int xNnz, const __device int *xIndices, int xBaseIndex, const __device int* y) + * Integer gather from y to sparse(x). Computes the integer gather from y to xValues (using xIndices). + * \param handle the spgpu handle used to call this routine + * \param xValues the destination array for gathered values + * \param xNnz the number of elements to gather + * \param xIndices the array of indices for the elements to be gathered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param y the source vector (from which the elements will be gathered) + */ +void spgpuIgath(spgpuHandle_t handle, + __device int *xValues, + int xNnz, + const __device int *xIndices, + int xBaseIndex, + const __device int* y); + +/** +* \fn void spgpuIscat(spgpuHandle_t handle, __device int* y, int xNnz, const __device int *xValues, const __device int *xIndices, int xBaseIndex, int beta) + * Integer scatter from sparse(x) to y. Computes the integer scatter from xValues to y (using xIndices). + * The scattered element will be, for i in [0,xNnz), y[xIndices[i]] = beta*y[xIndices[i]] + xValues[i] (to be noted that + * y values will be multiplied with beta just for scattered values). + * \param handle the spgpu handle used to call this routine + * \param y the destination vector (to which the elements will be scattered) + * \param xNnz the number of elements to scatter + * \param xValues the source array from which the values will be read + * \param xIndices the array of indices for the elements to be scattered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param beta the beta value + */ +void spgpuIscat(spgpuHandle_t handle, + __device int* y, + int xNnz, + const __device int *xValues, + const __device int *xIndices, + int xBaseIndex, int beta); + + + + +/** +* \fn float spgpuSdot (spgpuHandle_t handle, int n, __device float* a, __device float* b) + * Computes single precision dot product of a and b vectors. + * \param handle The spgpu handle used to call this routine + * \param n the vectors length + * \param a the first input vector + * \param b the second input vector + * \return the dot product + */ +float spgpuSdot(spgpuHandle_t handle, + int n, + __device float* a, + __device float* b); + +/** +* \fn float spgpuSmdot (spgpuHandle_t handle, float* y, int n, __device float* a, __device float* b, int count, int pitch) + * Computes single precision dot product of a and b multivectors. + * \param handle the spgpu handle used to call this routine + * \param y the result, made by dot products of every vector couples from the multivectors a and b + * \param n the vectors' length + * \param a the first input multivector + * \param b the second input multivector + * \param count the number of vectors in every multivector + * \param pitch the pitch, in number of elements, of every multivectors (so the second element of the first vector in a will be a[pitch], the third a[2*pitch], etc.). + */ +void spgpuSmdot(spgpuHandle_t handle, + float* y, + int n, + __device float* a, + __device float* b, + int count, + int pitch); + + +/** +* \fn void spgpuSabs (spgpuHandle_t handle, __device float* y, int n, float alpha, __device float* x) + * Computes single precision (y = alpha * abs(x)) for each element in x. + * \param handle The spgpu handle used to call this routine + * \param y the resulting vector (could be x) + * \param n the vectors length + * \param alpha the alpha value + * \param x the input vector + */ +void spgpuSabs(spgpuHandle_t handle, + __device float* y, + int n, + float alpha, + __device float* x); + +/** +* \fn float spgpuSnrm2(spgpuHandle_t handle, int n, __device float* x) + * Computes the single precision Euclidean vector norm of x. + * \param handle the spgpu handle used to call this routine + * \param n the vector's length + * \param x the input vector + * \return the euclidean vector norm + */ +float spgpuSnrm2(spgpuHandle_t handle, + int n, + __device float* x); + +/** +* \fn void spgpuSmnrm2(spgpuHandle_t handle, float *y, int n, __device float *x, int count, int pitch) + * Computes the single precision Euclidean vector norm for every vector in the multivector x. + * \param handle the spgpu handle used to call this routine + * \param y the array of results + * \param n the vectors' length in the x multivector + * \param x the input multivector + * \param count the number of vectors in x + * \param pitch the multivector's pitch + */ +void spgpuSmnrm2(spgpuHandle_t handle, + float *y, + int n, + __device float *x, + int count, + int pitch); + + +/** +* \fn void spgpuSscal(spgpuHandle_t handle, __device float *y, int n, float alpha, __device float *x) + * Computes the single precision y = alpha * x. y could be exactly x (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param y the resulting vector + * \param n the vectors' length + * \param alpha the alpha value + * \param x the input vector + */ +void spgpuSscal(spgpuHandle_t handle, + __device float *y, + int n, + float alpha, + __device float *x); + +/** +* \fn void spgpuSaxpby(spgpuHandle_t handle, __device float *z, int n, float beta, __device float *y, float alpha, __device float* x) + * Computes the single precision z = beta * y + alpha * x. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param y the first input vector + * \param alpha the alpha value + * \param x the second input vector + */ +void spgpuSaxpby(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x); + + +void spgpuSupd_xyz(spgpuHandle_t handle, + int n, + float alpha, + float beta, + float gamma, + float delta, + __device float* x, + __device float *y, + __device float *z) +; + + void spgpuSxyzw(spgpuHandle_t handle, + int n, + float a, float b, + float c, float d, + float e, float f, + __device float* x, + __device float *y, + __device float *z, + __device float *w) +; + +/** +* \fn void spgpuSmaxpby(spgpuHandle_t handle, __device float *z, int n, float beta, __device float *y, float alpha, __device float* x, int count, int pitch) + * Computes the single precision z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param y the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param count the number of vectors in z,x and y multivectors + * \param pitch the multivectors pitch + */ + +void spgpuSmaxpby(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x, + int count, int pitch); + +/** +* \fn void spgpuSaxy(spgpuHandle_t handle, __device float *z, int n, float alpha, __device float *x, __device float* y) + * Computes the single precision z = alpha * x * y. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param alpha the alpha value + * \param x the first input vector + * \param y the second input vector + */ +void spgpuSaxy(spgpuHandle_t handle, + __device float *z, + int n, + float alpha, + __device float *x, + __device float *y); + +/** +* \fn void spgpuSaxypbz(spgpuHandle_t handle, __device float *w, int n, float beta, __device float *z, float alpha, __device float* x, __device float *y) + * Computes the single precision w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param w the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param z the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param y the third input vector + */ +void spgpuSaxypbz(spgpuHandle_t handle, + __device float *w, + int n, + float beta, + __device float *z, + float alpha, + __device float* x, + __device float *y); + +/** +* \fn void spgpuSmaxy(spgpuHandle_t handle, __device float *z, int n, float alpha, __device float *x, __device float* y, int count, int pitch) + * Computes the single precision z = alpha * x * y for z,x and y multivectors. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting multivector + * \param n the vectors' length in the multivectors + * \param alpha the alpha value + * \param x the first input multivector + * \param y the second input multivector + * \param count the number of vectors in z,x and y multivectors + * \param pitch the multivectors pitch + */ +void spgpuSmaxy(spgpuHandle_t handle, + __device float *z, + int n, + float alpha, + __device float* x, + __device float *y, + int count, + int pitch); + +/** +* \fn void spgpuSmaxypbz(spgpuHandle_t handle, __device float *w, int n, float beta, __device float *z, float alpha, __device float* x, __device float *y, int count, int pitch) + * Computes the single precision w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param w the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param z the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param y the third input vector + * \param count the number of vectors in w,z,x and y multivectors + * \param pitch the multivectors' pitch + */ + +void spgpuSmaxypbz(spgpuHandle_t handle, + __device float *w, + int n, + float beta, + __device float *z, + float alpha, + __device float* x, + __device float *y, + int count, + int pitch); + +/** +* \fn void spgpuSgath(spgpuHandle_t handle, __device float *xValues, int xNnz, const __device int *xIndices, int xBaseIndex, const __device float* y) + * Single precision gather from y to sparse(x). Computes the single precision gather from y to xValues (using xIndices). + * \param handle the spgpu handle used to call this routine + * \param xValues the destination array for gathered values + * \param xNnz the number of elements to gather + * \param xIndices the array of indices for the elements to be gathered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param y the source vector (from which the elements will be gathered) + */ +void spgpuSgath(spgpuHandle_t handle, + __device float *xValues, + int xNnz, + const __device int *xIndices, + int xBaseIndex, + const __device float* y); + +/** +* \fn void spgpuSscat(spgpuHandle_t handle, __device float* y, int xNnz, const __device float *xValues, const __device int *xIndices, int xBaseIndex, float beta) + * Single precision scatter from sparse(x) to y. Computes the single precision scatter from xValues to y (using xIndices). + * The scattered element will be, for i in [0,xNnz), y[xIndices[i]] = beta*y[xIndices[i]] + xValues[i] (to be noted that + * y values will be multiplied with beta just for scattered values). + * \param handle the spgpu handle used to call this routine + * \param y the destination vector (to which the elements will be scattered) + * \param xNnz the number of elements to scatter + * \param xValues the source array from which the values will be read + * \param xIndices the array of indices for the elements to be scattered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param beta the beta value + */ +void spgpuSscat(spgpuHandle_t handle, + __device float* y, + int xNnz, + const __device float *xValues, + const __device int *xIndices, + int xBaseIndex, float beta); + + +float spgpuSasum(spgpuHandle_t handle, + int n, + float* x); + +float spgpuSamax(spgpuHandle_t handle, + int n, + float* x); + +void spgpuSmasum(spgpuHandle_t handle, + float* y, + int n, + float* x, + int count, + int pitch); + +void spgpuSmamax(spgpuHandle_t handle, + float* y, + int n, + float* x, + int count, + int pitch); + + +/** +* \fn void spgpuDscal(spgpuHandle_t handle, __device double *y, int n, double alpha, __device double *x) + * Computes the Double precision y = alpha * x. y could be exactly x (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param y the resulting vector + * \param n the vectors' length + * \param alpha the alpha value + * \param x the input vector + */ +void spgpuDscal(spgpuHandle_t handle, + __device double *y, + int n, + double alpha, + __device double *x); + +/** +* \fn float spgpuDdot (spgpuHandle_t handle, int n, __device double* a, __device double* b) + * Computes double precision dot product of a and b vectors. + * \param handle The spgpu handle used to call this routine + * \param n the vectors length + * \param a the first input vector + * \param b the second input vector + * \return the dot product + */ + double spgpuDdot(spgpuHandle_t handle, + int n, + __device double* a, + __device double* b); + +/** +* \fn void spgpuDabs (spgpuHandle_t handle, __device double* y, int n, double alpha, __device double* x) + * Computes double precision (y = alpha * abs(x)) for each element in x. + * \param handle The spgpu handle used to call this routine + * \param y the resulting vector (could be x) + * \param n the vectors length + * \param alpha the alpha value + * \param x the input vector + */ +void spgpuDabs(spgpuHandle_t handle, + __device double* y, + int n, + double alpha, + __device double* x); + +/** +* \fn float spgpuDmdot (spgpuHandle_t handle, double* y, int n, __device double* a, __device double* b, int count, int pitch) + * Computes double precision dot product of a and b multivectors. + * \param handle the spgpu handle used to call this routine + * \param y the result, made by dot products of every vector couples from the multivectors a and b + * \param n the vectors' length + * \param a the first input multivector + * \param b the second input multivector + * \param count the number of vectors in every multivector + * \param pitch the pitch, in number of elements, of every multivectors (so the second element of the first vector in a will be a[pitch], the third a[2*pitch], etc.). + */ +void spgpuDmdot(spgpuHandle_t handle, + double* y, + int n, + __device double* a, + __device double* b, + int count, + int pitch); + + +/** +* \fn double spgpuDnrm2(spgpuHandle_t handle, int n, __device double* x) + * Computes the double precision Euclidean vector norm of x. + * \param handle the spgpu handle used to call this routine + * \param n the vector's length + * \param x the input vector + * \return the euclidean vector norm + */ + double spgpuDnrm2(spgpuHandle_t handle, + int n, + __device double* x); + +/** +* \fn void spgpuDmnrm2(spgpuHandle_t handle, double *y, int n, __device double *x, int count, int pitch) + * Computes the double precision Euclidean vector norm for every vector in the multivector x. + * \param handle the spgpu handle used to call this routine + * \param y the array of results + * \param n the vectors' length in the x multivector + * \param x the input multivector + * \param count the number of vectors in x + * \param pitch the multivector's pitch + */ + +void spgpuDmnrm2(spgpuHandle_t handle, + double *y, + int n, + __device double *x, + int count, + int pitch); + +/** +* \fn void spgpuDaxpby(spgpuHandle_t handle, __device double *z, int n, double beta, __device double *y, double alpha, __device double* x) + * Computes the double precision z = beta * y + alpha * x. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param y the first input vector + * \param alpha the alpha value + * \param x the second input vector + */ +void spgpuDaxpby(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x); + + +void spgpuDupd_xyz(spgpuHandle_t handle, + int n, + double alpha, + double beta, + double gamma, + double delta, + __device double* x, + __device double *y, + __device double *z) +; + + void spgpuDxyzw(spgpuHandle_t handle, + int n, + double a, double b, + double c, double d, + double e, double f, + __device double* x, + __device double *y, + __device double *z, + __device double *w) +; + + /** +* \fn void spgpuDmaxpby(spgpuHandle_t handle, __device double *z, int n, double beta, __device double *y, double alpha, __device double* x, int count, int pitch) + * Computes the double precision z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param y the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param count the number of vectors in z,x and y multivectors + * \param pitch the multivector's pitch + */ + + void spgpuDmaxpby(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x, + int count, int pitch); + +/** +* \fn void spgpuDaxy(spgpuHandle_t handle, __device double *z, int n, double alpha, __device double *x, __device double* y) + * Computes the double precision z = alpha * x * y. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length in the x multivector + * \param alpha the alpha value + * \param x the second input vector + * \param y the first input vector + */ +void spgpuDaxy(spgpuHandle_t handle, + __device double *z, + int n, + double alpha, + __device double *x, + __device double *y); + +/** +* \fn void spgpuDaxypbz(spgpuHandle_t handle, __device double *w, int n, double beta, __device double *z, double alpha, __device double* x, __device double *y) + * Computes the double precision w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param w the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param z the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param y the third input vector + */void spgpuDaxypbz(spgpuHandle_t handle, + __device double *w, + int n, + double beta, + __device double *z, + double alpha, + __device double* x, + __device double *y); + +/** +* \fn void spgpuDmaxy(spgpuHandle_t handle, __device double *z, int n, double alpha, __device double *x, __device double* y, int count, int pitch) + * Computes the double precision z = alpha * x * y for z,x and y multivectors. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting multivector + * \param n the vectors' length in the multivectors + * \param alpha the alpha value + * \param x the first input multivector + * \param y the second input multivector + * \param count the number of vectors in z,x and y multivectors + * \param pitch the multivectors pitch + */ +void spgpuDmaxy(spgpuHandle_t handle, + __device double *z, + int n, + double alpha, + __device double* x, + __device double *y, + int count, + int pitch); + +/** +* \fn void spgpuDmaxypbz(spgpuHandle_t handle, __device double *w, int n, double beta, __device double *z, double alpha, __device double* x, __device double *y, int count, int pitch) + * Computes the double precision w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param w the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param z the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param y the third input vector + * \param count the number of vectors in w,z,x and y multivectors + * \param pitch the multivectors' pitch + */ + +void spgpuDmaxypbz(spgpuHandle_t handle, + __device double *w, + int n, + double beta, + __device double *z, + double alpha, + __device double* x, + __device double *y, + int count, + int pitch); + +/** +* \fn void spgpuDgath(spgpuHandle_t handle, __device double *xValues, int xNnz, const __device int *xIndices, int xBaseIndex, const __device double* y) + * Double precision gather from y to sparse(x). Computes the double precision gather from y to xValues (using xIndices). + * \param handle the spgpu handle used to call this routine + * \param xValues the destination array for gathered values + * \param xNnz the number of elements to gather + * \param xIndices the array of indices for the elements to be gathered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param y the source vector (from which the elements will be gathered) + */ +void spgpuDgath(spgpuHandle_t handle, + __device double *xValues, + int xNnz, + const __device int *xIndices, + int xBaseIndex, + const __device double* y); + +/** +* \fn void spgpuDscat(spgpuHandle_t handle, __device double* y, int xNnz, const __device double *xValues, const __device int *xIndices, int xBaseIndex, double beta) + * Double precision scatter from sparse(x) to y. Computes the single precision scatter from xValues to y (using xIndices). + * The scattered element will be, for i in [0,xNnz), y[xIndices[i]] = beta*y[xIndices[i]] + xValues[i] (to be noted that + * y values will be multiplied with beta just for scattered values). + * \param handle the spgpu handle used to call this routine + * \param y the destination vector (to which the elements will be scattered) + * \param xNnz the number of elements to scatter + * \param xValues the source array from which the values will be read + * \param xIndices the array of indices for the elements to be scattered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param beta the beta value + */ +void spgpuDscat(spgpuHandle_t handle, + __device double* y, + int xNnz, + const __device double *xValues, + const __device int *xIndices, + int xBaseIndex, double beta); + +double spgpuDasum(spgpuHandle_t handle, + int n, + double* x); + +double spgpuDamax(spgpuHandle_t handle, + int n, + double* x); + +void spgpuDmasum(spgpuHandle_t handle, + double* y, + int n, + double* x, + int count, + int pitch); + +void spgpuDmamax(spgpuHandle_t handle, + double* y, + int n, + double* x, + int count, + int pitch); + +/** +* \fn cuFloatComplex spgpuCdot (spgpuHandle_t handle, int n, __device cuFloatComplex* a, __device cuFloatComplex* b) + * Computes single precision complex dot product of a and b vectors. + * \param handle The spgpu handle used to call this routine + * \param n the vectors length + * \param a the first input vector + * \param b the second input vector + * \return the dot product + */ +cuFloatComplex spgpuCdot(spgpuHandle_t handle, + int n, + __device cuFloatComplex* a, + __device cuFloatComplex* b); + +/** +* \fn cuFloatComplex spgpuCmdot (spgpuHandle_t handle, cuFloatComplex* y, int n, __device cuFloatComplex* a, __device cuFloatComplex* b, int count, int pitch) + * Computes single precision complex dot product of a and b multivectors. + * \param handle the spgpu handle used to call this routine + * \param y the result, made by dot products of every vector couples from the multivectors a and b + * \param n the vectors' length + * \param a the first input multivector + * \param b the second input multivector + * \param count the number of vectors in every multivector + * \param pitch the pitch, in number of elements, of every multivectors (so the second element of the first vector in a will be a[pitch], the third a[2*pitch], etc.). + */ +void spgpuCmdot(spgpuHandle_t handle, + cuFloatComplex* y, + int n, + __device cuFloatComplex* a, + __device cuFloatComplex* b, + int count, + int pitch); + + +/** +* \fn void spgpuCabs (spgpuHandle_t handle, __device float* y, int n, float alpha, __device cuFloatComplex* x) + * Computes single precision complex (y = alpha * abs(x)) for each element in x. + * \param handle The spgpu handle used to call this routine + * \param y the resulting vector (could be x) + * \param n the vectors length + * \param alpha the alpha value + * \param x the input vector + */ +void spgpuCabs(spgpuHandle_t handle, + __device cuFloatComplex* y, + int n, + cuFloatComplex alpha, + __device cuFloatComplex* x); + +/** +* \fn float spgpuCnrm2(spgpuHandle_t handle, int n, __device cuFloatComplex* x) + * Computes the single precision complex Euclidean vector norm of x. + * \param handle the spgpu handle used to call this routine + * \param n the vector's length + * \param x the input vector + * \return the euclidean vector norm + */ +float spgpuCnrm2(spgpuHandle_t handle, + int n, + __device cuFloatComplex* x); + +/** +* \fn void spgpuCmnrm2(spgpuHandle_t handle, float *y, int n, __device cuFloatComplex *x, int count, int pitch) + * Computes the single precision complex Euclidean vector norm for every vector in the multivector x. + * \param handle the spgpu handle used to call this routine + * \param y the array of results + * \param n the vectors' length in the x multivector + * \param x the input multivector + * \param count the number of vectors in x + * \param pitch the multivector's pitch + */ +void spgpuCmnrm2(spgpuHandle_t handle, + float *y, + int n, + __device cuFloatComplex *x, + int count, + int pitch); + + +/** +* \fn void spgpuCscal(spgpuHandle_t handle, __device cuFloatComplex *y, int n, cuFloatComplex alpha, __device cuFloatComplex *x) + * Computes the single precision complex y = alpha * x. y could be exactly x (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param y the resulting vector + * \param n the vectors' length + * \param alpha the alpha value + * \param x the input vector + */ +void spgpuCscal(spgpuHandle_t handle, + __device cuFloatComplex *y, + int n, + cuFloatComplex alpha, + __device cuFloatComplex *x); + +/** +* \fn void spgpuCaxpby(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex beta, __device cuFloatComplex *y, cuFloatComplex alpha, __device cuFloatComplex* x) + * Computes the single precision complex z = beta * y + alpha * x. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param y the first input vector + * \param alpha the alpha value + * \param x the second input vector + */ +void spgpuCaxpby(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x); + + +void spgpuCupd_xyz(spgpuHandle_t handle, + int n, + cuFloatComplex alpha, + cuFloatComplex beta, + cuFloatComplex gamma, + cuFloatComplex delta, + __device cuFloatComplex* x, + __device cuFloatComplex *y, + __device cuFloatComplex *z) +; + + void spgpuCxyzw(spgpuHandle_t handle, + int n, + cuFloatComplex a, cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + __device cuFloatComplex* x, + __device cuFloatComplex *y, + __device cuFloatComplex *z, + __device cuFloatComplex *w) +; + + +/** +* \fn void spgpuCmaxpby(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex beta, __device cuFloatComplex *y, cuFloatComplex alpha, __device cuFloatComplex* x, int count, int pitch) + * Computes the single precision complex z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param y the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param count the number of vectors in z,x and y multivectors + * \param pitch the multivectors pitch + */ + +void spgpuCmaxpby(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x, + int count, int pitch); + +/** +* \fn void spgpuCaxy(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex alpha, __device cuFloatComplex *x, __device cuFloatComplex* y) + * Computes the single precision complex z = alpha * x * y. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param alpha the alpha value + * \param x the first input vector + * \param y the second input vector + */ +void spgpuCaxy(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex alpha, + __device cuFloatComplex *x, + __device cuFloatComplex *y); + +/** +* \fn void spgpuCaxypbz(spgpuHandle_t handle, __device cuFloatComplex *w, int n, cuFloatComplex beta, __device cuFloatComplex *z, cuFloatComplex alpha, __device cuFloatComplex* x, __device cuFloatComplex *y) + * Computes the single precision complex w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param w the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param z the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param y the third input vector + */ +void spgpuCaxypbz(spgpuHandle_t handle, + __device cuFloatComplex *w, + int n, + cuFloatComplex beta, + __device cuFloatComplex *z, + cuFloatComplex alpha, + __device cuFloatComplex* x, + __device cuFloatComplex *y); + +/** +* \fn void spgpuCmaxy(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex alpha, __device cuFloatComplex *x, __device cuFloatComplex* y, int count, int pitch) + * Computes the single precision complex z = alpha * x * y for z,x and y multivectors. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting multivector + * \param n the vectors' length in the multivectors + * \param alpha the alpha value + * \param x the first input multivector + * \param y the second input multivector + * \param count the number of vectors in z,x and y multivectors + * \param pitch the multivectors pitch + */ +void spgpuCmaxy(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex alpha, + __device cuFloatComplex* x, + __device cuFloatComplex *y, + int count, + int pitch); + +/** +* \fn void spgpuCmaxypbz(spgpuHandle_t handle, __device cuFloatComplex *w, int n, cuFloatComplex beta, __device cuFloatComplex *z, cuFloatComplex alpha, __device cuFloatComplex* x, __device cuFloatComplex *y, int count, int pitch) + * Computes the single precision complex w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param w the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param z the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param y the third input vector + * \param count the number of vectors in w,z,x and y multivectors + * \param pitch the multivectors' pitch + */ + +void spgpuCmaxypbz(spgpuHandle_t handle, + __device cuFloatComplex *w, + int n, + cuFloatComplex beta, + __device cuFloatComplex *z, + cuFloatComplex alpha, + __device cuFloatComplex* x, + __device cuFloatComplex *y, + int count, + int pitch); + +/** +* \fn void spgpuCgath(spgpuHandle_t handle, __device cuFloatComplex *xValues, int xNnz, const __device int *xIndices, int xBaseIndex, const __device cuFloatComplex* y) + * Single precision complex gather from y to sparse(x). Computes the single precision complex gather from y to xValues (using xIndices). + * \param handle the spgpu handle used to call this routine + * \param xValues the destination array for gathered values + * \param xNnz the number of elements to gather + * \param xIndices the array of indices for the elements to be gathered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param y the source vector (from which the elements will be gathered) + */ +void spgpuCgath(spgpuHandle_t handle, + __device cuFloatComplex *xValues, + int xNnz, + const __device int *xIndices, + int xBaseIndex, + const __device cuFloatComplex* y); + +/** +* \fn void spgpuCscat(spgpuHandle_t handle, __device cuFloatComplex* y, int xNnz, const __device cuFloatComplex *xValues, const __device int *xIndices, int xBaseIndex, cuFloatComplex beta) + * Single precision complex scatter from sparse(x) to y. Computes the single precision complex scatter from xValues to y (using xIndices). + * The scattered element will be, for i in [0,xNnz), y[xIndices[i]] = beta*y[xIndices[i]] + xValues[i] (to be noted that + * y values will be multiplied with beta just for scattered values). + * \param handle the spgpu handle used to call this routine + * \param y the destination vector (to which the elements will be scattered) + * \param xNnz the number of elements to scatter + * \param xValues the source array from which the values will be read + * \param xIndices the array of indices for the elements to be scattered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param beta the beta value + */ +void spgpuCscat(spgpuHandle_t handle, + __device cuFloatComplex* y, + int xNnz, + const __device cuFloatComplex *xValues, + const __device int *xIndices, + int xBaseIndex, cuFloatComplex beta); + +float spgpuCasum(spgpuHandle_t handle, + int n, + cuFloatComplex* x); + +float spgpuCamax(spgpuHandle_t handle, + int n, + cuFloatComplex* x); + +void spgpuCmasum(spgpuHandle_t handle, + float* y, + int n, + cuFloatComplex* x, + int count, + int pitch); + +void spgpuCmamax(spgpuHandle_t handle, + float* y, + int n, + cuFloatComplex* x, + int count, + int pitch); + +/** +* \fn void spgpuZscal(spgpuHandle_t handle, __device cuDoubleComplex *y, int n, cuDoubleComplex alpha, __device cuDoubleComplex *x) + * Computes the double precision complex y = alpha * x. y could be exactly x (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param y the resulting vector + * \param n the vectors' length + * \param alpha the alpha value + * \param x the input vector + */ +void spgpuZscal(spgpuHandle_t handle, + __device cuDoubleComplex *y, + int n, + cuDoubleComplex alpha, + __device cuDoubleComplex *x); + +/** +* \fn cuFloatComplex spgpuZdot (spgpuHandle_t handle, int n, __device cuDoubleComplex* a, __device cuDoubleComplex* b) + * Computes double precision complex dot product of a and b vectors. + * \param handle The spgpu handle used to call this routine + * \param n the vectors length + * \param a the first input vector + * \param b the second input vector + * \return the dot product + */ + cuDoubleComplex spgpuZdot(spgpuHandle_t handle, + int n, + __device cuDoubleComplex* a, + __device cuDoubleComplex* b); + +/** +* \fn cuFloatComplex spgpuZmdot (spgpuHandle_t handle, cuDoubleComplex* y, int n, __device cuDoubleComplex* a, __device cuDoubleComplex* b, int count, int pitch) + * Computes double precision complex dot product of a and b multivectors. + * \param handle the spgpu handle used to call this routine + * \param y the result, made by dot products of every vector couples from the multivectors a and b + * \param n the vectors' length + * \param a the first input multivector + * \param b the second input multivector + * \param count the number of vectors in every multivector + * \param pitch the pitch, in number of elements, of every multivectors (so the second element of the first vector in a will be a[pitch], the third a[2*pitch], etc.). + */ +void spgpuZmdot(spgpuHandle_t handle, + cuDoubleComplex* y, + int n, + __device cuDoubleComplex* a, + __device cuDoubleComplex* b, + int count, + int pitch); + +/** +* \fn void spgpuZabs (spgpuHandle_t handle, __device double* y, int n, double alpha, __device cuDoubleComplex* x) + * Computes double precision complex (y = alpha * abs(x)) for each element in x. + * \param handle The spgpu handle used to call this routine + * \param y the resulting vector (could be x) + * \param n the vectors length + * \param alpha the alpha value + * \param x the input vector + */ +void spgpuZabs(spgpuHandle_t handle, + __device cuDoubleComplex* y, + int n, + cuDoubleComplex alpha, + __device cuDoubleComplex* x); + +/** +* \fn double spgpuZnrm2(spgpuHandle_t handle, int n, __device cuDoubleComplex* x) + * Computes the double precision complex Euclidean vector norm of x. + * \param handle the spgpu handle used to call this routine + * \param n the vector's length + * \param x the input vector + * \return the euclidean vector norm + */ + double spgpuZnrm2(spgpuHandle_t handle, + int n, + __device cuDoubleComplex* x); + +/** +* \fn void spgpuZmnrm2(spgpuHandle_t handle, double *y, int n, __device cuDoubleComplex *x, int count, int pitch) + * Computes the double precision complex Euclidean vector norm for every vector in the multivector x. + * \param handle the spgpu handle used to call this routine + * \param y the array of results + * \param n the vectors' length in the x multivector + * \param x the input multivector + * \param count the number of vectors in x + * \param pitch the multivector's pitch + */ + +void spgpuZmnrm2(spgpuHandle_t handle, + double *y, + int n, + __device cuDoubleComplex *x, + int count, + int pitch); + +/** +* \fn void spgpuZaxpby(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex beta, __device cuDoubleComplex *y, cuDoubleComplex alpha, __device cuDoubleComplex* x) + * Computes the double precision complex z = beta * y + alpha * x. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param y the first input vector + * \param alpha the alpha value + * \param x the second input vector + */ +void spgpuZaxpby(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x); + + +void spgpuZupd_xyz(spgpuHandle_t handle, + int n, + cuDoubleComplex alpha, + cuDoubleComplex beta, + cuDoubleComplex gamma, + cuDoubleComplex delta, + __device cuDoubleComplex* x, + __device cuDoubleComplex *y, + __device cuDoubleComplex *z) +; + + + void spgpuZxyzw(spgpuHandle_t handle, + int n, + cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + __device cuDoubleComplex* x, + __device cuDoubleComplex *y, + __device cuDoubleComplex *z, + __device cuDoubleComplex *w) +; + + +/** +* \fn void spgpuZmaxpby(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex beta, __device cuDoubleComplex *y, cuDoubleComplex alpha, __device cuDoubleComplex* x, int count, int pitch) + * Computes the double precision complex z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param y the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param count the number of vectors in z,x and y multivectors + * \param pitch the multivector's pitch + */ + + void spgpuZmaxpby(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x, + int count, int pitch); + +/** +* \fn void spgpuZaxy(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex alpha, __device cuDoubleComplex *x, __device cuDoubleComplex* y) + * Computes the double precision complex z = alpha * x * y. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length in the x multivector + * \param alpha the alpha value + * \param x the second input vector + * \param y the first input vector + */ +void spgpuZaxy(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex alpha, + __device cuDoubleComplex *x, + __device cuDoubleComplex *y); + +/** +* \fn void spgpuZaxypbz(spgpuHandle_t handle, __device cuDoubleComplex *w, int n, cuDoubleComplex beta, __device cuDoubleComplex *z, cuDoubleComplex alpha, __device cuDoubleComplex* x, __device cuDoubleComplex *y) + * Computes the double precision complex w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param w the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param z the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param y the third input vector + */void spgpuZaxypbz(spgpuHandle_t handle, + __device cuDoubleComplex *w, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *z, + cuDoubleComplex alpha, + __device cuDoubleComplex* x, + __device cuDoubleComplex *y); + +/** +* \fn void spgpuZmaxy(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex alpha, __device cuDoubleComplex *x, __device cuDoubleComplex* y, int count, int pitch) + * Computes the double precision complex z = alpha * x * y for z,x and y multivectors. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting multivector + * \param n the vectors' length in the multivectors + * \param alpha the alpha value + * \param x the first input multivector + * \param y the second input multivector + * \param count the number of vectors in z,x and y multivectors + * \param pitch the multivectors pitch + */ +void spgpuZmaxy(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex alpha, + __device cuDoubleComplex* x, + __device cuDoubleComplex *y, + int count, + int pitch); + +/** +* \fn void spgpuZmaxypbz(spgpuHandle_t handle, __device cuDoubleComplex *w, int n, cuDoubleComplex beta, __device cuDoubleComplex *z, cuDoubleComplex alpha, __device cuDoubleComplex* x, __device cuDoubleComplex *y, int count, int pitch) + * Computes the double precision complex w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param w the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param z the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param y the third input vector + * \param count the number of vectors in w,z,x and y multivectors + * \param pitch the multivectors' pitch + */ + +void spgpuZmaxypbz(spgpuHandle_t handle, + __device cuDoubleComplex *w, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *z, + cuDoubleComplex alpha, + __device cuDoubleComplex* x, + __device cuDoubleComplex *y, + int count, + int pitch); + +/** +* \fn void spgpuZgath(spgpuHandle_t handle, __device cuDoubleComplex *xValues, int xNnz, const __device int *xIndices, int xBaseIndex, const __device cuDoubleComplex* y) + * Computes the double precision complex gather from y to xValues (using xIndices). + * \param handle the spgpu handle used to call this routine + * \param xValues the destination array for gathered values + * \param xNnz the number of elements to gather + * \param xIndices the array of indices for the elements to be gathered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param y the source vector (from which the elements will be gathered) + */ +void spgpuZgath(spgpuHandle_t handle, + __device cuDoubleComplex *xValues, + int xNnz, + const __device int *xIndices, + int xBaseIndex, + const __device cuDoubleComplex* y); + +/** +* \fn void spgpuZscat(spgpuHandle_t handle, __device cuDoubleComplex* y, int xNnz, const __device cuDoubleComplex *xValues, const __device int *xIndices, int xBaseIndex, cuDoubleComplex beta) + * Computes the double precision complex scatter from xValues to y (using xIndices). + * The scattered element will be, for i in [0,xNnz), y[xIndices[i]] = beta*y[xIndices[i]] + xValues[i] (to be noted that + * y values will be multiplied with beta just for scattered values). + * \param handle the spgpu handle used to call this routine + * \param y the destination vector (to which the elements will be scattered) + * \param xNnz the number of elements to scatter + * \param xValues the source array from which the values will be read + * \param xIndices the array of indices for the elements to be scattered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param beta the beta value + */ +void spgpuZscat(spgpuHandle_t handle, + __device cuDoubleComplex* y, + int xNnz, + const __device cuDoubleComplex *xValues, + const __device int *xIndices, + int xBaseIndex, cuDoubleComplex beta); + + +double spgpuZasum(spgpuHandle_t handle, + int n, + cuDoubleComplex* x); + +double spgpuZamax(spgpuHandle_t handle, + int n, + cuDoubleComplex* x); + +void spgpuZmasum(spgpuHandle_t handle, + double* y, + int n, + cuDoubleComplex* x, + int count, + int pitch); + +void spgpuZmamax(spgpuHandle_t handle, + double* y, + int n, + cuDoubleComplex* x, + int count, + int pitch); + +/** @}*/ + +void spgpuIsetscal(spgpuHandle_t handle, + int first, + int last, + int baseIndex, + int val, + __device int *y); + +void spgpuSsetscal(spgpuHandle_t handle, + int first, + int last, + int baseIndex, + float val, + __device float *y); + +void spgpuDsetscal(spgpuHandle_t handle, + int first, + int last, + int baseIndex, + double val, + __device double *y); + +void spgpuCsetscal(spgpuHandle_t handle, + int first, + int last, + int baseIndex, + cuFloatComplex val, + __device cuFloatComplex* y); + +void spgpuZsetscal(spgpuHandle_t handle, + int first, + int last, + int baseIndex, + cuDoubleComplex val, + __device cuDoubleComplex* y); + + +#ifdef __cplusplus +} +#endif diff --git a/cuda/svectordev.c b/cuda/svectordev.c new file mode 100644 index 00000000..e1c43b5e --- /dev/null +++ b/cuda/svectordev.c @@ -0,0 +1,341 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +//#include "utils.h" +//#include "common.h" +#include "svectordev.h" + + +int registerMappedFloat(void *buff, void **d_p, int n, float dummy) +{ + return registerMappedMemory(buff,d_p,((size_t) n)*sizeof(float)); +} + +int writeMultiVecDeviceFloat(void* deviceVec, float* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // Ex updateFromHost vector function + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, + ((size_t) devVec->pitch_)*devVec->count_*sizeof(float)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + return(i); +} + +int writeMultiVecDeviceFloatR2(void* deviceVec, float* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceFloat(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceFloatR2",i); + } + return(i); +} + +int readMultiVecDeviceFloat(void* deviceVec, float* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + ((size_t) devVec->pitch_)*devVec->count_*sizeof(float)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloat",i); + } + return(i); +} + +int readMultiVecDeviceFloatR2(void* deviceVec, float* hostVec, int ld) +{ int i; + i = readMultiVecDeviceFloat(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloatR2",i); + } + return(i); +} + +int setscalMultiVecDeviceFloat(float val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuSsetscal(handle, first, last, indexBase, val, (float *) devVecX->v_); + + return(i); +} + +int geinsMultiVecDeviceFloat(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + float beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + + if (dupl == INS_OVERWRITE) + beta = 0.0; + else if (dupl == INS_ADD) + beta = 1.0; + else + beta = 0.0; + + spgpuSscat(handle, (float *) devVecX->v_, n, (float*)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceFloatVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceFloat(deviceVec, vectorId, n, + first, (void*) devIdx->v_, hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + float *hv = (float *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuSgath(handle,hv, n, idx,indexBase, (float *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceFloatVecIdx(void* deviceVec, int vectorId, int n, int first, void *deviceIdx, + int hfirst, void* host_values, int indexBase, float beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceFloat(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, float beta) +{ int i=0; + float *hv = (float *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuSscat(handle, (float *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + + +int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuSmnrm2(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuSmamax(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuSmasum(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int scalMultiVecDeviceFloat(float alpha, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + // Note: inner kernel can handle aliased input/output + spgpuSscal(handle, (float *)devVecA->v_, devVecA->pitch_, + alpha, (float *)devVecA->v_); + return(i); +} + +int dotMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA, void* devMultiVecB) +{int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_, + devVecA->count_,devVecB->pitch_); + return(i); +} + +int axpbyMultiVecDeviceFloat(int n,float alpha, void* devMultiVecX, + float beta, void* devMultiVecY) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + for(j=0;jcount_;j++) + spgpuSaxpby(handle,(float*)devVecY->v_+pitch*j, n, beta, + (float*)devVecY->v_+pitch*j, alpha,(float*) devVecX->v_+pitch*j); + return(i); +} + +int upd_xyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuSupd_xyz(handle,n, alpha,beta,gamma,delta, + (float*)devVecX->v_,(float*) devVecY->v_,(float*) devVecZ->v_); + return(i); +} + + +int xyzwMultiVecDeviceFloat(int n,float a,float b, float c, float d, float e, float f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuSxyzw(handle,n, a,b,c,d,e,f, + (float*)devVecX->v_,(float*) devVecY->v_, + (float*) devVecZ->v_,(float*) devVecW->v_); + return(i); +} + +int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + spgpuHandle_t handle=psb_cudaGetHandle(); + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuSmaxy(handle, (float*)devVecB->v_, n, alpha, (float*)devVecA->v_, + (float*)devVecB->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, + void *deviceVecB, float beta, void *deviceVecZ) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) + return SPGPU_UNSUPPORTED; + spgpuSmaxypbz(handle, (float*)devVecZ->v_, n, beta, (float*)devVecZ->v_, + alpha, (float*) devVecA->v_, (float*) devVecB->v_, + devVecB->count_, devVecB->pitch_); + return(i); +} + +int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA, + void *deviceVecB) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + + spgpuHandle_t handle=psb_cudaGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuSabs(handle, (float*)devVecB->v_, n, alpha, (float*)devVecA->v_); + + return(i); +} + +int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + spgpuHandle_t handle=psb_cudaGetHandle(); + if (n > devVecA->size_) + return SPGPU_UNSUPPORTED; + + spgpuSabs(handle, (float*)devVecA->v_, n, alpha, (float*)devVecA->v_); + + return(i); +} + diff --git a/cuda/svectordev.h b/cuda/svectordev.h new file mode 100644 index 00000000..887a7755 --- /dev/null +++ b/cuda/svectordev.h @@ -0,0 +1,82 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +//#include "utils.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" +#include "vector.h" + +int registerMappedFloat(void *, void **, int, float); +int writeMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec); +int writeMultiVecDeviceFloatR2(void* deviceMultiVec, float* hostMultiVec, int ld); +int readMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec); +int readMultiVecDeviceFloatR2(void* deviceMultiVec, float* hostMultiVec, int ld); + +int setscalMultiVecDeviceFloat(float val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceFloat(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceFloatVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceFloatVecIdx(void* deviceVec, int vectorId, int n, int first, + void *deviceIdx, int hfirst, void* host_values, + int indexBase, float beta); +int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, float beta); + +int scalMultiVecDeviceFloat(float alpha, void* devMultiVecA); +int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devVecA); +int amaxMultiVecDeviceFloat(float* y_res, int n, void* devVecA); +int asumMultiVecDeviceFloat(float* y_res, int n, void* devVecA); +int dotMultiVecDeviceFloat(float* y_res, int n, void* devVecA, void* devVecB); + +int axpbyMultiVecDeviceFloat(int n, float alpha, void* devVecX, float beta, void* devVecY); +int upd_xyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); +int xyzwMultiVecDeviceFloat(int n,float a,float b, float c, float d, float e, float f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW); +int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB); +int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, + void *deviceVecB, float beta, void *deviceVecZ); +int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA); +int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA, void *deviceVecB); + + diff --git a/cuda/vectordev.c b/cuda/vectordev.c new file mode 100644 index 00000000..db976fe9 --- /dev/null +++ b/cuda/vectordev.c @@ -0,0 +1,200 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +#include "cuComplex.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +//new +MultiVectorDeviceParams getMultiVectorDeviceParams(unsigned int count, unsigned int size, + unsigned int elementType) +{ + struct MultiVectorDeviceParams params; + + if (count == 1) + params.pitch = size; + else + if (elementType == SPGPU_TYPE_INT) + { + //fprintf(stderr,"Getting parms for a DOUBLE vector\n"); + params.pitch = (((size*sizeof(int) + 255)/256)*256)/sizeof(int); + } + else if (elementType == SPGPU_TYPE_DOUBLE) + { + //fprintf(stderr,"Getting parms for a DOUBLE vector\n"); + params.pitch = (((size*sizeof(double) + 255)/256)*256)/sizeof(double); + } + else if (elementType == SPGPU_TYPE_FLOAT) + { + params.pitch = (((size*sizeof(float) + 255)/256)*256)/sizeof(float); + } + else if (elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + params.pitch = (((size*sizeof(cuFloatComplex) + 255)/256)*256)/sizeof(cuFloatComplex); + } + else if (elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + params.pitch = (((size*sizeof(cuDoubleComplex) + 255)/256)*256)/sizeof(cuDoubleComplex); + } + else + params.pitch = 0; + + params.elementType = elementType; + + params.count = count; + params.size = size; + + return params; + +} +//new +int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *params) +{ + if (params->pitch == 0) + return SPGPU_UNSUPPORTED; // Unsupported params + + struct MultiVectDevice *tmp = (struct MultiVectDevice *)malloc(sizeof(struct MultiVectDevice)); + *remoteMultiVec = (void *)tmp; + tmp->size_ = params->size; + tmp->count_ = params->count; + + if (params->elementType == SPGPU_TYPE_INT) + { + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (((params->size*sizeof(int) + 255)/256)*256)/sizeof(int); + //fprintf(stderr,"Allocating an INT vector %ld\n",tmp->pitch_*tmp->count_*sizeof(double)); + + return allocRemoteBuffer((void **)&(tmp->v_), + ((size_t) tmp->pitch_)*params->count*sizeof(int)); + } + else if (params->elementType == SPGPU_TYPE_FLOAT) + { + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (((params->size*sizeof(float) + 255)/256)*256)/sizeof(float); + + return allocRemoteBuffer((void **)&(tmp->v_), + ((size_t) tmp->pitch_)*params->count*sizeof(float)); + } + else if (params->elementType == SPGPU_TYPE_DOUBLE) + { + + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (int)(((params->size*sizeof(double) + 255)/256)*256)/sizeof(double); + //fprintf(stderr,"Allocating a DOUBLE vector %ld\n",tmp->pitch_*tmp->count_*sizeof(double)); + + return allocRemoteBuffer((void **)&(tmp->v_), + ((size_t) tmp->pitch_)*tmp->count_*sizeof(double)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (int)(((params->size*sizeof(cuFloatComplex) + 255)/256)*256)/sizeof(cuFloatComplex); + return allocRemoteBuffer((void **)&(tmp->v_), + ((size_t) tmp->pitch_)*tmp->count_*sizeof(cuFloatComplex)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (int)(((params->size*sizeof(cuDoubleComplex) + 255)/256)*256)/sizeof(cuDoubleComplex); + return allocRemoteBuffer((void **)&(tmp->v_), + ((size_t) tmp->pitch_)*tmp->count_*sizeof(cuDoubleComplex)); + } + else + return SPGPU_UNSUPPORTED; // Unsupported params + return SPGPU_SUCCESS; // Success +} + + +int unregisterMapped(void *buff) +{ + return unregisterMappedMemory(buff); +} + +void freeMultiVecDevice(void* deviceVec) +{ + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // fprintf(stderr,"freeMultiVecDevice\n"); + if (devVec != NULL) { + //fprintf(stderr,"Before freeMultiVecDevice% ld\n",devVec->pitch_*devVec->count_*sizeof(double)); + freeRemoteBuffer(devVec->v_); + free(deviceVec); + } +} + +int FallocMultiVecDevice(void** deviceMultiVec, unsigned int count, + unsigned int size, unsigned int elementType) +{ int i; + struct MultiVectorDeviceParams p; + + p = getMultiVectorDeviceParams(count, size, elementType); + i = allocMultiVecDevice(deviceMultiVec, &p); + //cudaSync(); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d, %d %d \n","FallocMultiVecDevice",i, count, size); + } + return(i); +} + +int getMultiVecDeviceSize(void* deviceVec) +{ int i; + struct MultiVectDevice *dev = (struct MultiVectDevice *) deviceVec; + i = dev->size_; + return(i); +} + +int getMultiVecDeviceCount(void* deviceVec) +{ int i; + struct MultiVectDevice *dev = (struct MultiVectDevice *) deviceVec; + i = dev->count_; + return(i); +} + +int getMultiVecDevicePitch(void* deviceVec) +{ int i; + struct MultiVectDevice *dev = (struct MultiVectDevice *) deviceVec; + i = dev->pitch_; + return(i); +} + diff --git a/cuda/vectordev.h b/cuda/vectordev.h new file mode 100644 index 00000000..93cf1189 --- /dev/null +++ b/cuda/vectordev.h @@ -0,0 +1,88 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#pragma once +//#include "utils.h" +#include "cuda_runtime.h" +//#include "common.h" +//#include "cintrf.h" +#include "cuda_util.h" +#include + +struct MultiVectDevice +{ + // number of vectors + int count_; + + //number of elements for a single vector + int size_; + + //pithc in number of elements + int pitch_; + + // Vectors in device memory (single allocation) + void *v_; +}; + +typedef struct MultiVectorDeviceParams +{ + // number on vectors + unsigned int count; //1 for a simple vector + + // The resulting allocation will be pitch*s*(size of the elementType) + unsigned int elementType; + + // Pitch (in number of elements) + unsigned int pitch; + + // Size of a single vector (in number of elements). + unsigned int size; +} MultiVectorDeviceParams; + + +#define INS_OVERWRITE 0 +#define INS_ADD 1 + + +int unregisterMapped(void *); + +MultiVectorDeviceParams getMultiVectorDeviceParams(unsigned int count, + unsigned int size, + unsigned int elementType); + +int FallocMultiVecDevice(void** deviceMultiVec, unsigned count, + unsigned int size, unsigned int elementType); +void freeMultiVecDevice(void* deviceVec); +int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *params); +int getMultiVecDeviceSize(void* deviceVec); +int getMultiVecDeviceCount(void* deviceVec); +int getMultiVecDevicePitch(void* deviceVec); + diff --git a/cuda/z_cusparse_mod.F90 b/cuda/z_cusparse_mod.F90 new file mode 100644 index 00000000..a4f15455 --- /dev/null +++ b/cuda/z_cusparse_mod.F90 @@ -0,0 +1,313 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module z_cusparse_mod + use base_cusparse_mod + + type, bind(c) :: z_Cmat + type(c_ptr) :: Mat = c_null_ptr + end type z_Cmat + +#if PSB_CUDA_SHORT_VERSION <= 10 + type, bind(c) :: z_Hmat + type(c_ptr) :: Mat = c_null_ptr + end type z_Hmat +#endif + + interface CSRGDeviceFree + function z_CSRGDeviceFree(Mat) & + & bind(c,name="z_CSRGDeviceFree") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: res + end function z_CSRGDeviceFree + end interface + + interface CSRGDeviceSetMatType + function z_CSRGDeviceSetMatType(Mat,type) & + & bind(c,name="z_CSRGDeviceSetMatType") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_CSRGDeviceSetMatType + end interface + + interface CSRGDeviceSetMatFillMode + function z_CSRGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="z_CSRGDeviceSetMatFillMode") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_CSRGDeviceSetMatFillMode + end interface + + interface CSRGDeviceSetMatDiagType + function z_CSRGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="z_CSRGDeviceSetMatDiagType") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_CSRGDeviceSetMatDiagType + end interface + + interface CSRGDeviceSetMatIndexBase + function z_CSRGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="z_CSRGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_CSRGDeviceSetMatIndexBase + end interface + +#if PSB_CUDA_SHORT_VERSION <= 10 + interface CSRGDeviceCsrsmAnalysis + function z_CSRGDeviceCsrsmAnalysis(Mat) & + & bind(c,name="z_CSRGDeviceCsrsmAnalysis") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: res + end function z_CSRGDeviceCsrsmAnalysis + end interface +#else + interface CSRGIsNullSvBuffer + function z_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="z_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: res + end function z_CSRGIsNullSvBuffer + end interface +#endif + + interface CSRGDeviceAlloc + function z_CSRGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="z_CSRGDeviceAlloc") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function z_CSRGDeviceAlloc + end interface + + interface CSRGDeviceGetParms + function z_CSRGDeviceGetParms(Mat,nr,nc,nz) & + & bind(c,name="z_CSRGDeviceGetParms") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: nr, nc, nz + integer(c_int) :: res + end function z_CSRGDeviceGetParms + end interface + + interface spsvCSRGDevice + function z_spsvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="z_spsvCSRGDevice") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_double_complex), value :: alpha,beta + integer(c_int) :: res + end function z_spsvCSRGDevice + end interface + + interface spmvCSRGDevice + function z_spmvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="z_spmvCSRGDevice") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_double_complex), value :: alpha,beta + integer(c_int) :: res + end function z_spmvCSRGDevice + end interface + + interface CSRGHost2Device + function z_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="z_CSRGHost2Device") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_double_complex) :: val(*) + integer(c_int) :: res + end function z_CSRGHost2Device + end interface + + interface CSRGDevice2Host + function z_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="z_CSRGDevice2Host") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_double_complex) :: val(*) + integer(c_int) :: res + end function z_CSRGDevice2Host + end interface + +#if PSB_CUDA_SHORT_VERSION <= 10 + interface HYBGDeviceAlloc + function z_HYBGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="z_HYBGDeviceAlloc") result(res) + use iso_c_binding + import z_hmat + type(z_Hmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function z_HYBGDeviceAlloc + end interface + + interface HYBGDeviceFree + function z_HYBGDeviceFree(Mat) & + & bind(c,name="z_HYBGDeviceFree") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int) :: res + end function z_HYBGDeviceFree + end interface + + interface HYBGDeviceSetMatType + function z_HYBGDeviceSetMatType(Mat,type) & + & bind(c,name="z_HYBGDeviceSetMatType") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_HYBGDeviceSetMatType + end interface + + interface HYBGDeviceSetMatFillMode + function z_HYBGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="z_HYBGDeviceSetMatFillMode") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_HYBGDeviceSetMatFillMode + end interface + + interface HYBGDeviceSetMatDiagType + function z_HYBGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="z_HYBGDeviceSetMatDiagType") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_HYBGDeviceSetMatDiagType + end interface + + interface HYBGDeviceSetMatIndexBase + function z_HYBGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="z_HYBGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_HYBGDeviceSetMatIndexBase + end interface + + interface HYBGDeviceHybsmAnalysis + function z_HYBGDeviceHybsmAnalysis(Mat) & + & bind(c,name="z_HYBGDeviceHybsmAnalysis") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int) :: res + end function z_HYBGDeviceHybsmAnalysis + end interface + + interface spsvHYBGDevice + function z_spsvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="z_spsvHYBGDevice") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_double_complex), value :: alpha,beta + integer(c_int) :: res + end function z_spsvHYBGDevice + end interface + + interface spmvHYBGDevice + function z_spmvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="z_spmvHYBGDevice") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_double_complex), value :: alpha,beta + integer(c_int) :: res + end function z_spmvHYBGDevice + end interface + + interface HYBGHost2Device + function z_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="z_HYBGHost2Device") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_double_complex) :: val(*) + integer(c_int) :: res + end function z_HYBGHost2Device + end interface + +#endif + +end module z_cusparse_mod diff --git a/cuda/zcusparse.c b/cuda/zcusparse.c new file mode 100644 index 00000000..93142d22 --- /dev/null +++ b/cuda/zcusparse.c @@ -0,0 +1,42 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#include +#include +#include "fcusparse.h" + +#include "zcusparse.h" +#include "fcusparse_dat.h" +#include "fcusparse_fct.h" diff --git a/cuda/zcusparse.h b/cuda/zcusparse.h new file mode 100644 index 00000000..ca0aeeff --- /dev/null +++ b/cuda/zcusparse.h @@ -0,0 +1,100 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef ZCUSPARSE_ +#define ZCUSPARSE_ + + +#include +#include + +#include +#include +#include "cintrf.h" + +/* Double precision real */ +#define TYPE double complex +#define CUSPARSE_BASE_TYPE CUDA_C_64F +#define T_CSRGDeviceMat z_CSRGDeviceMat +#define T_Cmat z_Cmat +#define T_spmvCSRGDevice z_spmvCSRGDevice +#define T_spsvCSRGDevice z_spsvCSRGDevice +#define T_CSRGDeviceAlloc z_CSRGDeviceAlloc +#define T_CSRGDeviceFree z_CSRGDeviceFree +#define T_CSRGHost2Device z_CSRGHost2Device +#define T_CSRGDevice2Host z_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode z_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType z_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms z_CSRGDeviceGetParms + +#if PSB_CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType z_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase z_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis z_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseZcsrmv +#define cusparseTcsrsv_solve cusparseZcsrsv_solve +#define cusparseTcsrsv_analysis cusparseZcsrsv_analysis +#define T_HYBGDeviceMat z_HYBGDeviceMat +#define T_Hmat z_Hmat +#define T_HYBGDeviceFree z_HYBGDeviceFree +#define T_spmvHYBGDevice z_spmvHYBGDevice +#define T_HYBGDeviceAlloc z_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType z_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase z_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType z_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode z_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis z_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice z_spsvHYBGDevice +#define T_HYBGHost2Device z_HYBGHost2Device +#define cusparseThybmv cusparseZhybmv +#define cusparseThybsv_solve cusparseZhybsv_solve +#define cusparseThybsv_analysis cusparseZhybsv_analysis +#define cusparseTcsr2hyb cusparseZcsr2hyb + +#elif PSB_CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType z_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase z_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis z_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseZcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseZcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseZcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer z_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr z_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr z_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr z_CSRGCreateSpMVDescr + +#endif + +#include "fcusparse.h" + +#endif diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c new file mode 100644 index 00000000..102ba0d2 --- /dev/null +++ b/cuda/zvectordev.c @@ -0,0 +1,359 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +//#include "utils.h" +//#include "common.h" +#include "zvectordev.h" + + +int registerMappedDoubleComplex(void *buff, void **d_p, int n, cuDoubleComplex dummy) +{ + return registerMappedMemory(buff,d_p,((size_t) n)*sizeof(cuDoubleComplex)); +} + +int writeMultiVecDeviceDoubleComplex(void* deviceVec, cuDoubleComplex* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // Ex updateFromHost vector function + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, + ((size_t) devVec->pitch_)*devVec->count_*sizeof(cuDoubleComplex)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + return(i); +} + +int writeMultiVecDeviceDoubleComplexR2(void* deviceVec, cuDoubleComplex* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceDoubleComplex(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceDoubleComplexR2",i); + } + return(i); +} + +int readMultiVecDeviceDoubleComplex(void* deviceVec, cuDoubleComplex* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + ((size_t) devVec->pitch_)*devVec->count_*sizeof(cuDoubleComplex)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleComplex",i); + } + return(i); +} + +int readMultiVecDeviceDoubleComplexR2(void* deviceVec, cuDoubleComplex* hostVec, int ld) +{ int i; + i = readMultiVecDeviceDoubleComplex(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleComplexR2",i); + } + return(i); +} + +int setscalMultiVecDeviceDoubleComplex(cuDoubleComplex val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuZsetscal(handle, first, last, indexBase, val, (cuDoubleComplex *) devVecX->v_); + + return(i); +} + +int geinsMultiVecDeviceDoubleComplex(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + cuDoubleComplex beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + if (dupl == INS_OVERWRITE) + beta = make_cuDoubleComplex(0.0, 0.0); + else if (dupl == INS_ADD) + beta = make_cuDoubleComplex(1.0, 0.0); + else + beta = make_cuDoubleComplex(0.0, 0.0); + + spgpuZscat(handle, (cuDoubleComplex *) devVecX->v_, n, (cuDoubleComplex*)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceDoubleComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceDoubleComplex(deviceVec, vectorId, n, + first, (void*) devIdx->v_, + hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + cuDoubleComplex *hv = (cuDoubleComplex *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuZgath(handle,hv, n, idx,indexBase, + (cuDoubleComplex *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceDoubleComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void *deviceIdx, + int hfirst, void* host_values, + int indexBase, cuDoubleComplex beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceDoubleComplex(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, + int first, void *indexes, + int hfirst, void* host_values, + int indexBase, cuDoubleComplex beta) +{ int i=0; + cuDoubleComplex *hv = (cuDoubleComplex *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuZscat(handle, (cuDoubleComplex *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + + +int nrm2MultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuZmnrm2(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int amaxMultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuZmamax(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + return(i); +} + +int asumMultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuZmasum(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + + return(i); +} + +int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + // Note: inner kernel can handle aliased input/output + spgpuZscal(handle, (cuDoubleComplex *)devVecA->v_, devVecA->pitch_, + alpha, (cuDoubleComplex *)devVecA->v_); + return(i); +} + +int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, + void* devMultiVecA, void* devMultiVecB) +{int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuZmdot(handle, y_res, n, (cuDoubleComplex*)devVecA->v_, + (cuDoubleComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_); + return(i); +} + +int upd_xyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, + cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuZupd_xyz(handle,n, alpha,beta,gamma,delta, + (cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_,(cuDoubleComplex *) devVecZ->v_); + return(i); +} + +int xyzwMultiVecDeviceDoubleComplex(int n,cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuZxyzw(handle,n, a,b,c,d,e,f, + (cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_, + (cuDoubleComplex *) devVecZ->v_,(cuDoubleComplex *) devVecW->v_); + return(i); +} + +int axpbyMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, void* devMultiVecX, + cuDoubleComplex beta, void* devMultiVecY) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + for(j=0;jcount_;j++) + spgpuZaxpby(handle,(cuDoubleComplex*)devVecY->v_+pitch*j, n, beta, + (cuDoubleComplex*)devVecY->v_+pitch*j, alpha, + (cuDoubleComplex*) devVecX->v_+pitch*j); + return(i); +} + +int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, + void *deviceVecA, void *deviceVecB) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + spgpuHandle_t handle=psb_cudaGetHandle(); + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuZmaxy(handle, (cuDoubleComplex*)devVecB->v_, n, alpha, + (cuDoubleComplex*)devVecA->v_, + (cuDoubleComplex*)devVecB->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, + void *deviceVecB, cuDoubleComplex beta, void *deviceVecZ) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) + return SPGPU_UNSUPPORTED; + spgpuZmaxypbz(handle, (cuDoubleComplex*)devVecZ->v_, n, beta, + (cuDoubleComplex*)devVecZ->v_, + alpha, (cuDoubleComplex*) devVecA->v_, (cuDoubleComplex*) devVecB->v_, + devVecB->count_, devVecB->pitch_); + return(i); +} + + +int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVecA, + void *deviceVecB) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + + spgpuHandle_t handle=psb_cudaGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuZabs(handle, (cuDoubleComplex*)devVecB->v_, n, + alpha, (cuDoubleComplex*)devVecA->v_); + + return(i); +} + +int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + spgpuHandle_t handle=psb_cudaGetHandle(); + if (n > devVecA->size_) + return SPGPU_UNSUPPORTED; + + spgpuZabs(handle, (cuDoubleComplex*)devVecA->v_, n, + alpha, (cuDoubleComplex*)devVecA->v_); + + return(i); +} + diff --git a/cuda/zvectordev.h b/cuda/zvectordev.h new file mode 100644 index 00000000..023c7f13 --- /dev/null +++ b/cuda/zvectordev.h @@ -0,0 +1,97 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +//#include "utils.h" +#include +#include "cuComplex.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" +#include "vector.h" + +int registerMappedDoubleComplex(void *, void **, int, cuDoubleComplex); +int writeMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec); +int writeMultiVecDeviceDoubleComplexR2(void* deviceMultiVec, + cuDoubleComplex* hostMultiVec, int ld); +int readMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec); +int readMultiVecDeviceDoubleComplexR2(void* deviceMultiVec, + cuDoubleComplex* hostMultiVec, int ld); +int setscalMultiVecDeviceDoubleComplex(cuDoubleComplex val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceDoubleComplex(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceDoubleComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, + int first, void* indexes, + int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceDoubleComplexVecIdx(void* deviceVec, int vectorId, + int n, int first, + void *deviceIdx, int hfirst, + void* host_values, + int indexBase, cuDoubleComplex beta); +int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, + int first, void *indexes, + int hfirst, void* host_values, + int indexBase, cuDoubleComplex beta); + +int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA); +int nrm2MultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA); +int amaxMultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA); +int asumMultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA); +int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, + void* devVecA, void* devVecB); + +int axpbyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void* devVecX, + cuDoubleComplex beta, void* devVecY); +int upd_xyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, + cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); +int xyzwMultiVecDeviceDoubleComplex(int n,cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW); +int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, + void *deviceVecA, void *deviceVecB); +int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, + void *deviceVecB, cuDoubleComplex beta, + void *deviceVecZ); +int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA); +int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, + void *deviceVecA, void *deviceVecB); + diff --git a/docs/Makefile b/docs/Makefile index b723f0d1..6ac24e64 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -1,7 +1,7 @@ all: guide guide: - cd src && $(MAKE) + cd src && $(MAKE) clean all doxy: - doxygen doxypsb \ No newline at end of file + doxygen doxypsb diff --git a/docs/doxypsb b/docs/doxypsb index 51b8c033..1e7456a9 100644 --- a/docs/doxypsb +++ b/docs/doxypsb @@ -52,7 +52,7 @@ PROJECT_LOGO = # If a relative path is entered, it will be relative to the location # where doxygen was started. If left blank the current directory will be used. -OUTPUT_DIRECTORY = ../../psblas-3.4-doxygen +OUTPUT_DIRECTORY = ../../psblas-3.9-doxygen # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create # 4096 sub-directories (in 2 levels) under the output directory of each output diff --git a/docs/html/cmsy10-42.png b/docs/html/cmsy10-42.png index bd31f1f8..3ebb79e6 100644 Binary files a/docs/html/cmsy10-42.png and b/docs/html/cmsy10-42.png differ diff --git a/docs/html/cmsy10-48.png b/docs/html/cmsy10-48.png index df31f928..04f134fe 100644 Binary files a/docs/html/cmsy10-48.png and b/docs/html/cmsy10-48.png differ diff --git a/docs/html/cmsy10-49.png b/docs/html/cmsy10-49.png index cb292c09..37ce5dc4 100644 Binary files a/docs/html/cmsy10-49.png and b/docs/html/cmsy10-49.png differ diff --git a/docs/html/dia-.png b/docs/html/dia-.png new file mode 100644 index 00000000..de7db919 Binary files /dev/null and b/docs/html/dia-.png differ diff --git a/docs/html/dia.png b/docs/html/dia.png new file mode 100644 index 00000000..de7db919 Binary files /dev/null and b/docs/html/dia.png differ diff --git a/docs/html/ell-.png b/docs/html/ell-.png new file mode 100644 index 00000000..31911882 Binary files /dev/null and b/docs/html/ell-.png differ diff --git a/docs/html/ell.png b/docs/html/ell.png new file mode 100644 index 00000000..31911882 Binary files /dev/null and b/docs/html/ell.png differ diff --git a/docs/html/hdia-.png b/docs/html/hdia-.png new file mode 100644 index 00000000..08bfb5ff Binary files /dev/null and b/docs/html/hdia-.png differ diff --git a/docs/html/hdia.png b/docs/html/hdia.png new file mode 100644 index 00000000..08bfb5ff Binary files /dev/null and b/docs/html/hdia.png differ diff --git a/docs/html/hll-.png b/docs/html/hll-.png new file mode 100644 index 00000000..219b751a Binary files /dev/null and b/docs/html/hll-.png differ diff --git a/docs/html/hll.png b/docs/html/hll.png new file mode 100644 index 00000000..219b751a Binary files /dev/null and b/docs/html/hll.png differ diff --git a/docs/html/index.html b/docs/html/index.html index c4f777e4..aca21355 100644 --- a/docs/html/index.html +++ b/docs/html/index.html @@ -10,18 +10,20 @@ -

PSBLAS

PSBLAS
User’s and Reference Guide
User’s and Reference Guide
A reference guide for the Parallel Sparse BLAS library
A reference guide for the Parallel Sparse BLAS library
Salvatore Filippone
by Salvatore Filippone
Alfredo Buttari
Software version: 3.8.0
May 1st, 2022 +class="pplb7t-">Alfredo Buttari
Fabio Durastante
Software version: 3.9.0
June 9th, 2025 @@ -29,219 +31,44 @@ class="newline" />May 1st, 2022

- Contents -
1 Introduction -
2 General overview -
 2.1 Basic Nomenclature -
 2.2 Library contents -
 2.3 Application structure -
 2.4 Programming model -
3 Data Structures and Classes -
 3.1 Descriptor data structure -
 3.2 Sparse Matrix class -
 3.3 Dense Vector Data Structure -
 3.4 Preconditioner data structure -
 3.5 Heap data structure -
4 Computational routines -
 4.1 psb_geaxpby — General Dense Matrix Sum -
 4.2 psb_gedot — Dot Product -
 4.3 psb_gedots — Generalized Dot Product -
 4.4 psb_normi — Infinity-Norm of Vector -
 4.5 psb_geamaxs — Generalized Infinity Norm -
 4.6 psb_norm1 — 1-Norm of Vector -
 4.7 psb_geasums — Generalized 1-Norm of Vector -
 4.8 psb_norm2 — 2-Norm of Vector -
 4.9 psb_genrm2s — Generalized 2-Norm of Vector -
 4.10 psb_norm1 — 1-Norm of Sparse Matrix -
 4.11 psb_normi — Infinity Norm of Sparse Matrix -
 4.12 psb_spmm — Sparse Matrix by Dense Matrix Product -
 4.13 psb_spsm — Triangular System Solve -
 4.14 psb_gemlt — Entrywise Product -
 4.15 psb_gediv — Entrywise Division -
 4.16 psb_geinv — Entrywise Inversion -
5 Communication routines -
 5.1 psb_halo — Halo Data Communication -
 5.2 psb_ovrl — Overlap Update -
 5.3 psb_gather — Gather Global Dense Matrix -
 5.4 psb_scatter — Scatter Global Dense Matrix -
6 Data management routines -
 6.1 psb_cdall — Allocates a communication descriptor -
 6.2 psb_cdins — Communication descriptor insert routine -
 6.3 psb_cdasb — Communication descriptor assembly routine -
 6.4 psb_cdcpy — Copies a communication descriptor -
 6.5 psb_cdfree — Frees a communication descriptor -
 6.6 psb_cdbldext — Build an extended communication descriptor -
 6.7 psb_spall — Allocates a sparse matrix -
 6.8 psb_spins — Insert a set of coefficients into a sparse matrix -
 6.9 psb_spasb — Sparse matrix assembly routine -
 6.10 psb_spfree — Frees a sparse matrix -
 6.11 psb_sprn — Reinit sparse matrix structure for psblas routines. -
 6.12 psb_geall — Allocates a dense matrix - +
 Preface +
 Preface +
 1 Introduction +
 2 General overview +
 3 Data Structures and Classes +
 4 Computational routines +
 5 Communication routines +
 6 Data management routines +
 7 Parallel environment routines +
 8 Error handling +
 9 Utilities +
 10 Preconditioner routines +
 11 Iterative Methods +
 12 Extensions +
 13 CUDA Environment Routines +
 References +
+ - -
 6.13 psb_geins — Dense matrix insertion routine -
 6.14 psb_geasb — Assembly a dense matrix -
 6.15 psb_gefree — Frees a dense matrix -
 6.16 psb_gelp — Applies a left permutation to a dense matrix -
 6.17 psb_glob_to_loc — Global to local indices convertion -
 6.18 psb_loc_to_glob — Local to global indices conversion -
 6.19 psb_is_owned — -
 6.20 psb_owned_index — -
 6.21 psb_is_local — -
 6.22 psb_local_index — -
 6.23 psb_get_boundary — Extract list of boundary elements -
 6.24 psb_get_overlap — Extract list of overlap elements -
 6.25 psb_sp_getrow — Extract row(s) from a sparse matrix -
 6.26 psb_sizeof — Memory occupation -
 6.27 Sorting utilities — -
7 Parallel environment routines -
 7.1 psb_init — Initializes PSBLAS parallel environment -
 7.2 psb_info — Return information about PSBLAS parallel environment -
 7.3 psb_exit — Exit from PSBLAS parallel environment -
 7.4 psb_get_mpi_comm — Get the MPI communicator -
 7.5 psb_get_mpi_rank — Get the MPI rank -
 7.6 psb_wtime — Wall clock timing -
 7.7 psb_barrier — Sinchronization point parallel environment -
 7.8 psb_abort — Abort a computation -
 7.9 psb_bcast — Broadcast data -
 7.10 psb_sum — Global sum -
 7.11 psb_max — Global maximum -
 7.12 psb_min — Global minimum -
 7.13 psb_amx — Global maximum absolute value -
 7.14 psb_amn — Global minimum absolute value -
 7.15 psb_nrm2 — Global 2-norm reduction -
 7.16 psb_snd — Send data -
 7.17 psb_rcv — Receive data -
8 Error handling -
 8.1 psb_errpush — Pushes an error code onto the error stack -
 8.2 psb_error — Prints the error stack content and aborts execution -
 8.3 psb_set_errverbosity — Sets the verbosity of error messages -
 8.4 psb_set_erraction — Set the type of action to be taken upon error condition -
9 Utilities -
 9.1 hb_read — Read a sparse matrix from a file in the Harwell–Boeing format -
 9.2 hb_write — Write a sparse matrix to a file in the Harwell–Boeing format -
 9.3 mm_mat_read — Read a sparse matrix from a file in the MatrixMarket format -
 9.4 mm_array_read — Read a dense array from a file in the MatrixMarket format -
 9.5 mm_mat_write — Write a sparse matrix to a file in the MatrixMarket format -
 9.6 mm_array_write — Write a dense array from a file in the MatrixMarket format -
10 Preconditioner routines -
 10.1 init — Initialize a preconditioner -
 10.2 build — Builds a preconditioner -
 10.3 apply — Preconditioner application routine -
 10.4 descr — Prints a description of current preconditioner - - -
 10.5 clone — clone current preconditioner -
 10.6 free — Free a preconditioner -
11 Iterative Methods -
 11.1 psb_krylov — Krylov Methods Driver Routine -
References - diff --git a/docs/html/mat-.png b/docs/html/mat-.png new file mode 100644 index 00000000..d4f5c6f9 Binary files /dev/null and b/docs/html/mat-.png differ diff --git a/docs/html/mat.png b/docs/html/mat.png new file mode 100644 index 00000000..d4f5c6f9 Binary files /dev/null and b/docs/html/mat.png differ diff --git a/docs/html/psblaslibraryext.png b/docs/html/psblaslibraryext.png new file mode 100644 index 00000000..da3b3a9c Binary files /dev/null and b/docs/html/psblaslibraryext.png differ diff --git a/docs/html/userhtml.css b/docs/html/userhtml.css index 31d7eb99..7e85fe60 100644 --- a/docs/html/userhtml.css +++ b/docs/html/userhtml.css @@ -1,53 +1,87 @@ /* start css.sty */ -.cmr-7{font-size:70%;} -.cmmi-5{font-size:50%;font-style: italic;} -.cmmi-7{font-size:70%;font-style: italic;} -.cmmi-10{font-style: italic;} -.cmsy-7{font-size:70%;} -.cmbx-12x-x-144{font-size:172%; font-weight: bold;} -.cmbx-12x-x-144{ font-weight: bold;} -.cmbx-12x-x-144{ font-weight: bold;} -.cmti-10{ font-style: italic;} -.cmti-12{font-size:120%; font-style: italic;} -.cmbx-10{ font-weight: bold;} -.cmbx-10{ font-weight: bold;} -.cmbx-10{ font-weight: bold;} +.pplb7t-x-x-172{font-size:172%;font-weight: bold;} +.pplb7t-x-x-172{font-weight: bold;} +.pplb7t-x-x-172{font-weight: bold;} +.pplri7t-{font-style: italic;} +.pplri7t-{font-style: italic;} +.pplri7t-x-x-120{font-size:120%;font-style: italic;} +.pplri7t-x-x-120{font-style: italic;} +.pplb7t-{font-weight: bold;} +.pplb7t-{font-weight: bold;} +.pplb7t-{font-weight: bold;} .cmtt-10{font-family: monospace,monospace;} .cmtt-10{font-family: monospace,monospace;} .cmtt-10{font-family: monospace,monospace;} -.cmr-9{font-size:90%;} -.cmr-8{font-size:80%;} -.cmbx-12{font-size:120%; font-weight: bold;} -.cmbx-12{ font-weight: bold;} -.cmbx-12{ font-weight: bold;} +.pplr7t-x-x-76{font-size:76%;} +.zplmr7m-{font-style: italic;} +.zplmr7m-{font-style: italic;} +.zplmr7m-{font-style: italic;} +.zplmr7m-{font-style: italic;} +.zplmr7m-{font-style: italic;} +.zplmr7m-x-x-76{font-size:76%;font-style: italic;} +.zplmr7m-x-x-76{font-style: italic;} +.zplmr7m-x-x-76{font-style: italic;} +.zplmr7m-x-x-76{font-style: italic;} +.zplmr7m-x-x-76{font-style: italic;} +.zplmr7m-x-x-60{font-size:60%;font-style: italic;} +.zplmr7m-x-x-60{font-style: italic;} +.zplmr7m-x-x-60{font-style: italic;} +.zplmr7m-x-x-60{font-style: italic;} +.zplmr7m-x-x-60{font-style: italic;} +.zplmr7y-x-x-76{font-size:76%;} +.zplmr7t-x-x-76{font-size:76%;} +.pplr7t-x-x-90{font-size:90%;} +.pplr7t-x-x-80{font-size:80%;} +.pplb7t-x-x-120{font-size:120%;font-weight: bold;} +.pplb7t-x-x-120{font-weight: bold;} +.pplb7t-x-x-120{font-weight: bold;} .cmtt-8{font-size:80%;font-family: monospace,monospace;} .cmtt-8{font-family: monospace,monospace;} .cmtt-8{font-family: monospace,monospace;} .cmtt-9{font-size:90%;font-family: monospace,monospace;} .cmtt-9{font-family: monospace,monospace;} .cmtt-9{font-family: monospace,monospace;} -.cmmi-8{font-size:80%;font-style: italic;} +.pplr7t-x-x-70{font-size:70%;} +.zplmr7m-x-x-90{font-size:90%;font-style: italic;} +.zplmr7m-x-x-90{font-style: italic;} +.zplmr7m-x-x-90{font-style: italic;} +.zplmr7m-x-x-90{font-style: italic;} +.zplmr7m-x-x-90{font-style: italic;} +.zplmr7y-x-x-90{font-size:90%;} +.zplmr7m-x-x-80{font-size:80%;font-style: italic;} +.zplmr7m-x-x-80{font-style: italic;} +.zplmr7m-x-x-80{font-style: italic;} +.zplmr7m-x-x-80{font-style: italic;} +.zplmr7m-x-x-80{font-style: italic;} +.zplmr7t-x-x-80{font-size:80%;} +.pplrc7t-x-x-90{font-size:90%;} +.small-caps{font-variant: small-caps; } p{margin-top:0;margin-bottom:0} p.indent{text-indent:0;} p + p{margin-top:1em;} p + div, p + pre {margin-top:1em;} div + p, pre + p {margin-top:1em;} +a { overflow-wrap: break-word; word-wrap: break-word; word-break: break-word; hyphens: auto; } @media print {div.crosslinks {visibility:hidden;}} +table.tabular{border-collapse: collapse; border-spacing: 0;} a img { border-top: 0; border-left: 0; border-right: 0; } center { margin-top:1em; margin-bottom:1em; } td center { margin-top:0em; margin-bottom:0em; } .Canvas { position:relative; } img.math{vertical-align:middle;} +div.par-math-display, div.math-display{text-align:center;} li p.indent { text-indent: 0em } li p:first-child{ margin-top:0em; } li p:last-child, li div:last-child { margin-bottom:0.5em; } +li p:first-child{ margin-bottom:0; } li p~ul:last-child, li p~ol:last-child{ margin-bottom:0.5em; } .enumerate1 {list-style-type:decimal;} .enumerate2 {list-style-type:lower-alpha;} .enumerate3 {list-style-type:lower-roman;} .enumerate4 {list-style-type:upper-alpha;} div.newtheorem { margin-bottom: 2em; margin-top: 2em;} +div.newtheorem .head{font-weight: bold;} .obeylines-h,.obeylines-v {white-space: nowrap; } div.obeylines-v p { margin-top:0; margin-bottom:0; } .overline{ text-decoration:overline; } @@ -91,6 +125,9 @@ table[rules] {border-left:solid black 0.4pt; border-right:solid black 0.4pt; } .hline hr, .cline hr{ height : 0px; margin:0px; } .hline td, .cline td{ padding: 0; } .hline hr, .cline hr{border:none;border-top:1px solid black;} +.hline {border-top: 1px solid black;} +.hline + .vspace:last-child{display:none;} +.hline:first-child{border-bottom:1px solid black;border-top:none;} .tabbing-right {text-align:right;} div.float, div.figure {margin-left: auto; margin-right: auto;} div.float img {text-align:center;} @@ -115,15 +152,16 @@ table.pmatrix {width:100%;} span.bar-css {text-decoration:overline;} img.cdots{vertical-align:middle;} .partToc a, .partToc, .likepartToc a, .likepartToc {line-height: 200%; font-weight:bold; font-size:110%;} +.chapterToc a, .chapterToc, .likechapterToc a, .likechapterToc, .appendixToc a, .appendixToc {line-height: 200%; font-weight:bold;} .index-item, .index-subitem, .index-subsubitem {display:block} div.caption {text-indent:-2em; margin-left:3em; margin-right:1em; text-align:left;} div.caption span.id{font-weight: bold; white-space: nowrap; } h1.partHead{text-align: center} p.bibitem { text-indent: -2em; margin-left: 2em; margin-top:0.6em; margin-bottom:0.6em; } p.bibitem-p { text-indent: 0em; margin-left: 2em; margin-top:0.6em; margin-bottom:0.6em; } +.subsubsectionHead, .likesubsubsectionHead { font-size: 1em; } .paragraphHead, .likeparagraphHead { margin-top:2em; font-weight: bold;} .subparagraphHead, .likesubparagraphHead { font-weight: bold;} -.quote {margin-bottom:0.25em; margin-top:0.25em; margin-left:1em; margin-right:1em; text-align:justify;} .verse{white-space:nowrap; margin-left:2em} div.maketitle {text-align:center;} h2.titleHead{text-align:center;} @@ -131,19 +169,29 @@ div.maketitle{ margin-bottom: 2em; } div.author, div.date {text-align:center;} div.thanks{text-align:left; margin-left:10%; font-size:85%; font-style:italic; } div.author{white-space: nowrap;} -.quotation {margin-bottom:0.25em; margin-top:0.25em; margin-left:1em; } -.abstract p {margin-left:5%; margin-right:5%;} +div.abstract p {margin-left:5%; margin-right:5%;} div.abstract {width:100%;} +.abstracttitle{text-align:center;margin-bottom:1em;} .subsectionToc, .likesubsectionToc {margin-left:2em;} .subsubsectionToc, .likesubsubsectionToc {margin-left:4em;} +.paragraphToc, .likeparagraphToc {margin-left:6em;} +.subparagraphToc, .likesubparagraphToc {margin-left:8em;} .ovalbox { padding-left:3pt; padding-right:3pt; border:solid thin; } .Ovalbox-thick { padding-left:3pt; padding-right:3pt; border:solid thick; } .shadowbox { padding-left:3pt; padding-right:3pt; border:solid thin; border-right:solid thick; border-bottom:solid thick; } .doublebox { padding-left:3pt; padding-right:3pt; border-style:double; border:solid thick; } .rotatebox{display: inline-block;} +code.lstinline{font-family:monospace,monospace;} +pre.listings{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; } .lstlisting .label{margin-right:0.5em; } -div.lstlisting{font-family: monospace,monospace; white-space: nowrap; margin-top:0.5em; margin-bottom:0.5em; } -div.lstinputlisting{ font-family: monospace,monospace; white-space: nowrap; } +pre.lstlisting{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; } +pre.lstinputlisting{ font-family: monospace,monospace; white-space: pre-wrap; } .lstinputlisting .label{margin-right:0.5em;} +#TBL-24-1{border-left: 1px solid black;} +#TBL-24-1{border-right:1px solid black;} +#TBL-24-2{border-right:1px solid black;} +#TBL-24-3{border-right:1px solid black;} +#TBL-24-4{border-right:1px solid black;} +#TBL-24-5{border-right:1px solid black;} /* end css.sty */ diff --git a/docs/html/userhtml.html b/docs/html/userhtml.html index c4f777e4..aca21355 100644 --- a/docs/html/userhtml.html +++ b/docs/html/userhtml.html @@ -10,18 +10,20 @@ -

PSBLAS

PSBLAS
User’s and Reference Guide
User’s and Reference Guide
A reference guide for the Parallel Sparse BLAS library
A reference guide for the Parallel Sparse BLAS library
Salvatore Filippone
by Salvatore Filippone
Alfredo Buttari
Software version: 3.8.0
May 1st, 2022 +class="pplb7t-">Alfredo Buttari
Fabio Durastante
Software version: 3.9.0
June 9th, 2025 @@ -29,219 +31,44 @@ class="newline" />May 1st, 2022

- Contents -
1 Introduction -
2 General overview -
 2.1 Basic Nomenclature -
 2.2 Library contents -
 2.3 Application structure -
 2.4 Programming model -
3 Data Structures and Classes -
 3.1 Descriptor data structure -
 3.2 Sparse Matrix class -
 3.3 Dense Vector Data Structure -
 3.4 Preconditioner data structure -
 3.5 Heap data structure -
4 Computational routines -
 4.1 psb_geaxpby — General Dense Matrix Sum -
 4.2 psb_gedot — Dot Product -
 4.3 psb_gedots — Generalized Dot Product -
 4.4 psb_normi — Infinity-Norm of Vector -
 4.5 psb_geamaxs — Generalized Infinity Norm -
 4.6 psb_norm1 — 1-Norm of Vector -
 4.7 psb_geasums — Generalized 1-Norm of Vector -
 4.8 psb_norm2 — 2-Norm of Vector -
 4.9 psb_genrm2s — Generalized 2-Norm of Vector -
 4.10 psb_norm1 — 1-Norm of Sparse Matrix -
 4.11 psb_normi — Infinity Norm of Sparse Matrix -
 4.12 psb_spmm — Sparse Matrix by Dense Matrix Product -
 4.13 psb_spsm — Triangular System Solve -
 4.14 psb_gemlt — Entrywise Product -
 4.15 psb_gediv — Entrywise Division -
 4.16 psb_geinv — Entrywise Inversion -
5 Communication routines -
 5.1 psb_halo — Halo Data Communication -
 5.2 psb_ovrl — Overlap Update -
 5.3 psb_gather — Gather Global Dense Matrix -
 5.4 psb_scatter — Scatter Global Dense Matrix -
6 Data management routines -
 6.1 psb_cdall — Allocates a communication descriptor -
 6.2 psb_cdins — Communication descriptor insert routine -
 6.3 psb_cdasb — Communication descriptor assembly routine -
 6.4 psb_cdcpy — Copies a communication descriptor -
 6.5 psb_cdfree — Frees a communication descriptor -
 6.6 psb_cdbldext — Build an extended communication descriptor -
 6.7 psb_spall — Allocates a sparse matrix -
 6.8 psb_spins — Insert a set of coefficients into a sparse matrix -
 6.9 psb_spasb — Sparse matrix assembly routine -
 6.10 psb_spfree — Frees a sparse matrix -
 6.11 psb_sprn — Reinit sparse matrix structure for psblas routines. -
 6.12 psb_geall — Allocates a dense matrix - +
 Preface +
 Preface +
 1 Introduction +
 2 General overview +
 3 Data Structures and Classes +
 4 Computational routines +
 5 Communication routines +
 6 Data management routines +
 7 Parallel environment routines +
 8 Error handling +
 9 Utilities +
 10 Preconditioner routines +
 11 Iterative Methods +
 12 Extensions +
 13 CUDA Environment Routines +
 References +
+ - -
 6.13 psb_geins — Dense matrix insertion routine -
 6.14 psb_geasb — Assembly a dense matrix -
 6.15 psb_gefree — Frees a dense matrix -
 6.16 psb_gelp — Applies a left permutation to a dense matrix -
 6.17 psb_glob_to_loc — Global to local indices convertion -
 6.18 psb_loc_to_glob — Local to global indices conversion -
 6.19 psb_is_owned — -
 6.20 psb_owned_index — -
 6.21 psb_is_local — -
 6.22 psb_local_index — -
 6.23 psb_get_boundary — Extract list of boundary elements -
 6.24 psb_get_overlap — Extract list of overlap elements -
 6.25 psb_sp_getrow — Extract row(s) from a sparse matrix -
 6.26 psb_sizeof — Memory occupation -
 6.27 Sorting utilities — -
7 Parallel environment routines -
 7.1 psb_init — Initializes PSBLAS parallel environment -
 7.2 psb_info — Return information about PSBLAS parallel environment -
 7.3 psb_exit — Exit from PSBLAS parallel environment -
 7.4 psb_get_mpi_comm — Get the MPI communicator -
 7.5 psb_get_mpi_rank — Get the MPI rank -
 7.6 psb_wtime — Wall clock timing -
 7.7 psb_barrier — Sinchronization point parallel environment -
 7.8 psb_abort — Abort a computation -
 7.9 psb_bcast — Broadcast data -
 7.10 psb_sum — Global sum -
 7.11 psb_max — Global maximum -
 7.12 psb_min — Global minimum -
 7.13 psb_amx — Global maximum absolute value -
 7.14 psb_amn — Global minimum absolute value -
 7.15 psb_nrm2 — Global 2-norm reduction -
 7.16 psb_snd — Send data -
 7.17 psb_rcv — Receive data -
8 Error handling -
 8.1 psb_errpush — Pushes an error code onto the error stack -
 8.2 psb_error — Prints the error stack content and aborts execution -
 8.3 psb_set_errverbosity — Sets the verbosity of error messages -
 8.4 psb_set_erraction — Set the type of action to be taken upon error condition -
9 Utilities -
 9.1 hb_read — Read a sparse matrix from a file in the Harwell–Boeing format -
 9.2 hb_write — Write a sparse matrix to a file in the Harwell–Boeing format -
 9.3 mm_mat_read — Read a sparse matrix from a file in the MatrixMarket format -
 9.4 mm_array_read — Read a dense array from a file in the MatrixMarket format -
 9.5 mm_mat_write — Write a sparse matrix to a file in the MatrixMarket format -
 9.6 mm_array_write — Write a dense array from a file in the MatrixMarket format -
10 Preconditioner routines -
 10.1 init — Initialize a preconditioner -
 10.2 build — Builds a preconditioner -
 10.3 apply — Preconditioner application routine -
 10.4 descr — Prints a description of current preconditioner - - -
 10.5 clone — clone current preconditioner -
 10.6 free — Free a preconditioner -
11 Iterative Methods -
 11.1 psb_krylov — Krylov Methods Driver Routine -
References - diff --git a/docs/html/userhtml0x.png b/docs/html/userhtml0x.png index bed5b304..7b06aa11 100644 Binary files a/docs/html/userhtml0x.png and b/docs/html/userhtml0x.png differ diff --git a/docs/html/userhtml10x.png b/docs/html/userhtml10x.png index a0d4d0d4..d95c1025 100644 Binary files a/docs/html/userhtml10x.png and b/docs/html/userhtml10x.png differ diff --git a/docs/html/userhtml11x.png b/docs/html/userhtml11x.png index 634594e1..af3a720c 100644 Binary files a/docs/html/userhtml11x.png and b/docs/html/userhtml11x.png differ diff --git a/docs/html/userhtml12x.png b/docs/html/userhtml12x.png index 7257667a..b3d9749d 100644 Binary files a/docs/html/userhtml12x.png and b/docs/html/userhtml12x.png differ diff --git a/docs/html/userhtml13x.png b/docs/html/userhtml13x.png index 4c70ecb3..cb244f75 100644 Binary files a/docs/html/userhtml13x.png and b/docs/html/userhtml13x.png differ diff --git a/docs/html/userhtml14x.png b/docs/html/userhtml14x.png index 711ce998..9d583958 100644 Binary files a/docs/html/userhtml14x.png and b/docs/html/userhtml14x.png differ diff --git a/docs/html/userhtml15x.png b/docs/html/userhtml15x.png index 00e0d73d..3a2dbe01 100644 Binary files a/docs/html/userhtml15x.png and b/docs/html/userhtml15x.png differ diff --git a/docs/html/userhtml16.html b/docs/html/userhtml16.html new file mode 100644 index 00000000..5e120f6c --- /dev/null +++ b/docs/html/userhtml16.html @@ -0,0 +1,19 @@ + + + + + + + + + + +
+

4The string is case-insensitive

+ + diff --git a/docs/html/userhtml16x.png b/docs/html/userhtml16x.png index 95794ed0..19ca0c98 100644 Binary files a/docs/html/userhtml16x.png and b/docs/html/userhtml16x.png differ diff --git a/docs/html/userhtml17.html b/docs/html/userhtml17.html new file mode 100644 index 00000000..4680e0be --- /dev/null +++ b/docs/html/userhtml17.html @@ -0,0 +1,19 @@ + + + + + + + + + + +
+

4The string is case-insensitive

+ + diff --git a/docs/html/userhtml17x.png b/docs/html/userhtml17x.png index 27a7387e..99871208 100644 Binary files a/docs/html/userhtml17x.png and b/docs/html/userhtml17x.png differ diff --git a/docs/html/userhtml18.html b/docs/html/userhtml18.html new file mode 100644 index 00000000..9271993a --- /dev/null +++ b/docs/html/userhtml18.html @@ -0,0 +1,23 @@ + + + + + + + + + + +
+

5Note: the implementation is for FCG(1).

+ diff --git a/docs/html/userhtml18x.png b/docs/html/userhtml18x.png index ebcb935d..f781e90c 100644 Binary files a/docs/html/userhtml18x.png and b/docs/html/userhtml18x.png differ diff --git a/docs/html/userhtml19.html b/docs/html/userhtml19.html new file mode 100644 index 00000000..ab4e0ad1 --- /dev/null +++ b/docs/html/userhtml19.html @@ -0,0 +1,23 @@ + + + + + + + + + + +
+

5Note: the implementation is for FCG(1).

+ diff --git a/docs/html/userhtml19x.png b/docs/html/userhtml19x.png index 5ac0302a..92346849 100644 Binary files a/docs/html/userhtml19x.png and b/docs/html/userhtml19x.png differ diff --git a/docs/html/userhtml1x.png b/docs/html/userhtml1x.png index 465c255b..42bbd7ec 100644 Binary files a/docs/html/userhtml1x.png and b/docs/html/userhtml1x.png differ diff --git a/docs/html/userhtml20x.png b/docs/html/userhtml20x.png index f9f0de44..ed2fd192 100644 Binary files a/docs/html/userhtml20x.png and b/docs/html/userhtml20x.png differ diff --git a/docs/html/userhtml21x.png b/docs/html/userhtml21x.png index 85201b8c..85e9af03 100644 Binary files a/docs/html/userhtml21x.png and b/docs/html/userhtml21x.png differ diff --git a/docs/html/userhtml22x.png b/docs/html/userhtml22x.png index 8c882bc2..f0a71dab 100644 Binary files a/docs/html/userhtml22x.png and b/docs/html/userhtml22x.png differ diff --git a/docs/html/userhtml23x.png b/docs/html/userhtml23x.png index 30c84d3d..d8e518ee 100644 Binary files a/docs/html/userhtml23x.png and b/docs/html/userhtml23x.png differ diff --git a/docs/html/userhtml24x.png b/docs/html/userhtml24x.png index 0910fe9c..3ade441e 100644 Binary files a/docs/html/userhtml24x.png and b/docs/html/userhtml24x.png differ diff --git a/docs/html/userhtml25x.png b/docs/html/userhtml25x.png index 88630c50..e1c548ac 100644 Binary files a/docs/html/userhtml25x.png and b/docs/html/userhtml25x.png differ diff --git a/docs/html/userhtml26x.png b/docs/html/userhtml26x.png index ac0518e5..e70146b3 100644 Binary files a/docs/html/userhtml26x.png and b/docs/html/userhtml26x.png differ diff --git a/docs/html/userhtml27x.png b/docs/html/userhtml27x.png index 57beeb01..f94ad5e3 100644 Binary files a/docs/html/userhtml27x.png and b/docs/html/userhtml27x.png differ diff --git a/docs/html/userhtml28x.png b/docs/html/userhtml28x.png index b2106916..6a7c4b7f 100644 Binary files a/docs/html/userhtml28x.png and b/docs/html/userhtml28x.png differ diff --git a/docs/html/userhtml29x.png b/docs/html/userhtml29x.png index b4efccdb..e3d801ea 100644 Binary files a/docs/html/userhtml29x.png and b/docs/html/userhtml29x.png differ diff --git a/docs/html/userhtml2x.png b/docs/html/userhtml2x.png index 49dbf305..4b12ea9e 100644 Binary files a/docs/html/userhtml2x.png and b/docs/html/userhtml2x.png differ diff --git a/docs/html/userhtml30x.png b/docs/html/userhtml30x.png index 509285f3..8846254f 100644 Binary files a/docs/html/userhtml30x.png and b/docs/html/userhtml30x.png differ diff --git a/docs/html/userhtml31x.png b/docs/html/userhtml31x.png index 72203987..0229601d 100644 Binary files a/docs/html/userhtml31x.png and b/docs/html/userhtml31x.png differ diff --git a/docs/html/userhtml32x.png b/docs/html/userhtml32x.png index a92c5455..b906670d 100644 Binary files a/docs/html/userhtml32x.png and b/docs/html/userhtml32x.png differ diff --git a/docs/html/userhtml33x.png b/docs/html/userhtml33x.png index b4a91874..6e996fed 100644 Binary files a/docs/html/userhtml33x.png and b/docs/html/userhtml33x.png differ diff --git a/docs/html/userhtml34x.png b/docs/html/userhtml34x.png new file mode 100644 index 00000000..8846254f Binary files /dev/null and b/docs/html/userhtml34x.png differ diff --git a/docs/html/userhtml35x.png b/docs/html/userhtml35x.png new file mode 100644 index 00000000..6b89f4fe Binary files /dev/null and b/docs/html/userhtml35x.png differ diff --git a/docs/html/userhtml36x.png b/docs/html/userhtml36x.png new file mode 100644 index 00000000..269291b5 Binary files /dev/null and b/docs/html/userhtml36x.png differ diff --git a/docs/html/userhtml3x.png b/docs/html/userhtml3x.png index 3676aa53..14357fc6 100644 Binary files a/docs/html/userhtml3x.png and b/docs/html/userhtml3x.png differ diff --git a/docs/html/userhtml4x.png b/docs/html/userhtml4x.png index 1ce118eb..4734ca48 100644 Binary files a/docs/html/userhtml4x.png and b/docs/html/userhtml4x.png differ diff --git a/docs/html/userhtml5.html b/docs/html/userhtml5.html index 0664a74b..a407f8e2 100644 --- a/docs/html/userhtml5.html +++ b/docs/html/userhtml5.html @@ -10,9 +10,10 @@ -
-

1In our prototype implementation we provide sample scatter/gather routines.

- +
+

1In our prototype implementation we provide sample scatter/gather routines.

+ diff --git a/docs/html/userhtml5x.png b/docs/html/userhtml5x.png index cabc66d4..e94d3514 100644 Binary files a/docs/html/userhtml5x.png and b/docs/html/userhtml5x.png differ diff --git a/docs/html/userhtml6.html b/docs/html/userhtml6.html new file mode 100644 index 00000000..655a0046 --- /dev/null +++ b/docs/html/userhtml6.html @@ -0,0 +1,19 @@ + + + + + + + + + + +
+

1In our prototype implementation we provide sample scatter/gather routines.

+ + diff --git a/docs/html/userhtml6x.png b/docs/html/userhtml6x.png index a0ef30ce..acf15505 100644 Binary files a/docs/html/userhtml6x.png and b/docs/html/userhtml6x.png differ diff --git a/docs/html/userhtml7.html b/docs/html/userhtml7.html index ae82b72f..aed722b3 100644 --- a/docs/html/userhtml7.html +++ b/docs/html/userhtml7.html @@ -11,13 +11,14 @@
-

2This is the normal situation when the pattern of the sparse matrix is symmetric, which is +

2This is the normal situation when the pattern of the sparse matrix is symmetric, which is equivalent to equivalent to say that the interaction between two variables is reciprocal. If the matrix pattern is +class="pplr7t-x-x-80">say that the interaction between two variables is reciprocal. If the matrix pattern is non-symmetric we may non-symmetric we may have one-way interactions, and these could cause a situation in which a +class="pplr7t-x-x-80">have one-way interactions, and these could cause a situation in which a boundary point is not a halo point boundary point is not a halo point for its neighbour.

+class="pplr7t-x-x-80">for its neighbour. diff --git a/docs/html/userhtml7x.png b/docs/html/userhtml7x.png index a0d4d0d4..d95c1025 100644 Binary files a/docs/html/userhtml7x.png and b/docs/html/userhtml7x.png differ diff --git a/docs/html/userhtml8.html b/docs/html/userhtml8.html new file mode 100644 index 00000000..61ffca5a --- /dev/null +++ b/docs/html/userhtml8.html @@ -0,0 +1,26 @@ + + + + + + + + + + +
+

3The subroutine style psb_precinit and psb_precbld are still supported for backward + compatibility

+ diff --git a/docs/html/userhtml8x.png b/docs/html/userhtml8x.png index 147d6b03..4a85f66b 100644 Binary files a/docs/html/userhtml8x.png and b/docs/html/userhtml8x.png differ diff --git a/docs/html/userhtml9x.png b/docs/html/userhtml9x.png index dcf8438f..d381c15a 100644 Binary files a/docs/html/userhtml9x.png and b/docs/html/userhtml9x.png differ diff --git a/docs/html/userhtmlli1.html b/docs/html/userhtmlli1.html index 2f706b63..5f64b2a6 100644 --- a/docs/html/userhtmlli1.html +++ b/docs/html/userhtmlli1.html @@ -10,306 +10,356 @@ - +

4 Computational routines

-
-  4.1 psb_geaxpby — General Dense Matrix Sum -
 4.2 psb_gedot — Dot Product -
 4.3 psb_gedots — Generalized Dot Product -
 4.4 psb_normi — Infinity-Norm of Vector -
 4.5 psb_geamaxs — Generalized Infinity Norm -
 4.6 psb_norm1 — 1-Norm of Vector -
 4.7 psb_geasums — Generalized 1-Norm of Vector -
 4.8 psb_norm2 — 2-Norm of Vector -
 4.9 psb_genrm2s — Generalized 2-Norm of Vector -
 4.10 psb_norm1 — 1-Norm of Sparse Matrix -
 4.11 psb_normi — Infinity Norm of Sparse Matrix -
 4.12 psb_spmm — Sparse Matrix by Dense Matrix Product -
 4.13 psb_spsm — Triangular System Solve -
 4.14 psb_gemlt — Entrywise Product -
 4.15 psb_gediv — Entrywise Division -
 4.16 psb_geinv — Entrywise Inversion +

4.1 psb_geaxpby — General Dense Matrix Sum

+

This subroutine is an interface to the computational kernel for dense matrix +sum: +

+y ← α x+ βy
+
+

+

call psb_geaxpby(alpha, x, beta, y, desc_a, info) +

+ + + +


+ + + +
+

+

+ + + + +


x, y, α, β Subroutine


Short Precision Real psb_geaxpby
Long Precision Real psb_geaxpby
Short Precision Complexpsb_geaxpby
Long Precision Complexpsb_geaxpby


+
Table 1: Data types
+ + + +

+
+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+alpha

+

the scalar α.
Scope: global
Type: required
Intent: in.
Specified as: a number of the data type indicated in Table 1. +

+

+x

+

the local portion of global dense matrix x.
Scope: local
Type: required
Intent: in.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 1. The + rank of x must be the same of y. +

+

+beta

+

the scalar β.
Scope: global
Type: required
Intent: in.
Specified as: a number of the data type indicated in Table 1. +

+

+y

+

the local portion of the global dense matrix y.
Scope: local
Type: required
Intent: inout.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of the type indicated in Table 1. + The rank of y must be the same of x. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_desc_type. +

+
+

+On Return

+

+

+

+y

+

the local portion of result submatrix y.
Scope: local
Type: required
Intent: inout.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of the type indicated in Table 1. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + +

4.2 psb_gedot — Dot Product

+

This function computes dot product between two vectors x and y.
If x and y are real vectors it computes dot-product as: +

+dot ← xTy
+
+

Else if x and y are complex vectors then it computes dot-product as: +

+dot ← xHy
+
+

+

psb_gedot(x, y, desc_a, info [,global])

+ + +


+ + +
+

+

+ + + + +


dot, x, y Function


Short Precision Real psb_gedot
Long Precision Real psb_gedot
Short Precision Complexpsb_gedot
Long Precision Complexpsb_gedot


+
Table 2: Data types
+ + +

+
+
+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+x

+

the local portion of global dense matrix x.
Scope: local
Type: required
Intent: in.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 2. The + rank of x must be the same of y. +

+

+y

+

the local portion of global dense matrix y.
Scope: local
Type: required
Intent: in.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 2. The + rank of y must be the same of x. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_desc_type. +

+

+global

+ + +

Specifies whether the computation should include the global reduction + across all processes.
Scope: global
Type: optional.
Intent: in.
Specified as: a logical scalar. Default: global=.true.
+

+

+On Return

+

+

+

+Function value

+

is the dot product of vectors x and y.
Scope: global unless the optional variable global=.false. has been + specified
Specified as: a number of the data type indicated in Table 2. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. +

    The computation of a global result requires a global communication, which + entails a significant overhead. It may be necessary and/or advisable + to compute multiple dot products at the same time; in this case, it is + possible to improve the runtime efficiency by using the following scheme: + +

       vres(1) = psb_gedot(x1,y1,desc_a,info,global=.false.) 
    +   vres(2) = psb_gedot(x2,y2,desc_a,info,global=.false.) 
    +   vres(3) = psb_gedot(x3,y3,desc_a,info,global=.false.) 
    +   call psb_sum(ctxt,vres(1:3))
    + +

    In this way the global communication, which for small sizes is a latency-bound + operation, is invoked only once.

+ + +

4.3 psb_gedots — Generalized Dot Product

+

This subroutine computes a series of dot products among the columns of two dense +matrices x and y: +

+res(i) ← x(:,i)Ty(:,i)
+
+

If the matrices are complex, then the usual convention applies, i.e. the conjugate +transpose of x is used. If x and y are of rank one, then res is a scalar, else it is a rank +one array. +

call psb_gedots(res, x, y, desc_a, info) +

+ + +


+ + +
+

+

+ + + + +


res, x, y Subroutine


Short Precision Real psb_gedots
Long Precision Real psb_gedots
Short Precision Complexpsb_gedots
Long Precision Complexpsb_gedots


+
Table 3: Data types
+ + +

+
+
+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+x

+

the local portion of global dense matrix x.
Scope: local
Type: required
Intent: in.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 3. The + rank of x must be the same of y. +

+

+y

+

the local portion of global dense matrix y.
Scope: local
Type: required
Intent: in.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 3. The + rank of y must be the same of x. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_desc_type. +

+

+On Return

+

+ + +

+

+res

+

is the dot product of vectors x and y.
Scope: global
Intent: out.
Specified as: a number or a rank-one array of the data type indicated in + Table 2. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + +

4.4 psb_normi — Infinity-Norm of Vector

+

This function computes the infinity-norm of a vector x.
If x is a real vector it computes infinity norm as: +

+amax ←  m ax|xi|
+          i
+
+

else if x is a complex vector then it computes the infinity-norm as: +

+amax ← m aix(|re(xi)|+ |im (xi)|)
+
+

+

psb_geamax(x, desc_a, info [,global])
psb_normi(x, desc_a, info [,global]) +

+ + +


+ + +
+

+

+ + + +



amax x Function



Short Precision RealShort Precision Real psb_geamax
Long Precision RealLong Precision Real psb_geamax
Short Precision RealShort Precision Complexpsb_geamax
Long Precision RealLong Precision Complexpsb_geamax



+
Table 4: Data types
+ + +

+
+
+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+x

+

the local portion of global dense matrix x.
Scope: local
Type: required
Intent: in.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 4. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_desc_type. +

+

+global

+

Specifies whether the computation should include the global reduction + across all processes.
Scope: global
Type: optional.
Intent: in.
Specified as: a logical scalar. Default: global=.true.
+

+

+On Return

+

+ + + +

+

+Function value

+

is the infinity norm of vector x.
Scope: global unless the optional variable global=.false. has been + specified
Specified as: a long precision real number. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. +

    The computation of a global result requires a global communication, which + entails a significant overhead. It may be necessary and/or advisable to + compute multiple norms at the same time; in this case, it is possible to improve + the runtime efficiency by using the following scheme: +

       vres(1) = psb_geamax(x1,desc_a,info,global=.false.) 
    +   vres(2) = psb_geamax(x2,desc_a,info,global=.false.) 
    +   vres(3) = psb_geamax(x3,desc_a,info,global=.false.) 
    +   call psb_amx(ctxt,vres(1:3))
    + +

    In this way the global communication, which for small sizes is a latency-bound + operation, is invoked only once.

+ + + +

4.5 psb_geamaxs — Generalized Infinity Norm

+

This subroutine computes a series of infinity norms on the columns of a dense matrix +x: +

+res(i) ← m ax|x(k,i)|
+         k
+
+

+

call psb_geamaxs(res, x, desc_a, info) +

+ + + +


+ + + +
+

+

+ + + +



res x Subroutine



Short Precision RealShort Precision Real psb_geamaxs
Long Precision RealLong Precision Real psb_geamaxs
Short Precision RealShort Precision Complexpsb_geamaxs
Long Precision RealLong Precision Complexpsb_geamaxs



+
Table 5: Data types
+ + + +

+
+
+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+x

+

the local portion of global dense matrix x.
Scope: local
Type: required
Intent: in.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 5. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_desc_type. +

+

+On Return

+

+

+

+res

+

is the infinity norm of the columns of x.
Scope: global
Intent: out.
Specified as: a number or a rank-one array of long precision real numbers. +

+

+info

+ + + +

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + + +

4.6 psb_norm1 — 1-Norm of Vector

+

This function computes the 1-norm of a vector x.
If x is a real vector it computes 1-norm as: +

+asum ← ∥xi∥
+
+

else if x is a complex vector then it computes 1-norm as: +

+asum ←  ∥re(x)∥1+ ∥im(x)∥1
+
+

+

psb_geasum(x, desc_a, info [,global])psb_norm1(x, desc_a, info [,global]) +

+ + + +


+ + + +
+

+

+ + + +



asum x Function



Short Precision RealShort Precision Real psb_geasum
Long Precision RealLong Precision Real psb_geasum
Short Precision RealShort Precision Complexpsb_geasum
Long Precision RealLong Precision Complexpsb_geasum



+
Table 6: Data types
+ + + +

+
+
+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+x

+

the local portion of global dense matrix x.
Scope: local
Type: required
Intent: in.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 6. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_desc_type. +

+

+global

+

Specifies whether the computation should include the global reduction + across all processes.
Scope: global
Type: optional.
Intent: in.
Specified as: a logical scalar. Default: global=.true.
+

+

+On Return

+

+ + + +

+

+Function value

+

is the 1-norm of vector x.
Scope: global unless the optional variable global=.false. has been + specified
Specified as: a long precision real number. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. +

    The computation of a global result requires a global communication, which + entails a significant overhead. It may be necessary and/or advisable to + compute multiple norms at the same time; in this case, it is possible to improve + the runtime efficiency by using the following scheme: +

       vres(1) = psb_geasum(x1,desc_a,info,global=.false.) 
    +   vres(2) = psb_geasum(x2,desc_a,info,global=.false.) 
    +   vres(3) = psb_geasum(x3,desc_a,info,global=.false.) 
    +   call psb_sum(ctxt,vres(1:3))
    + +

    In this way the global communication, which for small sizes is a latency-bound + operation, is invoked only once.

+ + + +

4.7 psb_geasums — Generalized 1-Norm of Vector

+

This subroutine computes a series of 1-norms on the columns of a dense matrix +x: +

+res(i) ← m ax|x(k,i)|
+         k
+
+

This function computes the 1-norm of a vector x.
If x is a real vector it computes 1-norm as: +

+res(i) ← ∥xi∥
+
+

else if x is a complex vector then it computes 1-norm as: +

+res(i) ← ∥re(x)∥ + ∥im(x)∥
+              1         1
+
+

+

call psb_geasums(res, x, desc_a, info) +

+ + + +


+ + + +
+

+

+ + + +



res x Subroutine



Short Precision RealShort Precision Real psb_geasums
Long Precision RealLong Precision Real psb_geasums
Short Precision RealShort Precision Complexpsb_geasums
Long Precision RealLong Precision Complexpsb_geasums



+
Table 7: Data types
+ + + +

+
+
+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+x

+

the local portion of global dense matrix x.
Scope: local
Type: required
Intent: in.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 7. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_desc_type. +

+

+On Return

+

+

+

+res

+

contains the 1-norm of (the columns of) x.
Scope: global
Intent: out.
Short as: a long precision real number. Specified as: a long precision real + number. + + + +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + + +

4.8 psb_norm2 — 2-Norm of Vector

+

This function computes the 2-norm of a vector x.
If x is a real vector it computes 2-norm as: +

+          ----
+nrm 2 ← √ xTx
+
+

else if x is a complex vector then it computes 2-norm as: +

+        √----
+nrm 2 ←  xHx
+
+

+

+ + + +


+ + + +
+

+

+ + + +



nrm2 x Function



Short Precision RealShort Precision Real psb_genrm2
Long Precision RealLong Precision Real psb_genrm2
Short Precision RealShort Precision Complexpsb_genrm2
Long Precision RealLong Precision Complexpsb_genrm2



+
Table 8: Data types
+ + + +

+
+

psb_genrm2(x, desc_a, info [,global])
psb_norm2(x, desc_a, info [,global])
+

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+x

+

the local portion of global dense matrix x.
Scope: local
Type: required
Intent: in.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 8. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_desc_type. +

+

+global

+

Specifies whether the computation should include the global reduction + across all processes.
Scope: global
Type: optional.
Intent: in.
Specified as: a logical scalar. Default: global=.true.
+

+

+On Return

+ + + +

+

+

+Function Value

+

is the 2-norm of vector x.
Scope: global unless the optional variable global=.false. has been + specified
Type: required
Specified as: a long precision real number. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. +

    The computation of a global result requires a global communication, which + entails a significant overhead. It may be necessary and/or advisable to + compute multiple norms at the same time; in this case, it is possible to improve + the runtime efficiency by using the following scheme: +

       vres(1) = psb_genrm2(x1,desc_a,info,global=.false.) 
    +   vres(2) = psb_genrm2(x2,desc_a,info,global=.false.) 
    +   vres(3) = psb_genrm2(x3,desc_a,info,global=.false.) 
    +   call psb_nrm2(ctxt,vres(1:3))
    + +

    In this way the global communication, which for small sizes is a latency-bound + operation, is invoked only once.

+ + + +

4.9 psb_genrm2s — Generalized 2-Norm of Vector

+

This subroutine computes a series of 2-norms on the columns of a dense matrix +x: +

+res(i) ← ∥x(:,i)∥2
+
+

+

call psb_genrm2s(res, x, desc_a, info) +

+ + + +


+ + + +
+

+

+ + + +



res x Subroutine



Short Precision RealShort Precision Real psb_genrm2s
Long Precision RealLong Precision Real psb_genrm2s
Short Precision RealShort Precision Complexpsb_genrm2s
Long Precision RealLong Precision Complexpsb_genrm2s



+
Table 9: Data types
+ + + +

+
+
+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+x

+

the local portion of global dense matrix x.
Scope: local
Type: required
Intent: in.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 9. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_desc_type. +

+

+On Return

+

+

+

+res

+

contains the 1-norm of (the columns of) x.
Scope: global
Intent: out.
Specified as: a long precision real number. +

+

+info

+ + + +

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + + +

4.10 psb_norm1 — 1-Norm of Sparse Matrix

+

This function computes the 1-norm of a matrix A:
+

+nrm1 ← ∥A ∥1
+
+

where: +

+

+A

+

represents the global matrix A

+
+ + + +


+ +
+

+

+ + + +


A Function


Short Precision Real psb_spnrm1
Long Precision Real psb_spnrm1
Short Precision Complexpsb_spnrm1
Long Precision Complexpsb_spnrm1


+
Table 10: Data types
+ + + +

+
+ + + +
+psb_spnrm1(A, desc_a, info)
+psb_norm1(A, desc_a, info)
+
+

+

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+a

+

the local portion of the global sparse matrix A.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_Tspmat_type. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_desc_type. +

+

+On Return

+

+

+

+Function value

+

is the 1-norm of sparse submatrix A.
Scope: global
Specified as: a long precision real number. + + + +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + + +

4.11 psb_normi — Infinity Norm of Sparse Matrix

+

This function computes the infinity-norm of a matrix A:
+

+nrmi ←  ∥A∥∞
+
+

where: +

+

+A

+

represents the global matrix A

+
+ + + +


+ + + +
+

+

+ + + +


A Function


Short Precision Real psb_spnrmi
Long Precision Real psb_spnrmi
Short Precision Complexpsb_spnrmi
Long Precision Complexpsb_spnrmi


+
Table 11: Data types
+ + + +

+
+ + + +
+psb_spnrmi(A, desc_a, info)
+psb_normi(A, desc_a, info)
+
+

+

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+a

+

the local portion of the global sparse matrix A.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_Tspmat_type. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_desc_type. +

+

+On Return

+

+

+

+Function value

+

is the infinity-norm of sparse submatrix A.
Scope: global
Specified as: a long precision real number. + + + +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + + +

4.12 psb_spmm — Sparse Matrix by Dense Matrix Product

+

This subroutine computes the Sparse Matrix by Dense Matrix Product: +
+
+y ←  αAx + βy
+
+
(1)
+

+
+
+       T
+y ← αA  x+ βy
+
+
(2)
+

+
+
+y ← αAHx + βy
+
+
(3)
+

+

where: + + + +

+

+x

+

is the global dense matrix x:,: +

+

+y

+

is the global dense matrix y:,: +

+

+A

+

is the global sparse matrix A

+
+ + + +


+ + + +
+

+

+ + + + +


A, x, y, α, β Subroutine


Short Precision Real psb_spmm
Long Precision Real psb_spmm
Short Precision Complexpsb_spmm
Long Precision Complexpsb_spmm


+
Table 12: Data types
+ + + +

+
+

call psb_spmm(alpha, a, x, beta, y, desc_a, info)
call psb_spmm(alpha, a, x, beta, y,desc_a, info, trans, work) +

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+alpha

+

the scalar α.
Scope: global
Type: required
Intent: in.
Specified as: a number of the data type indicated in Table 12. +

+

+a

+

the local portion of the sparse matrix A.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_Tspmat_type. +

+

+x

+

the local portion of global dense matrix x.
Scope: local
Type: required
Intent: in.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 12. The + rank of x must be the same of y. +

+

+beta

+ + + +

the scalar β.
Scope: global
Type: required
Intent: in.
Specified as: a number of the data type indicated in Table 12. +

+

+y

+

the local portion of global dense matrix y.
Scope: local
Type: required
Intent: inout.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 12. The + rank of y must be the same of x. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_desc_type. +

+

+trans

+

indicates what kind of operation to perform. +

+

+ trans = N

+

the operation is specified by equation 1 +

+

+ trans = T

+

the operation is specified by equation 2 +

+

+ trans = C

+

the operation is specified by equation 3

+

Scope: global
Type: optional
Intent: in.
Default: trans = N
Specified as: a character variable. + + + +

+

+work

+

work array.
Scope: local
Type: optional
Intent: inout.
Specified as: a rank one array of the same type of x and y with the TARGET + attribute. +

+

+On Return

+

+

+

+y

+

the local portion of result matrix y.
Scope: local
Type: required
Intent: inout.
Specified as: an array of rank one or two containing numbers of type specified + in Table 12. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + + +

4.13 psb_spsm — Triangular System Solve

+

This subroutine computes the Triangular System Solve: +

+
+y  ←   αT- 1x + βy
+           -1
+y  ←   αDT   x+ βy
+y  ←   αT- 1Dx + βy
+y  ←   αT-T x+ βy
+           -T
+y  ←   αDT   x + βy
+y  ←   αT-T Dx + βy
+y  ←   αT-Hx + βy
+           -H
+y  ←   αDT   x + βy
+y  ←   αT-HDx  + βy
+
+
+

where: +

+

+x

+

is the global dense matrix x:,: +

+

+y

+

is the global dense matrix y:,: +

+

+T

+

is the global sparse block triangular submatrix T +

+

+D

+

is the scaling diagonal matrix.

+ + + +

call psb_spsm(alpha, t, x, beta, y, desc_a, info)
call psb_spsm(alpha, t, x, beta, y, desc_a, info, trans, unit, choice, diag, work)
+

+ + + +


+ + + +
+

+

+ + + + +


T, x, y, D, α, β Subroutine


Short Precision Real psb_spsm
Long Precision Real psb_spsm
Short Precision Complexpsb_spsm
Long Precision Complexpsb_spsm


+
Table 13: Data types
+ + + +

+
+
+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+alpha

+

the scalar α.
Scope: global
Type: required
Intent: in.
Specified as: a number of the data type indicated in Table 13. +

+

+t

+

the global portion of the sparse matrix T.
Scope: local
Type: required
Intent: in.
Specified as: an object type specified in § 3. +

+

+x

+

the local portion of global dense matrix x.
Scope: local
Type: required
Intent: in.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 13. The + rank of x must be the same of y. +

+

+beta

+

the scalar β.
Scope: global
Type: required
Intent: in.
Specified as: a number of the data type indicated in Table 13. + + + +

+

+y

+

the local portion of global dense matrix y.
Scope: local
Type: required
Intent: inout.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 13. The + rank of y must be the same of x. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_desc_type. +

+

+trans

+

specify with unitd the operation to perform. +

+

+ trans = ’N’

+

the operation is with no transposed matrix +

+

+ trans = ’T’

+

the operation is with transposed matrix. +

+

+ trans = ’C’

+

the operation is with conjugate transposed matrix.

+

Scope: global
Type: optional
Intent: in.
Default: trans = N
Specified as: a character variable. +

+

+unitd

+

specify with trans the operation to perform. + + + +

+

+ unitd = ’U’

+

the operation is with no scaling +

+

+ unitd = ’L’

+

the operation is with left scaling +

+

+ unitd = ’R’

+

the operation is with right scaling.

+

Scope: global
Type: optional
Intent: in.
Default: unitd = U
Specified as: a character variable. +

+

+choice

+

specifies the update of overlap elements to be performed on exit: +

+

+

+

psb_none_ +

+

+

+

psb_sum_ +

+

+

+

psb_avg_ +

+

+

+

psb_square_root_

+

Scope: global
Type: optional
Intent: in.
Default: psb_avg_
Specified as: an integer variable. + + + +

+

+diag

+

the diagonal scaling matrix.
Scope: local
Type: optional
Intent: in.
Default: diag(1) = 1(noscaling)
Specified as: a rank one array containing numbers of the type indicated in + Table 13. +

+

+work

+

a work array.
Scope: local
Type: optional
Intent: inout.
Specified as: a rank one array of the same type of x with the TARGET + attribute. +

+

+On Return

+

+

+

+y

+

the local portion of global dense matrix y.
Scope: local
Type: required
Intent: inout.
Specified as: an array of rank one or two containing numbers of type specified + in Table 13. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + + +

4.14 psb_gemlt — Entrywise Product

+

This function computes the entrywise product between two vectors x and +y +

+dot ← x(i)y(i).
+
+

+

psb_gemlt(x, y, desc_a, info) +

+ + + +


+ + + +
+

+

+ + + + +


dot, x, y Function


Short Precision Real psb_gemlt
Long Precision Real psb_gemlt
Short Precision Complexpsb_gemlt
Long Precision Complexpsb_gemlt


+
Table 14: Data types
+ + + +

+
+
+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+x

+

the local portion of global dense vector x.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_T_vect_type containing numbers of + type specified in Table 2. +

+

+y

+

the local portion of global dense vector y.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_T_vect_type containing numbers of + type specified in Table 2. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_desc_type. +

+

+On Return

+

+ + + +

+

+y

+

the local portion of result submatrix y.
Scope: local
Type: required
Intent: inout.
Specified as: an object of type psb_T_vect_type containing numbers of + the type indicated in Table 14. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + + +

4.15 psb_gediv — Entrywise Division

+

This function computes the entrywise division between two vectors x and +y +

+/ ←  x(i)/y(i).
+
+

+

psb_gediv(x, y, desc_a, info, [flag) +

+ + + +


+ + + +
+

+

+ + + + +


/, x, y Function


Short Precision Real psb_gediv
Long Precision Real psb_gediv
Short Precision Complexpsb_gediv
Long Precision Complexpsb_gediv


+
Table 15: Data types
+ + + +

+
+
+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+x

+

the local portion of global dense vector x.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_T_vect_type containing numbers of + type specified in Table 2. +

+

+y

+

the local portion of global dense vector y.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_T_vect_type containing numbers of + type specified in Table 2. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_desc_type. +

+

+flag

+

check if any of the y(i) = 0, and in case returns error halting the + computation.
Scope: local
Type: optional Intent: in.
Specified as: the logical value flag=.true. + + + +

+

+On Return

+

+

+

+x

+

the local portion of result submatrix x.
Scope: local
Type: required
Intent: inout.
Specified as: an object of type psb_T_vect_type containing numbers of + the type indicated in Table 14. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + + +

4.16 psb_geinv — Entrywise Inversion

+

This function computes the entrywise inverse of a vector x and puts it into +y +

+/ ←  1/x(i).
+
+

+

psb_geinv(x, y, desc_a, info, [flag) +

+ + + +


+ + + +
+

+

+ + + + +


/, x, y Function


Short Precision Real psb_geinv
Long Precision Real psb_geinv
Short Precision Complexpsb_geinv
Long Precision Complexpsb_geinv


+
Table 16: Data types
+ + + +

+
+
+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+x

+

the local portion of global dense vector x.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_T_vect_type containing numbers of + type specified in Table 2. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: an object of type psb_desc_type. +

+

+flag

+

check if any of the x(i) = 0, and in case returns error halting the + computation.
Scope: local
Type: optional Intent: in.
Specified as: the logical value flag=.true. +

+

+On Return

+

+ + + +

+

+y

+

the local portion of result submatrix x.
Scope: local
Type: required
Intent: out.
Specified as: an object of type psb_T_vect_type containing numbers of + the type indicated in Table 16. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + + + + + + + + + +href="userhtml.html#userhtmlse7.html" >up]

+ id="tailuserhtmlse4.html"> diff --git a/docs/html/userhtmlse5.html b/docs/html/userhtmlse5.html index c897c0eb..fa33cd5e 100644 --- a/docs/html/userhtmlse5.html +++ b/docs/html/userhtmlse5.html @@ -11,42 +11,2318 @@

-

5 Communication routines

+href="#tailuserhtmlse5.html">tail] [up]

+

5 Communication routines

The routines in this chapter implement various global communication operators on vectors associated with a discretization mesh. For auxiliary communication routines not tied to a discretization space see 6. +href="userhtmlse7.html#x13-1060007">7. -

-  5.1 psb_halo — Halo Data Communication -
 5.2 psb_ovrl — Overlap Update -
 5.3 psb_gather — Gather Global Dense Matrix -
 5.4 psb_scatter — Scatter Global Dense Matrix +

5.1 psb_halo — Halo Data Communication

+

These subroutines gathers the values of the halo elements: +

+x ← x
+
+

where: +

+

+x

+

is a global dense submatrix.

+
+ + + +


+ + + +
+

+

+ + + + + +


α, x Subroutine


Integer psb_halo
Short Precision Real psb_halo
Long Precision Real psb_halo
Short Precision Complexpsb_halo
Long Precision Complexpsb_halo


+
Table 17: Data types
+ + + +

+

call psb_halo(x, desc_a, info)
call psb_halo(x, desc_a, info, work, data) +

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+x

+

global dense matrix x.
Scope: local
Type: required
Intent: inout.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 17. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: a structured data of type psb_desc_type. +

+

+work

+

the work array.
Scope: local
Type: optional
Intent: inout.
Specified as: a rank one array of the same type of x. +

+

+data

+ + +

index list selector.
Scope: global
Type: optional
Specified + as: an integer. Values:psb_comm_halo_,psb_comm_mov_, psb_comm_ext_, + default: psb_comm_halo_. Chooses the index list on which to base the + data exchange. +

+

+On Return

+

+

+

+x

+

global dense result matrix x.
Scope: local
Type: required
Intent: inout.
Returned as: a rank one or two array containing numbers of type + specified in Table 17. +

+

+info

+

the local portion of result submatrix y.
Scope: local
Type: required
Intent: out.
An integer value that contains an error code.

+


+
+

+

PIC

+
Figure 3: Sample discretization mesh.
+


+

Usage Example Consider the discretization mesh depicted in fig. 3, partitioned +among two processes as shown by the dashed line; the data distribution is such that +each process will own 32 entries in the index space, with a halo made of 8 entries +placed at local indices 33 through 40. If process 0 assigns an initial value of 1 +to its entries in the x vector, and process 1 assigns a value of 2, then after +a call to psb_halo the contents of the local vectors will be the following: +

+ + +


+ + + +
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Process 0
Process 1
IGLOB(I)X(I) IGLOB(I)X(I)
1 1 1.0 1 33 2.0
2 2 1.0 2 34 2.0
3 3 1.0 3 35 2.0
4 4 1.0 4 36 2.0
5 5 1.0 5 37 2.0
6 6 1.0 6 38 2.0
7 7 1.0 7 39 2.0
8 8 1.0 8 40 2.0
9 9 1.0 9 41 2.0
10 10 1.010 42 2.0
11 11 1.011 43 2.0
12 12 1.012 44 2.0
13 13 1.013 45 2.0
14 14 1.014 46 2.0
15 15 1.015 47 2.0
16 16 1.016 48 2.0
17 17 1.017 49 2.0
18 18 1.018 50 2.0
19 19 1.019 51 2.0
20 20 1.020 52 2.0
21 21 1.021 53 2.0
22 22 1.022 54 2.0
23 23 1.023 55 2.0
24 24 1.024 56 2.0
25 25 1.025 57 2.0
26 26 1.026 58 2.0
27 27 1.027 59 2.0
28 28 1.028 60 2.0
29 29 1.029 61 2.0
30 30 1.030 62 2.0
31 31 1.031 63 2.0
32 32 1.032 64 2.0
33 33 2.033 25 1.0
34 34 2.034 26 1.0
35 35 2.035 27 1.0
36 36 2.036 28 1.0
37 37 2.037 29 1.0
38 38 2.038 30 1.0
39 39 2.039 31 1.0
40 40 2.040 32 1.0
+ + + +

+
+ + + +

5.2 psb_ovrl — Overlap Update

+

These subroutines applies an overlap operator to the input vector: +

+x ← Qx
+
+

where: +

+

+x

+

is the global dense submatrix x +

+

+Q

+

is the overlap operator; it is the composition of two operators Pa and PT.

+
+ +


+ + + +
+

+

+ + + +


x Subroutine


Short Precision Real psb_ovrl
Long Precision Real psb_ovrl
Short Precision Complexpsb_ovrl
Long Precision Complexpsb_ovrl


+
Table 18: Data types
+ + + +

+
+

call psb_ovrl(x, desc_a, info)
call psb_ovrl(x, desc_a, info, update=update_type, work=work) +

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+x

+

global dense matrix x.
Scope: local
Type: required
Intent: inout.
Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 18. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: a structured data of type psb_desc_type. +

+

+update

+

Update operator.
+

+

+ update = psb_none_

+

Do nothing; +

+

+ update = psb_add_

+

Sum overlap entries, i.e. apply PT; + + + +

+

+ update = psb_avg_

+

Average overlap entries, i.e. apply PaPT;

+

Scope: global
Intent: in.
Default: update_type = psb_avg_
Scope: global
Specified as: a integer variable. +

+

+work

+

the work array.
Scope: local
Type: optional
Intent: inout.
Specified as: a one dimensional array of the same type of x. +

+

+On Return

+

+

+

+x

+

global dense result matrix x.
Scope: local
Type: required
Intent: inout.
Specified as: an array of rank one or two containing numbers of type specified + in Table 18. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. + + + +

    If there is no overlap in the data distribution associated with the + descriptor, no operations are performed; +

  2. +
  3. +

    The operator PT performs the reduction sum of overlap elements; it is a + “prolongation” operator PT that replicates overlap elements, accounting + for the physical replication of data; +

  4. +
  5. +

    The operator Pa performs a scaling on the overlap elements by the + amount of replication; thus, when combined with the reduction operator, + it implements the average of replicated elements over all of their + instances.

+


+ + + + + + + +
+

+

PIC

+
Figure 4: Sample discretization mesh.
+ + + +


+

Example of use Consider the discretization mesh depicted in fig. 4, partitioned +among two processes as shown by the dashed lines, with an overlap of 1 extra +layer with respect to the partition of fig. 3; the data distribution is such that +each process will own 40 entries in the index space, with an overlap of 16 +entries placed at local indices 25 through 40; the halo will run from local +index 41 through local index 48.. If process 0 assigns an initial value of 1 to +its entries in the x vector, and process 1 assigns a value of 2, then after a +call to psb_ovrl with psb_avg_ and a call to psb_halo_ the contents of the +local vectors will be the following (showing a transition among the two +subdomains) +

+ + + +


+ + + +
+

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Process 0
Process 1
IGLOB(I)X(I) IGLOB(I)X(I)
1 1 1.0 1 33 1.5
2 2 1.0 2 34 1.5
3 3 1.0 3 35 1.5
4 4 1.0 4 36 1.5
5 5 1.0 5 37 1.5
6 6 1.0 6 38 1.5
7 7 1.0 7 39 1.5
8 8 1.0 8 40 1.5
9 9 1.0 9 41 2.0
10 10 1.010 42 2.0
11 11 1.011 43 2.0
12 12 1.012 44 2.0
13 13 1.013 45 2.0
14 14 1.014 46 2.0
15 15 1.015 47 2.0
16 16 1.016 48 2.0
17 17 1.017 49 2.0
18 18 1.018 50 2.0
19 19 1.019 51 2.0
20 20 1.020 52 2.0
21 21 1.021 53 2.0
22 22 1.022 54 2.0
23 23 1.023 55 2.0
24 24 1.024 56 2.0
25 25 1.525 57 2.0
26 26 1.526 58 2.0
27 27 1.527 59 2.0
28 28 1.528 60 2.0
29 29 1.529 61 2.0
30 30 1.530 62 2.0
31 31 1.531 63 2.0
32 32 1.532 64 2.0
33 33 1.533 25 1.5
34 34 1.534 26 1.5
35 35 1.535 27 1.5
36 36 1.536 28 1.5
37 37 1.537 29 1.5
38 38 1.538 30 1.5
39 39 1.539 31 1.5
40 40 1.540 32 1.5
41 41 2.041 17 1.0
42 42 2.042 18 1.0
43 43 2.043 19 1.0
44 44 2.044 20 1.0
45 45 2.045 21 1.0
46 46 2.046 22 1.0
47 47 2.047 23 1.0
48 48 2.048 24 1.0
+ + + +

+
+ + + +

5.3 psb_gather — Gather Global Dense Matrix

+

These subroutines collect the portions of global dense matrix distributed over all +process into one single array stored on one process. +

+glob-x ← collect(loc xi)
+
+

where: +

+

+glob_x

+

is the global submatrix glob_x1:m,1:n +

+

+loc_xi

+

is the local portion of global dense matrix on process i. +

+

+collect

+

is the collect function.

+
+ + + +


+ + + +
+

+

+ + + + + +


xi, y Subroutine


Integer psb_gather
Short Precision Real psb_gather
Long Precision Real psb_gather
Short Precision Complexpsb_gather
Long Precision Complexpsb_gather


+
Table 19: Data types
+ + + +

+
+

call psb_gather(glob_x, loc_x, desc_a, info, root) +call psb_gather(glob_x, loc_x, desc_a, info, root) +

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+loc_x

+

the local portion of global dense matrix glob_x.
Scope: local
Type: required
Intent: in.
Specified as: a rank one or two array or an object of type + psb_T_vect_type indicated in Table 19. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: a structured data of type psb_desc_type. +

+

+root

+

The process that holds the global copy. If root = -1 all the processes will + have a copy of the global vector.
Scope: global
Type: optional
Intent: in.
Specified as: an integer variable -1 root np- 1, default -1. +

+

+On Return

+

+ + + +

+

+glob_x

+

The array where the local parts must be gathered.
Scope: global
Type: required
Intent: out.
Specified as: a rank one or two array with the ALLOCATABLE attribute. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + + +

5.4 psb_scatter — Scatter Global Dense Matrix

+

These subroutines scatters the portions of global dense matrix owned by a process to +all the processes in the processes grid. +

+loc-xi ← scatter(glob-x)
+
+

where: +

+

+glob_x

+

is the global matrix glob_x1:m,1:n +

+

+loc_xi

+

is the local portion of global dense matrix on process i. +

+

+scatter

+

is the scatter function.

+
+ + + +


+ + + +
+

+

+ + + + + +


xi, y Subroutine


Integer psb_scatter
Short Precision Real psb_scatter
Long Precision Real psb_scatter
Short Precision Complexpsb_scatter
Long Precision Complexpsb_scatter


+
Table 20: Data types
+ + + +

+
+

call psb_scatter(glob_x, loc_x, desc_a, info, root, mold) +

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+glob_x

+

The array that must be scattered into local pieces.
Scope: global
Type: required
Intent: in.
Specified as: a rank one or two array. +

+

+desc_a

+

contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: a structured data of type psb_desc_type. +

+

+root

+

The process that holds the global copy. If root = -1 all the processes have + a copy of the global vector.
Scope: global
Type: optional
Intent: in.
Specified as: an integer variable -1 root np- 1, default psb_root_, + i.e. process 0. +

+

+mold

+ + + +

The desired dynamic type for the internal vector storage.
Scope: local.
Type: optional.
Intent: in.
Specified as: an object of a class derived from psb_T_base_vect_type; + this is only allowed when loc_x is of type psb_T_vect_type. +

+

+On Return

+

+

+

+loc_x

+

the local portion of global dense matrix glob_x.
Scope: local
Type: required
Intent: out.
Specified as: a rank one or two ALLOCATABLE array or an object of type + psb_T_vect_type containing numbers of the type indicated in Table 20. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + + + + + + + + + +href="userhtml.html#userhtmlse8.html" >up]

+ id="tailuserhtmlse5.html"> diff --git a/docs/html/userhtmlse6.html b/docs/html/userhtmlse6.html index 78d2fa1e..7d23f86f 100644 --- a/docs/html/userhtmlse6.html +++ b/docs/html/userhtmlse6.html @@ -11,105 +11,4132 @@

-

6 Data management routines

+href="userhtmlse3.html#tailuserhtmlse6.html">tail] [up]

+

6 Data management routines

-

+

6.1 psb_cdall — Allocates a communication descriptor

+ + + +
+call psb_cdall(icontxt, desc_a, info,mg=mg,parts=parts)
+call psb_cdall(icontxt, desc_a, info,vg=vg,[mg=mg,flag=flag])
+call psb_cdall(icontxt, desc_a, info,vl=vl,[nl=nl,globalcheck=.false.,lidx=lidx])
+call psb_cdall(icontxt, desc_a, info,nl=nl)
+call psb_cdall(icontxt, desc_a, info,mg=mg,repl=.true.)
+
+

+

This subroutine initializes the communication descriptor associated with an +index space. One of the optional arguments parts, vg, vl, nl or repl must be +specified, thereby choosing the specific initialization strategy. +

+

+On Entry

+

+

+

+Type:

+

Synchronous. +

+

+icontxt

+

the communication context.
Scope:global.
Type:required.
Intent: in.
Specified as: an integer value. +

+

+vg

+

Data allocation: each index i ∈{1mg} is allocated to process vg(i).
Scope:global.
Type:optional.
Intent: in.
Specified as: an integer array. +

+

+flag

+

Specifies whether entries in vg are zero- or one-based.
Scope:global.
Type:optional.
Intent: in.
Specified as: an integer value 0, 1, default 0. + + + +

+

+mg

+

the (global) number of rows of the problem.
Scope:global.
Type:optional.
Intent: in.
Specified as: an integer value. It is required if parts or repl is specified, + it is optional if vg is specified. +

+

+parts

+

the subroutine that defines the partitioning scheme.
Scope:global.
Type:required.
Specified as: a subroutine. +

+

+vl

+

Data allocation: the set of global indices vl(1 : nl) belonging to the calling + process.
Scope:local.
Type:optional.
Intent: in.
Specified as: an integer array. +

+

+nl

+

Data allocation: in a generalized block-row distribution the number of + indices belonging to the current process.
Scope:local.
Type:optional.
Intent: in.
Specified as: an integer value. May be specified together with vl. +

+

+repl

+

Data allocation: build a replicated index space (i.e. all processes own all + indices).
Scope:global.
Type:optional.
Intent: in.
Specified as: the logical value .true. + + + +

+

+globalcheck

+

Data allocation: do global checks on the local index lists vl
Scope:global.
Type:optional.
Intent: in.
Specified as: a logical value, default: .false. +

+

+lidx

+

Data allocation: the set of local indices lidx(1 : nl) to be assigned to the + global indices vl.
Scope:local.
Type:optional.
Intent: in.
Specified as: an integer array.

+

+

+

+On Return

+

+

+

+desc_a

+

the communication descriptor.
Scope:local.
Type:required.
Intent: out.
Specified as: a structured data of type psb_desc_type. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. + + + +

    One of the optional arguments parts, vg, vl, nl or repl must be specified, + thereby choosing the initialization strategy as follows: +

    +

    + parts

    +

    In this case we have a subroutine specifying the mapping between global + indices and process/local index pairs. If this optional argument is + specified, then it is mandatory to specify the argument mg as well. The + subroutine must conform to the following interface: + + + +

    +           interface
    +              subroutine psb_parts(glob_index,mg,np,pv,nv)
    +                integer, intent (in)  :: glob_index,np,mg
    +                integer, intent (out) :: nv, pv(*)
    +              end subroutine psb_parts
    +           end interface
    +
    +

    The input arguments are: +

    +

    + glob_index

    +

    The global index to be mapped; +

    +

    + np

    +

    The number of processes in the mapping; +

    +

    + mg

    +

    The total number of global rows in the mapping;

    +

    The output arguments are: +

    +

    + nv

    +

    The number of entries in pv; +

    +

    + pv

    +

    A vector containing the indices of the processes to which the + global index should be assigend; each entry must satisfy 0 + pv(i) < np; if nv > 1 we have an index assigned to multiple + processes, i.e. we have an overlap among the subdomains.

    +
    +

    + vg

    +

    In this case the association between an index and a process is specified via + an integer vector vg(1:mg); each index i ∈{1mg} is assigned to process + vg(i). The vector vg must be identical on all calling processes; its + entries may have the ranges (0np- 1) or (1np) according to + the value of flag. The size mg may be specified via the optional + argument mg; the default is to use the entire vector vg, thus having + mg=size(vg). +

    +

    + vl

    + + + +

    In this case we are specifying the list of indices vl(1:nl) assigned to the + current process; thus, the global problem size mg is given by the range of + the aggregate of the individual vectors vl specified in the calling + processes. The size may be specified via the optional argument nl; the + default is to use the entire vector vl, thus having nl=size(vl). If + globalcheck=.true. the subroutine will check how many times each + entry in the global index space (1mg) is specified in the input lists vl, + thus allowing for the presence of overlap in the input, and checking for + “orphan” indices. If globalcheck=.false., the subroutine will not + check for overlap, and may be significantly faster, but the user is + implicitly guaranteeing that there are neither orphan nor overlap + indices. +

    +

    + lidx

    +

    The optional argument lidx is available for those cases in which the user + has already established a global-to-local mapping; if it is specified, each + index in vl(i) will be mapped to the corresponding local index lidx(i). + When specifying the argument lidx the user would also likely employ + lidx in calls to psb_cdins and local in calls to psb_spins and + psb_geins; see also sec. 2.3.1. +

    +

    + nl

    +

    If this argument is specified alone (i.e. without vl) the result is a + generalized row-block distribution in which each process I gets assigned + a consecutive chunk of NI = nl global indices. +

    +

    + repl

    +

    This arguments specifies to replicate all indices on all processes. This is a + special purpose data allocation that is useful in the construction of some + multilevel preconditioners.

    +
  2. +
  3. +

    On exit from this routine the descriptor is in the build state. +

  4. +
  5. +

    Calling the routine with vg or parts implies that every process will scan the + entire index space to figure out the local indices. +

  6. +
  7. +

    Overlapped indices are possible with both parts and vl invocations. + + + +

  8. +
  9. +

    When the subroutine is invoked with vl in conjunction with globalcheck=.true., + it will perform a scan of the index space to search for overlap or orphan + indices. +

  10. +
  11. +

    When the subroutine is invoked with vl in conjunction with + globalcheck=.false., no index space scan will take place. Thus it is the + responsibility of the user to make sure that the indices specified in vl have + neither orphans nor overlaps; if this assumption fails, results will be + unpredictable. +

  12. +
  13. +

    Orphan and overlap indices are impossible by construction when the + subroutine is invoked with nl (alone), or vg.

+ + + +

6.2 psb_cdins — Communication descriptor insert routine

+ + + +
+call psb_cdins(nz, ia, ja, desc_a, info [,ila,jla])
+call psb_cdins(nz,ja,desc,info[,jla,mask,lidx])
+
+

+

This subroutine examines the edges of the graph associated with the +discretization mesh (and isomorphic to the sparsity pattern of a linear system +coefficient matrix), storing them as necessary into the communication descriptor. In +the first form the edges are specified as pairs of indices ia(i), ja(i); the starting index +ia(i) should belong to the current process. In the second form only the remote +indices ja(i) are specified. +

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+nz

+

the number of points being inserted.
Scope: local.
Type: required.
Intent: in.
Specified as: an integer value. +

+

+ia

+

the indices of the starting vertex of the edges being inserted.
Scope: local.
Type: required.
Intent: in.
Specified as: an integer array of length nz. +

+

+ja

+ + + +

the indices of the end vertex of the edges being inserted.
Scope: local.
Type: required.
Intent: in.
Specified as: an integer array of length nz. +

+

+mask

+

Mask entries in ja, they are inserted only when the corresponding mask + entries are .true.
Scope: local.
Type: optional.
Intent: in.
Specified as: a logical array of length nz, default .true.. +

+

+lidx

+

User defined local indices for ja.
Scope: local.
Type: optional.
Intent: in.
Specified as: an integer array of length nz.

+

+

+

+On Return

+

+

+

+desc_a

+

the updated communication descriptor.
Scope:local.
Type:required.
Intent: inout.
Specified as: a structured data of type psb_desc_type. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected. + + + +

+

+ila

+

the local indices of the starting vertex of the edges being inserted.
Scope: local.
Type: optional.
Intent: out.
Specified as: an integer array of length nz. +

+

+jla

+

the local indices of the end vertex of the edges being inserted.
Scope: local.
Type: optional.
Intent: out.
Specified as: an integer array of length nz. +

+

Notes +

    +
  1. +

    This routine may only be called if the descriptor is in the build state; +

  2. +
  3. +

    This routine automatically ignores edges that do not insist on the current + process, i.e. edges for which neither the starting nor the end vertex belong + to the current process. +

  4. +
  5. +

    The second form of this routine will be useful when dealing with + user-specified index mappings; see also 2.3.1.

+ + + +

6.3 psb_cdasb — Communication descriptor assembly routine

+ + + +
+call psb_cdasb(desc_a, info [, mold])
+
+

+

+

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+desc_a

+

the communication descriptor.
Scope:local.
Type:required.
Intent: inout.
Specified as: a structured data of type psb_desc_type. +

+

+mold

+

The desired dynamic type for the internal index storage.
Scope: local.
Type: optional.
Intent: in.
Specified as: a object of type derived from (integer) + psb_T_base_vect_type.

+

+

+

+On Return

+

+

+

+desc_a

+ + + +

the communication descriptor.
Scope:local.
Type:required.
Intent: inout.
Specified as: a structured data of type psb_desc_type. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. +

    On exit from this routine the descriptor is in the assembled state.

+

This call will set up all the necessary information for the halo data exchanges. In doing +so, the library will need to identify the set of processes owning the halo indices +through the use of the desc%fnd_owner() method; the owning processes +are the topological neighbours of the calling process. If the user has some +background information on the processes that are neighbours of the current one, +it is possible to specify explicitly the list of adjacent processes with a call +to desc%set_p_adjcncy(list); this will speed up the subsequent call to +psb_cdasb. + + + +

6.4 psb_cdcpy — Copies a communication descriptor

+ + + +
+call psb_cdcpy(desc_in, desc_out, info)
+
+

+

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+desc_in

+

the communication descriptor.
Scope:local.
Type:required.
Intent: in.
Specified as: a structured data of type psb_desc_type. +

+

+

+

+On Return

+

+

+

+desc_out

+

the communication descriptor copy.
Scope:local.
Type:required.
Intent: out.
Specified as: a structured data of type psb_desc_type. +

+

+info

+ + + +

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + + +

6.5 psb_cdfree — Frees a communication descriptor

+ + + +
+call psb_cdfree(desc_a, info)
+
+

+

+

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+desc_a

+

the communication descriptor to be freed.
Scope:local.
Type:required.
Intent: inout.
Specified as: a structured data of type psb_desc_type.

+

+

+

+On Return

+

+

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + + +

6.6 psb_cdbldext — Build an extended communication descriptor

+ + + +
+call psb_cdbldext(a,desc_a,nl,desc_out, info, extype)
+
+

+

This subroutine builds an extended communication descriptor, based on the +input descriptor desc_a and on the stencil specified through the input sparse matrix +a. +

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+a

+

A sparse matrix Scope:local.
Type:required.
Intent: in.
Specified as: a structured data type. +

+

+desc_a

+

the communication descriptor.
Scope:local.
Type:required.
Intent: in.
Specified as: a structured data of type psb_Tspmat_type. +

+

+nl

+

the number of additional layers desired.
Scope:global.
Type:required.
Intent: in.
Specified as: an integer value nl 0. + + + +

+

+extype

+

the kind of estension required.
Scope:global.
Type:optional .
Intent: in.
Specified as: an integer value psb_ovt_xhal_, psb_ovt_asov_, default: + psb_ovt_xhal_ +

+

+

+

+On Return

+

+

+

+desc_out

+

the extended communication descriptor.
Scope:local.
Type:required.
Intent: inout.
Specified as: a structured data of type psb_desc_type. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. +

    Specifying psb_ovt_xhal_ for the extype argument the user will obtain + a descriptor for a domain partition in which the additional layers are + fetched as part of an (extended) halo; however the index-to-process + mapping is identical to that of the base descriptor; +

  2. +
  3. + + + +

    Specifying psb_ovt_asov_ for the extype argument the user will obtain + a descriptor with an overlapped decomposition: the additional layer is + aggregated to the local subdomain (and thus is an overlap), and a new + halo extending beyond the last additional layer is formed.

+ + + +

6.7 psb_spall — Allocates a sparse matrix

+ + + +
+call psb_spall(a, desc_a, info [, nnz, dupl, bldmode])
+
+

+

+

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+desc_a

+

the communication descriptor.
Scope:local.
Type:required.
Intent: in.
Specified as: a structured data of type psb_desc_type. +

+

+nnz

+

An estimate of the number of nonzeroes in the local part of the assembled + matrix.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value. +

+

+dupl

+

How to handle duplicate coefficients.
Scope: global.
Type: optional.
Intent: in.
Specified as: integer, possible values: psb_dupl_ovwrt_, psb_dupl_add_, + psb_dupl_err_. + + + +

+

+bldmode

+

Whether to keep track of matrix entries that do not belong to the current + process.
Scope: global.
Type: optional.
Intent: in.
Specified as: + an integer value psb_matbld_noremote_, psb_matbld_remote_. Default: + psb_matbld_noremote_.

+

+

+

+On Return

+

+

+

+a

+

the matrix to be allocated.
Scope:local
Type:required
Intent: out.
Specified as: a structured data of type psb_Tspmat_type. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. +

    On exit from this routine the sparse matrix is in the build state. +

  2. +
  3. +

    The descriptor may be in either the build or assembled state. + + + +

  4. +
  5. +

    Providing a good estimate for the number of nonzeroes nnz in the + assembled matrix may substantially improve performance in the matrix + build phase, as it will reduce or eliminate the need for (potentially + multiple) data reallocations; +

  6. +
  7. +

    Using psb_matbld_remote_ is likely to cause a runtime overhead at + assembly time;

+ + + +

6.8 psb_spins — Insert a set of coefficients into a sparse matrix

+ + + +
+call psb_spins(nz, ia, ja, val, a, desc_a, info [,local])
+call psb_spins(nr, irw, irp, ja, val, a, desc_a, info [,local])
+
+

+

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+nz

+

the number of coefficients to be inserted.
Scope:local.
Type:required.
Intent: in.
Specified as: an integer scalar. +

+

+nr

+

the number of rows to be inserted.
Scope:local.
Type:required.
Intent: in.
Specified as: an integer scalar. +

+

+irw

+

the first row to be inserted.
Scope:local.
Type:required.
Intent: in.
Specified as: an integer scalar. + + + +

+

+ia

+

the row indices of the coefficients to be inserted.
Scope:local.
Type:required.
Intent: in.
Specified as: an integer array of size nz. +

+

+irp

+

the row pointers of the coefficients to be inserted.
Scope:local.
Type:required.
Intent: in.
Specified as: an integer array of size nr + 1. +

+

+ja

+

the column indices of the coefficients to be inserted.
Scope:local.
Type:required.
Intent: in.
Specified as: an integer array of size nz. +

+

+val

+

the coefficients to be inserted.
Scope:local.
Type:required.
Intent: in.
Specified as: an array of size nz. Must be of the same type and kind of the + coefficients of the sparse matrix a. +

+

+desc_a

+

The communication descriptor.
Scope: local.
Type: required.
Intent: inout.
Specified as: a variable of type psb_desc_type.
+ + + +

+

+local

+

Whether the entries in the indices vectors ia, ja are already in local + numbering.
Scope:local.
Type:optional.
Specified as: a logical value; default: .false.. +

+

+

+

+On Return

+

+

+

+a

+

the matrix into which coefficients will be inserted.
Scope:local
Type:required
Intent: inout.
Specified as: a structured data of type psb_Tspmat_type. +

+

+desc_a

+

The communication descriptor.
Scope: local.
Type: required.
Intent: inout.
Specified as: a variable of type psb_desc_type.
+

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes + + + +

    +
  1. +

    On entry to this routine the descriptor may be in either the build or + assembled state. +

  2. +
  3. +

    On entry to this routine the sparse matrix may be in either the build or + update state. +

  4. +
  5. +

    If the descriptor is in the build state, then the sparse matrix must also be + in the build state; the action of the routine is to (implicitly) call psb_cdins + to add entries to the sparsity pattern; each sparse matrix entry implicitly + defines a graph edge, that is passed to the descriptor routine for the + appropriate processing; +

  6. +
  7. +

    The input data can be passed in either COO or CSR formats; +

  8. +
  9. +

    In COO format the coefficients to be inserted are represented by the + ordered triples ia(i), ja(i), val(i), for i = 1, , nz; these triples are + arbitrary; +

  10. +
  11. +

    In CSR format the coefficients to be inserted for each input row i = 1, nr + are represented by the ordered triples (i + irw- 1), ja(j), val(j), for j = + irp(i), , irp(i + 1) - 1; these triples should belong to the current process, + i.e. i + irw - 1 should be one of the local indices, but are otherwise + arbitrary; +

  12. +
  13. +

    There is no requirement that a given row must be passed in its entirety + to a single call to this routine: the buildup of a row may be split into as + many calls as desired (even in the CSR format); + + + +

  14. +
  15. +

    Coefficients from different rows may also be mixed up freely in a single + call, according to the application needs; +

  16. +
  17. +

    Coefficients from matrix rows not owned by the calling process are + treated according to the value of bldmode specified at allocation time; if + bldmode was chosen as psb_matbld_remote_ the library will keep track + of them, otherwise they are silently ignored; +

  18. +
  19. +

    If the descriptor is in the assembled state, then any entries in the sparse + matrix that would generate additional communication requirements are + ignored; +

  20. +
  21. +

    If the matrix is in the update state, any entries in positions that were not + present in the original matrix are ignored.

+ + + +

6.9 psb_spasb — Sparse matrix assembly routine

+ + + +
+call psb_spasb(a, desc_a, info [, afmt, upd,  mold])
+
+

+

+

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+desc_a

+

the communication descriptor.
Scope:local.
Type:required.
Intent: in/out.
Specified as: a structured data of type psb_desc_type. +

+

+afmt

+

the storage format for the sparse matrix.
Scope: local.
Type: optional.
Intent: in.
Specified as: an array of characters. Defalt: ’CSR’. +

+

+upd

+

Provide for updates to the matrix coefficients.
Scope: global.
Type: optional.
Intent: in.
Specified as: integer, possible values: psb_upd_srch_, psb_upd_perm_ +

+

+mold

+ + + +

The desired dynamic type for the internal matrix storage.
Scope: local.
Type: optional.
Intent: in.
Specified as: an object of a class derived from psb_T_base_sparse_mat.

+

+

+

+On Return

+

+

+

+a

+

the matrix to be assembled.
Scope:local
Type:required
Intent: inout.
Specified as: a structured data of type psb_Tspmat_type. +

+

+desc_a

+

the communication descriptor.
Scope:local.
Type:required.
Intent: in/out.
Specified as: a structured data of type psb_desc_type. If the matrix was + allocated with bldmode=psb_matbld_remote_, then the descriptor will be + reassembled. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. +

    On entry to this routine the descriptor must be in the assembled state, i.e. + psb_cdasb must already have been called. + + + +

  2. +
  3. +

    The sparse matrix may be in either the build or update state; +

  4. +
  5. +

    Duplicate entries are detected and handled in both build and update + state, with the exception of the error action that is only taken in the build + state, i.e. on the first assembly; +

  6. +
  7. +

    If the update choice is psb_upd_perm_, then subsequent calls to + psb_spins to update the matrix must be arranged in such a way as to + produce exactly the same sequence of coefficient values as encountered + at the first assembly; +

  8. +
  9. +

    The output storage format need not be the same on all processes; +

  10. +
  11. +

    On exit from this routine the matrix is in the assembled state, and thus is + suitable for the computational routines; +

  12. +
  13. +

    If the bldmode=psb_matbld_remote_ value was specified at allocation + time, contributions defined on the current process but belonging to a + remote process will be handled accordingly. This is most likely to occur + in finite element applications, with dupl=psb_dupl_add_; it is necessary + to check for possible updates needed in the descriptor, hence there will + be a runtime overhead.

+ + + +

6.10 psb_spfree — Frees a sparse matrix

+ + + +
+call psb_spfree(a, desc_a, info)
+
+

+

+

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+a

+

the matrix to be freed.
Scope:local
Type:required
Intent: inout.
Specified as: a structured data of type psb_Tspmat_type. +

+

+desc_a

+

the communication descriptor.
Scope:local.
Type:required.
Intent: in.
Specified as: a structured data of type psb_desc_type.

+

+

+

+On Return

+

+

+

+info

+ + + +

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + + +

6.11 psb_sprn — Reinit sparse matrix structure for psblas routines.

+ + + +
+call psb_sprn(a, decsc_a, info, clear)
+
+

+

+

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+a

+

the matrix to be reinitialized.
Scope:local
Type:required
Intent: inout.
Specified as: a structured data of type psb_Tspmat_type. +

+

+desc_a

+

the communication descriptor.
Scope:local.
Type:required.
Intent: in.
Specified as: a structured data of type psb_desc_type. +

+

+clear

+

Choose whether to zero out matrix coefficients
Scope:local.
Type:optional.
Intent: in.
Default: true.

+

+ + + +

+

+On Return

+

+

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. +

    On exit from this routine the sparse matrix is in the update state.

+ + + +

6.12 psb_geall — Allocates a dense matrix

+ + + +
+call psb_geall(x, desc_a, info[, dupl, bldmode, n, lb])
+
+

+

+

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+desc_a

+

The communication descriptor.
Scope: local
Type: required
Intent: in.
Specified as: a variable of type psb_desc_type.
+

+

+n

+

The number of columns of the dense matrix to be allocated.
Scope: local
Type: optional
Intent: in.
Specified as: Integer scalar, default 1. It is not a valid argument if x is a + rank-1 array. +

+

+lb

+

The lower bound for the column index range of the dense matrix to be + allocated.
Scope: local
Type: optional
Intent: in.
Specified as: Integer scalar, default 1. It is not a valid argument if x is a + rank-1 array. + + + +

+

+dupl

+

How to handle duplicate coefficients.
Scope: global.
Type: optional.
Intent: in.
Specified as: integer, possible values: psb_dupl_ovwrt_, psb_dupl_add_; + psb_dupl_err_ has no effect. +

+

+bldmode

+

Whether to keep track of matrix entries that do not belong to the current + process.
Scope: global.
Type: optional.
Intent: in.
Specified as: + an integer value psb_matbld_noremote_, psb_matbld_remote_. Default: + psb_matbld_noremote_.

+

+

+

+On Return

+

+

+

+x

+

The dense matrix to be allocated.
Scope: local
Type: required
Intent: out.
Specified as: a rank one or two array with the ALLOCATABLE attribute + or an object of type psb_T_vect_type, of type real, complex or integer.
+

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + +

Notes +

    +
  1. +

    Using psb_matbld_remote_ is likely to cause a runtime overhead at + assembly time;

+ + +

6.13 psb_geins — Dense matrix insertion routine

+ + +
+call psb_geins(m, irw, val, x, desc_a, info [,local])
+
+

+

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+m

+

Number of rows in val to be inserted.
Scope:local.
Type:required.
Intent: in.
Specified as: an integer value. +

+

+irw

+

Indices of the rows to be inserted. Specifically, row i of val will be + inserted into the local row corresponding to the global row index irw(i). + Scope:local.
Type:required.
Intent: in.
Specified as: an integer array. +

+

+val

+

the dense submatrix to be inserted.
Scope:local.
Type:required.
Intent: in.
Specified as: a rank 1 or 2 array. Specified as: an integer value. + + +

+

+desc_a

+

the communication descriptor.
Scope:local.
Type:required.
Intent: in.
Specified as: a structured data of type psb_desc_type. +

+

+local

+

Whether the entries in the index vector irw, are already in local + numbering.
Scope:local.
Type:optional.
Specified as: a logical value; default: .false.. +

+

+

+

+On Return

+

+

+

+x

+

the output dense matrix.
Scope: local
Type: required
Intent: inout.
Specified as: a rank one or two array or an object of type + psb_T_vect_type, of type real, complex or integer.
+

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes + + +

    +
  1. +

    Dense vectors/matrices do not have an associated state; +

  2. +
  3. +

    Duplicate entries are either overwritten or added, there is no provision + for raising an error condition.

+ + +

6.14 psb_geasb — Assembly a dense matrix

+ + +
+call psb_geasb(x, desc_a, info, mold)
+
+

+

+

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+desc_a

+

The communication descriptor.
Scope: local
Type: required
Intent: in.
Specified as: a variable of type psb_desc_type.
+

+

+mold

+

The desired dynamic type for the internal vector storage.
Scope: local.
Type: optional.
Intent: in.
Specified as: an object of a class derived from psb_T_base_vect_type; + this is only allowed when x is of type psb_T_vect_type.

+

+

+

+On Return

+

+

+

+x

+ + +

The dense matrix to be assembled.
Scope: local
Type: required
Intent: inout.
Specified as: a rank one or two array with the ALLOCATABLE or an + object of type psb_T_vect_type, of type real, complex or integer.
+

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. +

    On entry to this routine the descriptor must be in the assembled state, i.e. + psb_cdasb must already have been called. +

  2. +
  3. +

    If the bldmode=psb_matbld_remote_ value was specified at allocation + time, contributions defined on the current process but belonging to a + remote process will be handled accordingly. This is most likely to occur + in finite element applications, with dupl=psb_dupl_add_.

+ + +

6.15 psb_gefree — Frees a dense matrix

+ + +
+call psb_gefree(x, desc_a, info)
+
+

+

+

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+x

+

The dense matrix to be freed.
Scope: local
Type: required
Intent: inout.
Specified as: a rank one or two array with the ALLOCATABLE or an + object of type psb_T_vect_type, of type real, complex or integer.
+

+

+desc_a

+

The communication descriptor.
Scope: local
Type: required
Intent: in.
Specified as: a variable of type psb_desc_type.

+

+

+

+On Return

+

+ + +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + +

6.16 psb_gelp — Applies a left permutation to a dense matrix

+ + +
+call psb_gelp(trans, iperm, x, info)
+
+

+

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+trans

+

A character that specifies whether to permute A or AT.
Scope: local
Type: required
Intent: in.
Specified as: a single character with value ’N’ for A or ’T’ for AT.
+

+

+iperm

+

An integer array containing permutation information.
Scope: local
Type: required
Intent: in.
Specified as: an integer one-dimensional array.
+

+

+x

+

The dense matrix to be permuted.
Scope: local
Type: required
Intent: inout.
Specified as: a one or two dimensional array.

+

+ + +

+

+On Return

+

+

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + +

6.17 psb_glob_to_loc — Global to local indices convertion

+ + +
+call psb_glob_to_loc(x, y, desc_a, info, iact,owned)
+call psb_glob_to_loc(x, desc_a, info, iact,owned)
+
+

+

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+x

+

An integer vector of indices to be converted.
Scope: local
Type: required
Intent: in, inout.
Specified as: a rank one integer array.
+

+

+desc_a

+

the communication descriptor.
Scope:local.
Type:required.
Intent: in.
Specified as: a structured data of type psb_desc_type. +

+

+iact

+

specifies action to be taken in case of range errors. Scope: global
Type: optional
Intent: in.
Specified as: a character variable Ignore, Warning or Abort, default Ignore. + + +

+

+owned

+

Specfies valid range of input Scope: global
Type: optional
Intent: in.
If true, then only indices strictly owned by the current process are + considered valid, if false then halo indices are also accepted. Default: + false.

+

+

+

+On Return

+

+

+

+x

+

If y is not present, then x is overwritten with the translated integer + indices. Scope: global
Type: required
Intent: inout.
Specified as: a rank one integer array. +

+

+y

+

If y is present, then y is overwritten with the translated integer indices, + and x is left unchanged. Scope: global
Type: optional
Intent: out.
Specified as: a rank one integer array. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. + + +

    If an input index is out of range, then the corresponding output index is + set to a negative number; +

  2. +
  3. +

    The default Ignore means that the negative output is the only action + taken on an out-of-range input.

+ + + +

6.18 psb_loc_to_glob — Local to global indices conversion

+ + + +
+call psb_loc_to_glob(x, y, desc_a, info, iact)
+call psb_loc_to_glob(x, desc_a, info, iact)
+
+

+

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+x

+

An integer vector of indices to be converted.
Scope: local
Type: required
Intent: in, inout.
Specified as: a rank one integer array.
+

+

+desc_a

+

the communication descriptor.
Scope:local.
Type:required.
Intent: in.
Specified as: a structured data of type psb_desc_type. +

+

+iact

+

specifies action to be taken in case of range errors. Scope: global
Type: optional
Intent: in.
Specified as: a character variable Ignore, Warning or Abort, default Ignore.

+

+ + + +

+

+On Return

+

+

+

+x

+

If y is not present, then x is overwritten with the translated integer + indices. Scope: global
Type: required
Intent: inout.
Specified as: a rank one integer array. +

+

+y

+

If y is not present, then y is overwritten with the translated integer + indices, and x is left unchanged. Scope: global
Type: optional
Intent: out.
Specified as: a rank one integer array. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+ + + +

6.19 psb_is_owned —

+ + + +
+call psb_is_owned(x, desc_a)
+
+

+

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+x

+

Integer index.
Scope: local
Type: required
Intent: in.
Specified as: a scalar integer.
+

+

+desc_a

+

the communication descriptor.
Scope:local.
Type:required.
Intent: in.
Specified as: a structured data of type psb_desc_type.

+

+

+

+On Return

+

+

+

+Function value

+ + +

A logical mask which is true if x is owned by the current process Scope: + local
Type: required
Intent: out.

+

Notes +

    +
  1. +

    This routine returns a .true. value for an index that is strictly owned by + the current process, excluding the halo indices

+ + +

6.20 psb_owned_index —

+ + +
+call psb_owned_index(y, x, desc_a, info)
+
+

+

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+x

+

Integer indices.
Scope: local
Type: required
Intent: in, inout.
Specified as: a scalar or a rank one integer array.
+

+

+desc_a

+

the communication descriptor.
Scope:local.
Type:required.
Intent: in.
Specified as: a structured data of type psb_desc_type. +

+

+iact

+

specifies action to be taken in case of range errors. Scope: global
Type: optional
Intent: in.
Specified as: a character variable Ignore, Warning or Abort, default Ignore.

+

+ + +

+

+On Return

+

+

+

+y

+

A logical mask which is true for all corresponding entries of x that are + owned by the current process Scope: local
Type: required
Intent: out.
Specified as: a scalar or rank one logical array. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. +

    This routine returns a .true. value for those indices that are strictly + owned by the current process, excluding the halo indices

+ + +

6.21 psb_is_local —

+ + +
+call psb_is_local(x, desc_a)
+
+

+

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+x

+

Integer index.
Scope: local
Type: required
Intent: in.
Specified as: a scalar integer.
+

+

+desc_a

+

the communication descriptor.
Scope:local.
Type:required.
Intent: in.
Specified as: a structured data of type psb_desc_type.

+

+

+

+On Return

+

+

+

+Function value

+ + +

A logical mask which is true if x is local to the current process Scope: + local
Type: required
Intent: out.

+

Notes +

    +
  1. +

    This routine returns a .true. value for an index that is local to the current + process, including the halo indices

+ +

6.22 psb_local_index —

+ + + +
+call psb_local_index(y, x, desc_a, info)
+
+

+

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+x

+

Integer indices.
Scope: local
Type: required
Intent: in, inout.
Specified as: a scalar or a rank one integer array.
+

+

+desc_a

+

the communication descriptor.
Scope:local.
Type:required.
Intent: in.
Specified as: a structured data of type psb_desc_type. +

+

+iact

+

specifies action to be taken in case of range errors. Scope: global
Type: optional
Intent: in.
Specified as: a character variable Ignore, Warning or Abort, default Ignore.

+

+ + + +

+

+On Return

+

+

+

+y

+

A logical mask which is true for all corresponding entries of x that are + local to the current process Scope: local
Type: required
Intent: out.
Specified as: a scalar or rank one logical array. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. +

    This routine returns a .true. value for those indices that are local to the + current process, including the halo indices.

+ + + +

6.23 psb_get_boundary — Extract list of boundary elements

+ + + +
+call psb_get_boundary(bndel, desc, info)
+
+

+

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+desc

+

the communication descriptor.
Scope:local.
Type:required.
Intent: in.
Specified as: a structured data of type psb_desc_type.

+

+

+

+On Return

+

+

+

+bndel

+

The list of boundary elements on the calling process, in local numbering.
Scope: local
Type: required
Intent: out.
Specified as: a rank one array with the ALLOCATABLE attribute, of type + integer.
+

+

+info

+ + + +

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. +

    If there are no boundary elements (i.e., if the local part of the connectivity + graph is self-contained) the output vector is set to the “not allocated” + state. +

  2. +
  3. +

    Otherwise the size of bndel will be exactly equal to the number of + boundary elements.

+ + + +

6.24 psb_get_overlap — Extract list of overlap elements

+ + + +
+call psb_get_overlap(ovrel, desc, info)
+
+

+

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+desc

+

the communication descriptor.
Scope:local.
Type:required.
Intent: in.
Specified as: a structured data of type psb_desc_type.

+

+

+

+On Return

+

+

+

+ovrel

+

The list of overlap elements on the calling process, in local numbering.
Scope: local
Type: required
Intent: out.
Specified as: a rank one array with the ALLOCATABLE attribute, of type + integer.
+

+

+info

+ + + +

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. +

    If there are no overlap elements the output vector is set to the “not + allocated” state. +

  2. +
  3. +

    Otherwise the size of ovrel will be exactly equal to the number of overlap + elements.

+ + + +

6.25 psb_sp_getrow — Extract row(s) from a sparse matrix

+ + + +
+call psb_sp_getrow(row, a, nz, ia, ja, val, info, &
+              & append, nzin, lrw)
+
+

+

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+row

+

The (first) row to be extracted.
Scope:local
Type:required
Intent: in.
Specified as: an integer > 0. +

+

+a

+

the matrix from which to get rows.
Scope:local
Type:required
Intent: in.
Specified as: a structured data of type psb_Tspmat_type. +

+

+append

+

Whether to append or overwrite existing output.
Scope:local
Type:optional
Intent: in.
Specified as: a logical value default: false (overwrite). + + + +

+

+nzin

+

Input size to be appended to.
Scope:local
Type:optional
Intent: in.
Specified as: an integer > 0. When append is true, specifies how many + entries in the output vectors are already filled. +

+

+lrw

+

The last row to be extracted.
Scope:local
Type:optional
Intent: in.
Specified as: an integer > 0, default: row. +

+

+

+

+On Return

+

+

+

+nz

+

the number of elements returned by this call.
Scope:local.
Type:required.
Intent: out.
Returned as: an integer scalar. +

+

+ia

+

the row indices.
Scope:local.
Type:required.
Intent: inout.
Specified as: an integer array with the ALLOCATABLE attribute. +

+

+ja

+ + + +

the column indices of the elements to be inserted.
Scope:local.
Type:required.
Intent: inout.
Specified as: an integer array with the ALLOCATABLE attribute. +

+

+val

+

the elements to be inserted.
Scope:local.
Type:required.
Intent: inout.
Specified as: a real array with the ALLOCATABLE attribute. +

+

+info

+

Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.

+

Notes +

    +
  1. +

    The output nz is always the size of the output generated by the current + call; thus, if append=.true., the total output size will be nzin + nz, with + the newly extracted coefficients stored in entries nzin+1:nzin+nz of the + array arguments; +

  2. +
  3. +

    When append=.true. the output arrays are reallocated as necessary; +

  4. +
  5. +

    The row and column indices are returned in the local numbering + scheme; if the global numbering is desired, the user may employ the + psb_loc_to_glob routine on the output.

+ + + +

6.26 psb_sizeof — Memory occupation

+

This function computes the memory occupation of a PSBLAS object. + + + +

+isz = psb_sizeof(a)
+isz = psb_sizeof(desc_a)
+isz = psb_sizeof(prec)
+
+

+

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+a

+

A sparse matrix A.
Scope: local
Type: required
Intent: in.
Specified as: a structured data of type psb_Tspmat_type. +

+

+desc_a

+

Communication descriptor.
Scope: local
Type: required
Intent: in.
Specified as: a structured data of type psb_desc_type. +

+

+prec

+

Scope: local
Type: required
Intent: in.
Specified as: a preconditioner data structure psb_Tprec_type. + + + +

+

+On Return

+

+

+

+Function value

+

The memory occupation of the object specified in the calling sequence, in + bytes.
Scope: local
Returned as: an integer(psb_long_int_k_) number.

+ + + +

6.27 Sorting utilities —

+

psb_msort — Sorting by the Merge-sort algorithm +

psb_qsort — Sorting by the Quicksort algorithm +

psb_hsort — Sorting by the Heapsort algorithm + + + +

+call psb_msort(x,ix,dir,flag)
+call psb_qsort(x,ix,dir,flag)
+call psb_hsort(x,ix,dir,flag)
+
+

+

These serial routines sort a sequence X into ascending or descending order. The +argument meaning is identical for the three calls; the only difference is the algorithm +used to accomplish the task (see Usage Notes below). +

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+x

+

The sequence to be sorted.
Type:required.
Specified as: an integer, real or complex array of rank 1. +

+

+ix

+

A vector of indices.
Type:optional.
Specified as: an integer array of (at least) the same size as X. +

+

+dir

+

The desired ordering.
Type:optional.
Specified as: an integer value: +

+

+ Integer and real data:

+

psb_sort_up_, psb_sort_down_, psb_asort_up_, + psb_asort_down_; default psb_sort_up_. + + + +

+

+ Complex data:

+

psb_lsort_up_, psb_lsort_down_, psb_asort_up_, + psb_asort_down_; default psb_lsort_up_.

+
+

+flag

+

Whether to keep the original values in IX.
Type:optional.
Specified as: an integer value psb_sort_ovw_idx_ or psb_sort_keep_idx_; + default psb_sort_ovw_idx_. +

+

+

+

+On Return

+

+

+

+x

+

The sequence of values, in the chosen ordering.
Type:required.
Specified as: an integer, real or complex array of rank 1. +

+

+ix

+

A vector of indices.
Type: Optional
An integer array of rank 1, whose entries are moved to the same position + as the corresponding entries in x.

+

Notes +

    +
  1. +

    For integer or real data the sorting can be performed in the up/down + direction, on the natural or absolute values; +

  2. +
  3. +

    For complex data the sorting can be done in a lexicographic order (i.e.: + sort on the real part with ties broken according to the imaginary part) or + on the absolute values; + + + +

  4. +
  5. +

    The routines return the items in the chosen ordering; the output + difference is the handling of ties (i.e. items with an equal value) in the + original input. With the merge-sort algorithm ties are preserved in the + same relative order as they had in the original sequence, while this is not + guaranteed for quicksort or heapsort; +

  6. +
  7. +

    If flag = psb_sort_ovw_idx_ then the entries in ix(1 : n) where n is the size + of x are initialized to ix(i) i; thus, upon return from the subroutine, + for each index i we have in ix(i) the position that the item x(i) occupied + in the original data sequence; +

  8. +
  9. +

    If flag = psb_sort_keep_idx_ the routine will assume that the entries in + ix(:) have already been initialized by the user; +

  10. +
  11. +

    The three sorting algorithms have a similar O(n log n) expected running time; + in the average case quicksort will be the fastest and merge-sort the slowest. + However note that: +

      +
    1. +

      The worst case running time + for quicksort is O(n2); the algorithm implemented here follows the + well-known median-of-three heuristics, but the worst case may still + apply; +

    2. +
    3. +

      The worst case running time for merge-sort and heap-sort is + O(n log n) as the average case; +

    4. +
    5. +

      The merge-sort algorithm is implemented to take advantage of + subsequences that may be already in the desired ordering prior to + the subroutine call; this situation is relatively common when dealing + with groups of indices of sparse matrix entries, thus merge-sort is + + + + the preferred choice when a sorting is needed by other routines in + the library.

    +
+ + + + + + + + + + +href="userhtml.html#userhtmlse9.html" >up]

+ id="tailuserhtmlse6.html"> diff --git a/docs/html/userhtmlse7.html b/docs/html/userhtmlse7.html index 16277374..575dfb43 100644 --- a/docs/html/userhtmlse7.html +++ b/docs/html/userhtmlse7.html @@ -11,77 +11,2225 @@

-

7 Parallel environment routines

+href="userhtmlse4.html#tailuserhtmlse7.html">tail] [up]

+

7 Parallel environment routines

- +

7.1 psb_init — Initializes PSBLAS parallel environment

+ + + +
+call psb_init(ctxt, np, basectxt, ids, extcomm)
+
+

+

This subroutine initializes the PSBLAS parallel environment, defining a virtual +parallel machine. +

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+np

+

Number of processes in the PSBLAS virtual parallel machine.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value.  Default: use all available processes. +

+

+basectxt

+

the initial PSBLAS communication context. The new context will be + defined from the processes participating in the initial one.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value.  Default: use MPI_COMM_WORLD. +

+

+ids

+

Identities of the processes to use for the new context; the argument is + ignored when np is not specified. This allows the processes in the new + environment to be in an order different from the original one.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer array.  Default: use the indices (0np- 1). + + + +

+

+extcomm

+

an alternative initial MPI communicator. The new context will be defined + from the processes participating in the initial one.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value.  Default: use MPI_COMM_WORLD.

+

+

+

+On Return

+

+

+

+ctxt

+

the communication context identifying the virtual parallel machine, + type psb_ctxt_type. Note that this is always a duplicate of basectxt, + so that library communications are completely separated from other + communication operations.
Scope: global.
Type: required.
Intent: out.
Specified as: an integer variable.

+

Notes +

    +
  1. +

    A call to this routine must precede any other PSBLAS call. +

  2. +
  3. +

    It is an error to specify a value for np greater than the number of processes + available in the underlying base parallel environment.

+ + + +

7.2 psb_info — Return information about PSBLAS parallel environment

+ + + +
+call psb_info(ctxt, iam, np)
+
+

+

This subroutine returns information about the PSBLAS parallel environment, +defining a virtual parallel machine. +

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+ctxt

+

the communication context identifying the virtual parallel machine.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer variable.

+

+

+

+On Return

+

+

+

+iam

+

Identifier of current process in the PSBLAS virtual parallel machine.
Scope: local.
Type: required.
Intent: out.
Specified as: an integer value. -1 iam np- 1  +

+

+np

+ + + +

Number of processes in the PSBLAS virtual parallel machine.
Scope: global.
Type: required.
Intent: out.
Specified as: an integer variable.  

+

Notes +

    +
  1. +

    For processes in the virtual parallel machine the identifier will satisfy + 0 iam np- 1; +

  2. +
  3. +

    If the user has requested on psb_init a number of processes less than + the total available in the parallel execution environment, the remaining + processes will have on return iam = -1; the only call involving ctxt that + any such process may execute is to psb_exit.

+ + + +

7.3 psb_exit — Exit from PSBLAS parallel environment

+ + + +
+call psb_exit(ctxt)
+call psb_exit(ctxt,close)
+
+

+

This subroutine exits from the PSBLAS parallel virtual machine. +

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+ctxt

+

the communication context identifying the virtual parallel machine.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer variable. +

+

+close

+

Whether to close all data structures related to the virtual parallel + machine, besides those associated with ctxt.
Scope: global.
Type: optional.
Intent: in.
Specified as: a logical variable, default value: true.

+

Notes +

    +
  1. +

    This routine may be called even if a previous call to psb_info has + returned with iam = -1; indeed, it it is the only routine that may be + called with argument ctxt in this situation. + + + +

  2. +
  3. +

    A call to this routine with close=.true. implies a call to MPI_Finalize, + after which no parallel routine may be called. +

  4. +
  5. +

    If the user whishes to use multiple communication contexts in the + same program, or to enter and exit multiple times into the parallel + environment, this routine may be called to selectively close the + contexts with close=.false., while on the last call it should be called + with close=.true. to shutdown in a clean way the entire parallel + environment.

+ + + +

7.4 psb_get_mpi_comm — Get the MPI communicator

+ + + +
+icomm = psb_get_mpi_comm(ctxt)
+
+

+

This function returns the MPI communicator associated with a PSBLAS +context +

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+ctxt

+

the communication context identifying the virtual parallel machine.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer variable.

+

+

+

+On Return

+

+

+

+Function value

+

The MPI communicator associated with the PSBLAS virtual parallel + machine.
Scope: global.
Type: required.
Intent: out.

+

Notes The subroutine version psb_get_mpicomm is still available but is +deprecated. + + + +

7.5 psb_get_mpi_rank — Get the MPI rank

+ + + +
+rank = psb_get_mpi_rank(ctxt, id)
+
+

+

This function returns the MPI rank of the PSBLAS process id +

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+ctxt

+

the communication context identifying the virtual parallel machine.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer variable. +

+

+id

+

Identifier of a process in the PSBLAS virtual parallel machine.
Scope: local.
Type: required.
Intent: in.
Specified as: an integer value. 0 id np-

+

+

+

+On Return

+

+

+

+Funciton value

+ + + +

The MPI rank associated with the PSBLAS process id.
Scope: local.
Type: required.
Intent: out.

+

Notes The subroutine version psb_get_rank is still available but is deprecated. + + + +

7.6 psb_wtime — Wall clock timing

+ + +
+time = psb_wtime()
+
+

+

This function returns a wall clock timer. The resolution of the timer is dependent +on the underlying parallel environment implementation. +

+

+Type:

+

Asynchronous. +

+

+On Exit

+

+

+

+Function value

+

the elapsed time in seconds.
Returned as: a real(psb_dpk_) variable.

+ + +

7.7 psb_barrier — Sinchronization point parallel environment

+ + +
+call psb_barrier(ctxt)
+
+

+

This subroutine acts as an explicit synchronization point for the PSBLAS parallel +virtual machine. +

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+ctxt

+

the communication context identifying the virtual parallel machine.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer variable.

+ + +

7.8 psb_abort — Abort a computation

+ + +
+call psb_abort(ctxt)
+
+

+

This subroutine aborts computation on the parallel virtual machine. +

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+ctxt

+

the communication context identifying the virtual parallel machine.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer variable.

+ + +

7.9 psb_bcast — Broadcast data

+ + +
+call psb_bcast(ctxt, dat [, root, mode, request])
+
+

+

This subroutine implements a broadcast operation based on the underlying +communication library. +

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+ctxt

+

the communication context identifying the virtual parallel machine.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer variable. +

+

+dat

+

On the root process, the data to be broadcast.
Scope: global.
Type: required.
Intent: inout.
Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array, or a character or logical variable, which may be + a scalar or rank 1 array.  Type, kind, rank and size must agree on all + processes. +

+

+root

+

Root process holding data to be broadcast.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value 0 <= root <= np- 1, default 0   + + +

+

+mode

+

Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
+

+

+request

+

A request variable to check for operation completion.
Scope: local.
Type: optional.
Intent: inout.
If mode specifies non-blocking action, then this variable must be present.

+

+

+

+On Return

+

+

+

+dat

+

On all processes other than root, the broadcasted data.
Scope: global.
Type: required.
Intent: inout.
Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array, or a character or logical scalar.  Type, kind, rank + and size must agree on all processes. +

+

+request

+

A request variable to check for operation completion.
Scope: local.
Type: optional.
Intent: inout.
If mode specifies non-blocking action, then this variable must be present.

+ + +

Notes +

    +
  1. +

    The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

  2. +
  3. +

    The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

    +

    +

    +  call psb_bcast(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
    +
    +

    +
  4. +
  5. +

    When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

    +

    +

    +  call psb_bcast(ctxt,dat,mode=psb_collective_start_,request=bcast_request)
    +  ....... ! Do not access dat
    +  call psb_bcast(ctxt,dat,mode=psb_collective_end_, request=bcast_request)
    +
    +

    +
+ + +

7.10 psb_sum — Global sum

+ + + +
+call psb_sum(ctxt, dat [, root, mode, request])
+
+

+

This subroutine implements a sum reduction operation based on the underlying +communication library. +

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+ctxt

+

the communication context identifying the virtual parallel machine.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer variable. +

+

+dat

+

The local contribution to the global sum.
Scope: global.
Type: required.
Intent: inout.
Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array.  Type, kind, rank and size must agree on all + processes. +

+

+root

+

Process to hold the final sum, or -1 to make it available on all processes.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value -1 <= root <= np- 1, default -1.   + + +

+

+mode

+

Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
+

+

+request

+

A request variable to check for operation completion.
Scope: local.
Type: optional.
Intent: inout.
If mode specifies non-blocking action, then this variable must be present.

+

+

+

+On Return

+

+

+

+dat

+

On destination process(es), the result of the sum operation.
Scope: global.
Type: required.
Intent: inout.
Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array.
Type, kind, rank and size must agree on all processes. +

+

+request

+

A request variable to check for operation completion.
Scope: local.
Type: optional.
Intent: inout.
If mode specifies non-blocking action, then this variable must be present.

+ + +

Notes +

    +
  1. +

    The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

  2. +
  3. +

    The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

    +

    +

    +  call psb_sum(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
    +
    +

    +
  4. +
  5. +

    When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

    +

    +

    +  call psb_sum(ctxt,dat,mode=psb_collective_start_,request=sum_request)
    +  ....... ! Do not access dat
    +  call psb_sum(ctxt,dat,mode=psb_collective_end_,request=sum_request)
    +
    +

    +
+ + +

7.11 psb_max — Global maximum

+ + +
+call psb_max(ctxt, dat [, root, mode, request])
+
+

+

This subroutine implements a maximum valuereduction operation based on the +underlying communication library. +

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+ctxt

+

the communication context identifying the virtual parallel machine.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer variable. +

+

+dat

+

The local contribution to the global maximum.
Scope: local.
Type: required.
Intent: inout.
Specified as: an integer or real variable, which may be a scalar, or a rank + 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

+

+root

+

Process to hold the final maximum, or -1 to make it available on all + processes.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value -1 <= root <= np- 1, default -1.
+ + +

+

+mode

+

Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
+

+

+request

+

A request variable to check for operation completion.
Scope: local.
Type: optional.
Intent: inout.
If mode specifies non-blocking action, then this variable must be present.

+

+

+

+On Return

+

+

+

+dat

+

On destination process(es), the result of the maximum operation.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer or real variable, which may be a scalar, or a rank + 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

+

+request

+

A request variable to check for operation completion.
Scope: local.
Type: optional.
Intent: inout.
If mode specifies non-blocking action, then this variable must be present.

+ +

Notes +

    +
  1. +

    The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

  2. +
  3. +

    The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

    +

    +

    +  call psb_max(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
    +
    +

    +
  4. +
  5. +

    When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

    +

    +

    +  call psb_max(ctxt,dat,mode=psb_collective_start_,request=max_request)
    +  ....... ! Do not access dat
    +  call psb_max(ctxt,dat,mode=psb_collective_end_,request=max_request)
    +
    +

    +
+ + + +

7.12 psb_min — Global minimum

+ + + +
+call psb_min(ctxt, dat [, root, mode, request])
+
+

+

This subroutine implements a minimum value reduction operation based on the +underlying communication library. +

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+ctxt

+

the communication context identifying the virtual parallel machine.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer variable. +

+

+dat

+

The local contribution to the global minimum.
Scope: local.
Type: required.
Intent: inout.
Specified as: an integer or real variable, which may be a scalar, or a rank + 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

+

+root

+

Process to hold the final value, or -1 to make it available on all processes.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value -1 <= root <= np- 1, default -1.
+ + + +

+

+mode

+

Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
+

+

+request

+

A request variable to check for operation completion.
Scope: local.
Type: optional.
Intent: inout.
If mode specifies non-blocking action, then this variable must be present.

+

+

+

+On Return

+

+

+

+dat

+

On destination process(es), the result of the minimum operation.
Scope: global.
Type: required.
Intent: inout.
Specified as: an integer or real variable, which may be a scalar, or a rank + 1 or 2 array.
Type, kind, rank and size must agree on all processes. +

+

+request

+

A request variable to check for operation completion.
Scope: local.
Type: optional.
Intent: inout.
If mode specifies non-blocking action, then this variable must be present.

+ + + +

Notes +

    +
  1. +

    The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

  2. +
  3. +

    The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

    +

    +

    +  call psb_min(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
    +
    +

    +
  4. +
  5. +

    When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

    +

    +

    +  call psb_min(ctxt,dat,mode=psb_collective_start_,request=min_request)
    +  ....... ! Do not access dat
    +  call psb_min(ctxt,dat,mode=psb_collective_end_,request=min_request)
    +
    +

    +
+ + + +

7.13 psb_amx — Global maximum absolute value

+ + + +
+call psb_amx(ctxt, dat [, root, mode, request])
+
+

+

This subroutine implements a maximum absolute value reduction operation +based on the underlying communication library. +

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+ctxt

+

the communication context identifying the virtual parallel machine.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer variable. +

+

+dat

+

The local contribution to the global maximum.
Scope: local.
Type: required.
Intent: inout.
Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array.  Type, kind, rank and size must agree on all + processes. +

+

+root

+

Process to hold the final value, or -1 to make it available on all processes.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value -1 <= root <= np- 1, default -1.
+ + + +

+

+mode

+

Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
+

+

+request

+

A request variable to check for operation completion.
Scope: local.
Type: optional.
Intent: inout.
If mode specifies non-blocking action, then this variable must be present.

+

+

+

+On Return

+

+

+

+dat

+

On destination process(es), the result of the maximum operation.
Scope: global.
Type: required.
Intent: inout.
Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array.  Type, kind, rank and size must agree on all + processes. +

+

+request

+

A request variable to check for operation completion.
Scope: local.
Type: optional.
Intent: inout.
If mode specifies non-blocking action, then this variable must be present.

+ + + +

Notes +

    +
  1. +

    The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

  2. +
  3. +

    The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

    +

    +

    +  call psb_amx(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
    +
    +

    +
  4. +
  5. +

    When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

    +

    +

    +  call psb_amx(ctxt,dat,mode=psb_collective_start_,request=amx_request)
    +  ....... ! Do not access dat
    +  call psb_amx(ctxt,dat,mode=psb_collective_end_,request=amx_request)
    +
    +

    +
+ + + +

7.14 psb_amn — Global minimum absolute value

+ + + +
+call psb_amn(ctxt, dat [, root, mode, request])
+
+

+

This subroutine implements a minimum absolute value reduction operation +based on the underlying communication library. +

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+ctxt

+

the communication context identifying the virtual parallel machine.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer variable. +

+

+dat

+

The local contribution to the global minimum.
Scope: local.
Type: required.
Intent: inout.
Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array.  Type, kind, rank and size must agree on all + processes. +

+

+root

+

Process to hold the final value, or -1 to make it available on all processes.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value -1 <= root <= np- 1, default -1.
+ + + +

+

+mode

+

Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
+

+

+request

+

A request variable to check for operation completion.
Scope: local.
Type: optional.
Intent: inout.
If mode specifies non-blocking action, then this variable must be present.

+

+

+

+On Return

+

+

+

+dat

+

On destination process(es), the result of the minimum operation.
Scope: global.
Type: required.
Intent: inout.
Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array.
Type, kind, rank and size must agree on all processes. +

+

+request

+

A request variable to check for operation completion.
Scope: local.
Type: optional.
Intent: inout.
If mode specifies non-blocking action, then this variable must be present.

+ + + +

Notes +

    +
  1. +

    The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

  2. +
  3. +

    The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

    +

    +

    +  call psb_amn(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
    +
    +

    +
  4. +
  5. +

    When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

    +

    +

    +  call psb_amn(ctxt,dat,mode=psb_collective_start_,request=amn_request)
    +  ....... ! Do not access dat
    +  call psb_amn(ctxt,dat,mode=psb_collective_end_,request=amn_request)
    +
    +

    +
+ + + +

7.15 psb_nrm2 — Global 2-norm reduction

+ + + +
+call psb_nrm2(ctxt, dat [, root, mode, request])
+
+

+

This subroutine implements a 2-norm value reduction operation based on the +underlying communication library. +

+

+Type:

+

Synchronous. +

+

+On Entry

+

+

+

+ctxt

+

the communication context identifying the virtual parallel machine.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer variable. +

+

+dat

+

The local contribution to the global minimum.
Scope: local.
Type: required.
Intent: inout.
Specified as: a real variable, which may be a scalar, or a rank 1 array. +  Kind, rank and size must agree on all processes. +

+

+root

+

Process to hold the final value, or -1 to make it available on all processes.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value -1 <= root <= np- 1, default -1.
+ + + +

+

+mode

+

Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
Scope: global.
Type: optional.
Intent: in.
Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
+

+

+request

+

A request variable to check for operation completion.
Scope: local.
Type: optional.
Intent: inout.
If mode specifies non-blocking action, then this variable must be present.

+

+

+

+On Return

+

+

+

+dat

+

On destination process(es), the result of the 2-norm reduction.
Scope: global.
Type: required.
Intent: inout.
Specified as: a real variable, which may be a scalar, or a rank 1 array.
Kind, rank and size must agree on all processes. +

+

+request

+

A request variable to check for operation completion.
Scope: local.
Type: optional.
Intent: inout.
If mode specifies non-blocking action, then this variable must be present.

+ + + +

Notes +

    +
  1. +

    This reduction is appropriate to compute the results of multiple (local) + NRM2 operations at the same time. +

  2. +
  3. +

    Denoting by dati the value of the variable dat on process i, the output res + is equivalent to the computation of +

    +     ∘ ------
+res =  ∑ dat2i,
+        i
+
    +

    with care taken to avoid unnecessary overflow. +

  4. +
  5. +

    The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

  6. +
  7. +

    The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

    +

    +

    +  call psb_nrm2(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
    +
    +

    +
  8. +
  9. +

    When splitting the operation in two calls, the dat argument must not be + accessed between calls: + + + +

    +

    +

    +  call psb_nrm2(ctxt,dat,mode=psb_collective_start_,request=nrm2_request)
    +  ....... ! Do not access dat
    +  call psb_nrm2(ctxt,dat,mode=psb_collective_end_,request=nrm2_request)
    +
    +

    +
+ + + +

7.16 psb_snd — Send data

+ + + +
+call psb_snd(ctxt, dat, dst, m)
+
+

+

This subroutine sends a packet of data to a destination. +

+

+Type:

+

Synchronous: see usage notes. +

+

+On Entry

+

+

+

+ctxt

+

the communication context identifying the virtual parallel machine.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer variable. +

+

+dat

+

The data to be sent.
Scope: local.
Type: required.
Intent: in.
Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array, or a character or logical scalar.  Type, kind and + rank must agree on sender and receiver process; if m is not specified, size + must agree as well. +

+

+dst

+

Destination process.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer value 0 <= dst <= np- 1.
+ + + +

+

+m

+

Number of rows.
Scope: global.
Type: Optional.
Intent: in.
Specified as: an integer value 0 <= m <= size(dat, 1).
When dat is a rank 2 array, specifies the number of rows to be sent + independently of the leading dimension size(dat, 1); must have the same + value on sending and receiving processes.

+

+

+

+On Return

+

+

Notes +

    +
  1. +

    This subroutine implies a synchronization, but only between the calling + process and the destination process dst.

+ + + +

7.17 psb_rcv — Receive data

+ + + +
+call psb_rcv(ctxt, dat, src, m)
+
+

+

This subroutine receives a packet of data to a destination. +

+

+Type:

+

Synchronous: see usage notes. +

+

+On Entry

+

+

+

+ctxt

+

the communication context identifying the virtual parallel machine.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer variable. +

+

+src

+

Source process.
Scope: global.
Type: required.
Intent: in.
Specified as: an integer value 0 <= src <= np- 1.
+

+

+m

+

Number of rows.
Scope: global.
Type: Optional.
Intent: in.
Specified as: an integer value 0 <= m <= size(dat, 1).
When dat is a rank 2 array, specifies the number of rows to be sent + independently of the leading dimension size(dat, 1); must have the same + value on sending and receiving processes.

+ + + +

+

+

+On Return

+

+

+

+dat

+

The data to be received.
Scope: local.
Type: required.
Intent: inout.
Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array, or a character or logical scalar.  Type, kind and + rank must agree on sender and receiver process; if m is not specified, size + must agree as well.

+

Notes +

    +
  1. +

    This subroutine implies a synchronization, but only between the calling + process and the source process src.

+ + + + +href="userhtml.html#userhtmlse10.html" >up]

+ id="tailuserhtmlse7.html"> diff --git a/docs/html/userhtmlse8.html b/docs/html/userhtmlse8.html index d4260b27..029136a8 100644 --- a/docs/html/userhtmlse8.html +++ b/docs/html/userhtmlse8.html @@ -11,32 +11,32 @@

-

8 Error handling

+href="userhtmlse5.html#tailuserhtmlse8.html">tail] [up]

+

8 Error handling

The PSBLAS library error handling policy has been completely rewritten in version 2.0. The idea behind the design of this new error handling strategy is to keep error messages on a stack allowing the user to trace back up to the point where the first error message has been generated. Every routine in the PSBLAS-2.0 library has, as last non-optional argument, an integer info variable; whenever, inside the routine, an -error is detected, this variable is set to a value corresponding to a specific -error code. Then this error code is also pushed on the error stack and then -either control is returned to the caller routine or the execution is aborted, -depending on the users choice. At the time when the execution is aborted, -an error message is printed on standard output with a level of verbosity -than can be chosen by the user. If the execution is not aborted, then, the -caller routine checks the value returned in the info variable; whenever, inside the +routine, an error is detected, this variable is set to a value corresponding to a +specific error code. Then this error code is also pushed on the error stack +and then either control is returned to the caller routine or the execution is +aborted, depending on the users choice. At the time when the execution is +aborted, an error message is printed on standard output with a level of +verbosity than can be chosen by the user. If the execution is not aborted, then, +the caller routine checks the value returned in the info variable and, if not zero, an error condition is raised. This process continues on all the levels of nested calls until the level where the user decides to abort the program execution.

Figure 5 shows the layout of a generic 5 shows the layout of a generic psb_foo routine with respect to the PSBLAS-2.0 error handling policy. It is possible to see how, whenever an error condition is detected, the



@@ -67,270 +67,232 @@ explicitly. >

-
subroutinesubroutine psb_foo(some args, info) + psb_foo(some args, ... + info) 
  if(error detected) then +  ... 
   info=errcode1 + if(error detected) then 
  call psb_errpush(psb_foo, errcode1) +     goto 9999 + info=errcode1 
  end if +   ... +   call psb_bar(some args, info) + call psb_errpush(psb_foo, if(info .ne. zero) then + errcode1) 
    info=errcode2 +   goto 9999 
 call psb_errpush(psb_foo, errcode2) +   end if 
 goto 9999 +   end if + ... 
  ... +9999 continue +  call if (err_act .eq. act_abort) then + psb_bar(some args, info) 
 call psb_error(icontxt) +   if(info return + .ne. zero) else + then 
   return +    info=errcode2 
      call psb_errpush(psb_foo, errcode2) 
      goto 9999 
   end if 
   ... 
9999 continue 
   if (err_act .eq. act_abort) then 
     call psb_error(icontxt) 
     return 
   else 
     return 
   end if 
 
end subroutine psb_foo -
+class="cmtt-9"> end if + +end subroutine psb_foo

Listing 5: The layout of a generic psb_foo routine with respect to PSBLAS-2.0 -error handling policy.
+error handling policy.

Figure 6 reports a sample error message generated by the PSBLAS-2.0 +href="#x14-124026r6">6 reports a sample error message generated by the PSBLAS-2.0 library. This error has been generated by the fact that the user has chosen the invalid “FOO” storage format to represent the sparse matrix. From this error message it is possible to see that the error has been detected inside @@ -342,7 +304,7 @@ process).



@@ -366,12 +328,12 @@ Format FOO is unknown ========================================================== Aborting... -

+


Listing 6: A sample PSBLAS-3.0 error message. Process 0 detected an error -condition inside the psb_cest subroutine
+condition inside the psb_cest subroutine @@ -379,28 +341,236 @@ condition inside the psb_cest subroutine +
call psb_errpush(err_c, r_name, i_err, a_err)
+ +

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+err_c

+

the error code
Scope: local
Type: required
Intent: in.
Specified as: an integer. +

+

+r_name

+

the soutine where the error has been caught.
Scope: local
Type: required
Intent: in.
Specified as: a string.
+

+

+i_err

+

addional info for error code
Scope: local
Type: optional
Specified as: an integer array
+

+

+a_err

+ + + +

addional info for error code
Scope: local
Type: optional
Specified as: a string.

+ + + +

8.2 psb_error — Prints the error stack content and aborts execution

+ +
call psb_error(icontxt)
+ +

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+icontxt

+

the communication context.
Scope: global
Type: optional
Intent: in.
Specified as: an integer.

+ + + +

8.3 psb_set_errverbosity — Sets the verbosity of error messages

+ +
call psb_set_errverbosity(v)
+ +

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+v

+

the verbosity level
Scope: global
Type: required
Intent: in.
Specified as: an integer.

+ + +

8.4 psb_set_erraction — Set the type of action to be taken upon error +condition

+ +
call psb_set_erraction(err_act)
+ +

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+err_act

+

the type of action.
Scope: global
Type: required
Intent: in.
Specified as: an integer. Possible values: psb_act_ret, psb_act_abort.

+ + + + + + + +href="userhtml.html#userhtmlse11.html" >up]

+ id="tailuserhtmlse8.html"> diff --git a/docs/html/userhtmlse9.html b/docs/html/userhtmlse9.html index 878297b4..ecb64010 100644 --- a/docs/html/userhtmlse9.html +++ b/docs/html/userhtmlse9.html @@ -11,47 +11,728 @@

-

9 Utilities

+href="userhtmlse6.html#tailuserhtmlse9.html">tail] [up]

+

9 Utilities

We have some utilities available for input and output of sparse matrices; the interfaces to these routines are available in the module psb_util_mod. -

+

9.1 hb_read — Read a sparse matrix from a file in the Harwell–Boeing +format

+ +
call hb_read(a, iret, iunit, filename, b, mtitle)
+ +

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+filename

+

The name of the file to be read.
Type:optional.
Specified as: a character variable containing a valid file name, or -, in + which case the default input unit 5 (i.e. standard input in Unix jargon) is + used. Default: -. +

+

+iunit

+

The Fortran file unit number.
Type:optional.
Specified as: an integer value. Only meaningful if filename is not -.

+

+

+

+On Return

+

+

+

+a

+

the sparse matrix read from file.
Type:required.
Specified as: a structured data of type psb_Tspmat_type. + + + +

+

+b

+

Rigth hand side(s).
Type: Optional
An array of type real or complex, rank 2 and having the ALLOCATABLE + attribute; will be allocated and filled in if the input file contains a right + hand side, otherwise will be left in the UNALLOCATED state. +

+

+mtitle

+

Matrix title.
Type: Optional
A charachter variable of length 72 holding a copy of the matrix title as + specified by the Harwell-Boeing format and contained in the input file. +

+

+iret

+

Error code.
Type: required
An integer value; 0 means no error has been detected.

+ + + +

9.2 hb_write — Write a sparse matrix to a file in the Harwell–Boeing +format

+ +
call hb_write(a, iret, iunit, filename, key, rhs, mtitle)
+ +

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+a

+

the sparse matrix to be written.
Type:required.
Specified as: a structured data of type psb_Tspmat_type. +

+

+b

+

Rigth hand side.
Type: Optional
An array of type real or complex, rank 1 and having the ALLOCATABLE + attribute; will be allocated and filled in if the input file contains a right + hand side. +

+

+filename

+

The name of the file to be written to.
Type:optional.
Specified as: a character variable containing a valid file name, or -, in + which case the default output unit 6 (i.e. standard output in Unix jargon) + is used. Default: -. +

+

+iunit

+

The Fortran file unit number.
Type:optional.
Specified as: an integer value. Only meaningful if filename is not -. + + + +

+

+key

+

Matrix key.
Type: Optional
A charachter variable of length 8 holding the matrix key as specified by + the Harwell-Boeing format and to be written to file. +

+

+mtitle

+

Matrix title.
Type: Optional
A charachter variable of length 72 holding the matrix title as specified by + the Harwell-Boeing format and to be written to file.

+

+

+

+On Return

+

+

+

+iret

+

Error code.
Type: required
An integer value; 0 means no error has been detected.

+ + + +

9.3 mm_mat_read — Read a sparse matrix from a file in the MatrixMarket +format

+ +
call mm_mat_read(a, iret, iunit, filename)
+ +

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+filename

+

The name of the file to be read.
Type:optional.
Specified as: a character variable containing a valid file name, or -, in + which case the default input unit 5 (i.e. standard input in Unix jargon) is + used. Default: -. +

+

+iunit

+

The Fortran file unit number.
Type:optional.
Specified as: an integer value. Only meaningful if filename is not -.

+

+

+

+On Return

+

+

+

+a

+

the sparse matrix read from file.
Type:required.
Specified as: a structured data of type psb_Tspmat_type. + + +

+

+iret

+

Error code.
Type: required
An integer value; 0 means no error has been detected.

+ + +

9.4 mm_array_read — Read a dense array from a file in the MatrixMarket +format

+ +
call mm_array_read(b, iret, iunit, filename)
+ +

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+filename

+

The name of the file to be read.
Type:optional.
Specified as: a character variable containing a valid file name, or -, in + which case the default input unit 5 (i.e. standard input in Unix jargon) is + used. Default: -. +

+

+iunit

+

The Fortran file unit number.
Type:optional.
Specified as: an integer value. Only meaningful if filename is not -.

+

+

+

+On Return

+

+

+

+b

+

Rigth hand side(s).
Type: required
An array of type real or complex, rank 1 or 2 and having the + + + ALLOCATABLE attribute, or an object of type psb_T_vect_type, of type + real or complex.
Will be allocated and filled in if the input file contains a right hand side, + otherwise will be left in the UNALLOCATED state.
+

+

+iret

+

Error code.
Type: required
An integer value; 0 means no error has been detected.

+ + +

9.5 mm_mat_write — Write a sparse matrix to a file in the MatrixMarket +format

+ +
call mm_mat_write(a, mtitle, iret, iunit, filename)
+ +

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+a

+

the sparse matrix to be written.
Type:required.
Specified as: a structured data of type psb_Tspmat_type. +

+

+mtitle

+

Matrix title.
Type: required
A charachter variable holding a descriptive title for the matrix to be + written to file. +

+

+filename

+

The name of the file to be written to.
Type:optional.
Specified as: a character variable containing a valid file name, or -, in + which case the default output unit 6 (i.e. standard output in Unix jargon) + is used. Default: -. +

+

+iunit

+

The Fortran file unit number.
Type:optional.
Specified as: an integer value. Only meaningful if filename is not -.

+ +

+

+

+On Return

+

+

+

+iret

+

Error code.
Type: required
An integer value; 0 means no error has been detected.

+

Notes +

If this function is called on a matrix a on a distributed communicator only the +local part is written in output. To get a single MatrixMarket file with the whole +matrix when appropriate, e.g. for debugging purposes, one could gather the whole +matrix on a single rank and then write it. Consider the following example for a +double precision matrix +

+

+

+type(psb_ldspmat_type) :: aglobal
+
+call psb_gather(aglobal,a,desc_a,info)
+if (iam == psb_root_) then
+call mm_mat_write(aglobal,mtitle,info,filename)
+end if
+call psb_spfree(aglobal, desc_a, info)
+
+

+

To simplify this procedure in C, there is a utility function +

+

+

+psb_i_t psb_c_<s,d,c,z>global_mat_write(ah,cdh);
+
+

+

that produces exactly this result. + + + +

9.6 mm_array_write — Write a dense array from a file in the MatrixMarket +format

+ +
call mm_array_write(b, vtitle, iret, iunit, filename)
+ +

+

+

+Type:

+

Asynchronous. +

+

+On Entry

+

+

+

+b

+

Rigth hand side(s).
Type: required
An array of type real or complex, rank 1 or 2, or an object of type + psb_T_vect_type, of type real or complex; its contents will be written to + disk.
+

+

+filename

+

The name of the file to be written.
+

+

+vtitle

+

Matrix title.
Type: required
A charachter variable holding a descriptive title for the vector to be + written to file. Type:optional.
Specified as: a character variable containing a valid file name, or -, in + which case the default input unit 5 (i.e. standard input in Unix jargon) is + used. Default: -. +

+

+iunit

+ + + +

The Fortran file unit number.
Type:optional.
Specified as: an integer value. Only meaningful if filename is not -.

+

+

+

+On Return

+

+

+

+iret

+

Error code.
Type: required
An integer value; 0 means no error has been detected.

+

Notes +

If this function is called on a vector v on a distributed communicator only the +local part is written in output. To get a single MatrixMarket file with the whole +vector when appropriate, e.g. for debugging purposes, one could gather the whole +vector on a single rank and then write it. Consider the following example for a double +precision vector +

+

+

+real(psb_dpk_), allocatable :: vglobal(:)
+
+call psb_gather(vglobal,v,desc,info)
+if (iam == psb_root_) then
+call mm_array_write(vglobal,vtitle,info,filename)
+end if
+call deallocate(vglobal, stat=info)
+
+

+

To simplify this procedure in C, there is a utility function +

+

+

+psb_i_t psb_c_<s,d,c,z>global_vec_write(vh,cdh);
+
+

+

that produces exactly this result. + + + + + + + + + +

+href="userhtml.html#userhtmlse12.html" >up]

+ id="tailuserhtmlse9.html"> diff --git a/docs/html/zplmr7y-42.png b/docs/html/zplmr7y-42.png new file mode 100644 index 00000000..1879254d Binary files /dev/null and b/docs/html/zplmr7y-42.png differ diff --git a/docs/html/zplmr7y-48.png b/docs/html/zplmr7y-48.png new file mode 100644 index 00000000..a3d208c1 Binary files /dev/null and b/docs/html/zplmr7y-48.png differ diff --git a/docs/html/zplmr7y-49.png b/docs/html/zplmr7y-49.png new file mode 100644 index 00000000..230ddcab Binary files /dev/null and b/docs/html/zplmr7y-49.png differ diff --git a/docs/psblas-3.8.pdf b/docs/psblas-3.9.pdf similarity index 60% rename from docs/psblas-3.8.pdf rename to docs/psblas-3.9.pdf index 95e953d8..6f6505c6 100644 --- a/docs/psblas-3.8.pdf +++ b/docs/psblas-3.9.pdf @@ -4,220 +4,220 @@ << /Type /ObjStm /N 100 -/First 819 -/Length 6377 ->> -stream -1 0 5 38 6 77 9 117 10 144 13 185 14 217 17 263 18 299 21 345 -22 379 25 425 26 464 29 515 30 562 33 608 34 643 37 684 38 727 41 773 -42 816 45 867 46 905 49 956 50 1026 53 1077 54 1147 57 1198 58 1270 61 1321 -62 1393 65 1444 66 1522 69 1573 70 1638 73 1689 74 1740 77 1791 78 1851 81 1903 -82 2006 85 2058 86 2161 89 2213 90 2285 93 2337 94 2409 97 2461 98 2542 101 2594 -102 2631 105 2678 106 2716 109 2768 110 2810 113 2862 114 2938 117 2990 118 3069 121 3121 -122 3211 125 3263 126 3358 129 3410 130 3492 133 3544 134 3618 137 3670 138 3740 141 3792 -142 3883 145 3936 146 4006 149 4059 150 4114 153 4167 154 4236 157 4289 158 4345 161 4398 -162 4459 165 4512 166 4569 169 4622 170 4679 173 4732 174 4814 177 4867 178 4920 181 4973 -182 5010 185 5057 186 5103 189 5155 190 5190 193 5242 194 5317 197 5369 198 5450 201 5502 +/First 862 +/Length 15859 +>> +stream +1 0 5 38 6 195 9 236 10 292 13 333 14 428 17 469 18 587 21 633 +22 771 25 817 26 945 29 991 30 1144 33 1195 34 1391 37 1437 38 1570 41 1611 +42 1790 45 1836 46 2012 49 2063 50 2211 53 2262 54 2549 57 2600 58 2887 61 2938 +62 3235 65 3286 66 3583 69 3634 70 3961 73 4012 74 4280 77 4331 78 4541 81 4592 +82 4850 85 4902 86 5328 89 5380 90 5806 93 5858 94 6152 97 6204 98 6498 101 6550 +102 6917 105 6970 106 7109 109 7156 110 7303 113 7355 114 7522 117 7574 118 7908 121 7960 +122 8309 125 8361 126 8768 129 8820 130 9255 133 9307 134 9686 137 9738 138 10056 141 10108 +142 10376 145 10428 146 10792 149 10845 150 11155 153 11208 154 11437 157 11490 158 11774 161 11827 +162 12046 165 12099 166 12346 169 12399 170 12638 173 12691 174 12930 177 12983 178 13311 181 13364 +182 13580 185 13633 186 13772 189 13819 190 14009 193 14061 194 14190 197 14242 198 14571 201 14623 % 1 0 obj << /S /GoTo /D (title.0) >> % 5 0 obj -(PSBLAS-v3.8.0 User's Guide) +(\376\377\000P\000S\000B\000L\000A\000S\000-\000v\0003\000.\0009\000.\0000\000\040\000U\000s\000e\000r\000'\000s\000\040\000G\000u\000i\000d\000e) % 6 0 obj -<< /S /GoTo /D (section.1) >> +<< /S /GoTo /D (section*.2) >> % 9 0 obj -(1 Introduction) +(\376\377\000P\000r\000e\000f\000a\000c\000e) % 10 0 obj -<< /S /GoTo /D (section.2) >> +<< /S /GoTo /D (section.1) >> % 13 0 obj -(2 General overview) +(\376\377\0001\000\040\000I\000n\000t\000r\000o\000d\000u\000c\000t\000i\000o\000n) % 14 0 obj -<< /S /GoTo /D (subsection.2.1) >> +<< /S /GoTo /D (section.2) >> % 17 0 obj -(2.1 Basic Nomenclature) +(\376\377\0002\000\040\000G\000e\000n\000e\000r\000a\000l\000\040\000o\000v\000e\000r\000v\000i\000e\000w) % 18 0 obj -<< /S /GoTo /D (subsection.2.2) >> +<< /S /GoTo /D (subsection.2.1) >> % 21 0 obj -(2.2 Library contents) +(\376\377\0002\000.\0001\000\040\000B\000a\000s\000i\000c\000\040\000N\000o\000m\000e\000n\000c\000l\000a\000t\000u\000r\000e) % 22 0 obj -<< /S /GoTo /D (subsection.2.3) >> +<< /S /GoTo /D (subsection.2.2) >> % 25 0 obj -(2.3 Application structure) +(\376\377\0002\000.\0002\000\040\000L\000i\000b\000r\000a\000r\000y\000\040\000c\000o\000n\000t\000e\000n\000t\000s) % 26 0 obj -<< /S /GoTo /D (subsubsection.2.3.1) >> +<< /S /GoTo /D (subsection.2.3) >> % 29 0 obj -(2.3.1 User-defined index mappings) +(\376\377\0002\000.\0003\000\040\000A\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000s\000t\000r\000u\000c\000t\000u\000r\000e) % 30 0 obj -<< /S /GoTo /D (subsection.2.4) >> +<< /S /GoTo /D (subsubsection.2.3.1) >> % 33 0 obj -(2.4 Programming model) +(\376\377\0002\000.\0003\000.\0001\000\040\000U\000s\000e\000r\000-\000d\000e\000f\000i\000n\000e\000d\000\040\000i\000n\000d\000e\000x\000\040\000m\000a\000p\000p\000i\000n\000g\000s) % 34 0 obj -<< /S /GoTo /D (section.3) >> +<< /S /GoTo /D (subsection.2.4) >> % 37 0 obj -(3 Data Structures and Classes) +(\376\377\0002\000.\0004\000\040\000P\000r\000o\000g\000r\000a\000m\000m\000i\000n\000g\000\040\000m\000o\000d\000e\000l) % 38 0 obj -<< /S /GoTo /D (subsection.3.1) >> +<< /S /GoTo /D (section.3) >> % 41 0 obj -(3.1 Descriptor data structure) +(\376\377\0003\000\040\000D\000a\000t\000a\000\040\000S\000t\000r\000u\000c\000t\000u\000r\000e\000s\000\040\000a\000n\000d\000\040\000C\000l\000a\000s\000s\000e\000s) % 42 0 obj -<< /S /GoTo /D (subsubsection.3.1.1) >> +<< /S /GoTo /D (subsection.3.1) >> % 45 0 obj -(3.1.1 Descriptor Methods) +(\376\377\0003\000.\0001\000\040\000D\000e\000s\000c\000r\000i\000p\000t\000o\000r\000\040\000d\000a\000t\000a\000\040\000s\000t\000r\000u\000c\000t\000u\000r\000e) % 46 0 obj -<< /S /GoTo /D (subsubsection.3.1.2) >> +<< /S /GoTo /D (subsubsection.3.1.1) >> % 49 0 obj -(3.1.2 get\137local\137rows \204 Get number of local rows) +(\376\377\0003\000.\0001\000.\0001\000\040\000D\000e\000s\000c\000r\000i\000p\000t\000o\000r\000\040\000M\000e\000t\000h\000o\000d\000s) % 50 0 obj -<< /S /GoTo /D (subsubsection.3.1.3) >> +<< /S /GoTo /D (subsubsection.3.1.2) >> % 53 0 obj -(3.1.3 get\137local\137cols \204 Get number of local cols) +(\376\377\0003\000.\0001\000.\0002\000\040\000g\000e\000t\000\137\000l\000o\000c\000a\000l\000\137\000r\000o\000w\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000l\000o\000c\000a\000l\000\040\000r\000o\000w\000s) % 54 0 obj -<< /S /GoTo /D (subsubsection.3.1.4) >> +<< /S /GoTo /D (subsubsection.3.1.3) >> % 57 0 obj -(3.1.4 get\137global\137rows \204 Get number of global rows) +(\376\377\0003\000.\0001\000.\0003\000\040\000g\000e\000t\000\137\000l\000o\000c\000a\000l\000\137\000c\000o\000l\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000l\000o\000c\000a\000l\000\040\000c\000o\000l\000s) % 58 0 obj -<< /S /GoTo /D (subsubsection.3.1.5) >> +<< /S /GoTo /D (subsubsection.3.1.4) >> % 61 0 obj -(3.1.5 get\137global\137cols \204 Get number of global cols) +(\376\377\0003\000.\0001\000.\0004\000\040\000g\000e\000t\000\137\000g\000l\000o\000b\000a\000l\000\137\000r\000o\000w\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000g\000l\000o\000b\000a\000l\000\040\000r\000o\000w\000s) % 62 0 obj -<< /S /GoTo /D (subsubsection.3.1.6) >> +<< /S /GoTo /D (subsubsection.3.1.5) >> % 65 0 obj -(3.1.6 get\137global\137indices \204 Get vector of global indices) +(\376\377\0003\000.\0001\000.\0005\000\040\000g\000e\000t\000\137\000g\000l\000o\000b\000a\000l\000\137\000c\000o\000l\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000g\000l\000o\000b\000a\000l\000\040\000c\000o\000l\000s) % 66 0 obj -<< /S /GoTo /D (subsubsection.3.1.7) >> +<< /S /GoTo /D (subsubsection.3.1.6) >> % 69 0 obj -(3.1.7 get\137context \204 Get communication context) +(\376\377\0003\000.\0001\000.\0006\000\040\000g\000e\000t\000\137\000g\000l\000o\000b\000a\000l\000\137\000i\000n\000d\000i\000c\000e\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000v\000e\000c\000t\000o\000r\000\040\000o\000f\000\040\000g\000l\000o\000b\000a\000l\000\040\000i\000n\000d\000i\000c\000e\000s) % 70 0 obj -<< /S /GoTo /D (subsubsection.3.1.8) >> +<< /S /GoTo /D (subsubsection.3.1.7) >> % 73 0 obj -(3.1.8 Clone \204 clone current object) +(\376\377\0003\000.\0001\000.\0007\000\040\000g\000e\000t\000\137\000c\000o\000n\000t\000e\000x\000t\000\040\040\024\000\040\000G\000e\000t\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000c\000o\000n\000t\000e\000x\000t) % 74 0 obj -<< /S /GoTo /D (subsubsection.3.1.9) >> +<< /S /GoTo /D (subsubsection.3.1.8) >> % 77 0 obj -(3.1.9 CNV \204 convert internal storage format) +(\376\377\0003\000.\0001\000.\0008\000\040\000C\000l\000o\000n\000e\000\040\040\024\000\040\000c\000l\000o\000n\000e\000\040\000c\000u\000r\000r\000e\000n\000t\000\040\000o\000b\000j\000e\000c\000t) % 78 0 obj -<< /S /GoTo /D (subsubsection.3.1.10) >> +<< /S /GoTo /D (subsubsection.3.1.9) >> % 81 0 obj -(3.1.10 psb\137cd\137get\137large\137threshold \204 Get threshold for index mapping switch) +(\376\377\0003\000.\0001\000.\0009\000\040\000C\000N\000V\000\040\040\024\000\040\000c\000o\000n\000v\000e\000r\000t\000\040\000i\000n\000t\000e\000r\000n\000a\000l\000\040\000s\000t\000o\000r\000a\000g\000e\000\040\000f\000o\000r\000m\000a\000t) % 82 0 obj -<< /S /GoTo /D (subsubsection.3.1.11) >> +<< /S /GoTo /D (subsubsection.3.1.10) >> % 85 0 obj -(3.1.11 psb\137cd\137set\137large\137threshold \204 Set threshold for index mapping switch) +(\376\377\0003\000.\0001\000.\0001\0000\000\040\000p\000s\000b\000\137\000c\000d\000\137\000g\000e\000t\000\137\000h\000a\000s\000h\000\137\000t\000h\000r\000e\000s\000h\000o\000l\000d\000\040\040\024\000\040\000G\000e\000t\000\040\000t\000h\000r\000e\000s\000h\000o\000l\000d\000\040\000f\000o\000r\000\040\000i\000n\000d\000e\000x\000\040\000m\000a\000p\000p\000i\000n\000g\000\040\000s\000w\000i\000t\000c\000h) % 86 0 obj -<< /S /GoTo /D (subsubsection.3.1.12) >> +<< /S /GoTo /D (subsubsection.3.1.11) >> % 89 0 obj -(3.1.12 get\137p\137adjcncy \204 Get process adjacency list) +(\376\377\0003\000.\0001\000.\0001\0001\000\040\000p\000s\000b\000\137\000c\000d\000\137\000s\000e\000t\000\137\000h\000a\000s\000h\000\137\000t\000h\000r\000e\000s\000h\000o\000l\000d\000\040\040\024\000\040\000S\000e\000t\000\040\000t\000h\000r\000e\000s\000h\000o\000l\000d\000\040\000f\000o\000r\000\040\000i\000n\000d\000e\000x\000\040\000m\000a\000p\000p\000i\000n\000g\000\040\000s\000w\000i\000t\000c\000h) % 90 0 obj -<< /S /GoTo /D (subsubsection.3.1.13) >> +<< /S /GoTo /D (subsubsection.3.1.12) >> % 93 0 obj -(3.1.13 set\137p\137adjcncy \204 Set process adjacency list) +(\376\377\0003\000.\0001\000.\0001\0002\000\040\000g\000e\000t\000\137\000p\000\137\000a\000d\000j\000c\000n\000c\000y\000\040\040\024\000\040\000G\000e\000t\000\040\000p\000r\000o\000c\000e\000s\000s\000\040\000a\000d\000j\000a\000c\000e\000n\000c\000y\000\040\000l\000i\000s\000t) % 94 0 obj -<< /S /GoTo /D (subsubsection.3.1.14) >> +<< /S /GoTo /D (subsubsection.3.1.13) >> % 97 0 obj -(3.1.14 fnd\137owner \204 Find the owner process of a set of indices) +(\376\377\0003\000.\0001\000.\0001\0003\000\040\000s\000e\000t\000\137\000p\000\137\000a\000d\000j\000c\000n\000c\000y\000\040\040\024\000\040\000S\000e\000t\000\040\000p\000r\000o\000c\000e\000s\000s\000\040\000a\000d\000j\000a\000c\000e\000n\000c\000y\000\040\000l\000i\000s\000t) % 98 0 obj -<< /S /GoTo /D (subsubsection.3.1.15) >> +<< /S /GoTo /D (subsubsection.3.1.14) >> % 101 0 obj -(3.1.15 Named Constants) +(\376\377\0003\000.\0001\000.\0001\0004\000\040\000f\000n\000d\000\137\000o\000w\000n\000e\000r\000\040\040\024\000\040\000F\000i\000n\000d\000\040\000t\000h\000e\000\040\000o\000w\000n\000e\000r\000\040\000p\000r\000o\000c\000e\000s\000s\000\040\000o\000f\000\040\000a\000\040\000s\000e\000t\000\040\000o\000f\000\040\000i\000n\000d\000i\000c\000e\000s) % 102 0 obj -<< /S /GoTo /D (subsection.3.2) >> +<< /S /GoTo /D (subsubsection.3.1.15) >> % 105 0 obj -(3.2 Sparse Matrix class) +(\376\377\0003\000.\0001\000.\0001\0005\000\040\000N\000a\000m\000e\000d\000\040\000C\000o\000n\000s\000t\000a\000n\000t\000s) % 106 0 obj -<< /S /GoTo /D (subsubsection.3.2.1) >> +<< /S /GoTo /D (subsection.3.2) >> % 109 0 obj -(3.2.1 Sparse Matrix Methods) +(\376\377\0003\000.\0002\000\040\000S\000p\000a\000r\000s\000e\000\040\000M\000a\000t\000r\000i\000x\000\040\000c\000l\000a\000s\000s) % 110 0 obj -<< /S /GoTo /D (subsubsection.3.2.2) >> +<< /S /GoTo /D (subsubsection.3.2.1) >> % 113 0 obj -(3.2.2 get\137nrows \204 Get number of rows in a sparse matrix) +(\376\377\0003\000.\0002\000.\0001\000\040\000S\000p\000a\000r\000s\000e\000\040\000M\000a\000t\000r\000i\000x\000\040\000M\000e\000t\000h\000o\000d\000s) % 114 0 obj -<< /S /GoTo /D (subsubsection.3.2.3) >> +<< /S /GoTo /D (subsubsection.3.2.2) >> % 117 0 obj -(3.2.3 get\137ncols \204 Get number of columns in a sparse matrix) +(\376\377\0003\000.\0002\000.\0002\000\040\000g\000e\000t\000\137\000n\000r\000o\000w\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000r\000o\000w\000s\000\040\000i\000n\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 118 0 obj -<< /S /GoTo /D (subsubsection.3.2.4) >> +<< /S /GoTo /D (subsubsection.3.2.3) >> % 121 0 obj -(3.2.4 get\137nnzeros \204 Get number of nonzero elements in a sparse matrix) +(\376\377\0003\000.\0002\000.\0003\000\040\000g\000e\000t\000\137\000n\000c\000o\000l\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000c\000o\000l\000u\000m\000n\000s\000\040\000i\000n\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 122 0 obj -<< /S /GoTo /D (subsubsection.3.2.5) >> +<< /S /GoTo /D (subsubsection.3.2.4) >> % 125 0 obj -(3.2.5 get\137size \204 Get maximum number of nonzero elements in a sparse matrix) +(\376\377\0003\000.\0002\000.\0004\000\040\000g\000e\000t\000\137\000n\000n\000z\000e\000r\000o\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000n\000o\000n\000z\000e\000r\000o\000\040\000e\000l\000e\000m\000e\000n\000t\000s\000\040\000i\000n\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 126 0 obj -<< /S /GoTo /D (subsubsection.3.2.6) >> +<< /S /GoTo /D (subsubsection.3.2.5) >> % 129 0 obj -(3.2.6 sizeof \204 Get memory occupation in bytes of a sparse matrix) +(\376\377\0003\000.\0002\000.\0005\000\040\000g\000e\000t\000\137\000s\000i\000z\000e\000\040\040\024\000\040\000G\000e\000t\000\040\000m\000a\000x\000i\000m\000u\000m\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000n\000o\000n\000z\000e\000r\000o\000\040\000e\000l\000e\000m\000e\000n\000t\000s\000\040\000i\000n\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 130 0 obj -<< /S /GoTo /D (subsubsection.3.2.7) >> +<< /S /GoTo /D (subsubsection.3.2.6) >> % 133 0 obj -(3.2.7 get\137fmt \204 Short description of the dynamic type) +(\376\377\0003\000.\0002\000.\0006\000\040\000s\000i\000z\000e\000o\000f\000\040\040\024\000\040\000G\000e\000t\000\040\000m\000e\000m\000o\000r\000y\000\040\000o\000c\000c\000u\000p\000a\000t\000i\000o\000n\000\040\000i\000n\000\040\000b\000y\000t\000e\000s\000\040\000o\000f\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 134 0 obj -<< /S /GoTo /D (subsubsection.3.2.8) >> +<< /S /GoTo /D (subsubsection.3.2.7) >> % 137 0 obj -(3.2.8 is\137bld, is\137upd, is\137asb \204 Status check) +(\376\377\0003\000.\0002\000.\0007\000\040\000g\000e\000t\000\137\000f\000m\000t\000\040\040\024\000\040\000S\000h\000o\000r\000t\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000i\000o\000n\000\040\000o\000f\000\040\000t\000h\000e\000\040\000d\000y\000n\000a\000m\000i\000c\000\040\000t\000y\000p\000e) % 138 0 obj -<< /S /GoTo /D (subsubsection.3.2.9) >> +<< /S /GoTo /D (subsubsection.3.2.8) >> % 141 0 obj -(3.2.9 is\137lower, is\137upper, is\137triangle, is\137unit \204 Format check) +(\376\377\0003\000.\0002\000.\0008\000\040\000i\000s\000\137\000b\000l\000d\000,\000\040\000i\000s\000\137\000u\000p\000d\000,\000\040\000i\000s\000\137\000a\000s\000b\000\040\040\024\000\040\000S\000t\000a\000t\000u\000s\000\040\000c\000h\000e\000c\000k) % 142 0 obj -<< /S /GoTo /D (subsubsection.3.2.10) >> +<< /S /GoTo /D (subsubsection.3.2.9) >> % 145 0 obj -(3.2.10 cscnv \204 Convert to a different storage format) +(\376\377\0003\000.\0002\000.\0009\000\040\000i\000s\000\137\000l\000o\000w\000e\000r\000,\000\040\000i\000s\000\137\000u\000p\000p\000e\000r\000,\000\040\000i\000s\000\137\000t\000r\000i\000a\000n\000g\000l\000e\000,\000\040\000i\000s\000\137\000u\000n\000i\000t\000\040\040\024\000\040\000F\000o\000r\000m\000a\000t\000\040\000c\000h\000e\000c\000k) % 146 0 obj -<< /S /GoTo /D (subsubsection.3.2.11) >> +<< /S /GoTo /D (subsubsection.3.2.10) >> % 149 0 obj -(3.2.11 csclip \204 Reduce to a submatrix) +(\376\377\0003\000.\0002\000.\0001\0000\000\040\000c\000s\000c\000n\000v\000\040\040\024\000\040\000C\000o\000n\000v\000e\000r\000t\000\040\000t\000o\000\040\000a\000\040\000d\000i\000f\000f\000e\000r\000e\000n\000t\000\040\000s\000t\000o\000r\000a\000g\000e\000\040\000f\000o\000r\000m\000a\000t) % 150 0 obj -<< /S /GoTo /D (subsubsection.3.2.12) >> +<< /S /GoTo /D (subsubsection.3.2.11) >> % 153 0 obj -(3.2.12 clean\137zeros \204 Eliminate zero coefficients) +(\376\377\0003\000.\0002\000.\0001\0001\000\040\000c\000s\000c\000l\000i\000p\000\040\040\024\000\040\000R\000e\000d\000u\000c\000e\000\040\000t\000o\000\040\000a\000\040\000s\000u\000b\000m\000a\000t\000r\000i\000x) % 154 0 obj -<< /S /GoTo /D (subsubsection.3.2.13) >> +<< /S /GoTo /D (subsubsection.3.2.12) >> % 157 0 obj -(3.2.13 get\137diag \204 Get main diagonal) +(\376\377\0003\000.\0002\000.\0001\0002\000\040\000c\000l\000e\000a\000n\000\137\000z\000e\000r\000o\000s\000\040\040\024\000\040\000E\000l\000i\000m\000i\000n\000a\000t\000e\000\040\000z\000e\000r\000o\000\040\000c\000o\000e\000f\000f\000i\000c\000i\000e\000n\000t\000s) % 158 0 obj -<< /S /GoTo /D (subsubsection.3.2.14) >> +<< /S /GoTo /D (subsubsection.3.2.13) >> % 161 0 obj -(3.2.14 clip\137diag \204 Cut out main diagonal) +(\376\377\0003\000.\0002\000.\0001\0003\000\040\000g\000e\000t\000\137\000d\000i\000a\000g\000\040\040\024\000\040\000G\000e\000t\000\040\000m\000a\000i\000n\000\040\000d\000i\000a\000g\000o\000n\000a\000l) % 162 0 obj -<< /S /GoTo /D (subsubsection.3.2.15) >> +<< /S /GoTo /D (subsubsection.3.2.14) >> % 165 0 obj -(3.2.15 tril \204 Return the lower triangle) +(\376\377\0003\000.\0002\000.\0001\0004\000\040\000c\000l\000i\000p\000\137\000d\000i\000a\000g\000\040\040\024\000\040\000C\000u\000t\000\040\000o\000u\000t\000\040\000m\000a\000i\000n\000\040\000d\000i\000a\000g\000o\000n\000a\000l) % 166 0 obj -<< /S /GoTo /D (subsubsection.3.2.16) >> +<< /S /GoTo /D (subsubsection.3.2.15) >> % 169 0 obj -(3.2.16 triu \204 Return the upper triangle) +(\376\377\0003\000.\0002\000.\0001\0005\000\040\000t\000r\000i\000l\000\040\040\024\000\040\000R\000e\000t\000u\000r\000n\000\040\000t\000h\000e\000\040\000l\000o\000w\000e\000r\000\040\000t\000r\000i\000a\000n\000g\000l\000e) % 170 0 obj -<< /S /GoTo /D (subsubsection.3.2.17) >> +<< /S /GoTo /D (subsubsection.3.2.16) >> % 173 0 obj -(3.2.17 psb\137set\137mat\137default \204 Set default storage format) +(\376\377\0003\000.\0002\000.\0001\0006\000\040\000t\000r\000i\000u\000\040\040\024\000\040\000R\000e\000t\000u\000r\000n\000\040\000t\000h\000e\000\040\000u\000p\000p\000e\000r\000\040\000t\000r\000i\000a\000n\000g\000l\000e) % 174 0 obj -<< /S /GoTo /D (subsubsection.3.2.18) >> +<< /S /GoTo /D (subsubsection.3.2.17) >> % 177 0 obj -(3.2.18 clone \204 Clone current object) +(\376\377\0003\000.\0002\000.\0001\0007\000\040\000p\000s\000b\000\137\000s\000e\000t\000\137\000m\000a\000t\000\137\000d\000e\000f\000a\000u\000l\000t\000\040\040\024\000\040\000S\000e\000t\000\040\000d\000e\000f\000a\000u\000l\000t\000\040\000s\000t\000o\000r\000a\000g\000e\000\040\000f\000o\000r\000m\000a\000t) % 178 0 obj -<< /S /GoTo /D (subsubsection.3.2.19) >> +<< /S /GoTo /D (subsubsection.3.2.18) >> % 181 0 obj -(3.2.19 Named Constants) +(\376\377\0003\000.\0002\000.\0001\0008\000\040\000c\000l\000o\000n\000e\000\040\040\024\000\040\000C\000l\000o\000n\000e\000\040\000c\000u\000r\000r\000e\000n\000t\000\040\000o\000b\000j\000e\000c\000t) % 182 0 obj -<< /S /GoTo /D (subsection.3.3) >> +<< /S /GoTo /D (subsubsection.3.2.19) >> % 185 0 obj -(3.3 Dense Vector Data Structure) +(\376\377\0003\000.\0002\000.\0001\0009\000\040\000N\000a\000m\000e\000d\000\040\000C\000o\000n\000s\000t\000a\000n\000t\000s) % 186 0 obj -<< /S /GoTo /D (subsubsection.3.3.1) >> +<< /S /GoTo /D (subsection.3.3) >> % 189 0 obj -(3.3.1 Vector Methods) +(\376\377\0003\000.\0003\000\040\000D\000e\000n\000s\000e\000\040\000V\000e\000c\000t\000o\000r\000\040\000D\000a\000t\000a\000\040\000S\000t\000r\000u\000c\000t\000u\000r\000e) % 190 0 obj -<< /S /GoTo /D (subsubsection.3.3.2) >> +<< /S /GoTo /D (subsubsection.3.3.1) >> % 193 0 obj -(3.3.2 get\137nrows \204 Get number of rows in a dense vector) +(\376\377\0003\000.\0003\000.\0001\000\040\000V\000e\000c\000t\000o\000r\000\040\000M\000e\000t\000h\000o\000d\000s) % 194 0 obj -<< /S /GoTo /D (subsubsection.3.3.3) >> +<< /S /GoTo /D (subsubsection.3.3.2) >> % 197 0 obj -(3.3.3 sizeof \204 Get memory occupation in bytes of a dense vector) +(\376\377\0003\000.\0003\000.\0002\000\040\000g\000e\000t\000\137\000n\000r\000o\000w\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000r\000o\000w\000s\000\040\000i\000n\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000v\000e\000c\000t\000o\000r) % 198 0 obj -<< /S /GoTo /D (subsubsection.3.3.4) >> +<< /S /GoTo /D (subsubsection.3.3.3) >> % 201 0 obj -(3.3.4 set \204 Set contents of the vector) +(\376\377\0003\000.\0003\000.\0003\000\040\000s\000i\000z\000e\000o\000f\000\040\040\024\000\040\000G\000e\000t\000\040\000m\000e\000m\000o\000r\000y\000\040\000o\000c\000c\000u\000p\000a\000t\000i\000o\000n\000\040\000i\000n\000\040\000b\000y\000t\000e\000s\000\040\000o\000f\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000v\000e\000c\000t\000o\000r) endstream endobj @@ -225,287 +225,508 @@ endobj << /Type /ObjStm /N 100 -/First 877 -/Length 6351 ->> -stream -202 0 206 52 207 123 210 175 211 227 214 274 215 322 218 369 219 407 222 449 -223 488 226 535 227 598 230 645 231 693 234 740 235 801 238 848 239 908 242 955 -243 1019 246 1066 247 1119 250 1166 251 1233 254 1280 255 1333 258 1380 259 1447 262 1495 -263 1556 266 1604 267 1672 270 1720 271 1794 274 1842 275 1902 278 1950 279 2005 282 2053 -283 2109 286 2157 287 2214 290 2256 291 2295 294 2342 295 2401 298 2448 299 2498 302 2545 -303 2609 306 2656 307 2722 310 2764 311 2805 314 2852 315 2925 318 2972 319 3048 322 3095 -323 3173 326 3220 327 3290 330 3337 331 3407 334 3454 335 3536 338 3583 339 3645 342 3692 -343 3778 346 3825 347 3892 350 3940 351 4000 354 4048 355 4136 358 4184 359 4246 362 4294 -363 4362 366 4410 367 4471 370 4519 371 4578 374 4626 375 4707 378 4755 379 4839 382 4887 -383 4971 386 5019 387 5063 390 5111 391 5158 394 5206 395 5250 398 5298 399 5345 402 5393 +/First 919 +/Length 15777 +>> +stream +202 0 206 52 207 289 210 341 211 647 214 699 215 910 218 957 219 1154 222 1201 +223 1348 226 1390 227 1539 230 1586 231 1843 234 1890 235 2066 238 2113 239 2357 242 2404 +243 2643 246 2690 247 2949 250 2996 251 3200 254 3247 255 3524 258 3571 259 3775 262 3822 +263 4099 266 4147 267 4394 270 4442 271 4727 274 4775 275 5093 278 5141 279 5380 282 5428 +283 5639 286 5687 287 5903 290 5951 291 6172 294 6214 295 6363 298 6410 299 6644 302 6691 +303 6877 306 6924 307 7186 310 7233 311 7505 314 7547 315 7709 318 7756 319 8063 322 8110 +323 8432 326 8479 327 8811 330 8858 331 9150 334 9197 335 9489 338 9536 339 9891 342 9938 +343 10190 346 10237 347 10624 350 10671 351 10948 354 10996 355 11238 358 11286 359 11677 362 11725 +363 11977 366 12025 367 12307 370 12355 371 12602 374 12650 375 12887 378 12935 379 13294 382 13342 +383 13683 386 13731 387 14072 390 14120 391 14261 394 14309 395 14465 398 14513 399 14654 402 14702 % 202 0 obj -<< /S /GoTo /D (subsubsection.3.3.5) >> +<< /S /GoTo /D (subsubsection.3.3.4) >> % 206 0 obj -(3.3.5 get\137vect \204 Get a copy of the vector contents) +(\376\377\0003\000.\0003\000.\0004\000\040\000s\000e\000t\000\040\040\024\000\040\000S\000e\000t\000\040\000c\000o\000n\000t\000e\000n\000t\000s\000\040\000o\000f\000\040\000t\000h\000e\000\040\000v\000e\000c\000t\000o\000r) % 207 0 obj -<< /S /GoTo /D (subsubsection.3.3.6) >> +<< /S /GoTo /D (subsubsection.3.3.5) >> % 210 0 obj -(3.3.6 clone \204 Clone current object) +(\376\377\0003\000.\0003\000.\0005\000\040\000g\000e\000t\000\137\000v\000e\000c\000t\000\040\040\024\000\040\000G\000e\000t\000\040\000a\000\040\000c\000o\000p\000y\000\040\000o\000f\000\040\000t\000h\000e\000\040\000v\000e\000c\000t\000o\000r\000\040\000c\000o\000n\000t\000e\000n\000t\000s) % 211 0 obj -<< /S /GoTo /D (subsection.3.4) >> +<< /S /GoTo /D (subsubsection.3.3.6) >> % 214 0 obj -(3.4 Preconditioner data structure) +(\376\377\0003\000.\0003\000.\0006\000\040\000c\000l\000o\000n\000e\000\040\040\024\000\040\000C\000l\000o\000n\000e\000\040\000c\000u\000r\000r\000e\000n\000t\000\040\000o\000b\000j\000e\000c\000t) % 215 0 obj -<< /S /GoTo /D (subsection.3.5) >> +<< /S /GoTo /D (subsection.3.4) >> % 218 0 obj -(3.5 Heap data structure) +(\376\377\0003\000.\0004\000\040\000P\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r\000\040\000d\000a\000t\000a\000\040\000s\000t\000r\000u\000c\000t\000u\000r\000e) % 219 0 obj -<< /S /GoTo /D (section.4) >> +<< /S /GoTo /D (subsection.3.5) >> % 222 0 obj -(4 Computational routines) +(\376\377\0003\000.\0005\000\040\000H\000e\000a\000p\000\040\000d\000a\000t\000a\000\040\000s\000t\000r\000u\000c\000t\000u\000r\000e) % 223 0 obj -<< /S /GoTo /D (subsection.4.1) >> +<< /S /GoTo /D (section.4) >> % 226 0 obj -(4.1 psb\137geaxpby \204 General Dense Matrix Sum) +(\376\377\0004\000\040\000C\000o\000m\000p\000u\000t\000a\000t\000i\000o\000n\000a\000l\000\040\000r\000o\000u\000t\000i\000n\000e\000s) % 227 0 obj -<< /S /GoTo /D (subsection.4.2) >> +<< /S /GoTo /D (subsection.4.1) >> % 230 0 obj -(4.2 psb\137gedot \204 Dot Product) +(\376\377\0004\000.\0001\000\040\000p\000s\000b\000\137\000g\000e\000a\000x\000p\000b\000y\000\040\040\024\000\040\000G\000e\000n\000e\000r\000a\000l\000\040\000D\000e\000n\000s\000e\000\040\000M\000a\000t\000r\000i\000x\000\040\000S\000u\000m) % 231 0 obj -<< /S /GoTo /D (subsection.4.3) >> +<< /S /GoTo /D (subsection.4.2) >> % 234 0 obj -(4.3 psb\137gedots \204 Generalized Dot Product) +(\376\377\0004\000.\0002\000\040\000p\000s\000b\000\137\000g\000e\000d\000o\000t\000\040\040\024\000\040\000D\000o\000t\000\040\000P\000r\000o\000d\000u\000c\000t) % 235 0 obj -<< /S /GoTo /D (subsection.4.4) >> +<< /S /GoTo /D (subsection.4.3) >> % 238 0 obj -(4.4 psb\137normi \204 Infinity-Norm of Vector) +(\376\377\0004\000.\0003\000\040\000p\000s\000b\000\137\000g\000e\000d\000o\000t\000s\000\040\040\024\000\040\000G\000e\000n\000e\000r\000a\000l\000i\000z\000e\000d\000\040\000D\000o\000t\000\040\000P\000r\000o\000d\000u\000c\000t) % 239 0 obj -<< /S /GoTo /D (subsection.4.5) >> +<< /S /GoTo /D (subsection.4.4) >> % 242 0 obj -(4.5 psb\137geamaxs \204 Generalized Infinity Norm) +(\376\377\0004\000.\0004\000\040\000p\000s\000b\000\137\000n\000o\000r\000m\000i\000\040\040\024\000\040\000I\000n\000f\000i\000n\000i\000t\000y\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000V\000e\000c\000t\000o\000r) % 243 0 obj -<< /S /GoTo /D (subsection.4.6) >> +<< /S /GoTo /D (subsection.4.5) >> % 246 0 obj -(4.6 psb\137norm1 \204 1-Norm of Vector) +(\376\377\0004\000.\0005\000\040\000p\000s\000b\000\137\000g\000e\000a\000m\000a\000x\000s\000\040\040\024\000\040\000G\000e\000n\000e\000r\000a\000l\000i\000z\000e\000d\000\040\000I\000n\000f\000i\000n\000i\000t\000y\000\040\000N\000o\000r\000m) % 247 0 obj -<< /S /GoTo /D (subsection.4.7) >> +<< /S /GoTo /D (subsection.4.6) >> % 250 0 obj -(4.7 psb\137geasums \204 Generalized 1-Norm of Vector) +(\376\377\0004\000.\0006\000\040\000p\000s\000b\000\137\000n\000o\000r\000m\0001\000\040\040\024\000\040\0001\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000V\000e\000c\000t\000o\000r) % 251 0 obj -<< /S /GoTo /D (subsection.4.8) >> +<< /S /GoTo /D (subsection.4.7) >> % 254 0 obj -(4.8 psb\137norm2 \204 2-Norm of Vector) +(\376\377\0004\000.\0007\000\040\000p\000s\000b\000\137\000g\000e\000a\000s\000u\000m\000s\000\040\040\024\000\040\000G\000e\000n\000e\000r\000a\000l\000i\000z\000e\000d\000\040\0001\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000V\000e\000c\000t\000o\000r) % 255 0 obj -<< /S /GoTo /D (subsection.4.9) >> +<< /S /GoTo /D (subsection.4.8) >> % 258 0 obj -(4.9 psb\137genrm2s \204 Generalized 2-Norm of Vector) +(\376\377\0004\000.\0008\000\040\000p\000s\000b\000\137\000n\000o\000r\000m\0002\000\040\040\024\000\040\0002\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000V\000e\000c\000t\000o\000r) % 259 0 obj -<< /S /GoTo /D (subsection.4.10) >> +<< /S /GoTo /D (subsection.4.9) >> % 262 0 obj -(4.10 psb\137norm1 \204 1-Norm of Sparse Matrix) +(\376\377\0004\000.\0009\000\040\000p\000s\000b\000\137\000g\000e\000n\000r\000m\0002\000s\000\040\040\024\000\040\000G\000e\000n\000e\000r\000a\000l\000i\000z\000e\000d\000\040\0002\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000V\000e\000c\000t\000o\000r) % 263 0 obj -<< /S /GoTo /D (subsection.4.11) >> +<< /S /GoTo /D (subsection.4.10) >> % 266 0 obj -(4.11 psb\137normi \204 Infinity Norm of Sparse Matrix) +(\376\377\0004\000.\0001\0000\000\040\000p\000s\000b\000\137\000n\000o\000r\000m\0001\000\040\040\024\000\040\0001\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000S\000p\000a\000r\000s\000e\000\040\000M\000a\000t\000r\000i\000x) % 267 0 obj -<< /S /GoTo /D (subsection.4.12) >> +<< /S /GoTo /D (subsection.4.11) >> % 270 0 obj -(4.12 psb\137spmm \204 Sparse Matrix by Dense Matrix Product) +(\376\377\0004\000.\0001\0001\000\040\000p\000s\000b\000\137\000n\000o\000r\000m\000i\000\040\040\024\000\040\000I\000n\000f\000i\000n\000i\000t\000y\000\040\000N\000o\000r\000m\000\040\000o\000f\000\040\000S\000p\000a\000r\000s\000e\000\040\000M\000a\000t\000r\000i\000x) % 271 0 obj -<< /S /GoTo /D (subsection.4.13) >> +<< /S /GoTo /D (subsection.4.12) >> % 274 0 obj -(4.13 psb\137spsm \204 Triangular System Solve) +(\376\377\0004\000.\0001\0002\000\040\000p\000s\000b\000\137\000s\000p\000m\000m\000\040\040\024\000\040\000S\000p\000a\000r\000s\000e\000\040\000M\000a\000t\000r\000i\000x\000\040\000b\000y\000\040\000D\000e\000n\000s\000e\000\040\000M\000a\000t\000r\000i\000x\000\040\000P\000r\000o\000d\000u\000c\000t) % 275 0 obj -<< /S /GoTo /D (subsection.4.14) >> +<< /S /GoTo /D (subsection.4.13) >> % 278 0 obj -(4.14 psb\137gemlt \204 Entrywise Product) +(\376\377\0004\000.\0001\0003\000\040\000p\000s\000b\000\137\000s\000p\000s\000m\000\040\040\024\000\040\000T\000r\000i\000a\000n\000g\000u\000l\000a\000r\000\040\000S\000y\000s\000t\000e\000m\000\040\000S\000o\000l\000v\000e) % 279 0 obj -<< /S /GoTo /D (subsection.4.15) >> +<< /S /GoTo /D (subsection.4.14) >> % 282 0 obj -(4.15 psb\137gediv \204 Entrywise Division) +(\376\377\0004\000.\0001\0004\000\040\000p\000s\000b\000\137\000g\000e\000m\000l\000t\000\040\040\024\000\040\000E\000n\000t\000r\000y\000w\000i\000s\000e\000\040\000P\000r\000o\000d\000u\000c\000t) % 283 0 obj -<< /S /GoTo /D (subsection.4.16) >> +<< /S /GoTo /D (subsection.4.15) >> % 286 0 obj -(4.16 psb\137geinv \204 Entrywise Inversion) +(\376\377\0004\000.\0001\0005\000\040\000p\000s\000b\000\137\000g\000e\000d\000i\000v\000\040\040\024\000\040\000E\000n\000t\000r\000y\000w\000i\000s\000e\000\040\000D\000i\000v\000i\000s\000i\000o\000n) % 287 0 obj -<< /S /GoTo /D (section.5) >> +<< /S /GoTo /D (subsection.4.16) >> % 290 0 obj -(5 Communication routines) +(\376\377\0004\000.\0001\0006\000\040\000p\000s\000b\000\137\000g\000e\000i\000n\000v\000\040\040\024\000\040\000E\000n\000t\000r\000y\000w\000i\000s\000e\000\040\000I\000n\000v\000e\000r\000s\000i\000o\000n) % 291 0 obj -<< /S /GoTo /D (subsection.5.1) >> +<< /S /GoTo /D (section.5) >> % 294 0 obj -(5.1 psb\137halo \204 Halo Data Communication) +(\376\377\0005\000\040\000C\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000r\000o\000u\000t\000i\000n\000e\000s) % 295 0 obj -<< /S /GoTo /D (subsection.5.2) >> +<< /S /GoTo /D (subsection.5.1) >> % 298 0 obj -(5.2 psb\137ovrl \204 Overlap Update) +(\376\377\0005\000.\0001\000\040\000p\000s\000b\000\137\000h\000a\000l\000o\000\040\040\024\000\040\000H\000a\000l\000o\000\040\000D\000a\000t\000a\000\040\000C\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n) % 299 0 obj -<< /S /GoTo /D (subsection.5.3) >> +<< /S /GoTo /D (subsection.5.2) >> % 302 0 obj -(5.3 psb\137gather \204 Gather Global Dense Matrix) +(\376\377\0005\000.\0002\000\040\000p\000s\000b\000\137\000o\000v\000r\000l\000\040\040\024\000\040\000O\000v\000e\000r\000l\000a\000p\000\040\000U\000p\000d\000a\000t\000e) % 303 0 obj -<< /S /GoTo /D (subsection.5.4) >> +<< /S /GoTo /D (subsection.5.3) >> % 306 0 obj -(5.4 psb\137scatter \204 Scatter Global Dense Matrix) +(\376\377\0005\000.\0003\000\040\000p\000s\000b\000\137\000g\000a\000t\000h\000e\000r\000\040\040\024\000\040\000G\000a\000t\000h\000e\000r\000\040\000G\000l\000o\000b\000a\000l\000\040\000D\000e\000n\000s\000e\000\040\000M\000a\000t\000r\000i\000x) % 307 0 obj -<< /S /GoTo /D (section.6) >> +<< /S /GoTo /D (subsection.5.4) >> % 310 0 obj -(6 Data management routines) +(\376\377\0005\000.\0004\000\040\000p\000s\000b\000\137\000s\000c\000a\000t\000t\000e\000r\000\040\040\024\000\040\000S\000c\000a\000t\000t\000e\000r\000\040\000G\000l\000o\000b\000a\000l\000\040\000D\000e\000n\000s\000e\000\040\000M\000a\000t\000r\000i\000x) % 311 0 obj -<< /S /GoTo /D (subsection.6.1) >> +<< /S /GoTo /D (section.6) >> % 314 0 obj -(6.1 psb\137cdall \204 Allocates a communication descriptor) +(\376\377\0006\000\040\000D\000a\000t\000a\000\040\000m\000a\000n\000a\000g\000e\000m\000e\000n\000t\000\040\000r\000o\000u\000t\000i\000n\000e\000s) % 315 0 obj -<< /S /GoTo /D (subsection.6.2) >> +<< /S /GoTo /D (subsection.6.1) >> % 318 0 obj -(6.2 psb\137cdins \204 Communication descriptor insert routine) +(\376\377\0006\000.\0001\000\040\000p\000s\000b\000\137\000c\000d\000a\000l\000l\000\040\040\024\000\040\000A\000l\000l\000o\000c\000a\000t\000e\000s\000\040\000a\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r) % 319 0 obj -<< /S /GoTo /D (subsection.6.3) >> +<< /S /GoTo /D (subsection.6.2) >> % 322 0 obj -(6.3 psb\137cdasb \204 Communication descriptor assembly routine) +(\376\377\0006\000.\0002\000\040\000p\000s\000b\000\137\000c\000d\000i\000n\000s\000\040\040\024\000\040\000C\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r\000\040\000i\000n\000s\000e\000r\000t\000\040\000r\000o\000u\000t\000i\000n\000e) % 323 0 obj -<< /S /GoTo /D (subsection.6.4) >> +<< /S /GoTo /D (subsection.6.3) >> % 326 0 obj -(6.4 psb\137cdcpy \204 Copies a communication descriptor) +(\376\377\0006\000.\0003\000\040\000p\000s\000b\000\137\000c\000d\000a\000s\000b\000\040\040\024\000\040\000C\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r\000\040\000a\000s\000s\000e\000m\000b\000l\000y\000\040\000r\000o\000u\000t\000i\000n\000e) % 327 0 obj -<< /S /GoTo /D (subsection.6.5) >> +<< /S /GoTo /D (subsection.6.4) >> % 330 0 obj -(6.5 psb\137cdfree \204 Frees a communication descriptor) +(\376\377\0006\000.\0004\000\040\000p\000s\000b\000\137\000c\000d\000c\000p\000y\000\040\040\024\000\040\000C\000o\000p\000i\000e\000s\000\040\000a\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r) % 331 0 obj -<< /S /GoTo /D (subsection.6.6) >> +<< /S /GoTo /D (subsection.6.5) >> % 334 0 obj -(6.6 psb\137cdbldext \204 Build an extended communication descriptor) +(\376\377\0006\000.\0005\000\040\000p\000s\000b\000\137\000c\000d\000f\000r\000e\000e\000\040\040\024\000\040\000F\000r\000e\000e\000s\000\040\000a\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r) % 335 0 obj -<< /S /GoTo /D (subsection.6.7) >> +<< /S /GoTo /D (subsection.6.6) >> % 338 0 obj -(6.7 psb\137spall \204 Allocates a sparse matrix) +(\376\377\0006\000.\0006\000\040\000p\000s\000b\000\137\000c\000d\000b\000l\000d\000e\000x\000t\000\040\040\024\000\040\000B\000u\000i\000l\000d\000\040\000a\000n\000\040\000e\000x\000t\000e\000n\000d\000e\000d\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r) % 339 0 obj -<< /S /GoTo /D (subsection.6.8) >> +<< /S /GoTo /D (subsection.6.7) >> % 342 0 obj -(6.8 psb\137spins \204 Insert a set of coefficients into a sparse matrix) +(\376\377\0006\000.\0007\000\040\000p\000s\000b\000\137\000s\000p\000a\000l\000l\000\040\040\024\000\040\000A\000l\000l\000o\000c\000a\000t\000e\000s\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 343 0 obj -<< /S /GoTo /D (subsection.6.9) >> +<< /S /GoTo /D (subsection.6.8) >> % 346 0 obj -(6.9 psb\137spasb \204 Sparse matrix assembly routine) +(\376\377\0006\000.\0008\000\040\000p\000s\000b\000\137\000s\000p\000i\000n\000s\000\040\040\024\000\040\000I\000n\000s\000e\000r\000t\000\040\000a\000\040\000s\000e\000t\000\040\000o\000f\000\040\000c\000o\000e\000f\000f\000i\000c\000i\000e\000n\000t\000s\000\040\000i\000n\000t\000o\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 347 0 obj -<< /S /GoTo /D (subsection.6.10) >> +<< /S /GoTo /D (subsection.6.9) >> % 350 0 obj -(6.10 psb\137spfree \204 Frees a sparse matrix) +(\376\377\0006\000.\0009\000\040\000p\000s\000b\000\137\000s\000p\000a\000s\000b\000\040\040\024\000\040\000S\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000a\000s\000s\000e\000m\000b\000l\000y\000\040\000r\000o\000u\000t\000i\000n\000e) % 351 0 obj -<< /S /GoTo /D (subsection.6.11) >> +<< /S /GoTo /D (subsection.6.10) >> % 354 0 obj -(6.11 psb\137sprn \204 Reinit sparse matrix structure for psblas routines.) +(\376\377\0006\000.\0001\0000\000\040\000p\000s\000b\000\137\000s\000p\000f\000r\000e\000e\000\040\040\024\000\040\000F\000r\000e\000e\000s\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 355 0 obj -<< /S /GoTo /D (subsection.6.12) >> +<< /S /GoTo /D (subsection.6.11) >> % 358 0 obj -(6.12 psb\137geall \204 Allocates a dense matrix) +(\376\377\0006\000.\0001\0001\000\040\000p\000s\000b\000\137\000s\000p\000r\000n\000\040\040\024\000\040\000R\000e\000i\000n\000i\000t\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000s\000t\000r\000u\000c\000t\000u\000r\000e\000\040\000f\000o\000r\000\040\000p\000s\000b\000l\000a\000s\000\040\000r\000o\000u\000t\000i\000n\000e\000s\000.) % 359 0 obj -<< /S /GoTo /D (subsection.6.13) >> +<< /S /GoTo /D (subsection.6.12) >> % 362 0 obj -(6.13 psb\137geins \204 Dense matrix insertion routine) +(\376\377\0006\000.\0001\0002\000\040\000p\000s\000b\000\137\000g\000e\000a\000l\000l\000\040\040\024\000\040\000A\000l\000l\000o\000c\000a\000t\000e\000s\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 363 0 obj -<< /S /GoTo /D (subsection.6.14) >> +<< /S /GoTo /D (subsection.6.13) >> % 366 0 obj -(6.14 psb\137geasb \204 Assembly a dense matrix) +(\376\377\0006\000.\0001\0003\000\040\000p\000s\000b\000\137\000g\000e\000i\000n\000s\000\040\040\024\000\040\000D\000e\000n\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000i\000n\000s\000e\000r\000t\000i\000o\000n\000\040\000r\000o\000u\000t\000i\000n\000e) % 367 0 obj -<< /S /GoTo /D (subsection.6.15) >> +<< /S /GoTo /D (subsection.6.14) >> % 370 0 obj -(6.15 psb\137gefree \204 Frees a dense matrix) +(\376\377\0006\000.\0001\0004\000\040\000p\000s\000b\000\137\000g\000e\000a\000s\000b\000\040\040\024\000\040\000A\000s\000s\000e\000m\000b\000l\000y\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 371 0 obj -<< /S /GoTo /D (subsection.6.16) >> +<< /S /GoTo /D (subsection.6.15) >> % 374 0 obj -(6.16 psb\137gelp \204 Applies a left permutation to a dense matrix) +(\376\377\0006\000.\0001\0005\000\040\000p\000s\000b\000\137\000g\000e\000f\000r\000e\000e\000\040\040\024\000\040\000F\000r\000e\000e\000s\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 375 0 obj -<< /S /GoTo /D (subsection.6.17) >> +<< /S /GoTo /D (subsection.6.16) >> % 378 0 obj -(6.17 psb\137glob\137to\137loc \204 Global to local indices convertion) +(\376\377\0006\000.\0001\0006\000\040\000p\000s\000b\000\137\000g\000e\000l\000p\000\040\040\024\000\040\000A\000p\000p\000l\000i\000e\000s\000\040\000a\000\040\000l\000e\000f\000t\000\040\000p\000e\000r\000m\000u\000t\000a\000t\000i\000o\000n\000\040\000t\000o\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 379 0 obj -<< /S /GoTo /D (subsection.6.18) >> +<< /S /GoTo /D (subsection.6.17) >> % 382 0 obj -(6.18 psb\137loc\137to\137glob \204 Local to global indices conversion) +(\376\377\0006\000.\0001\0007\000\040\000p\000s\000b\000\137\000g\000l\000o\000b\000\137\000t\000o\000\137\000l\000o\000c\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000t\000o\000\040\000l\000o\000c\000a\000l\000\040\000i\000n\000d\000i\000c\000e\000s\000\040\000c\000o\000n\000v\000e\000r\000t\000i\000o\000n) % 383 0 obj -<< /S /GoTo /D (subsection.6.19) >> +<< /S /GoTo /D (subsection.6.18) >> % 386 0 obj -(6.19 psb\137is\137owned \204 ) +(\376\377\0006\000.\0001\0008\000\040\000p\000s\000b\000\137\000l\000o\000c\000\137\000t\000o\000\137\000g\000l\000o\000b\000\040\040\024\000\040\000L\000o\000c\000a\000l\000\040\000t\000o\000\040\000g\000l\000o\000b\000a\000l\000\040\000i\000n\000d\000i\000c\000e\000s\000\040\000c\000o\000n\000v\000e\000r\000s\000i\000o\000n) % 387 0 obj -<< /S /GoTo /D (subsection.6.20) >> +<< /S /GoTo /D (subsection.6.19) >> % 390 0 obj -(6.20 psb\137owned\137index \204 ) +(\376\377\0006\000.\0001\0009\000\040\000p\000s\000b\000\137\000i\000s\000\137\000o\000w\000n\000e\000d\000\040\040\024\000\040) % 391 0 obj -<< /S /GoTo /D (subsection.6.21) >> +<< /S /GoTo /D (subsection.6.20) >> % 394 0 obj -(6.21 psb\137is\137local \204 ) +(\376\377\0006\000.\0002\0000\000\040\000p\000s\000b\000\137\000o\000w\000n\000e\000d\000\137\000i\000n\000d\000e\000x\000\040\040\024\000\040) % 395 0 obj -<< /S /GoTo /D (subsection.6.22) >> +<< /S /GoTo /D (subsection.6.21) >> % 398 0 obj -(6.22 psb\137local\137index \204 ) +(\376\377\0006\000.\0002\0001\000\040\000p\000s\000b\000\137\000i\000s\000\137\000l\000o\000c\000a\000l\000\040\040\024\000\040) % 399 0 obj -<< /S /GoTo /D (subsection.6.23) >> +<< /S /GoTo /D (subsection.6.22) >> % 402 0 obj -(6.23 psb\137get\137boundary \204 Extract list of boundary elements) - -endstream -endobj -581 0 obj -<< -/Length 728 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G -0 g 0 G -0 g 0 G -BT -/F51 24.7871 Tf 169.511 626.367 Td [(PSBLAS)-250(3.8.0)-250(User)-55(')55(s)-250(guide)]TJ -ET -q -1 0 0 1 125.3 609.739 cm -0 0 343.711 4.981 re f -Q -BT -/F52 14.3462 Tf 156.541 586.546 Td [(A)-250(r)18(efer)18(ence)-250(guide)-250(for)-250(the)-250(Parallel)-250(Sparse)-250(BLAS)-250(library)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -0 g 0 G -0 g 0 G -ET -1 0 0 1 168.637 345.042 cm -q -.42964 0 0 .42964 0 0 cm -q -480 0 0 360 0 0 cm -/Im1 Do -Q -Q -0 g 0 G -0 g 0 G -0 g 0 G -1 0 0 1 -168.637 -345.042 cm -BT -/F51 9.9626 Tf 365.51 263.977 Td [(by)-250(Salvatore)-250(Filippone)]TJ 14.396 -11.956 Td [(and)-250(Alfredo)-250(Buttari)]TJ/F54 9.9626 Tf 29.31 -11.955 Td [(May)-250(1st,)-250(2022)]TJ -0 g 0 G -0 g 0 G -ET +(\376\377\0006\000.\0002\0002\000\040\000p\000s\000b\000\137\000l\000o\000c\000a\000l\000\137\000i\000n\000d\000e\000x\000\040\040\024\000\040) endstream endobj -578 0 obj +404 0 obj << -/Type /XObject -/Subtype /Image -/Width 640 -/Height 480 -/BitsPerComponent 8 -/ColorSpace /DeviceRGB -/SMask 588 0 R -/Length 921600 +/Type /ObjStm +/N 100 +/First 927 +/Length 16886 >> stream -ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþýýýýýýýýýþþþÿÿÿÿÿÿþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýýýûûûûûûûûûûûûúúúúúúøøø÷÷÷÷÷÷÷÷÷öööõõõõõõððððððððððððððððððíííùùùþþþÿÿÿÿÿÿþþþùùùæææäääááááááááááááááááááÝÝÝÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÛÛÛÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÔÔÔÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÍÍÍÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌËËËÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÄÄĽ½½½½½½½½½½½½½½½½½ººº¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿÿÿÿÿÿÿÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøÿÿÿÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÿÿÿÿÿÿþþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿÿÿÿþþþøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüüÿÿÿÿÿÿþþþççç³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôþþþÿÿÿÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿûûû¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþÿÿÿþþþôôô´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøÿÿÿÿÿÿÿÿÿÿÿÿöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÿÿÿÿÿÿþþþÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿÿÿÿÿÿÿäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþúúú···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüüÿÿÿÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ôôôþþþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþÿÿÿÿÿÿÕÕÕ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿþþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøÿÿÿÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿþþþøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÿÿÿÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿþþþççç³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿþþþûûû¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüüÿÿÿÿÿÿþþþôôô´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ôôôþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿþþþÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···üüüûûýüüþýýþýýþüüþüüþüüþüüþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿûûýòòøòòøòòøòòøòñøòñøéèóäãñäãñäãñãâðâáïàßîÓÑçÓÑçÓÑçÓÑçÑÏæÑÏæÉÇâ¿Þ¿Þ¿Þûûýÿÿÿÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄýýþÑÎæ£ŸÍ£ŸÍ£ŸÍ£ŸÍ£ŸÍ™”ȔŔŔŔŔŒŽÄ…€½…€½…€½…€½…€½…€½~x¹vpµvpµvpµvpµvpµvpµhb­f`¬f`¬f`¬f`¬f`¬b[ªWP¤WP¤WP¤WP¤WP¤WP¤LDžH@œH@œH@œH@œH@œF>›90”90”90”90”90”90”0') ‹) ‹) ‹) ‹) ‹) ‹„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•Æÿÿÿÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³õõõùùü7.“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰éèóýýþÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ’ŽÄƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒle¯ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ùùúîíö'ŠƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáüüýÿÿÿÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÞÞÞÿÿÿvpµƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB9˜üüýÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ûûüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œËþþþÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿZS¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿üüþ½ºÛƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³òòòýýþA8˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉþþþŸ›ËƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùóòø,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒª¦ÑýýþÿÿÿÿÿÿÿÿÿÆÆÆ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿ|»ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žôôùþþÿÿÿÿþþþóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµûûüãâ𠆃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ€{ºÿÿÿÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…Ø×êüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üûýÌÊッƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒVO¤ÿÿÿÿÿÿÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ïïïþþÿJBƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒµ²×ýýþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆþýþ«§Ñƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ4+‘÷÷ûþþþÿÿÿþþþððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³öö÷ùøû5,’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‹†ÀÿÿÿÿÿÿÿÿÿÿÿÿÐÐг³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿˆÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ †àßîûûýÿÿÿÿÿÿþþþùùù¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³úúûêéô$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒaZ©ÿÿÿÿÿÿÿÿÿþþþÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿpj²ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¾¼ÜýýþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹ûûýÖÔ脃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ:2•ûúýÿÿÿÿÿÿþþþììì³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ëëëÿÿÿWP¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•ÆÿÿÿÿÿÿÿÿÿÿÿÿÌÌ̳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀýüþ·³Øƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰éèóýýþÿÿÿÿÿÿÿÿÿøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ôôôüüý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒle¯ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌþþÿš•ȃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáüüýÿÿÿÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùòñø+"ŒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB9˜üüýÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œËþþþÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµûûüÞÝí…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³çççÿÿÿ_X¨ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½üüýÅÃàƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ñññþþþG?›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆþþþ¥¡Îƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$Š$Š,#+"$Іƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒª¦ÑýýþÿÿÿÿÿÿÿÿÿÆÆÆ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³÷÷÷ööú0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$ŠJC¤fa·~Ê—•Ù¨§äµ´í¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯¯é«ªæ›ÜЇÐxtÃgb¸UO«B;Ÿ) ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žôôùþþÿÿÿÿþþþóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÕÕÕÿÿÿˆƒ¿ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ2*”gb¸š˜Ú··î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æÓvrÂYT¯=5›†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ€{ºÿÿÿÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµúúûèçò#ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒIB£‘ŽÔ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³²ë’Õlh¼H@¡#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…Ø×êüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ãããÿÿÿkd¯ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ;3™”’׸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¶¶îlg»ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒVO¤ÿÿÿÿÿÿÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüýÐÎ僃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„id¹¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtõ²×ýýþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííÿÿÿPH ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆŒÒ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃ4+‘÷÷ûþþþÿÿÿþþþððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂýýþ±®Õƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$Š—•Ù¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtˆÀÿÿÿÿÿÿÿÿÿÿÿÿÐÐг³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³õõõûûý91”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ŒÒ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtà †àßîûûýÿÿÿÿÿÿþþþùùù¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎþþÿ”Ńƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒql¾¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃaZ©ÿÿÿÿÿÿÿÿÿþþþÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ùùúïîö'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ<5›¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtþ¼ÜýýþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿwq¶ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ“‘Ö¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃ:2•ûúýÿÿÿÿÿÿþþþîîî³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ûúüÜÚì…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC<Ÿ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÕÆÿÿÿÿÿÿÿÿÿÿÿÿÌÌ̳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿZS¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒʸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï£¢áŽ‹ÒyuÄni¼gb¸d_¶d_¶hc¸rm¿|É‘ŽÔª©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃ%‰éèóýýþÿÿÿÿÿÿÿÿÿøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿ýüþ¾»Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰³³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï•“×UO«*!Žƒƒƒƒƒƒƒƒƒƒƒƒ) G?¡d_¶~Ê©¨å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃle¯ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³òòòýýþB9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒIB£¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï©¨åF?¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„;3šid¹—•Ø··î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃÇÅáüüýÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉþþþ œËƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒmi½¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï­¬ç/&‘ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) \V°›™Û¸¸ï¸¸ï¸¸ïxtÃB9˜üüýÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùõõú/&ŽƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠˆÐ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïTN«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ;4š{w۰êxtàœËþþþÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿƒ~¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ žÞ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï®­è†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰91˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµûûüäã𠆃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ±±ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï”’׃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååÿÿÿe_¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»ûûýÍË䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ ‡¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï”’׃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ïïïþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì$Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒª¦ÑýýþÿÿÿÿÿÿÿÿÿÆÆÆ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆýýþ¬¨Òƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒµ´í¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïrnÀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žôôùþþÿÿÿÿþþþóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³öööùøû5,’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¦¥ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïd_¶„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ€{ºÿÿÿÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ’Õ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï—•ØMF¦…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…Ø×êüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ùùúíìõ&‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsoÀ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï©¨åzvÄLE¥&‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒVO¤ÿÿÿÿÿÿÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒNH§¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïœÝ{wÅ[U°<5›†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒµ²×ýýþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹ûûü×Õ鄃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆ´³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´íš˜Û{wÅ[U¯:2™„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ4+‘÷÷ûþþþÿÿÿþþþððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ëëëÿÿÿWP¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ€}ɸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï±±ëŽ‹Òid¹D= †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‹†ÀÿÿÿÿÿÿÿÿÿÿÿÿÐÐг³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀüüý¹¶Ùƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ;4š··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°ê}yÇE>¡„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ †àßîûûýÿÿÿÿÿÿþþþùùù¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³óóóüüý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„€Ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï©¨årnÀ1)“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒaZ©ÿÿÿÿÿÿÿÿÿþþþÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌþþÿš–Ƀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ*!ެ«æ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··îƒÊ1)“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¾¼ÜýýþÿÿÿÿÿÿþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùòñø+"ŒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@¡µµí¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ìoj½†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ:2•ûúýÿÿÿÿÿÿþþþîîî³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿ~x¹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒUO¬¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï›Ü1)“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•ÆÿÿÿÿÿÿÿÿÿÿÿÿÌÌ̳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµúúüâàƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒLE¥°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«ç<5›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰éèóýýþÿÿÿÿÿÿÿÿÿøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³çççÿÿÿ`Y©ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ3*“š˜Ú¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï­¬ç4+”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒle¯ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½üüýÈÅჃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…`Z²­¬ç¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞ ‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáüüýÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ñññþþþH@œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰hc¸¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïpk¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB9˜üüýÿÿÿÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆýýþ¦¢Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„HA¢…‚̵´í¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ì-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œËþþþÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³÷÷÷ööú0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&ŒQJ¨{wÅ¥¤â¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïpk¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿ‰„¿ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0(“YS®}yÆŸžß··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï«ªæ„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµúúûéèó#ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$ŠH@¡jeºŠÑ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï?7œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþþþþÿÿÿÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿmf°ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…;3šb]´‘ŽÔ¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïe_µƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒPH¡ÿÿÿþþþþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþûûûûûûûûûûûûúúúúúúøøø÷÷÷÷÷÷÷÷÷õõõõõõõõõððððððððððððððððððíííëëëëëëëëëëëëëëëîîîÿÿÿÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµ½½½½½½½½½½½½½½½½½½ÇÇÇüüýÑÏæƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ(`Z²¢¡à¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï„€Ëƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¦¢ÏÿÿÿçççááááááÝÝÝÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÛÛÛÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÔÔÔÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÍÍÍÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌËËËÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÄÄĽ½½½½½½½½½½½½½½½½½»»»¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþØØØÎÎÎÒÒÒÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÜÜÜÝÝÝÝÝÝÝÝÝÝÝÝÝÝÝßßßããããããããããããããããããèèèééééééééééééééééééïïïïïïïïïïïïïïïïïïñññóóóóóóóóóóóóóóóóóóööö÷÷÷÷÷÷÷÷÷÷÷÷÷÷÷÷÷÷úúúúúúúúúúúúúúúúúúûûûüüüûûûúùûúùûúùûúúûüûüüûüûûýúúüúúüúúüûûüýüýýüýüûýüûýüûýüûýþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿaZ©ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„`Z²²²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïš˜Úƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ-#üüýððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿýüþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýýþóòøóòøóòøóòøòñøòñøêéóåäñåäñåäñãâðâáïâáïÕÓèÕÓèÕÓèÕÓèÒÐæÑÏæÌÊã¿Þ¿Þ¿Þ¿Þ¿Þ¿޶³Ø³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö°­Ô£ŸÍ£ŸÍ£ŸÍ£ŸÍ£ŸÍ£ŸÍš–ɔŔŔŔŔŔŅ€½…€½…€½…€½…€½…€½y¹vpµŠ„¿ýýþ²¯ÕƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒA:ž³²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïª©åƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿÿÿÿ‡¾WP¤WP¤WP¤WP¤WP¤WP¤OG H@œH@œH@œH@œH@œH@œ91”90”90”90”90”90”3*‘) ‹) ‹) ‹) ‹) ‹) ‹…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÄàÿÿÿTM£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒid¹¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³³ìƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@œþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿüüýÍË䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ùùüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ'Œ¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒàßïþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿþþÿIAœƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–’Æÿÿÿ~x¹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒœ›Ý¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´íƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿýýþª¦Ðƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒìëõõôù0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠÑ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯®éƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷ùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅþþþÿÿÿþþþùøü4+‘ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ^V¨þþþ§£ÏƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMF¦MF¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–“׸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¥¤âƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ˜”ÇÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååþþþÿÿÿÿÿÿÿÿÿˆÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþKDžƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï›™ÛLE¥„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…­¬ç¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï“‘Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰úùüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿýýþéèó$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÓÑ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¡ŸßZT¯ ‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒKD¤¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï|ȃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¹ÚÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ØØØÿÿÿÿÿÿÿÿÿÿÿÿoi±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ£ Îÿÿÿsm³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«çrm¿5,•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰£¡à¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïb]´ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõþþþÿÿÿÿÿÿüüþÕÓ脃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒñð÷ðð÷+"Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï–”Ø^X±+"ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ)“‘Ö¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï?7œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒØÖéþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÊÊÊÿÿÿÿÿÿÿÿÿÿÿÿVO¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®þþÿœ˜Êƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï–”Øid¹A9ž†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†^X±©©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯¯é†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\U¦ÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿÿÿÿýýþ¶³ØƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙØëýýþD<šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³²ë—”ØxtÃ\V°G?¡5,•#‰ƒƒƒƒƒ(>6›XR®ƒʯ®é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï|Ƀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒìëôüüüººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿûûý;3•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ûûýÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï±±ë®­è±±ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïG?¡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|»ÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿÿÿÿÿÿÿþþþ˜“ǃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ±­Õÿÿÿhb­ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïœšÜƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …÷÷úöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿüüþññ÷*!Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ôôùèçò%‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïLE¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ­ªÓÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎÿÿÿÿÿÿÿÿÿÿÿÿ{u¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒwqµÿÿÿ’ŽÄƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‹ˆÐƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(üüýîîî³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿüüýßÝî…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒâáïûûý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¨§ä*!ŽƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÌÊäÿÿÿÄÄij³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÿÿÿÿÿÿÿÿÿ]V§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC:šüüýÁ¾Ýƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï²²ë;3šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒKCžþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßþþþÿÿÿÿÿÿüüýÄÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¼ºÛÿÿÿ^W¨ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êC<Ÿƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒäãðýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿþþÿG?›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰÷÷ûâáï!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\V°¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞ6.–ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsm³ÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþÿ¤ Îƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„¼ÿÿÿˆ‚¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0(“d_¶—•Ø··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ítpÁ ‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒóòøùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿþþþööú0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçóøøû6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ-$`[³“‘Ö¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµµí„€Ë2*”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŸ›ËÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿÿÿÿˆ‚¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸýýþ±®Õƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰G?¡lg»‘Ô²²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞjeº+"ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ(‹ûûüñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿýýþéèó#ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáÿÿÿTM£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…<4š`Z²wsÊѣ¢á¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î¢ ß|È\V°2*”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÁ¾ÝÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿÿÿÿjc®ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ùùüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†2*”HA¢WQ­b]µmi½xtÃxtÃ}yÆ~ÊxtÃxtÃql¾fa·YS®KD¤6-•†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ@8˜þþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿüüýÏÍ僃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’Æÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÛÚìþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿþþþPH ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒìëõõôù0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒb[ªÿÿÿÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅþþþÿÿÿÿÿÿýýþ°­Ôƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ^V¨þþþ§£Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒîíõûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååþþþÿÿÿÿÿÿûúý91”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþKDžƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŽ‰ÂÿÿÿÕÕÕ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿÿÿÿ’ŽÄƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÓÑ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡ùùüõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ØØØÿÿÿÿÿÿþþÿîíö'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¤ Îÿÿÿsm³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ´±ÖÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõþþþÿÿÿÿÿÿÿÿÿvpµƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒñð÷îíö(‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ5-’ýýýììì³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÊÊÊÿÿÿÿÿÿÿÿÿüüþÛÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®þþÿœ˜ÊƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿÿÿÿÿÿÿZS¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÚØëýýþD<šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒSL¢þþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿýýþ¾»Ûƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ6.–:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™80˜0'’)„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ûûýÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçòýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿÿÿÿÿÿÿýýþA8˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¶¶î©©å•“×zvÄ`[³<4š…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ±­Õÿÿÿhb­ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒzt·ÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿþþÿŸ›Ëƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êˆ…ÏSLª#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ôôùèçò%‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õõùøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎÿÿÿÿÿÿþþÿôôù/&Žƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ì~{È3*“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒwqµÿÿÿ‘ăƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¥¡ÎÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿÿÿÿ‚}»ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î~{È'Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒâáïûûý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ûûüñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÿÿÿÿÿÿýýþãâ𠆃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æJC£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC:šüüýÁ¾ÝƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÆÄàÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßþþþÿÿÿÿÿÿÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïni¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ½ºÛÿÿÿ]V§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@œþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿüüþÎÌ䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï|É…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰÷÷ûâáï!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒàÞîþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþþJBƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï{wŃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„¼ÿÿÿˆ‚¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿýýþ«§Ñƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒéèóøøû6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒðï÷úúú¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿùøü5,’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì5-–ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸýýþ±®Õƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–‘ÆÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿÿÿÿމƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï•“׃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáÿÿÿTM£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰úùüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿüüþêéô$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïG@¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ùùüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒº·ÚÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿÿÿÿqk²ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï}yÇNH§NH§NH§NH§NH§NH§NH§NH§NH§UO«_Y²lh¼ŠÑ¯¯é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï“‘Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’Æÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ80“ýýþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿüüýÖÔ脃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒ†UO«œÝ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï0'’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒììõõôù0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÖÔéþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿÿÿÿWP¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†rm¿¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïe_¶ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ^V¨þþþ§£Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ[T¦ÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååþþþÿÿÿÿÿÿýýþ¸µÙƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒmi¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŒ‰ÑƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþKDžƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒëêôüüüººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿûûý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ“‘Ö¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°ê„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÓÑ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|»ÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ØØØÿÿÿÿÿÿÿÿÿþþþš•ȃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒA9¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï/&‘ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¥¡Îÿÿÿsm³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ø÷û÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõþþþÿÿÿþþÿòòø+"Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïC<Ÿƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒñð÷îíö(‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¬©ÒÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÊÊÊÿÿÿÿÿÿÿÿÿÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïSLªƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®þþÿœ˜Êƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&üüýïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿÿÿÿüüýßÝî…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒlh¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïYT¯ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÚØëýýþC;™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒËÉãÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿÿÿÿ_X¨ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ`[³¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ûûýÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒIAþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿÿÿÿÿÿÿüüýÆÄჃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒgb¸¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï]W±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ±­Õÿÿÿhb­ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒãâðýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿþþÿH@œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒtpÁ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïVP¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …õõúèçò%‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒrl³ÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎÿÿÿÿÿÿÿÿÿýýþ¥¡Îƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–”ظ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïLE¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒxr¶ÿÿÿ‘ăƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒóòøùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿþþþööú0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&Œ¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï7/—ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒâáïûûý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ˜ÊÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÿÿÿÿÿÿÿÿÿˆƒ¿ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒni¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC:šüüýÁ¾Ýƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ'Šúúüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßþþþÿÿÿÿÿÿýýþéèó#ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ6.–³³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ½ºÛÿÿÿ]V§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÀ½ÝÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿÿÿÿle¯ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0'’§¦ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃ$‰ø÷ûâáï!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ@8˜þþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿüüýÐÎæƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…XR®¯¯é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïHA¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„½ÿÿÿˆ‚¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÚÙëþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿÿÿÿPH ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒ"ˆC< jeº¦¥ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æ…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒéèóøøû6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ_X¨ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿýýþ³¯Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«ç¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à©¨å³³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïmi¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸýýþ±®Õƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒíìõûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿúúü:2•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï±±ë'ŒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÈÅáÿÿÿTM£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŒ‡ÁÿÿÿÕÕÕ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿþþþ”Ńƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïc^µƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ùùüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …ùùüõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿþþÿïîö'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï—•Ø…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’Æÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ²®ÕÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿÿÿÿwq¶ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æ/'’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒììõõôù0&ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ5-’ýýýîîî³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿüüþÜÚì…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯¯é<5›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ^V¨þþþ¦¢ÏƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÐÎæÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååþþþÿÿÿÿÿÿÿÿÿ[S¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï­¬ç<5›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþKDžƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒSL¢þþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿýýþ¾¼Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï—•Ø/'’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÓÑ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒæåñýýý»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿýýþB9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï²²ëgb¸…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¤ Îÿÿÿsm³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ3)7/“7/“7/“7/“7/“>6–SL¢SL¢–‘ÆÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³çççþþþÿÿÿÿÿÿþþÿ¡Ìƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ì{wÅ,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒòñ÷ñð÷†¾„¼„¼„¼„¼„¼ˆÁ™•È™•È™•È™•È™•È™•Ȫ¦Ñ­©Ó­©Ó­©Ó­©Ó­©Ó±­Õ¾»Ü¾»Ü¾»Ü¾»Ü¾»Ü¾»ÜÈÆáÍËäÍËäÍËäÍËäÍËäÎÌåÚÙëÚÙëÚÙëÚÙëÚÙëÚÙëàÞîæäñæäñæäñæäñæäñæäñíìõïîöïîöïîöïîöïîöñð÷öõúöõúõôúôôùôôùôôù÷÷ûùùüùøûøøüøøüøøü÷÷ûüüýüüýûûýûûýûûýûûýýýþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýýý···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅÿÿÿÿÿÿþþÿôôù.%Žƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŸžßhc¸)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–’ÆþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿöõúóòøóòøóòøòñøòñøïïöåäñåäñåäñåäñâáïâáïÛÙëÕÓèÕÓèÕÓèÓÑçÑÏæÑÏæÂ¿Þ¿Þ¿Þ¿Þ¿Þ¿޽ºÛ³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö§£Ï£ŸÍ£ŸÍ£ŸÍ£ŸÍ£ŸÍ¡Ì”ŔŔŔŔŔŋ†À…€½…€½…€½…€½…€½…€½vpµvpµvpµvpµ½ºÛÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ðððþþþÿÿÿÿÿÿÿÿÿ„¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïª©å‡„Î`[³5-–ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒìëôôóù\U§WP¤WP¤WP¤TM£H@œH@œH@œH@œH@œH@œ@7—90”90”90”90”90”90”*!Œ) ‹) ‹) ‹) ‹) ‹$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒòò÷úúú¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿÿÿÿ@7—ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï›™Û„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€ËƒÊxtÃrnÀc^µUO«A9&Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…½ÿÿÿ„¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿýýþ¹¶Ùƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …ööúäãð!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙ×êþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ®«Ôÿÿÿf`¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýýííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿüüýáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÍË䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¯«ÔÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââþþþÿÿÿÿÿÿÿÿÿ…€½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÍËäþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õôùøøø···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÁÁÁÿÿÿÿÿÿþþþööú2)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒQI¡ýýþ­©ÒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ìììÿÿÿÿÿÿÿÿÿýýþ®«Óƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒåãñùøü6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒáàïþþþ¿¿¿³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿSK¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsm³ÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB:˜þþþêêê³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´õõõþþþÿÿÿÿÿÿüüýÚÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷íìõ'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸ÚÿÿÿÊÊʳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÕÕÕÿÿÿÿÿÿÿÿÿÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œÌÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡øøûõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúÿÿÿüüþòòø-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ)‹ùùü×Õ鄃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ~¼ÿÿÿØØØ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßþþþÿÿÿÿÿÿþþÿ£ŸÍƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÂ¿ÞÿÿÿXQ¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçòýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ@8˜üüý¹¶ÙƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿÿÿÿüüýÒÐ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÜÛìüüý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÆÄàÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆÿÿÿÿÿÿÿÿÿÿÿÿoi±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒf`­þþÿ›—Ƀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ûúüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññþþþÿÿÿÿÿÿüüþíìõ(‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒííöòñø+"Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•ÅÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþþš•ȃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠÂÿÿÿy¹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒíìõûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùÿÿÿÿÿÿÿÿÿüüýB:™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡÷öúáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒWQ¤þþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿÿÿÿÿÿÿüüþÄÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒµ²×ÿÿÿaZ©ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÐÍæÿÿÿÄÄij³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüÿÿÿÿÿÿÿÿÿe_¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ7/“ûûýÈÅჃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(üüýððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääÿÿÿÿÿÿÿÿÿýýþèçò$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÒÐçþþþH@œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ£žÍÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÃÃÃþþþÿÿÿÿÿÿÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒTL£ýýþ§£Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷úúú¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿûúý;3•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçòööú1(ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÍÍÍÿÿÿÿÿÿÿÿÿýýþ¹¶Ùƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ}w¹ÿÿÿŠ…ÀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙ×êþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµöööÿÿÿÿÿÿÿÿÿÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ôôùéèó$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýýííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿüüýáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ§£Ïÿÿÿmf°ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¯«ÔÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹ûûûÿÿÿÿÿÿÿÿÿ…€½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ+!ŒúùüÕÓ脃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õôùøøø···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿþþþööú2)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáÿÿÿQI¡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿýýþ®«ÓƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@œýýþ´±Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒáàïþþþ¿¿¿³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿÿÿÿSK¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒàßîûûý:2•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB:˜þþþêêê³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉÿÿÿÿÿÿÿÿÿüüýÚÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®þþÿ–‘ƃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸ÚÿÿÿÊÊʳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒðð÷ïîö'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡øøûõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿÿÿÿüüþòòø-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’Æÿÿÿys·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ~¼ÿÿÿØØØ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿþþÿ£ŸÍƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰øøûÜÛì…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçòýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿÿÿÿþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸Úÿÿÿ\T¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿÿÿÿüüýÒÐ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”üûýÅÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÆÄàÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³æææþþþÿÿÿÿÿÿÿÿÿoi±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ×ÕéýýþB:™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ûúüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿüüþíìõ(‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\U¦þþþ¢žÌƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•ÅÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ðððþþþÿÿÿÿÿÿþþþš•ȃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒëêôõõú0&ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒíìõûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎÿÿÿÿÿÿÿÿÿüüýB:™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|»ÿÿÿ…€½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒWQ¤þþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿüüþÄÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …ööúåãñ!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÐÍæÿÿÿÄÄij³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ØØØÿÿÿÿÿÿÿÿÿÿÿÿe_¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ­ªÓÿÿÿga­ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(üüýððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûþþþÿÿÿýýþèçò$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(ûûýÎÌ䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ£žÍÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÌÊäþþÿOG ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷úúú¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÁÁÁþþþÿÿÿÿÿÿûúý;3•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒKCžýýþ¯«Óƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ëëëþþþÿÿÿÿÿÿýýþ¹¶Ùƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒäãðùùü6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙ×êþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsm³ÿÿÿ‘ŒÃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýýííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´õõõþþþÿÿÿÿÿÿüüýáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒòñ÷ííõ'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¯«ÔÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿÿÿÿ„¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒž™Êÿÿÿtn´ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õôùøøø···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþþþþööú2)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ(‹ùùü×Õ鄃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÞÞÞþþþÿÿÿÿÿÿýýþ®«ÓƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÁ¾ÝÿÿÿXQ¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒáàïþþþ¿¿¿³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿÿÿÿÿÿÿSK¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ@8˜üüýº·ÚƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB:˜þþþêêê³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèÿÿÿÿÿÿÿÿÿüüýÚÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÛÙìüüþA8˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸ÚÿÿÿÊÊʳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆÿÿÿÿÿÿÿÿÿÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ_X¨þþþœ˜Êƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡øøûõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿüüþòòø-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒíìõòòø,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆ0'’91˜A:žA9:2™3*“(ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ~¼ÿÿÿØØØ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿÿÿÿÿÿÿþþÿ£ŸÍƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŽ‰ÂÿÿÿzºƒƒƒƒƒtpÁxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃni¼lg»d_¶]W±G?¡0(“…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&ŒxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃ[U°ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ>6›xtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃMF¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$Š\V°ƒÊ£¢á¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ì£¡àŽ‹ÒsoÀXR®;4š†ƒƒƒƒƒƒƒƒƒƒèçòýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡÷÷ûâáï †ƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ìŠÑ`[³,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„̸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ+"~{ȳ²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³³ì‘Õlg» ‡ƒƒƒƒƒƒƒMEŸþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿÿÿÿÿÿÿüüýÒÐ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ³°Öÿÿÿb[ªƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ìvrÂ) ƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‹´³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï;3™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒWQ­¯®é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!ŽƒƒƒƒƒƒƒƒÆÄàÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿÿÿÿoi±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ5-’ûûýÇÅჃƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¨§ä80—ƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\V°¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïtpÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ`Z²¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!Žƒƒƒƒƒƒƒƒ* ‹ûúüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääÿÿÿÿÿÿÿÿÿüüþíìõ'ŠƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþIAœƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï­¬ç*!Žƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—”ظ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æ†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒTMª··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!Žƒƒƒƒƒƒƒƒƒ•ÅÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿþþþ™”ȃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒSL¢ýýþ©¥Ðƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï†ƒÍƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ3*“¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïLE¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰ª©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!Žƒƒƒƒƒƒƒƒƒƒíìõûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿüüýB:™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒçæòùøû4+‘ƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï0(“ƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒmi¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï†ƒÍƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjeº¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î«ªæ¢¡à®­è··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!ŽƒƒƒƒƒƒƒƒƒƒWQ¤þþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌÿÿÿÿÿÿÿÿÿüüþÄÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿ‹†Àƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï§¦ã0'’0'’0'’0'’0'’2*”C< `[³¥¤â¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïb]µƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„§¦ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì&ŒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒžÞ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïƒÊG@¢'Œƒƒƒƒƒ+"IB£fa·Œ‰Ñ³²ë¸¸ï¸¸ï¸¸ï¸¸ï*!ŽƒƒƒƒƒƒƒƒƒƒƒÐÍæÿÿÿÄÄij³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõÿÿÿÿÿÿÿÿÿÿÿÿe_¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ôôùéèó$ˆƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒ#‰š˜Û¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï„̃ƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒD= ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï^X±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&‘¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïOI¨ƒƒƒƒƒƒƒƒƒƒƒƒ!‡KD¤~Ê´³ì¸¸ï*!Žƒƒƒƒƒƒƒƒƒƒƒ1(üüýððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿýýþèçò$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¥¡Îÿÿÿnh±ƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒD= ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï“‘Öƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¢ ß¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï˜–ÙƒƒƒƒƒƒƒƒƒƒƒƒƒƒG@¢¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&Œa\´$Šƒƒƒƒƒƒƒƒƒƒƒƒ£žÍÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹ûûûÿÿÿÿÿÿÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹úùüÕÓ脃ƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ†··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŸžßƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆ±±ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA:ž³³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï5,•ƒƒƒƒƒƒƒƒƒƒƒƒƒUO¬¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïfa·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷úúú¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿÿÿÿûúý;3•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÆÄàÿÿÿRJ¡ƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ„¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï˜–Ùƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒVP¬¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¡Ÿßƒ„€Ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïpk½ƒƒƒƒƒƒƒƒƒƒƒƒƒ`[³¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïmi½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿýýþ¹¶ÙƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@œýýþµ²×ƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ4+”¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï~ʃƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‘ŽÔ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïkfºƒNH§¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¨§ä„ƒƒƒƒƒƒƒƒƒƒƒƒXR®¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¥¤â"ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙ×êþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒßÞîûûý;3•ƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ~zǸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïWQ­ƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ.%‘··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï4+”ƒ†°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïG?¡ƒƒƒƒƒƒƒƒƒƒƒƒKD¤¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïœšÜJC¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýýííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿüüýáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒib­þþÿ—’ǃƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒ4+”~{ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï«ªæ"ˆƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒgb¸¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïœšÜƒƒƒ|ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï~ʃƒƒƒƒƒƒƒƒƒƒƒ) ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï§¦ã}yÇTMª1)“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¯«ÔÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿÿÿÿ„¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒïî÷ñð÷*!Œƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì˜–Ù˜–Ù˜–Ù˜–Ù˜–Ù žÞ´³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïRK©ƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¢ ß¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïe_¶ƒƒƒIB£¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³³ì#‰ƒƒƒƒƒƒƒƒƒƒƒƒœ›Ý¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯®éÓpk½LE¥&Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õôùøøø···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿþþþööú2)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–‘Æÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïa\´ƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ?7œ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï0'’ƒƒƒ†­¬ç¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïXR®ƒƒƒƒƒƒƒƒƒƒƒƒWQ­¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïœ›Ýkfº2*”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿýýþ®«Óƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰øøûÝÜì…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï˜–ÙA9ƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒxtø¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï–“׃ƒƒƒƒ{wŸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï“‘Öƒƒƒƒƒƒƒƒƒƒƒƒ…š˜Ú¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‘ÕA9ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒàßïþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¹¹¹½½½½½½½½½½½½½½½ÞÞÞÿÿÿÿÿÿÿÿÿÿÿÿSK¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¹·Ùÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´íxtÃ6.–ƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†®­è¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`Z²ƒƒƒƒƒD= ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï/'’ƒƒƒƒƒƒƒƒƒƒƒƒ.%‘«ªæ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï„€Ë#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC;™þþþëëëÇÇÇÇÇÇÇÇÇÉÉÉÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÓÓÓÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÙÙÙÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜááááááááááááááááááãããææææææææææææææææææêêêëëëëëëëëëëëëëëëìììððððððñññðððððððððôôôõõõööö÷÷÷÷÷÷÷÷÷öööúúúúúúûûûûûûûûûûûûýýýÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿüüýÚÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”üüý¿Þƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··îws†ƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒPJ¨¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î*!Žƒƒƒƒƒ„©¨å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïid¹ƒƒƒƒƒƒƒƒƒƒƒƒƒ/&‘›Ü¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¡Ÿß) ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÌÊãÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþþþþþþþþþþþþþÿÿÿþþþýýýýýýýýýýýýÿÿÿÿÿÿþþþþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&7/“7/“7/“7/“7/“;2•SL¢SL¢SL¢SL¢SL¢SL¢`Y©mf°mf°mf°mf°mf°mf°‚}¼„¼„¼„¼„¼„¼‹…¿™•È™•È™•È™•È™•È™•ȧ£Ï­©Ó­©Ó­©Ó­©Ó­©Ó®ªÓ¾»ÜðïöýýþC;™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‘Ô†ƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠˆÐ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‘ŽÔƒƒƒƒƒƒƒvr¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï£¢áƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†f`¶°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï•“׆ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ+"ŒòòøþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿüüþøøûÍËäÙØêÚÙëÚÙëÚÙëÚÙëÚÙëÞÝíæäñæäñæäñæäñæäñæäñììõïîöïîöïîöïîöïîöðï÷öõúöõúöõùööúööúööúøøûúúüúúüûûýûûýûûýûûýýýþýýþýýþýýýýýýýýýþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿúúü"‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµµí¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à©¨å¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï}yǃƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) µµí¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïYT¯ƒƒƒƒƒƒƒ?7œ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA9ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&‹`[³œÝ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïZT¯ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ™Êþþþÿÿÿÿÿÿÿÿÿûûûûûûûûûûûûúúúúúú÷÷÷÷÷÷÷÷÷÷÷÷õõõõõõóóóñññññññññðððððððððìììëëëëëëëëëëëëëëëêêêææææææææææææææææææâââáááááááááááááááàààÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÙÙÙÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÒÒÒÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÐÐÐÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÉÉÉÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÀÀÀ½½½½½½½½½½½½½½½½½½¹¹¹¸¸¸¸¸¸ÎÎÎÿÿÿµ±×ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒ ‡F?¡Œ‰Ð¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA9ƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒb]´¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´í&Œƒƒƒƒƒƒƒ„¦¥ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï{wŃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0(“YT¯„€Ë¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ=5–ûûýÿÿÿÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþA9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒoj½¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‡„΃ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ›Ü¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŠˆÐƒƒƒƒƒƒƒƒƒql¾¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯¯é ‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ,#OI¨tpÁ¡Ÿß¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï5,•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¼¹Úýýþÿÿÿÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÜÚ샃ƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒ…£¡à¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´í ‡ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ80˜¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïЇÐd_¶d_¶d_¶d_¶d_¶d_¶d_¶d_¶d_¶|xƸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïSL©ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0'’c^µ©¨å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïYT¯ƒƒƒƒƒƒƒƒƒƒƒƒƒUN£ÿÿÿÿÿÿÿÿÿÿÿÿÝÝݳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒrm¿¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï>6›ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsoÀ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŒ‰Ñƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$Š•“׸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïjeºƒƒƒƒƒƒƒƒƒƒƒƒ„ÔÒèüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøöõù…ƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒ_Y²¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïPI¨ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ª©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¶¶î*!Žƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ4+”¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïvrƒƒƒƒƒƒƒƒƒƒƒƒuo´ÿÿÿÿÿÿÿÿÿÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ¨¤Ðƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïZT¯ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒJC¤¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïd_¶ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ«ªæ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïql¾ƒƒƒƒƒƒƒƒƒƒƒ%‰ëêôüüþÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííýýý5-’ƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒ€}ɸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïOI¨ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„̸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞƒƒƒƒƒƒƒ) b]´"ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¬«ç¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïe_µƒƒƒƒƒƒƒƒƒƒƒ‘ÄÿÿÿÿÿÿÿÿÿÿÿÿÊÊʳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÓÑ烃ƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒ) °°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA9žƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‹´³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï;3šƒƒƒƒƒƒ0'’¸¸ï­¬çjeº&Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ:2™¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïWQ­ƒƒƒƒƒƒƒƒƒƒ3*‘øøûÿÿÿÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ)–“׸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î) ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ[U°¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïvrƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï´³ì†ƒÍMF¦†ƒƒƒƒƒƒƒƒƒƒƒ)›Ü¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï4+”ƒƒƒƒƒƒƒƒƒƒ¯¬Óýýþÿÿÿÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúòñ÷ƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï§¦ã0'’0'’0'’0'’0'’1)“;3™NH§vr¯®é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï”’׃ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï²²ë¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à›Üƒƒƒƒ—”ظ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï®­èxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtâ ß¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«ç†ƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï®­èˆ…Îf`¶C<Ÿ'Œƒƒƒƒ…5-–hc¸«ªæ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¦¥ãƒƒƒƒƒƒƒƒƒƒKDžþþþÿÿÿÿÿÿþþþááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿš–Ƀƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïZT¯ƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒƒ3*“¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃZT¯¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïMF¦ƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï¬«æ¢¡à§¦ãµ´ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïsoÀƒƒƒƒƒƒƒƒƒƒËÉãüüýÿÿÿþþþüüüººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññüüý-$ƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï˜–Ù„ƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒƒmi¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA9ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&Œµµí¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‡„΃ƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï²²ë-$ƒƒƒƒƒƒƒƒƒib®ÿÿÿÿÿÿÿÿÿÿÿÿØØØ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅÿÿÿÊÈ⃃ƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïª©å5,•ƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒ„§¦ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¦¥ã„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‹‰Ñ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´í'Œƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï_Y²ƒƒƒƒƒƒƒƒƒ†âáïýýþÿÿÿÿÿÿÿÿÿøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþÿYR¥ƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£á2*”ƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒD= ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïpk½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒUO«¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï^X±ƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïwsÆ€½ÿÿÿÿÿÿÿÿÿÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûêê󃃃ƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êgb¸ ‡ƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒ|ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï91˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰³²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï™—Úƒƒƒƒ&‹Ó¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°ê^X±ƒƒƒƒƒƒƒƒƒƒ.%Žóóøþþÿÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿˆÁƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï®­è†ƒÍSL©#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒ"ˆ±±ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŸžßƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†ƒÍ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï6-•ƒƒƒƒƒ'ŒYS®Š‡Ð¯¯é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï±±ë}yÆ,#ƒƒƒƒƒƒƒƒƒƒƒ¢žÍþþÿÿÿÿÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôúúü'Šƒƒƒƒƒƒjeºni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼d_¶^X±WQ­C< ) ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‹ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼jeºƒ3*“ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼KD¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ=5›ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼A9ƒƒƒƒƒƒƒƒ„80—^X±}yÆ”’ת©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì™—Ú|xÆTN«"ˆƒƒƒƒƒƒƒƒƒƒƒƒ@7—üüýÿÿÿÿÿÿÿÿÿæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿ¿¼Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆ0'’7/—>6›D= ;4š4+”+"…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÀ½Ýüüýÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþNGŸƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\U§ÿÿÿÿÿÿÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½ýýýäãðƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÛÚìüüýÿÿÿÿÿÿúúú···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿzºƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒzt·ÿÿÿÿÿÿÿÿÿÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööùùü"‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ'Šíìõüüþÿÿÿÿÿÿþþþóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌÿÿÿ´±Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’ÇþþþÿÿÿÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþA9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ8/“ûúýÿÿÿÿÿÿÿÿÿééé³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÜÚ샃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ´±ÖýýþÿÿÿÿÿÿÿÿÿÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøöõù…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæüüþÿÿÿÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ§£Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒnh±ÿÿÿÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííýýý5-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡çæòýýþÿÿÿÿÿÿÿÿÿöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÓÑ烃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‹†ÀÿÿÿÿÿÿÿÿÿÿÿÿÌÌ̳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žõõùþþþÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúòñ÷ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨¤ÐýýþÿÿÿÿÿÿþþþÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿš–ɃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒE=šþþÿÿÿÿÿÿÿþþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññüüý-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÄÁßüüýÿÿÿÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅÿÿÿÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒb[ªÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþÿYR¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…àÞîüüþÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûêê󃃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒzºÿÿÿÿÿÿÿÿÿÿÿÿÐÐг³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿˆÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ñð÷þþÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôúúü&‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒœ˜ÊþþþÿÿÿÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿ¿¼Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ=5–ûûýÿÿÿÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþNGŸƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸Úýýþÿÿÿÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½ýýýäãðƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒTM£ÿÿÿÿÿÿÿÿÿÿÿÿÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿzºƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÓÑçüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööùùü"‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒtn´ÿÿÿÿÿÿÿÿÿÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌÿÿÿ´±Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰ëêôüüþÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþA9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‘ŒÃÿÿÿÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÜÚ샃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ3*‘øøûÿÿÿÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ­ªÓþþÿÿÿÿÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøöõù…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒJCþþþÿÿÿÿÿÿþþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ§£ÏƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÊÈâüüþÿÿÿþþþüüü»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííýýý5-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒga­ÿÿÿÿÿÿÿÿÿÿÿÿØØØ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÓÑ烃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†âáïýýþÿÿÿÿÿÿþþþøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…€½ÿÿÿÿÿÿÿÿÿÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúòñ÷ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ-$óóøþþÿÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿš–Ƀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¢žÌþþþÿÿÿÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññüüý-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ?6—üüýÿÿÿÿÿÿþþþççç³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅÿÿÿÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¿½Üýýþÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþÿYR¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\T¦ÿÿÿÿÿÿÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûêê󃃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÙØêüüþÿÿÿÿÿÿúúú···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿˆÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒys·ÿÿÿÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôúúü&‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&‰íìõüüþÿÿÿÿÿÿþþþôôô´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿ¿¼Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–‘ÆÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþNGŸƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ8/“ûúýÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½ýýýäãðƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ³¯ÕýýþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿzºƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööùùü"‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÐÎæüüýÿÿÿÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌÿÿÿ´±Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒmf°ÿÿÿÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþA9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡çæòýýþÿÿÿÿÿÿÿÿÿöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÜÚ샃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠ…ÀÿÿÿÿÿÿÿÿÿÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žõõúþþþÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøöõù…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ§£ÏþþÿÿÿÿÿÿÿþþþÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ§£ÏƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒD<šýýþÿÿÿÿÿÿÿÿÿäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³îîîýýý5-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÂ¿Þüüýÿÿÿÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÓÑ烃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒaZ©ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿd]«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰) ‹) ‹) ‹) ‹) ‹+"Œ90”90”90”90”90”90”A8˜H@œH@œH@œH@œH@œH@œUN£WP¤WP¤WP¤WP¤WP¤\T¦f`¬f`¬f`¬f`¬f`¬f`¬qk²vpµvpµvpµvpµvpµwq¶…€½…€½…€½…€½àßîüüýÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹úúúõôù£ŸÍ£ŸÍ£ŸÍ£ŸÍ¨¤Ð³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö½»Û¿Þ¿Þ¿Þ¿Þ¿ÞÃÀÞÑÏæÑÏæÓÑçÓÑçÓÑçÓÑçÛÚìâáïâáïäãñäãñäãñäãññð÷òñøòòøòòøòòøòòøööúÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ðððþþþýýþýýþýýþùùüùùüùùüùùüüüýýýþûûýûûþûûþûûþûûýÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÍÍÍÿÿÿÿÿÿÿÿÿþþþôôôµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµ÷÷÷þþþÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³×××ÿÿÿÿÿÿÿÿÿûûû»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿÿÿÿÿÿÿþþþÆÆÆ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÊÊÊÿÿÿÿÿÿþþþûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´óóóþþþÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝþþþÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³çççþþþÿÿÿÿÿÿþþþÝÝݳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ãããþþþÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÁÁÁÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ìììþþþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõþþþÿÿÿÿÿÿûûû»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÕÕÕÿÿÿÿÿÿÿÿÿþþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸ûûûÿÿÿÿÿÿþþþÆÆÆ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßÿÿÿÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿþþþûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆÿÿÿÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññþþþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùÿÿÿÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüÿÿÿÿÿÿþþþÝÝݳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÃÃÃþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³îîîþþþÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÍÍÍÿÿÿÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµöööÿÿÿÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³×××ÿÿÿÿÿÿÿÿÿýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉÿÿÿÿÿÿÿÿÿûûû»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´óóóþþþÿÿÿÿÿÿþþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿÿÿÿÿÿÿþþþÆÆÆ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿÿÿÿÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿþþþûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³æææþþþÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ðððþþþÿÿÿÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿþþþÝÝݳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûþþþÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÁÁÁÿÿÿÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ëëëþþþÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´õõõþþþÿÿÿÿÿÿýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÞÞÞþþþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèÿÿÿÿÿÿÿÿÿûûû»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆÿÿÿÿÿÿÿÿÿþþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþÆÆÆ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿÿÿÿþþþûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»ýýýÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääÿÿÿÿÿÿÿÿÿþþþ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸»»»½½½½½½½½½½½½½½½½½½ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÄÄÄÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇËËËÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÍÍÍÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÔÔÔÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÞÞÞááááááááááááááááááäääæææææææææææææææçççëëëëëëëëëëëëëëëëëëîîîððððððþþþÿÿÿÿÿÿþþþüüüúúúúúúûûûûûûûûûûûûþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþþþþþþþþþþÿÿÿÿÿÿþþþýýýýýýýýýþþþÿÿÿÿÿÿþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ -endstream -endobj -588 0 obj -<< -/Type /XObject -/Subtype /Image +403 0 407 48 408 386 411 434 412 762 415 810 416 1152 419 1200 420 1416 423 1464 +424 1627 427 1669 428 1856 431 1903 432 2220 435 2267 436 2655 439 2702 440 3012 443 3059 +444 3347 447 3394 448 3642 451 3689 452 3898 455 3945 456 4292 459 4339 460 4558 463 4605 +464 4796 467 4844 468 5010 471 5058 472 5244 475 5292 476 5478 479 5526 480 5793 483 5841 +484 6108 487 6156 488 6395 491 6443 492 6604 495 6652 496 6828 499 6870 500 6979 503 7026 +504 7380 507 7427 508 7821 511 7868 512 8239 515 8286 516 8752 519 8794 520 8875 523 8922 +524 9376 527 9423 528 9869 531 9916 532 10372 535 10419 536 10875 539 10922 540 11378 543 11425 +544 11891 547 11934 548 12093 551 12141 552 12377 555 12425 556 12666 559 12714 560 12935 563 12983 +564 13259 567 13307 568 13652 571 13700 572 13946 575 13994 576 14200 579 14248 580 14456 583 14504 +584 14722 587 14765 588 14894 591 14942 592 15232 595 15280 596 15620 599 15663 600 15754 603 15802 +% 403 0 obj +<< /S /GoTo /D (subsection.6.23) >> +% 407 0 obj +(\376\377\0006\000.\0002\0003\000\040\000p\000s\000b\000\137\000g\000e\000t\000\137\000b\000o\000u\000n\000d\000a\000r\000y\000\040\040\024\000\040\000E\000x\000t\000r\000a\000c\000t\000\040\000l\000i\000s\000t\000\040\000o\000f\000\040\000b\000o\000u\000n\000d\000a\000r\000y\000\040\000e\000l\000e\000m\000e\000n\000t\000s) +% 408 0 obj +<< /S /GoTo /D (subsection.6.24) >> +% 411 0 obj +(\376\377\0006\000.\0002\0004\000\040\000p\000s\000b\000\137\000g\000e\000t\000\137\000o\000v\000e\000r\000l\000a\000p\000\040\040\024\000\040\000E\000x\000t\000r\000a\000c\000t\000\040\000l\000i\000s\000t\000\040\000o\000f\000\040\000o\000v\000e\000r\000l\000a\000p\000\040\000e\000l\000e\000m\000e\000n\000t\000s) +% 412 0 obj +<< /S /GoTo /D (subsection.6.25) >> +% 415 0 obj +(\376\377\0006\000.\0002\0005\000\040\000p\000s\000b\000\137\000s\000p\000\137\000g\000e\000t\000r\000o\000w\000\040\040\024\000\040\000E\000x\000t\000r\000a\000c\000t\000\040\000r\000o\000w\000\050\000s\000\051\000\040\000f\000r\000o\000m\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) +% 416 0 obj +<< /S /GoTo /D (subsection.6.26) >> +% 419 0 obj +(\376\377\0006\000.\0002\0006\000\040\000p\000s\000b\000\137\000s\000i\000z\000e\000o\000f\000\040\040\024\000\040\000M\000e\000m\000o\000r\000y\000\040\000o\000c\000c\000u\000p\000a\000t\000i\000o\000n) +% 420 0 obj +<< /S /GoTo /D (subsection.6.27) >> +% 423 0 obj +(\376\377\0006\000.\0002\0007\000\040\000S\000o\000r\000t\000i\000n\000g\000\040\000u\000t\000i\000l\000i\000t\000i\000e\000s\000\040\040\024\000\040) +% 424 0 obj +<< /S /GoTo /D (section.7) >> +% 427 0 obj +(\376\377\0007\000\040\000P\000a\000r\000a\000l\000l\000e\000l\000\040\000e\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t\000\040\000r\000o\000u\000t\000i\000n\000e\000s) +% 428 0 obj +<< /S /GoTo /D (subsection.7.1) >> +% 431 0 obj +(\376\377\0007\000.\0001\000\040\000p\000s\000b\000\137\000i\000n\000i\000t\000\040\040\024\000\040\000I\000n\000i\000t\000i\000a\000l\000i\000z\000e\000s\000\040\000P\000S\000B\000L\000A\000S\000\040\000p\000a\000r\000a\000l\000l\000e\000l\000\040\000e\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t) +% 432 0 obj +<< /S /GoTo /D (subsection.7.2) >> +% 435 0 obj +(\376\377\0007\000.\0002\000\040\000p\000s\000b\000\137\000i\000n\000f\000o\000\040\040\024\000\040\000R\000e\000t\000u\000r\000n\000\040\000i\000n\000f\000o\000r\000m\000a\000t\000i\000o\000n\000\040\000a\000b\000o\000u\000t\000\040\000P\000S\000B\000L\000A\000S\000\040\000p\000a\000r\000a\000l\000l\000e\000l\000\040\000e\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t) +% 436 0 obj +<< /S /GoTo /D (subsection.7.3) >> +% 439 0 obj +(\376\377\0007\000.\0003\000\040\000p\000s\000b\000\137\000e\000x\000i\000t\000\040\040\024\000\040\000E\000x\000i\000t\000\040\000f\000r\000o\000m\000\040\000P\000S\000B\000L\000A\000S\000\040\000p\000a\000r\000a\000l\000l\000e\000l\000\040\000e\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t) +% 440 0 obj +<< /S /GoTo /D (subsection.7.4) >> +% 443 0 obj +(\376\377\0007\000.\0004\000\040\000p\000s\000b\000\137\000g\000e\000t\000\137\000m\000p\000i\000\137\000c\000o\000m\000m\000\040\040\024\000\040\000G\000e\000t\000\040\000t\000h\000e\000\040\000M\000P\000I\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000o\000r) +% 444 0 obj +<< /S /GoTo /D (subsection.7.5) >> +% 447 0 obj +(\376\377\0007\000.\0005\000\040\000p\000s\000b\000\137\000g\000e\000t\000\137\000m\000p\000i\000\137\000r\000a\000n\000k\000\040\040\024\000\040\000G\000e\000t\000\040\000t\000h\000e\000\040\000M\000P\000I\000\040\000r\000a\000n\000k) +% 448 0 obj +<< /S /GoTo /D (subsection.7.6) >> +% 451 0 obj +(\376\377\0007\000.\0006\000\040\000p\000s\000b\000\137\000w\000t\000i\000m\000e\000\040\040\024\000\040\000W\000a\000l\000l\000\040\000c\000l\000o\000c\000k\000\040\000t\000i\000m\000i\000n\000g) +% 452 0 obj +<< /S /GoTo /D (subsection.7.7) >> +% 455 0 obj +(\376\377\0007\000.\0007\000\040\000p\000s\000b\000\137\000b\000a\000r\000r\000i\000e\000r\000\040\040\024\000\040\000S\000i\000n\000c\000h\000r\000o\000n\000i\000z\000a\000t\000i\000o\000n\000\040\000p\000o\000i\000n\000t\000\040\000p\000a\000r\000a\000l\000l\000e\000l\000\040\000e\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t) +% 456 0 obj +<< /S /GoTo /D (subsection.7.8) >> +% 459 0 obj +(\376\377\0007\000.\0008\000\040\000p\000s\000b\000\137\000a\000b\000o\000r\000t\000\040\040\024\000\040\000A\000b\000o\000r\000t\000\040\000a\000\040\000c\000o\000m\000p\000u\000t\000a\000t\000i\000o\000n) +% 460 0 obj +<< /S /GoTo /D (subsection.7.9) >> +% 463 0 obj +(\376\377\0007\000.\0009\000\040\000p\000s\000b\000\137\000b\000c\000a\000s\000t\000\040\040\024\000\040\000B\000r\000o\000a\000d\000c\000a\000s\000t\000\040\000d\000a\000t\000a) +% 464 0 obj +<< /S /GoTo /D (subsection.7.10) >> +% 467 0 obj +(\376\377\0007\000.\0001\0000\000\040\000p\000s\000b\000\137\000s\000u\000m\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000s\000u\000m) +% 468 0 obj +<< /S /GoTo /D (subsection.7.11) >> +% 471 0 obj +(\376\377\0007\000.\0001\0001\000\040\000p\000s\000b\000\137\000m\000a\000x\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000m\000a\000x\000i\000m\000u\000m) +% 472 0 obj +<< /S /GoTo /D (subsection.7.12) >> +% 475 0 obj +(\376\377\0007\000.\0001\0002\000\040\000p\000s\000b\000\137\000m\000i\000n\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000m\000i\000n\000i\000m\000u\000m) +% 476 0 obj +<< /S /GoTo /D (subsection.7.13) >> +% 479 0 obj +(\376\377\0007\000.\0001\0003\000\040\000p\000s\000b\000\137\000a\000m\000x\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000m\000a\000x\000i\000m\000u\000m\000\040\000a\000b\000s\000o\000l\000u\000t\000e\000\040\000v\000a\000l\000u\000e) +% 480 0 obj +<< /S /GoTo /D (subsection.7.14) >> +% 483 0 obj +(\376\377\0007\000.\0001\0004\000\040\000p\000s\000b\000\137\000a\000m\000n\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000m\000i\000n\000i\000m\000u\000m\000\040\000a\000b\000s\000o\000l\000u\000t\000e\000\040\000v\000a\000l\000u\000e) +% 484 0 obj +<< /S /GoTo /D (subsection.7.15) >> +% 487 0 obj +(\376\377\0007\000.\0001\0005\000\040\000p\000s\000b\000\137\000n\000r\000m\0002\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\0002\000-\000n\000o\000r\000m\000\040\000r\000e\000d\000u\000c\000t\000i\000o\000n) +% 488 0 obj +<< /S /GoTo /D (subsection.7.16) >> +% 491 0 obj +(\376\377\0007\000.\0001\0006\000\040\000p\000s\000b\000\137\000s\000n\000d\000\040\040\024\000\040\000S\000e\000n\000d\000\040\000d\000a\000t\000a) +% 492 0 obj +<< /S /GoTo /D (subsection.7.17) >> +% 495 0 obj +(\376\377\0007\000.\0001\0007\000\040\000p\000s\000b\000\137\000r\000c\000v\000\040\040\024\000\040\000R\000e\000c\000e\000i\000v\000e\000\040\000d\000a\000t\000a) +% 496 0 obj +<< /S /GoTo /D (section.8) >> +% 499 0 obj +(\376\377\0008\000\040\000E\000r\000r\000o\000r\000\040\000h\000a\000n\000d\000l\000i\000n\000g) +% 500 0 obj +<< /S /GoTo /D (subsection.8.1) >> +% 503 0 obj +(\376\377\0008\000.\0001\000\040\000p\000s\000b\000\137\000e\000r\000r\000p\000u\000s\000h\000\040\040\024\000\040\000P\000u\000s\000h\000e\000s\000\040\000a\000n\000\040\000e\000r\000r\000o\000r\000\040\000c\000o\000d\000e\000\040\000o\000n\000t\000o\000\040\000t\000h\000e\000\040\000e\000r\000r\000o\000r\000\040\000s\000t\000a\000c\000k) +% 504 0 obj +<< /S /GoTo /D (subsection.8.2) >> +% 507 0 obj +(\376\377\0008\000.\0002\000\040\000p\000s\000b\000\137\000e\000r\000r\000o\000r\000\040\040\024\000\040\000P\000r\000i\000n\000t\000s\000\040\000t\000h\000e\000\040\000e\000r\000r\000o\000r\000\040\000s\000t\000a\000c\000k\000\040\000c\000o\000n\000t\000e\000n\000t\000\040\000a\000n\000d\000\040\000a\000b\000o\000r\000t\000s\000\040\000e\000x\000e\000c\000u\000t\000i\000o\000n) +% 508 0 obj +<< /S /GoTo /D (subsection.8.3) >> +% 511 0 obj +(\376\377\0008\000.\0003\000\040\000p\000s\000b\000\137\000s\000e\000t\000\137\000e\000r\000r\000v\000e\000r\000b\000o\000s\000i\000t\000y\000\040\040\024\000\040\000S\000e\000t\000s\000\040\000t\000h\000e\000\040\000v\000e\000r\000b\000o\000s\000i\000t\000y\000\040\000o\000f\000\040\000e\000r\000r\000o\000r\000\040\000m\000e\000s\000s\000a\000g\000e\000s) +% 512 0 obj +<< /S /GoTo /D (subsection.8.4) >> +% 515 0 obj +(\376\377\0008\000.\0004\000\040\000p\000s\000b\000\137\000s\000e\000t\000\137\000e\000r\000r\000a\000c\000t\000i\000o\000n\000\040\040\024\000\040\000S\000e\000t\000\040\000t\000h\000e\000\040\000t\000y\000p\000e\000\040\000o\000f\000\040\000a\000c\000t\000i\000o\000n\000\040\000t\000o\000\040\000b\000e\000\040\000t\000a\000k\000e\000n\000\040\000u\000p\000o\000n\000\040\000e\000r\000r\000o\000r\000\040\000c\000o\000n\000d\000i\000t\000i\000o\000n) +% 516 0 obj +<< /S /GoTo /D (section.9) >> +% 519 0 obj +(\376\377\0009\000\040\000U\000t\000i\000l\000i\000t\000i\000e\000s) +% 520 0 obj +<< /S /GoTo /D (subsection.9.1) >> +% 523 0 obj +(\376\377\0009\000.\0001\000\040\000\040\000h\000b\000\137\000r\000e\000a\000d\000\040\040\024\000\040\000R\000e\000a\000d\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000f\000r\000o\000m\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000H\000a\000r\000w\000e\000l\000l\040\023\000B\000o\000e\000i\000n\000g\000\040\000f\000o\000r\000m\000a\000t) +% 524 0 obj +<< /S /GoTo /D (subsection.9.2) >> +% 527 0 obj +(\376\377\0009\000.\0002\000\040\000h\000b\000\137\000w\000r\000i\000t\000e\000\040\040\024\000\040\000W\000r\000i\000t\000e\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000t\000o\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000H\000a\000r\000w\000e\000l\000l\040\023\000B\000o\000e\000i\000n\000g\000\040\000f\000o\000r\000m\000a\000t) +% 528 0 obj +<< /S /GoTo /D (subsection.9.3) >> +% 531 0 obj +(\376\377\0009\000.\0003\000\040\000m\000m\000\137\000m\000a\000t\000\137\000r\000e\000a\000d\000\040\040\024\000\040\000R\000e\000a\000d\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000f\000r\000o\000m\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000M\000a\000t\000r\000i\000x\000M\000a\000r\000k\000e\000t\000\040\000f\000o\000r\000m\000a\000t) +% 532 0 obj +<< /S /GoTo /D (subsection.9.4) >> +% 535 0 obj +(\376\377\0009\000.\0004\000\040\000m\000m\000\137\000a\000r\000r\000a\000y\000\137\000r\000e\000a\000d\000\040\040\024\000\040\000R\000e\000a\000d\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000a\000r\000r\000a\000y\000\040\000f\000r\000o\000m\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000M\000a\000t\000r\000i\000x\000M\000a\000r\000k\000e\000t\000\040\000f\000o\000r\000m\000a\000t) +% 536 0 obj +<< /S /GoTo /D (subsection.9.5) >> +% 539 0 obj +(\376\377\0009\000.\0005\000\040\000m\000m\000\137\000m\000a\000t\000\137\000w\000r\000i\000t\000e\000\040\040\024\000\040\000W\000r\000i\000t\000e\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000t\000o\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000M\000a\000t\000r\000i\000x\000M\000a\000r\000k\000e\000t\000\040\000f\000o\000r\000m\000a\000t) +% 540 0 obj +<< /S /GoTo /D (subsection.9.6) >> +% 543 0 obj +(\376\377\0009\000.\0006\000\040\000m\000m\000\137\000a\000r\000r\000a\000y\000\137\000w\000r\000i\000t\000e\000\040\040\024\000\040\000W\000r\000i\000t\000e\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000a\000r\000r\000a\000y\000\040\000f\000r\000o\000m\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000M\000a\000t\000r\000i\000x\000M\000a\000r\000k\000e\000t\000\040\000f\000o\000r\000m\000a\000t) +% 544 0 obj +<< /S /GoTo /D (section.10) >> +% 547 0 obj +(\376\377\0001\0000\000\040\000P\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r\000\040\000r\000o\000u\000t\000i\000n\000e\000s) +% 548 0 obj +<< /S /GoTo /D (subsection.10.1) >> +% 551 0 obj +(\376\377\0001\0000\000.\0001\000\040\000i\000n\000i\000t\000\040\040\024\000\040\000I\000n\000i\000t\000i\000a\000l\000i\000z\000e\000\040\000a\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 552 0 obj +<< /S /GoTo /D (subsection.10.2) >> +% 555 0 obj +(\376\377\0001\0000\000.\0002\000\040\000S\000e\000t\000\040\040\024\000\040\000s\000e\000t\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r\000\040\000p\000a\000r\000a\000m\000e\000t\000e\000r\000s) +% 556 0 obj +<< /S /GoTo /D (subsection.10.3) >> +% 559 0 obj +(\376\377\0001\0000\000.\0003\000\040\000b\000u\000i\000l\000d\000\040\040\024\000\040\000B\000u\000i\000l\000d\000s\000\040\000a\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 560 0 obj +<< /S /GoTo /D (subsection.10.4) >> +% 563 0 obj +(\376\377\0001\0000\000.\0004\000\040\000a\000p\000p\000l\000y\000\040\040\024\000\040\000P\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000r\000o\000u\000t\000i\000n\000e) +% 564 0 obj +<< /S /GoTo /D (subsection.10.5) >> +% 567 0 obj +(\376\377\0001\0000\000.\0005\000\040\000d\000e\000s\000c\000r\000\040\040\024\000\040\000P\000r\000i\000n\000t\000s\000\040\000a\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000i\000o\000n\000\040\000o\000f\000\040\000c\000u\000r\000r\000e\000n\000t\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 568 0 obj +<< /S /GoTo /D (subsection.10.6) >> +% 571 0 obj +(\376\377\0001\0000\000.\0006\000\040\000c\000l\000o\000n\000e\000\040\040\024\000\040\000c\000l\000o\000n\000e\000\040\000c\000u\000r\000r\000e\000n\000t\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 572 0 obj +<< /S /GoTo /D (subsection.10.7) >> +% 575 0 obj +(\376\377\0001\0000\000.\0007\000\040\000f\000r\000e\000e\000\040\040\024\000\040\000F\000r\000e\000e\000\040\000a\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 576 0 obj +<< /S /GoTo /D (subsection.10.8) >> +% 579 0 obj +(\376\377\0001\0000\000.\0008\000\040\000a\000l\000l\000o\000c\000a\000t\000e\000\137\000w\000r\000k\000\040\040\024\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 580 0 obj +<< /S /GoTo /D (subsection.10.9) >> +% 583 0 obj +(\376\377\0001\0000\000.\0009\000\040\000d\000e\000a\000l\000l\000o\000c\000a\000t\000e\000\137\000w\000r\000k\000\040\040\024\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 584 0 obj +<< /S /GoTo /D (section.11) >> +% 587 0 obj +(\376\377\0001\0001\000\040\000I\000t\000e\000r\000a\000t\000i\000v\000e\000\040\000M\000e\000t\000h\000o\000d\000s) +% 588 0 obj +<< /S /GoTo /D (subsection.11.1) >> +% 591 0 obj +(\376\377\0001\0001\000.\0001\000\040\000p\000s\000b\000\137\000k\000r\000y\000l\000o\000v\000\040\000\040\040\024\000\040\000K\000r\000y\000l\000o\000v\000\040\000M\000e\000t\000h\000o\000d\000s\000\040\000D\000r\000i\000v\000e\000r\000\040\000R\000o\000u\000t\000i\000n\000e) +% 592 0 obj +<< /S /GoTo /D (subsection.11.2) >> +% 595 0 obj +(\376\377\0001\0001\000.\0002\000\040\000p\000s\000b\000\137\000r\000i\000c\000h\000a\000r\000d\000s\000o\000n\000\040\000\040\040\024\000\040\000R\000i\000c\000h\000a\000r\000d\000s\000o\000n\000\040\000I\000t\000e\000r\000a\000t\000i\000o\000n\000\040\000D\000r\000i\000v\000e\000r\000\040\000R\000o\000u\000t\000i\000n\000e) +% 596 0 obj +<< /S /GoTo /D (section.12) >> +% 599 0 obj +(\376\377\0001\0002\000\040\000E\000x\000t\000e\000n\000s\000i\000o\000n\000s) +% 600 0 obj +<< /S /GoTo /D (subsection.12.1) >> +% 603 0 obj +(\376\377\0001\0002\000.\0001\000\040\000U\000s\000i\000n\000g\000\040\000t\000h\000e\000\040\000e\000x\000t\000e\000n\000s\000i\000o\000n\000s) + +endstream +endobj +682 0 obj +<< +/Length 767 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F75 24.7871 Tf 169.511 628.583 Td [(PSBLAS)-250(3.9.0)-250(User)-55(')55(s)-250(guide)]TJ +ET +q +1 0 0 1 125.3 611.955 cm +0 0 343.711 4.981 re f +Q +BT +/F78 14.3462 Tf 156.541 588.762 Td [(A)-250(r)18(efer)18(ence)-250(guide)-250(for)-250(the)-250(Parallel)-250(Sparse)-250(BLAS)-250(library)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +ET +1 0 0 1 168.637 349.474 cm +q +.42964 0 0 .42964 0 0 cm +q +480 0 0 360 0 0 cm +/Im1 Do +Q +Q +0 g 0 G +0 g 0 G +0 g 0 G +1 0 0 1 -168.637 -349.474 cm +BT +/F75 9.9626 Tf 365.51 270.624 Td [(by)-250(Salvatore)-250(Filippone)]TJ 34.042 -11.956 Td [(Alfredo)-250(Buttari)]TJ -8.847 -11.955 Td [(Fabio)-250(Durastante)]TJ/F84 9.9626 Tf 16.987 -11.955 Td [(June)-250(9th,)-250(2025)]TJ +0 g 0 G +0 g 0 G +ET + +endstream +endobj +679 0 obj +<< +/Type /XObject +/Subtype /Image +/Width 640 +/Height 480 +/BitsPerComponent 8 +/ColorSpace /DeviceRGB +/SMask 689 0 R +/Length 921600 +>> +stream +ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþýýýýýýýýýþþþÿÿÿÿÿÿþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýýýûûûûûûûûûûûûúúúúúúøøø÷÷÷÷÷÷÷÷÷öööõõõõõõððððððððððððððððððíííùùùþþþÿÿÿÿÿÿþþþùùùæææäääááááááááááááááááááÝÝÝÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÛÛÛÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÔÔÔÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÍÍÍÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌËËËÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÄÄĽ½½½½½½½½½½½½½½½½½ººº¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿÿÿÿÿÿÿÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøÿÿÿÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÿÿÿÿÿÿþþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿÿÿÿþþþøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüüÿÿÿÿÿÿþþþççç³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôþþþÿÿÿÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿûûû¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþÿÿÿþþþôôô´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøÿÿÿÿÿÿÿÿÿÿÿÿöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÿÿÿÿÿÿþþþÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿÿÿÿÿÿÿäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþúúú···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüüÿÿÿÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ôôôþþþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþÿÿÿÿÿÿÕÕÕ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿþþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøÿÿÿÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿþþþøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÿÿÿÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿþþþççç³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿþþþûûû¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüüÿÿÿÿÿÿþþþôôô´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ôôôþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿþþþÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···üüüûûýüüþýýþýýþüüþüüþüüþüüþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿûûýòòøòòøòòøòòøòñøòñøéèóäãñäãñäãñãâðâáïàßîÓÑçÓÑçÓÑçÓÑçÑÏæÑÏæÉÇâ¿Þ¿Þ¿Þûûýÿÿÿÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄýýþÑÎæ£ŸÍ£ŸÍ£ŸÍ£ŸÍ£ŸÍ™”ȔŔŔŔŔŒŽÄ…€½…€½…€½…€½…€½…€½~x¹vpµvpµvpµvpµvpµvpµhb­f`¬f`¬f`¬f`¬f`¬b[ªWP¤WP¤WP¤WP¤WP¤WP¤LDžH@œH@œH@œH@œH@œF>›90”90”90”90”90”90”0') ‹) ‹) ‹) ‹) ‹) ‹„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•Æÿÿÿÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³õõõùùü7.“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰éèóýýþÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ’ŽÄƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒle¯ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ùùúîíö'ŠƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáüüýÿÿÿÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÞÞÞÿÿÿvpµƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB9˜üüýÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ûûüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œËþþþÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿZS¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿üüþ½ºÛƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³òòòýýþA8˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉþþþŸ›ËƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùóòø,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒª¦ÑýýþÿÿÿÿÿÿÿÿÿÆÆÆ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿ|»ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žôôùþþÿÿÿÿþþþóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµûûüãâ𠆃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ€{ºÿÿÿÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…Ø×êüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üûýÌÊッƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒVO¤ÿÿÿÿÿÿÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ïïïþþÿJBƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒµ²×ýýþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆþýþ«§Ñƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ4+‘÷÷ûþþþÿÿÿþþþððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³öö÷ùøû5,’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‹†ÀÿÿÿÿÿÿÿÿÿÿÿÿÐÐг³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿˆÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ †àßîûûýÿÿÿÿÿÿþþþùùù¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³úúûêéô$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒaZ©ÿÿÿÿÿÿÿÿÿþþþÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿpj²ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¾¼ÜýýþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹ûûýÖÔ脃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ:2•ûúýÿÿÿÿÿÿþþþììì³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ëëëÿÿÿWP¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•ÆÿÿÿÿÿÿÿÿÿÿÿÿÌÌ̳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀýüþ·³Øƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰éèóýýþÿÿÿÿÿÿÿÿÿøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ôôôüüý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒle¯ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌþþÿš•ȃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáüüýÿÿÿÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùòñø+"ŒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB9˜üüýÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œËþþþÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµûûüÞÝí…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³çççÿÿÿ_X¨ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½üüýÅÃàƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ñññþþþG?›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆþþþ¥¡Îƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$Š$Š,#+"$Іƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒª¦ÑýýþÿÿÿÿÿÿÿÿÿÆÆÆ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³÷÷÷ööú0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$ŠJC¤fa·~Ê—•Ù¨§äµ´í¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯¯é«ªæ›ÜЇÐxtÃgb¸UO«B;Ÿ) ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žôôùþþÿÿÿÿþþþóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÕÕÕÿÿÿˆƒ¿ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ2*”gb¸š˜Ú··î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æÓvrÂYT¯=5›†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ€{ºÿÿÿÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµúúûèçò#ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒIB£‘ŽÔ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³²ë’Õlh¼H@¡#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…Ø×êüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ãããÿÿÿkd¯ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ;3™”’׸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¶¶îlg»ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒVO¤ÿÿÿÿÿÿÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüýÐÎ僃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„id¹¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtõ²×ýýþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííÿÿÿPH ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆŒÒ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃ4+‘÷÷ûþþþÿÿÿþþþððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂýýþ±®Õƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$Š—•Ù¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtˆÀÿÿÿÿÿÿÿÿÿÿÿÿÐÐг³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³õõõûûý91”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ŒÒ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtà †àßîûûýÿÿÿÿÿÿþþþùùù¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎþþÿ”Ńƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒql¾¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃaZ©ÿÿÿÿÿÿÿÿÿþþþÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ùùúïîö'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ<5›¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtþ¼ÜýýþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿwq¶ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ“‘Ö¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃ:2•ûúýÿÿÿÿÿÿþþþîîî³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ûúüÜÚì…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC<Ÿ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÕÆÿÿÿÿÿÿÿÿÿÿÿÿÌÌ̳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿZS¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒʸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï£¢áŽ‹ÒyuÄni¼gb¸d_¶d_¶hc¸rm¿|É‘ŽÔª©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃ%‰éèóýýþÿÿÿÿÿÿÿÿÿøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿ýüþ¾»Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰³³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï•“×UO«*!Žƒƒƒƒƒƒƒƒƒƒƒƒ) G?¡d_¶~Ê©¨å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃle¯ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³òòòýýþB9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒIB£¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï©¨åF?¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„;3šid¹—•Ø··î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃÇÅáüüýÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉþþþ œËƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒmi½¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï­¬ç/&‘ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) \V°›™Û¸¸ï¸¸ï¸¸ïxtÃB9˜üüýÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùõõú/&ŽƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠˆÐ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïTN«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ;4š{w۰êxtàœËþþþÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿƒ~¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ žÞ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï®­è†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰91˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµûûüäã𠆃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ±±ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï”’׃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååÿÿÿe_¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»ûûýÍË䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ ‡¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï”’׃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ïïïþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì$Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒª¦ÑýýþÿÿÿÿÿÿÿÿÿÆÆÆ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆýýþ¬¨Òƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒµ´í¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïrnÀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žôôùþþÿÿÿÿþþþóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³öööùøû5,’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¦¥ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïd_¶„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ€{ºÿÿÿÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ’Õ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï—•ØMF¦…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…Ø×êüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ùùúíìõ&‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsoÀ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï©¨åzvÄLE¥&‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒVO¤ÿÿÿÿÿÿÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒNH§¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïœÝ{wÅ[U°<5›†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒµ²×ýýþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹ûûü×Õ鄃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆ´³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´íš˜Û{wÅ[U¯:2™„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ4+‘÷÷ûþþþÿÿÿþþþððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ëëëÿÿÿWP¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ€}ɸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï±±ëŽ‹Òid¹D= †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‹†ÀÿÿÿÿÿÿÿÿÿÿÿÿÐÐг³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀüüý¹¶Ùƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ;4š··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°ê}yÇE>¡„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ †àßîûûýÿÿÿÿÿÿþþþùùù¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³óóóüüý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„€Ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï©¨årnÀ1)“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒaZ©ÿÿÿÿÿÿÿÿÿþþþÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌþþÿš–Ƀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ*!ެ«æ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··îƒÊ1)“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¾¼ÜýýþÿÿÿÿÿÿþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùòñø+"ŒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@¡µµí¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ìoj½†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ:2•ûúýÿÿÿÿÿÿþþþîîî³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿ~x¹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒUO¬¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï›Ü1)“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•ÆÿÿÿÿÿÿÿÿÿÿÿÿÌÌ̳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµúúüâàƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒLE¥°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«ç<5›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰éèóýýþÿÿÿÿÿÿÿÿÿøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³çççÿÿÿ`Y©ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ3*“š˜Ú¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï­¬ç4+”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒle¯ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½üüýÈÅჃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…`Z²­¬ç¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞ ‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáüüýÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ñññþþþH@œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰hc¸¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïpk¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB9˜üüýÿÿÿÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆýýþ¦¢Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„HA¢…‚̵´í¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ì-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œËþþþÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³÷÷÷ööú0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&ŒQJ¨{wÅ¥¤â¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïpk¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿ‰„¿ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0(“YS®}yÆŸžß··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï«ªæ„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµúúûéèó#ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$ŠH@¡jeºŠÑ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï?7œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþþþþÿÿÿÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿmf°ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…;3šb]´‘ŽÔ¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïe_µƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒPH¡ÿÿÿþþþþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþûûûûûûûûûûûûúúúúúúøøø÷÷÷÷÷÷÷÷÷õõõõõõõõõððððððððððððððððððíííëëëëëëëëëëëëëëëîîîÿÿÿÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµ½½½½½½½½½½½½½½½½½½ÇÇÇüüýÑÏæƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ(`Z²¢¡à¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï„€Ëƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¦¢ÏÿÿÿçççááááááÝÝÝÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÛÛÛÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÔÔÔÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÍÍÍÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌËËËÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÄÄĽ½½½½½½½½½½½½½½½½½»»»¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþØØØÎÎÎÒÒÒÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÜÜÜÝÝÝÝÝÝÝÝÝÝÝÝÝÝÝßßßããããããããããããããããããèèèééééééééééééééééééïïïïïïïïïïïïïïïïïïñññóóóóóóóóóóóóóóóóóóööö÷÷÷÷÷÷÷÷÷÷÷÷÷÷÷÷÷÷úúúúúúúúúúúúúúúúúúûûûüüüûûûúùûúùûúùûúúûüûüüûüûûýúúüúúüúúüûûüýüýýüýüûýüûýüûýüûýþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿaZ©ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„`Z²²²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïš˜Úƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ-#üüýððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿýüþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýýþóòøóòøóòøóòøòñøòñøêéóåäñåäñåäñãâðâáïâáïÕÓèÕÓèÕÓèÕÓèÒÐæÑÏæÌÊã¿Þ¿Þ¿Þ¿Þ¿Þ¿޶³Ø³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö°­Ô£ŸÍ£ŸÍ£ŸÍ£ŸÍ£ŸÍ£ŸÍš–ɔŔŔŔŔŔŅ€½…€½…€½…€½…€½…€½y¹vpµŠ„¿ýýþ²¯ÕƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒA:ž³²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïª©åƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿÿÿÿ‡¾WP¤WP¤WP¤WP¤WP¤WP¤OG H@œH@œH@œH@œH@œH@œ91”90”90”90”90”90”3*‘) ‹) ‹) ‹) ‹) ‹) ‹…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÄàÿÿÿTM£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒid¹¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³³ìƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@œþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿüüýÍË䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ùùüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ'Œ¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒàßïþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿþþÿIAœƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–’Æÿÿÿ~x¹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒœ›Ý¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´íƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿýýþª¦Ðƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒìëõõôù0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠÑ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯®éƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷ùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅþþþÿÿÿþþþùøü4+‘ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ^V¨þþþ§£ÏƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMF¦MF¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–“׸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¥¤âƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ˜”ÇÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååþþþÿÿÿÿÿÿÿÿÿˆÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþKDžƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï›™ÛLE¥„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…­¬ç¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï“‘Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰úùüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿýýþéèó$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÓÑ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¡ŸßZT¯ ‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒKD¤¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï|ȃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¹ÚÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ØØØÿÿÿÿÿÿÿÿÿÿÿÿoi±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ£ Îÿÿÿsm³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«çrm¿5,•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰£¡à¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïb]´ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõþþþÿÿÿÿÿÿüüþÕÓ脃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒñð÷ðð÷+"Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï–”Ø^X±+"ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ)“‘Ö¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï?7œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒØÖéþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÊÊÊÿÿÿÿÿÿÿÿÿÿÿÿVO¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®þþÿœ˜Êƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï–”Øid¹A9ž†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†^X±©©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯¯é†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\U¦ÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿÿÿÿýýþ¶³ØƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙØëýýþD<šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³²ë—”ØxtÃ\V°G?¡5,•#‰ƒƒƒƒƒ(>6›XR®ƒʯ®é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï|Ƀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒìëôüüüººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿûûý;3•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ûûýÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï±±ë®­è±±ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïG?¡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|»ÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿÿÿÿÿÿÿþþþ˜“ǃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ±­Õÿÿÿhb­ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïœšÜƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …÷÷úöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿüüþññ÷*!Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ôôùèçò%‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïLE¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ­ªÓÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎÿÿÿÿÿÿÿÿÿÿÿÿ{u¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒwqµÿÿÿ’ŽÄƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‹ˆÐƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(üüýîîî³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿüüýßÝî…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒâáïûûý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¨§ä*!ŽƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÌÊäÿÿÿÄÄij³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÿÿÿÿÿÿÿÿÿ]V§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC:šüüýÁ¾Ýƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï²²ë;3šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒKCžþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßþþþÿÿÿÿÿÿüüýÄÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¼ºÛÿÿÿ^W¨ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êC<Ÿƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒäãðýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿþþÿG?›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰÷÷ûâáï!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\V°¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞ6.–ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsm³ÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþÿ¤ Îƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„¼ÿÿÿˆ‚¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0(“d_¶—•Ø··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ítpÁ ‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒóòøùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿþþþööú0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçóøøû6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ-$`[³“‘Ö¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµµí„€Ë2*”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŸ›ËÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿÿÿÿˆ‚¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸýýþ±®Õƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰G?¡lg»‘Ô²²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞjeº+"ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ(‹ûûüñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿýýþéèó#ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáÿÿÿTM£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…<4š`Z²wsÊѣ¢á¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î¢ ß|È\V°2*”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÁ¾ÝÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿÿÿÿjc®ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ùùüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†2*”HA¢WQ­b]µmi½xtÃxtÃ}yÆ~ÊxtÃxtÃql¾fa·YS®KD¤6-•†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ@8˜þþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿüüýÏÍ僃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’Æÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÛÚìþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿþþþPH ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒìëõõôù0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒb[ªÿÿÿÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅþþþÿÿÿÿÿÿýýþ°­Ôƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ^V¨þþþ§£Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒîíõûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååþþþÿÿÿÿÿÿûúý91”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþKDžƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŽ‰ÂÿÿÿÕÕÕ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿÿÿÿ’ŽÄƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÓÑ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡ùùüõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ØØØÿÿÿÿÿÿþþÿîíö'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¤ Îÿÿÿsm³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ´±ÖÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõþþþÿÿÿÿÿÿÿÿÿvpµƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒñð÷îíö(‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ5-’ýýýììì³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÊÊÊÿÿÿÿÿÿÿÿÿüüþÛÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®þþÿœ˜ÊƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿÿÿÿÿÿÿZS¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÚØëýýþD<šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒSL¢þþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿýýþ¾»Ûƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ6.–:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™80˜0'’)„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ûûýÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçòýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿÿÿÿÿÿÿýýþA8˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¶¶î©©å•“×zvÄ`[³<4š…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ±­Õÿÿÿhb­ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒzt·ÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿþþÿŸ›Ëƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êˆ…ÏSLª#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ôôùèçò%‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õõùøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎÿÿÿÿÿÿþþÿôôù/&Žƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ì~{È3*“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒwqµÿÿÿ‘ăƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¥¡ÎÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿÿÿÿ‚}»ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î~{È'Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒâáïûûý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ûûüñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÿÿÿÿÿÿýýþãâ𠆃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æJC£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC:šüüýÁ¾ÝƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÆÄàÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßþþþÿÿÿÿÿÿÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïni¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ½ºÛÿÿÿ]V§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@œþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿüüþÎÌ䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï|É…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰÷÷ûâáï!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒàÞîþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþþJBƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï{wŃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„¼ÿÿÿˆ‚¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿýýþ«§Ñƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒéèóøøû6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒðï÷úúú¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿùøü5,’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì5-–ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸýýþ±®Õƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–‘ÆÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿÿÿÿމƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï•“׃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáÿÿÿTM£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰úùüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿüüþêéô$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïG@¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ùùüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒº·ÚÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿÿÿÿqk²ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï}yÇNH§NH§NH§NH§NH§NH§NH§NH§NH§UO«_Y²lh¼ŠÑ¯¯é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï“‘Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’Æÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ80“ýýþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿüüýÖÔ脃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒ†UO«œÝ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï0'’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒììõõôù0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÖÔéþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿÿÿÿWP¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†rm¿¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïe_¶ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ^V¨þþþ§£Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ[T¦ÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååþþþÿÿÿÿÿÿýýþ¸µÙƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒmi¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŒ‰ÑƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþKDžƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒëêôüüüººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿûûý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ“‘Ö¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°ê„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÓÑ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|»ÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ØØØÿÿÿÿÿÿÿÿÿþþþš•ȃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒA9¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï/&‘ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¥¡Îÿÿÿsm³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ø÷û÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõþþþÿÿÿþþÿòòø+"Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïC<Ÿƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒñð÷îíö(‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¬©ÒÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÊÊÊÿÿÿÿÿÿÿÿÿÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïSLªƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®þþÿœ˜Êƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&üüýïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿÿÿÿüüýßÝî…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒlh¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïYT¯ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÚØëýýþC;™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒËÉãÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿÿÿÿ_X¨ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ`[³¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ûûýÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒIAþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿÿÿÿÿÿÿüüýÆÄჃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒgb¸¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï]W±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ±­Õÿÿÿhb­ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒãâðýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿþþÿH@œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒtpÁ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïVP¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …õõúèçò%‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒrl³ÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎÿÿÿÿÿÿÿÿÿýýþ¥¡Îƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–”ظ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïLE¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒxr¶ÿÿÿ‘ăƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒóòøùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿþþþööú0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&Œ¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï7/—ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒâáïûûý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ˜ÊÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÿÿÿÿÿÿÿÿÿˆƒ¿ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒni¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC:šüüýÁ¾Ýƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ'Šúúüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßþþþÿÿÿÿÿÿýýþéèó#ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ6.–³³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ½ºÛÿÿÿ]V§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÀ½ÝÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿÿÿÿle¯ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0'’§¦ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃ$‰ø÷ûâáï!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ@8˜þþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿüüýÐÎæƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…XR®¯¯é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïHA¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„½ÿÿÿˆ‚¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÚÙëþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿÿÿÿPH ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒ"ˆC< jeº¦¥ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æ…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒéèóøøû6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ_X¨ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿýýþ³¯Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«ç¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à©¨å³³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïmi¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸýýþ±®Õƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒíìõûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿúúü:2•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï±±ë'ŒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÈÅáÿÿÿTM£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŒ‡ÁÿÿÿÕÕÕ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿþþþ”Ńƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïc^µƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ùùüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …ùùüõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿþþÿïîö'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï—•Ø…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’Æÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ²®ÕÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿÿÿÿwq¶ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æ/'’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒììõõôù0&ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ5-’ýýýîîî³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿüüþÜÚì…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯¯é<5›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ^V¨þþþ¦¢ÏƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÐÎæÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååþþþÿÿÿÿÿÿÿÿÿ[S¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï­¬ç<5›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþKDžƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒSL¢þþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿýýþ¾¼Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï—•Ø/'’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÓÑ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒæåñýýý»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿýýþB9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï²²ëgb¸…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¤ Îÿÿÿsm³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ3)7/“7/“7/“7/“7/“>6–SL¢SL¢–‘ÆÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³çççþþþÿÿÿÿÿÿþþÿ¡Ìƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ì{wÅ,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒòñ÷ñð÷†¾„¼„¼„¼„¼„¼ˆÁ™•È™•È™•È™•È™•È™•Ȫ¦Ñ­©Ó­©Ó­©Ó­©Ó­©Ó±­Õ¾»Ü¾»Ü¾»Ü¾»Ü¾»Ü¾»ÜÈÆáÍËäÍËäÍËäÍËäÍËäÎÌåÚÙëÚÙëÚÙëÚÙëÚÙëÚÙëàÞîæäñæäñæäñæäñæäñæäñíìõïîöïîöïîöïîöïîöñð÷öõúöõúõôúôôùôôùôôù÷÷ûùùüùøûøøüøøüøøü÷÷ûüüýüüýûûýûûýûûýûûýýýþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýýý···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅÿÿÿÿÿÿþþÿôôù.%Žƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŸžßhc¸)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–’ÆþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿöõúóòøóòøóòøòñøòñøïïöåäñåäñåäñåäñâáïâáïÛÙëÕÓèÕÓèÕÓèÓÑçÑÏæÑÏæÂ¿Þ¿Þ¿Þ¿Þ¿Þ¿޽ºÛ³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö§£Ï£ŸÍ£ŸÍ£ŸÍ£ŸÍ£ŸÍ¡Ì”ŔŔŔŔŔŋ†À…€½…€½…€½…€½…€½…€½vpµvpµvpµvpµ½ºÛÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ðððþþþÿÿÿÿÿÿÿÿÿ„¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïª©å‡„Î`[³5-–ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒìëôôóù\U§WP¤WP¤WP¤TM£H@œH@œH@œH@œH@œH@œ@7—90”90”90”90”90”90”*!Œ) ‹) ‹) ‹) ‹) ‹$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒòò÷úúú¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿÿÿÿ@7—ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï›™Û„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€ËƒÊxtÃrnÀc^µUO«A9&Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…½ÿÿÿ„¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿýýþ¹¶Ùƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …ööúäãð!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙ×êþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ®«Ôÿÿÿf`¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýýííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿüüýáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÍË䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¯«ÔÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââþþþÿÿÿÿÿÿÿÿÿ…€½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÍËäþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õôùøøø···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÁÁÁÿÿÿÿÿÿþþþööú2)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒQI¡ýýþ­©ÒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ìììÿÿÿÿÿÿÿÿÿýýþ®«Óƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒåãñùøü6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒáàïþþþ¿¿¿³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿSK¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsm³ÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB:˜þþþêêê³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´õõõþþþÿÿÿÿÿÿüüýÚÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷íìõ'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸ÚÿÿÿÊÊʳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÕÕÕÿÿÿÿÿÿÿÿÿÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œÌÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡øøûõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúÿÿÿüüþòòø-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ)‹ùùü×Õ鄃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ~¼ÿÿÿØØØ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßþþþÿÿÿÿÿÿþþÿ£ŸÍƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÂ¿ÞÿÿÿXQ¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçòýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ@8˜üüý¹¶ÙƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿÿÿÿüüýÒÐ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÜÛìüüý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÆÄàÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆÿÿÿÿÿÿÿÿÿÿÿÿoi±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒf`­þþÿ›—Ƀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ûúüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññþþþÿÿÿÿÿÿüüþíìõ(‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒííöòñø+"Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•ÅÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþþš•ȃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠÂÿÿÿy¹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒíìõûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùÿÿÿÿÿÿÿÿÿüüýB:™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡÷öúáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒWQ¤þþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿÿÿÿÿÿÿüüþÄÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒµ²×ÿÿÿaZ©ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÐÍæÿÿÿÄÄij³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüÿÿÿÿÿÿÿÿÿe_¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ7/“ûûýÈÅჃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(üüýððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääÿÿÿÿÿÿÿÿÿýýþèçò$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÒÐçþþþH@œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ£žÍÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÃÃÃþþþÿÿÿÿÿÿÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒTL£ýýþ§£Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷úúú¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿûúý;3•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçòööú1(ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÍÍÍÿÿÿÿÿÿÿÿÿýýþ¹¶Ùƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ}w¹ÿÿÿŠ…ÀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙ×êþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµöööÿÿÿÿÿÿÿÿÿÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ôôùéèó$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýýííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿüüýáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ§£Ïÿÿÿmf°ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¯«ÔÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹ûûûÿÿÿÿÿÿÿÿÿ…€½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ+!ŒúùüÕÓ脃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õôùøøø···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿþþþööú2)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáÿÿÿQI¡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿýýþ®«ÓƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@œýýþ´±Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒáàïþþþ¿¿¿³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿÿÿÿSK¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒàßîûûý:2•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB:˜þþþêêê³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉÿÿÿÿÿÿÿÿÿüüýÚÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®þþÿ–‘ƃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸ÚÿÿÿÊÊʳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒðð÷ïîö'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡øøûõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿÿÿÿüüþòòø-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’Æÿÿÿys·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ~¼ÿÿÿØØØ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿþþÿ£ŸÍƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰øøûÜÛì…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçòýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿÿÿÿþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸Úÿÿÿ\T¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿÿÿÿüüýÒÐ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”üûýÅÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÆÄàÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³æææþþþÿÿÿÿÿÿÿÿÿoi±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ×ÕéýýþB:™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ûúüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿüüþíìõ(‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\U¦þþþ¢žÌƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•ÅÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ðððþþþÿÿÿÿÿÿþþþš•ȃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒëêôõõú0&ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒíìõûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎÿÿÿÿÿÿÿÿÿüüýB:™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|»ÿÿÿ…€½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒWQ¤þþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿüüþÄÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …ööúåãñ!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÐÍæÿÿÿÄÄij³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ØØØÿÿÿÿÿÿÿÿÿÿÿÿe_¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ­ªÓÿÿÿga­ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(üüýððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûþþþÿÿÿýýþèçò$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(ûûýÎÌ䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ£žÍÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÌÊäþþÿOG ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷úúú¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÁÁÁþþþÿÿÿÿÿÿûúý;3•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒKCžýýþ¯«Óƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ëëëþþþÿÿÿÿÿÿýýþ¹¶Ùƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒäãðùùü6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙ×êþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsm³ÿÿÿ‘ŒÃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýýííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´õõõþþþÿÿÿÿÿÿüüýáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒòñ÷ííõ'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¯«ÔÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿÿÿÿ„¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒž™Êÿÿÿtn´ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õôùøøø···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþþþþööú2)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ(‹ùùü×Õ鄃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÞÞÞþþþÿÿÿÿÿÿýýþ®«ÓƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÁ¾ÝÿÿÿXQ¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒáàïþþþ¿¿¿³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿÿÿÿÿÿÿSK¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ@8˜üüýº·ÚƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB:˜þþþêêê³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèÿÿÿÿÿÿÿÿÿüüýÚÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÛÙìüüþA8˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸ÚÿÿÿÊÊʳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆÿÿÿÿÿÿÿÿÿÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ_X¨þþþœ˜Êƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡øøûõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿüüþòòø-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒíìõòòø,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆ0'’91˜A:žA9:2™3*“(ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ~¼ÿÿÿØØØ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿÿÿÿÿÿÿþþÿ£ŸÍƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŽ‰ÂÿÿÿzºƒƒƒƒƒtpÁxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃni¼lg»d_¶]W±G?¡0(“…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&ŒxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃ[U°ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ>6›xtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃMF¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$Š\V°ƒÊ£¢á¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ì£¡àŽ‹ÒsoÀXR®;4š†ƒƒƒƒƒƒƒƒƒƒèçòýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡÷÷ûâáï †ƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ìŠÑ`[³,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„̸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ+"~{ȳ²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³³ì‘Õlg» ‡ƒƒƒƒƒƒƒMEŸþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿÿÿÿÿÿÿüüýÒÐ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ³°Öÿÿÿb[ªƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ìvrÂ) ƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‹´³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï;3™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒWQ­¯®é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!ŽƒƒƒƒƒƒƒƒÆÄàÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿÿÿÿoi±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ5-’ûûýÇÅჃƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¨§ä80—ƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\V°¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïtpÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ`Z²¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!Žƒƒƒƒƒƒƒƒ* ‹ûúüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääÿÿÿÿÿÿÿÿÿüüþíìõ'ŠƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþIAœƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï­¬ç*!Žƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—”ظ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æ†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒTMª··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!Žƒƒƒƒƒƒƒƒƒ•ÅÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿþþþ™”ȃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒSL¢ýýþ©¥Ðƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï†ƒÍƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ3*“¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïLE¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰ª©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!Žƒƒƒƒƒƒƒƒƒƒíìõûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿüüýB:™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒçæòùøû4+‘ƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï0(“ƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒmi¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï†ƒÍƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjeº¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î«ªæ¢¡à®­è··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!ŽƒƒƒƒƒƒƒƒƒƒWQ¤þþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌÿÿÿÿÿÿÿÿÿüüþÄÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿ‹†Àƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï§¦ã0'’0'’0'’0'’0'’2*”C< `[³¥¤â¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïb]µƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„§¦ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì&ŒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒžÞ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïƒÊG@¢'Œƒƒƒƒƒ+"IB£fa·Œ‰Ñ³²ë¸¸ï¸¸ï¸¸ï¸¸ï*!ŽƒƒƒƒƒƒƒƒƒƒƒÐÍæÿÿÿÄÄij³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõÿÿÿÿÿÿÿÿÿÿÿÿe_¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ôôùéèó$ˆƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒ#‰š˜Û¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï„̃ƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒD= ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï^X±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&‘¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïOI¨ƒƒƒƒƒƒƒƒƒƒƒƒ!‡KD¤~Ê´³ì¸¸ï*!Žƒƒƒƒƒƒƒƒƒƒƒ1(üüýððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿýýþèçò$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¥¡Îÿÿÿnh±ƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒD= ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï“‘Öƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¢ ß¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï˜–ÙƒƒƒƒƒƒƒƒƒƒƒƒƒƒG@¢¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&Œa\´$Šƒƒƒƒƒƒƒƒƒƒƒƒ£žÍÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹ûûûÿÿÿÿÿÿÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹úùüÕÓ脃ƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ†··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŸžßƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆ±±ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA:ž³³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï5,•ƒƒƒƒƒƒƒƒƒƒƒƒƒUO¬¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïfa·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷úúú¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿÿÿÿûúý;3•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÆÄàÿÿÿRJ¡ƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ„¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï˜–Ùƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒVP¬¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¡Ÿßƒ„€Ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïpk½ƒƒƒƒƒƒƒƒƒƒƒƒƒ`[³¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïmi½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿýýþ¹¶ÙƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@œýýþµ²×ƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ4+”¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï~ʃƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‘ŽÔ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïkfºƒNH§¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¨§ä„ƒƒƒƒƒƒƒƒƒƒƒƒXR®¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¥¤â"ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙ×êþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒßÞîûûý;3•ƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ~zǸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïWQ­ƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ.%‘··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï4+”ƒ†°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïG?¡ƒƒƒƒƒƒƒƒƒƒƒƒKD¤¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïœšÜJC¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýýííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿüüýáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒib­þþÿ—’ǃƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒ4+”~{ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï«ªæ"ˆƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒgb¸¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïœšÜƒƒƒ|ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï~ʃƒƒƒƒƒƒƒƒƒƒƒ) ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï§¦ã}yÇTMª1)“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¯«ÔÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿÿÿÿ„¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒïî÷ñð÷*!Œƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì˜–Ù˜–Ù˜–Ù˜–Ù˜–Ù žÞ´³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïRK©ƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¢ ß¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïe_¶ƒƒƒIB£¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³³ì#‰ƒƒƒƒƒƒƒƒƒƒƒƒœ›Ý¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯®éÓpk½LE¥&Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õôùøøø···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿþþþööú2)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–‘Æÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïa\´ƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ?7œ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï0'’ƒƒƒ†­¬ç¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïXR®ƒƒƒƒƒƒƒƒƒƒƒƒWQ­¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïœ›Ýkfº2*”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿýýþ®«Óƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰øøûÝÜì…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï˜–ÙA9ƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒxtø¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï–“׃ƒƒƒƒ{wŸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï“‘Öƒƒƒƒƒƒƒƒƒƒƒƒ…š˜Ú¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‘ÕA9ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒàßïþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¹¹¹½½½½½½½½½½½½½½½ÞÞÞÿÿÿÿÿÿÿÿÿÿÿÿSK¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¹·Ùÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´íxtÃ6.–ƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†®­è¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`Z²ƒƒƒƒƒD= ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï/'’ƒƒƒƒƒƒƒƒƒƒƒƒ.%‘«ªæ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï„€Ë#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC;™þþþëëëÇÇÇÇÇÇÇÇÇÉÉÉÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÓÓÓÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÙÙÙÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜááááááááááááááááááãããææææææææææææææææææêêêëëëëëëëëëëëëëëëìììððððððñññðððððððððôôôõõõööö÷÷÷÷÷÷÷÷÷öööúúúúúúûûûûûûûûûûûûýýýÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿüüýÚÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”üüý¿Þƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··îws†ƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒPJ¨¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î*!Žƒƒƒƒƒ„©¨å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïid¹ƒƒƒƒƒƒƒƒƒƒƒƒƒ/&‘›Ü¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¡Ÿß) ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÌÊãÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþþþþþþþþþþþþþÿÿÿþþþýýýýýýýýýýýýÿÿÿÿÿÿþþþþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&7/“7/“7/“7/“7/“;2•SL¢SL¢SL¢SL¢SL¢SL¢`Y©mf°mf°mf°mf°mf°mf°‚}¼„¼„¼„¼„¼„¼‹…¿™•È™•È™•È™•È™•È™•ȧ£Ï­©Ó­©Ó­©Ó­©Ó­©Ó®ªÓ¾»ÜðïöýýþC;™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‘Ô†ƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠˆÐ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‘ŽÔƒƒƒƒƒƒƒvr¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï£¢áƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†f`¶°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï•“׆ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ+"ŒòòøþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿüüþøøûÍËäÙØêÚÙëÚÙëÚÙëÚÙëÚÙëÞÝíæäñæäñæäñæäñæäñæäñììõïîöïîöïîöïîöïîöðï÷öõúöõúöõùööúööúööúøøûúúüúúüûûýûûýûûýûûýýýþýýþýýþýýýýýýýýýþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿúúü"‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµµí¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à©¨å¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï}yǃƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) µµí¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïYT¯ƒƒƒƒƒƒƒ?7œ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA9ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&‹`[³œÝ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïZT¯ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ™Êþþþÿÿÿÿÿÿÿÿÿûûûûûûûûûûûûúúúúúú÷÷÷÷÷÷÷÷÷÷÷÷õõõõõõóóóñññññññññðððððððððìììëëëëëëëëëëëëëëëêêêææææææææææææææææææâââáááááááááááááááàààÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÙÙÙÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÒÒÒÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÐÐÐÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÉÉÉÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÀÀÀ½½½½½½½½½½½½½½½½½½¹¹¹¸¸¸¸¸¸ÎÎÎÿÿÿµ±×ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒ ‡F?¡Œ‰Ð¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA9ƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒb]´¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´í&Œƒƒƒƒƒƒƒ„¦¥ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï{wŃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0(“YT¯„€Ë¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ=5–ûûýÿÿÿÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþA9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒoj½¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‡„΃ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ›Ü¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŠˆÐƒƒƒƒƒƒƒƒƒql¾¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯¯é ‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ,#OI¨tpÁ¡Ÿß¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï5,•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¼¹Úýýþÿÿÿÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÜÚ샃ƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒ…£¡à¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´í ‡ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ80˜¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïЇÐd_¶d_¶d_¶d_¶d_¶d_¶d_¶d_¶d_¶|xƸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïSL©ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0'’c^µ©¨å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïYT¯ƒƒƒƒƒƒƒƒƒƒƒƒƒUN£ÿÿÿÿÿÿÿÿÿÿÿÿÝÝݳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒrm¿¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï>6›ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsoÀ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŒ‰Ñƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$Š•“׸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïjeºƒƒƒƒƒƒƒƒƒƒƒƒ„ÔÒèüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøöõù…ƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒ_Y²¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïPI¨ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ª©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¶¶î*!Žƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ4+”¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïvrƒƒƒƒƒƒƒƒƒƒƒƒuo´ÿÿÿÿÿÿÿÿÿÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ¨¤Ðƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïZT¯ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒJC¤¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïd_¶ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ«ªæ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïql¾ƒƒƒƒƒƒƒƒƒƒƒ%‰ëêôüüþÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííýýý5-’ƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒ€}ɸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïOI¨ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„̸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞƒƒƒƒƒƒƒ) b]´"ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¬«ç¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïe_µƒƒƒƒƒƒƒƒƒƒƒ‘ÄÿÿÿÿÿÿÿÿÿÿÿÿÊÊʳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÓÑ烃ƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒ) °°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA9žƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‹´³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï;3šƒƒƒƒƒƒ0'’¸¸ï­¬çjeº&Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ:2™¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïWQ­ƒƒƒƒƒƒƒƒƒƒ3*‘øøûÿÿÿÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ)–“׸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î) ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ[U°¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïvrƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï´³ì†ƒÍMF¦†ƒƒƒƒƒƒƒƒƒƒƒ)›Ü¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï4+”ƒƒƒƒƒƒƒƒƒƒ¯¬Óýýþÿÿÿÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúòñ÷ƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï§¦ã0'’0'’0'’0'’0'’1)“;3™NH§vr¯®é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï”’׃ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï²²ë¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à›Üƒƒƒƒ—”ظ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï®­èxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtâ ß¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«ç†ƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï®­èˆ…Îf`¶C<Ÿ'Œƒƒƒƒ…5-–hc¸«ªæ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¦¥ãƒƒƒƒƒƒƒƒƒƒKDžþþþÿÿÿÿÿÿþþþááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿš–Ƀƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïZT¯ƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒƒ3*“¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃZT¯¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïMF¦ƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï¬«æ¢¡à§¦ãµ´ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïsoÀƒƒƒƒƒƒƒƒƒƒËÉãüüýÿÿÿþþþüüüººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññüüý-$ƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï˜–Ù„ƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒƒmi¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA9ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&Œµµí¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‡„΃ƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï²²ë-$ƒƒƒƒƒƒƒƒƒib®ÿÿÿÿÿÿÿÿÿÿÿÿØØØ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅÿÿÿÊÈ⃃ƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïª©å5,•ƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒ„§¦ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¦¥ã„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‹‰Ñ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´í'Œƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï_Y²ƒƒƒƒƒƒƒƒƒ†âáïýýþÿÿÿÿÿÿÿÿÿøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþÿYR¥ƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£á2*”ƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒD= ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïpk½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒUO«¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï^X±ƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïwsÆ€½ÿÿÿÿÿÿÿÿÿÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûêê󃃃ƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êgb¸ ‡ƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒ|ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï91˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰³²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï™—Úƒƒƒƒ&‹Ó¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°ê^X±ƒƒƒƒƒƒƒƒƒƒ.%Žóóøþþÿÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿˆÁƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï®­è†ƒÍSL©#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒ"ˆ±±ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŸžßƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†ƒÍ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï6-•ƒƒƒƒƒ'ŒYS®Š‡Ð¯¯é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï±±ë}yÆ,#ƒƒƒƒƒƒƒƒƒƒƒ¢žÍþþÿÿÿÿÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôúúü'Šƒƒƒƒƒƒjeºni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼d_¶^X±WQ­C< ) ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‹ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼jeºƒ3*“ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼KD¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ=5›ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼A9ƒƒƒƒƒƒƒƒ„80—^X±}yÆ”’ת©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì™—Ú|xÆTN«"ˆƒƒƒƒƒƒƒƒƒƒƒƒ@7—üüýÿÿÿÿÿÿÿÿÿæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿ¿¼Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆ0'’7/—>6›D= ;4š4+”+"…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÀ½Ýüüýÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþNGŸƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\U§ÿÿÿÿÿÿÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½ýýýäãðƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÛÚìüüýÿÿÿÿÿÿúúú···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿzºƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒzt·ÿÿÿÿÿÿÿÿÿÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööùùü"‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ'Šíìõüüþÿÿÿÿÿÿþþþóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌÿÿÿ´±Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’ÇþþþÿÿÿÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþA9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ8/“ûúýÿÿÿÿÿÿÿÿÿééé³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÜÚ샃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ´±ÖýýþÿÿÿÿÿÿÿÿÿÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøöõù…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæüüþÿÿÿÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ§£Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒnh±ÿÿÿÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííýýý5-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡çæòýýþÿÿÿÿÿÿÿÿÿöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÓÑ烃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‹†ÀÿÿÿÿÿÿÿÿÿÿÿÿÌÌ̳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žõõùþþþÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúòñ÷ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨¤ÐýýþÿÿÿÿÿÿþþþÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿš–ɃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒE=šþþÿÿÿÿÿÿÿþþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññüüý-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÄÁßüüýÿÿÿÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅÿÿÿÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒb[ªÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþÿYR¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…àÞîüüþÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûêê󃃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒzºÿÿÿÿÿÿÿÿÿÿÿÿÐÐг³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿˆÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ñð÷þþÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôúúü&‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒœ˜ÊþþþÿÿÿÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿ¿¼Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ=5–ûûýÿÿÿÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþNGŸƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸Úýýþÿÿÿÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½ýýýäãðƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒTM£ÿÿÿÿÿÿÿÿÿÿÿÿÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿzºƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÓÑçüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööùùü"‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒtn´ÿÿÿÿÿÿÿÿÿÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌÿÿÿ´±Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰ëêôüüþÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþA9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‘ŒÃÿÿÿÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÜÚ샃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ3*‘øøûÿÿÿÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ­ªÓþþÿÿÿÿÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøöõù…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒJCþþþÿÿÿÿÿÿþþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ§£ÏƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÊÈâüüþÿÿÿþþþüüü»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííýýý5-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒga­ÿÿÿÿÿÿÿÿÿÿÿÿØØØ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÓÑ烃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†âáïýýþÿÿÿÿÿÿþþþøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…€½ÿÿÿÿÿÿÿÿÿÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúòñ÷ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ-$óóøþþÿÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿš–Ƀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¢žÌþþþÿÿÿÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññüüý-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ?6—üüýÿÿÿÿÿÿþþþççç³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅÿÿÿÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¿½Üýýþÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþÿYR¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\T¦ÿÿÿÿÿÿÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûêê󃃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÙØêüüþÿÿÿÿÿÿúúú···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿˆÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒys·ÿÿÿÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôúúü&‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&‰íìõüüþÿÿÿÿÿÿþþþôôô´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿ¿¼Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–‘ÆÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþNGŸƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ8/“ûúýÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½ýýýäãðƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ³¯ÕýýþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿzºƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööùùü"‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÐÎæüüýÿÿÿÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌÿÿÿ´±Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒmf°ÿÿÿÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþA9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡çæòýýþÿÿÿÿÿÿÿÿÿöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÜÚ샃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠ…ÀÿÿÿÿÿÿÿÿÿÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žõõúþþþÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøöõù…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ§£ÏþþÿÿÿÿÿÿÿþþþÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ§£ÏƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒD<šýýþÿÿÿÿÿÿÿÿÿäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³îîîýýý5-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÂ¿Þüüýÿÿÿÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÓÑ烃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒaZ©ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿd]«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰) ‹) ‹) ‹) ‹) ‹+"Œ90”90”90”90”90”90”A8˜H@œH@œH@œH@œH@œH@œUN£WP¤WP¤WP¤WP¤WP¤\T¦f`¬f`¬f`¬f`¬f`¬f`¬qk²vpµvpµvpµvpµvpµwq¶…€½…€½…€½…€½àßîüüýÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹úúúõôù£ŸÍ£ŸÍ£ŸÍ£ŸÍ¨¤Ð³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö½»Û¿Þ¿Þ¿Þ¿Þ¿ÞÃÀÞÑÏæÑÏæÓÑçÓÑçÓÑçÓÑçÛÚìâáïâáïäãñäãñäãñäãññð÷òñøòòøòòøòòøòòøööúÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ðððþþþýýþýýþýýþùùüùùüùùüùùüüüýýýþûûýûûþûûþûûþûûýÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÍÍÍÿÿÿÿÿÿÿÿÿþþþôôôµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµ÷÷÷þþþÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³×××ÿÿÿÿÿÿÿÿÿûûû»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿÿÿÿÿÿÿþþþÆÆÆ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÊÊÊÿÿÿÿÿÿþþþûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´óóóþþþÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝþþþÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³çççþþþÿÿÿÿÿÿþþþÝÝݳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ãããþþþÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÁÁÁÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ìììþþþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõþþþÿÿÿÿÿÿûûû»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÕÕÕÿÿÿÿÿÿÿÿÿþþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸ûûûÿÿÿÿÿÿþþþÆÆÆ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßÿÿÿÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿþþþûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆÿÿÿÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññþþþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùÿÿÿÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüÿÿÿÿÿÿþþþÝÝݳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÃÃÃþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³îîîþþþÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÍÍÍÿÿÿÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµöööÿÿÿÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³×××ÿÿÿÿÿÿÿÿÿýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉÿÿÿÿÿÿÿÿÿûûû»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´óóóþþþÿÿÿÿÿÿþþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿÿÿÿÿÿÿþþþÆÆÆ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿÿÿÿÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿþþþûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³æææþþþÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ðððþþþÿÿÿÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿþþþÝÝݳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûþþþÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÁÁÁÿÿÿÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ëëëþþþÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´õõõþþþÿÿÿÿÿÿýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÞÞÞþþþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèÿÿÿÿÿÿÿÿÿûûû»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆÿÿÿÿÿÿÿÿÿþþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþÆÆÆ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿÿÿÿþþþûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»ýýýÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääÿÿÿÿÿÿÿÿÿþþþ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸»»»½½½½½½½½½½½½½½½½½½ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÄÄÄÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇËËËÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÍÍÍÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÔÔÔÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÞÞÞááááááááááááááááááäääæææææææææææææææçççëëëëëëëëëëëëëëëëëëîîîððððððþþþÿÿÿÿÿÿþþþüüüúúúúúúûûûûûûûûûûûûþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþþþþþþþþþþÿÿÿÿÿÿþþþýýýýýýýýýþþþÿÿÿÿÿÿþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ +endstream +endobj +689 0 obj +<< +/Type /XObject +/Subtype /Image /Width 640 /Height 480 /BitsPerComponent 8 @@ -538,7 +759,7 @@ stream ãýÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿüËXÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ<Áüÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿý¨+üÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿú#’þÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþÿ‰íþýýüüüüþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýññññððéâââáßßÑÏÏÏÍÌÇ»»»»µN™™™™™ˆˆˆˆˆˆxwwwwwqffffffZUUUUUSDDDDDD;333333$""""" endstream endobj -591 0 obj +692 0 obj << /Length 78 >> @@ -547,569 +768,565 @@ stream 0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 320.07 90.438 Td [(2)]TJ +/F84 9.9626 Tf 320.07 90.438 Td [(2)]TJ 0 g 0 G ET endstream endobj -637 0 obj +738 0 obj << -/Length 15343 +/Length 15190 >> stream 0 g 0 G 0 g 0 G BT -/F51 14.3462 Tf 99.895 706.042 Td [(Contents)]TJ +/F75 14.3462 Tf 99.895 706.042 Td [(Contents)]TJ +0 0 1 rg 0 0 1 RG +/F75 9.9626 Tf 0 -22.965 Td [(Preface)]TJ +0 g 0 G + [-30667(1)]TJ 0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf 0 -22.894 Td [(1)-1000(Introduction)]TJ + 0 -22.189 Td [(1)-1000(Introduction)]TJ 0 g 0 G - [-26723(1)]TJ + [-26723(2)]TJ 0 0 1 rg 0 0 1 RG - 0 -22.116 Td [(2)-1000(General)-250(overview)]TJ + 0 -22.189 Td [(2)-1000(General)-250(overview)]TJ 0 g 0 G - [-24361(2)]TJ + [-24361(3)]TJ 0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.074 Td [(2.1)-1050(Basic)-250(Nomenclatur)18(e)]TJ +/F84 9.9626 Tf 14.944 -12.118 Td [(2.1)-1050(Basic)-250(Nomenclatur)18(e)]TJ 0 g 0 G [-339(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-2000(3)]TJ + [-2000(4)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - 0 -12.074 Td [(2.2)-1050(Library)-250(contents)]TJ + 0 -12.118 Td [(2.2)-1050(Library)-250(contents)]TJ 0 g 0 G [-440(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-2000(4)]TJ + [-2000(5)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - 0 -12.074 Td [(2.3)-1050(Application)-250(str)8(uctur)18(e)]TJ + 0 -12.118 Td [(2.3)-1011(Application)-250(str)8(uctur)18(e)]TJ 0 g 0 G - [-480(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-519(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-2000(6)]TJ + [-2000(7)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - 22.914 -12.074 Td [(2.3.1)-1200(User)18(-de\002ned)-250(index)-250(mappings)]TJ + 22.914 -12.118 Td [(2.3.1)-1200(User)18(-de\002ned)-250(index)-250(mappings)]TJ 0 g 0 G [-325(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-2000(8)]TJ + [-2000(9)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -22.914 -12.074 Td [(2.4)-1050(Pr)18(ogramming)-250(model)]TJ + -22.914 -12.117 Td [(2.4)-1050(Pr)18(ogramming)-250(model)]TJ 0 g 0 G [-717(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-2000(8)]TJ + [-2000(9)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -14.944 -22.116 Td [(3)-1000(Data)-250(Structures)-250(and)-250(Classes)]TJ +/F75 9.9626 Tf -14.944 -22.19 Td [(3)-1000(Data)-250(Structures)-250(and)-250(Classes)]TJ 0 g 0 G - [-19810(9)]TJ + [-19310(10)]TJ 0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.074 Td [(3.1)-1050(Descriptor)-250(data)-250(str)8(uctur)18(e)]TJ +/F84 9.9626 Tf 14.944 -12.118 Td [(3.1)-1050(Descriptor)-250(data)-250(str)8(uctur)18(e)]TJ 0 g 0 G [-369(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-2000(9)]TJ + [-1500(10)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - 22.914 -12.074 Td [(3.1.1)-1200(Descriptor)-250(Methods)]TJ + 22.914 -12.117 Td [(3.1.1)-1200(Descriptor)-250(Methods)]TJ 0 g 0 G [-911(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(12)]TJ + [-1500(13)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - 0 -12.074 Td [(3.1.2)-1200(get)]TJ + 0 -12.118 Td [(3.1.2)-1200(get)]TJ ET q -1 0 0 1 183.79 542.523 cm +1 0 0 1 183.79 519.766 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 542.324 Td [(local)]TJ +/F84 9.9626 Tf 186.779 519.567 Td [(local)]TJ ET q -1 0 0 1 208.019 542.523 cm +1 0 0 1 208.019 519.766 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 211.008 542.324 Td [(r)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(r)18(ows)]TJ +/F84 9.9626 Tf 211.008 519.567 Td [(r)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(r)18(ows)]TJ 0 g 0 G [-471(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(12)]TJ + [-1500(13)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -73.255 -12.074 Td [(3.1.3)-1200(get)]TJ + -73.255 -12.118 Td [(3.1.3)-1200(get)]TJ ET q -1 0 0 1 183.79 530.449 cm +1 0 0 1 183.79 507.648 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 530.25 Td [(local)]TJ +/F84 9.9626 Tf 186.779 507.449 Td [(local)]TJ ET q -1 0 0 1 208.019 530.449 cm +1 0 0 1 208.019 507.648 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 211.008 530.25 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(cols)]TJ +/F84 9.9626 Tf 211.008 507.449 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(cols)]TJ 0 g 0 G [-673(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(12)]TJ + [-1500(13)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -73.255 -12.074 Td [(3.1.4)-1200(get)]TJ + -73.255 -12.118 Td [(3.1.4)-1200(get)]TJ ET q -1 0 0 1 183.79 518.375 cm +1 0 0 1 183.79 495.53 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 518.176 Td [(global)]TJ +/F84 9.9626 Tf 186.779 495.331 Td [(global)]TJ ET q -1 0 0 1 214.644 518.375 cm +1 0 0 1 214.644 495.53 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 217.633 518.176 Td [(r)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(r)18(ows)]TJ +/F84 9.9626 Tf 217.633 495.331 Td [(r)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(r)18(ows)]TJ 0 g 0 G [-641(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(12)]TJ + [-1500(13)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -79.88 -12.074 Td [(3.1.5)-1200(get)]TJ + -79.88 -12.118 Td [(3.1.5)-1200(get)]TJ ET q -1 0 0 1 183.79 506.301 cm +1 0 0 1 183.79 483.412 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 506.102 Td [(global)]TJ +/F84 9.9626 Tf 186.779 483.213 Td [(global)]TJ ET q -1 0 0 1 214.644 506.301 cm +1 0 0 1 214.644 483.412 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 217.633 506.102 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(cols)]TJ +/F84 9.9626 Tf 217.633 483.213 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(cols)]TJ 0 g 0 G [-843(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(13)]TJ + [-1500(14)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -79.88 -12.075 Td [(3.1.6)-1200(get)]TJ + -79.88 -12.118 Td [(3.1.6)-1200(get)]TJ ET q -1 0 0 1 183.79 494.227 cm +1 0 0 1 183.79 471.294 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 494.027 Td [(global)]TJ +/F84 9.9626 Tf 186.779 471.095 Td [(global)]TJ ET q -1 0 0 1 214.644 494.227 cm +1 0 0 1 214.644 471.294 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 217.633 494.027 Td [(indices)-250(\227)-250(Get)-250(vector)-250(of)-250(global)-250(indices)]TJ +/F84 9.9626 Tf 217.633 471.095 Td [(indices)-250(\227)-250(Get)-250(vector)-250(of)-250(global)-250(indices)]TJ 0 g 0 G [-999(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(13)]TJ + [-1500(14)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -79.88 -12.074 Td [(3.1.7)-1200(get)]TJ + -79.88 -12.118 Td [(3.1.7)-1200(get)]TJ ET q -1 0 0 1 183.79 482.153 cm +1 0 0 1 183.79 459.176 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 481.953 Td [(context)-250(\227)-250(Get)-250(communication)-250(context)]TJ +/F84 9.9626 Tf 186.779 458.977 Td [(context)-250(\227)-250(Get)-250(communication)-250(context)]TJ 0 g 0 G [-852(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(13)]TJ + [-1500(14)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -49.026 -12.074 Td [(3.1.8)-1200(Clone)-250(\227)-250(clone)-250(curr)18(ent)-250(object)]TJ + -49.026 -12.118 Td [(3.1.8)-1200(Clone)-250(\227)-250(clone)-250(curr)18(ent)-250(object)]TJ 0 g 0 G [-763(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(14)]TJ + [-1500(15)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - 0 -12.074 Td [(3.1.9)-1200(CNV)-250(\227)-250(convert)-250(internal)-250(storage)-250(format)]TJ + 0 -12.118 Td [(3.1.9)-1200(CNV)-250(\227)-250(convert)-250(internal)-250(storage)-250(format)]TJ 0 g 0 G [-496(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(14)]TJ + [-1500(15)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - 0 -12.074 Td [(3.1.10)-700(psb)]TJ + 0 -12.118 Td [(3.1.10)]TJ 0.98 0 0 1 169.335 422.623 Tm [(psb)]TJ ET q -1 0 0 1 185.952 445.93 cm +1 0 0 1 185.339 422.822 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 188.941 445.731 Td [(cd)]TJ +/F84 9.9626 Tf 0.98 0 0 1 188.328 422.623 Tm [(cd)]TJ ET q -1 0 0 1 200.049 445.93 cm +1 0 0 1 199.226 422.822 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 203.038 445.731 Td [(get)]TJ +/F84 9.9626 Tf 0.98 0 0 1 202.215 422.623 Tm [(get)]TJ ET q -1 0 0 1 217.195 445.93 cm +1 0 0 1 216.1 422.822 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 220.184 445.731 Td [(lar)18(ge)]TJ +/F84 9.9626 Tf 0.98 0 0 1 219.089 422.623 Tm [(hash)]TJ ET q -1 0 0 1 242.729 445.93 cm +1 0 0 1 240.073 422.822 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 245.718 445.731 Td [(thr)18(eshold)-190(\227)-190(Get)-190(thr)18(eshold)-190(for)-190(index)-190(map-)]TJ -76.084 -11.955 Td [(ping)-250(switch)]TJ +/F84 9.9626 Tf 0.98 0 0 1 243.061 422.623 Tm [(thr)18(eshold)-194(\227)-193(Get)-194(thr)18(eshold)-194(for)-194(index)-193(map-)]TJ 1 0 0 1 169.335 410.668 Tm [(ping)-250(switch)]TJ 0 g 0 G - [-819(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-849(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(14)]TJ + [-1500(15)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -31.881 -12.074 Td [(3.1.11)-700(psb)]TJ + -31.582 -12.118 Td [(3.1.11)]TJ 0.98 0 0 1 169.335 398.55 Tm [(psb)]TJ ET q -1 0 0 1 185.952 421.901 cm +1 0 0 1 185.339 398.749 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 188.941 421.702 Td [(cd)]TJ +/F84 9.9626 Tf 0.98 0 0 1 188.328 398.55 Tm [(cd)]TJ ET q -1 0 0 1 200.049 421.901 cm +1 0 0 1 199.226 398.749 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 203.038 421.702 Td [(set)]TJ +/F84 9.9626 Tf 0.98 0 0 1 202.215 398.55 Tm [(set)]TJ ET q -1 0 0 1 215.88 421.901 cm +1 0 0 1 214.812 398.749 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 218.869 421.702 Td [(lar)18(ge)]TJ +/F84 9.9626 Tf 0.98 0 0 1 217.8 398.55 Tm [(hash)]TJ ET q -1 0 0 1 241.414 421.901 cm +1 0 0 1 238.784 398.749 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 244.403 421.702 Td [(thr)18(eshold)-190(\227)-190(Set)-190(thr)18(eshold)-190(for)-190(index)-190(map-)]TJ -74.769 -11.955 Td [(ping)-250(switch)]TJ +/F84 9.9626 Tf 0.98 0 0 1 241.773 398.55 Tm [(thr)18(eshold)-234(\227)-234(Set)-234(thr)18(eshold)-234(for)-234(index)-234(map-)]TJ 1 0 0 1 169.335 386.595 Tm [(ping)-250(switch)]TJ 0 g 0 G - [-819(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-849(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(14)]TJ + [-1500(15)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -31.881 -12.074 Td [(3.1.12)-700(get)]TJ + -31.582 -12.118 Td [(3.1.12)-700(get)]TJ ET q -1 0 0 1 183.79 397.872 cm +1 0 0 1 183.79 374.676 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 397.673 Td [(p)]TJ +/F84 9.9626 Tf 186.779 374.477 Td [(p)]TJ ET q -1 0 0 1 193.364 397.872 cm +1 0 0 1 193.364 374.676 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 196.353 397.673 Td [(adjcncy)-250(\227)-250(Get)-250(pr)18(ocess)-250(adjacency)-250(list)]TJ +/F84 9.9626 Tf 196.353 374.477 Td [(adjcncy)-250(\227)-250(Get)-250(pr)18(ocess)-250(adjacency)-250(list)]TJ 0 g 0 G [-652(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(15)]TJ + [-1500(16)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -58.6 -12.074 Td [(3.1.13)-700(set)]TJ + -58.6 -12.118 Td [(3.1.13)-700(set)]TJ ET q -1 0 0 1 182.475 385.798 cm +1 0 0 1 182.475 362.558 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 185.464 385.599 Td [(p)]TJ +/F84 9.9626 Tf 185.464 362.359 Td [(p)]TJ ET q -1 0 0 1 192.049 385.798 cm +1 0 0 1 192.049 362.558 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 195.038 385.599 Td [(adjcncy)-250(\227)-250(Set)-250(pr)18(ocess)-250(adjacency)-250(list)]TJ +/F84 9.9626 Tf 195.038 362.359 Td [(adjcncy)-250(\227)-250(Set)-250(pr)18(ocess)-250(adjacency)-250(list)]TJ 0 g 0 G [-272(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(15)]TJ + [-1500(16)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -57.285 -12.074 Td [(3.1.14)-700(fnd)]TJ + -57.285 -12.118 Td [(3.1.14)-700(fnd)]TJ ET q -1 0 0 1 185.434 373.724 cm +1 0 0 1 185.434 350.44 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 188.423 373.524 Td [(owner)-250(\227)-250(Find)-250(the)-250(owner)-250(pr)18(ocess)-250(of)-250(a)-250(set)-250(of)-250(indices)]TJ +/F84 9.9626 Tf 188.423 350.241 Td [(owner)-250(\227)-250(Find)-250(the)-250(owner)-250(pr)18(ocess)-250(of)-250(a)-250(set)-250(of)-250(indices)]TJ 0 g 0 G [-361(.)]TJ 0 g 0 G - [-1500(15)]TJ + [-1500(16)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -50.67 -12.074 Td [(3.1.15)-700(Named)-250(Constants)]TJ + -50.67 -12.118 Td [(3.1.15)-700(Named)-250(Constants)]TJ 0 g 0 G [-277(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(16)]TJ + [-1500(17)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -22.914 -12.074 Td [(3.2)-1050(Sparse)-250(Matrix)-250(class)]TJ + -22.914 -12.118 Td [(3.2)-1050(Sparse)-250(Matrix)-250(class)]TJ 0 g 0 G [-719(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(16)]TJ + [-1500(17)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - 22.914 -12.074 Td [(3.2.1)-1200(Sparse)-250(Matrix)-250(Methods)]TJ + 22.914 -12.118 Td [(3.2.1)-1200(Sparse)-250(Matrix)-250(Methods)]TJ 0 g 0 G [-938(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(17)]TJ + [-1500(18)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - 0 -12.074 Td [(3.2.2)-1200(get)]TJ + 0 -12.118 Td [(3.2.2)-1200(get)]TJ ET q -1 0 0 1 183.79 325.427 cm +1 0 0 1 183.79 301.968 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 325.228 Td [(nr)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(r)18(ows)-250(in)-250(a)-250(sparse)-250(matrix)]TJ +/F84 9.9626 Tf 186.779 301.769 Td [(nr)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(r)18(ows)-250(in)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G [-286(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(17)]TJ + [-1500(18)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -49.026 -12.074 Td [(3.2.3)-1200(get)]TJ + -49.026 -12.118 Td [(3.2.3)-1200(get)]TJ ET q -1 0 0 1 183.79 313.353 cm +1 0 0 1 183.79 289.851 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 313.154 Td [(ncols)-250(\227)-250(Get)-250(number)-250(of)-250(columns)-250(in)-250(a)-250(sparse)-250(matrix)]TJ +/F84 9.9626 Tf 186.779 289.651 Td [(ncols)-250(\227)-250(Get)-250(number)-250(of)-250(columns)-250(in)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G [-670(.)]TJ 0 g 0 G - [-1500(18)]TJ + [-1500(19)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -49.026 -12.074 Td [(3.2.4)-1200(get)]TJ + -49.026 -12.118 Td [(3.2.4)]TJ 1.02 0 0 1 169.634 277.533 Tm [(get)]TJ ET q -1 0 0 1 183.79 301.279 cm +1 0 0 1 184.062 277.733 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 301.08 Td [(nnzer)18(os)-190(\227)-190(Get)-190(number)-190(of)-190(nonzer)18(o)-190(elements)-190(in)-190(a)-190(sparse)]TJ -17.145 -11.955 Td [(matrix)]TJ +/F84 9.9626 Tf 1.02 0 0 1 187.05 277.533 Tm [(nnzer)18(os)-424(\227)-423(Get)-423(number)-424(of)-423(nonzer)18(o)-423(elements)-424(in)-423(a)]TJ 1 0 0 1 169.634 265.578 Tm [(sparse)-250(matrix)]TJ 0 g 0 G - [-839(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-766(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(18)]TJ + [-1500(19)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -31.881 -12.074 Td [(3.2.5)-1200(get)]TJ + -31.881 -12.118 Td [(3.2.5)]TJ 0.983 0 0 1 169.634 253.46 Tm [(get)]TJ ET q -1 0 0 1 183.79 277.25 cm +1 0 0 1 183.56 253.659 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 277.051 Td [(size)-354(\227)-354(Get)-355(maximum)-354(number)-354(of)-354(nonzer)18(o)-354(elements)]TJ -17.145 -11.955 Td [(in)-250(a)-250(sparse)-250(matrix)]TJ +/F84 9.9626 Tf 0.983 0 0 1 186.549 253.46 Tm [(size)-254(\227)-254(Get)-254(maximum)-254(number)-254(of)-254(nonzer)19(o)-254(elements)-254(in)]TJ 1 0 0 1 169.634 241.505 Tm [(a)-250(sparse)-250(matrix)]TJ 0 g 0 G - [-393(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-766(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(18)]TJ + [-1500(19)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -31.881 -12.074 Td [(3.2.6)-1200(sizeof)-206(\227)-205(Get)-206(memory)-205(occupation)-206(in)-205(bytes)-206(of)-205(a)-206(sparse)-205(ma-)]TJ 31.881 -11.956 Td [(trix)]TJ + -31.881 -12.118 Td [(3.2.6)]TJ 0.98 0 0 1 169.634 229.387 Tm [(sizeof)-239(\227)-239(Get)-239(memory)-239(occupation)-239(in)-239(bytes)-239(of)-239(a)-239(sparse)-239(matrix)]TJ 0 g 0 G - [-722(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(19)]TJ + 1 0 0 1 433.644 229.387 Tm [(20)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -31.881 -12.074 Td [(3.2.7)-1200(get)]TJ + -295.891 -12.118 Td [(3.2.7)-1200(get)]TJ ET q -1 0 0 1 183.79 229.192 cm +1 0 0 1 183.79 217.468 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 228.992 Td [(fmt)-250(\227)-250(Short)-250(description)-250(of)-250(the)-250(dynamic)-250(type)]TJ +/F84 9.9626 Tf 186.779 217.269 Td [(fmt)-250(\227)-250(Short)-250(description)-250(of)-250(the)-250(dynamic)-250(type)]TJ 0 g 0 G [-278(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(19)]TJ + [-1500(20)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -49.026 -12.074 Td [(3.2.8)-1200(is)]TJ + -49.026 -12.118 Td [(3.2.8)-1200(is)]TJ ET q -1 0 0 1 177.355 217.117 cm +1 0 0 1 177.355 205.35 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 180.344 216.918 Td [(bld,)-250(is)]TJ +/F84 9.9626 Tf 180.344 205.151 Td [(bld,)-250(is)]TJ ET q -1 0 0 1 207.541 217.117 cm +1 0 0 1 207.541 205.35 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 210.53 216.918 Td [(upd,)-250(is)]TJ +/F84 9.9626 Tf 210.53 205.151 Td [(upd,)-250(is)]TJ ET q -1 0 0 1 241.314 217.117 cm +1 0 0 1 241.314 205.35 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 244.303 216.918 Td [(asb)-250(\227)-250(Status)-250(check)]TJ +/F84 9.9626 Tf 244.303 205.151 Td [(asb)-250(\227)-250(Status)-250(check)]TJ 0 g 0 G [-569(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(19)]TJ + [-1500(20)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -106.55 -12.074 Td [(3.2.9)-1200(is)]TJ + -106.55 -12.118 Td [(3.2.9)-1200(is)]TJ ET q -1 0 0 1 177.355 205.043 cm +1 0 0 1 177.355 193.233 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 180.344 204.844 Td [(lower)74(,)-250(is)]TJ +/F84 9.9626 Tf 180.344 193.033 Td [(lower)74(,)-250(is)]TJ ET q -1 0 0 1 217.663 205.043 cm +1 0 0 1 217.663 193.233 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 220.652 204.844 Td [(upper)74(,)-250(is)]TJ +/F84 9.9626 Tf 220.652 193.033 Td [(upper)74(,)-250(is)]TJ ET q -1 0 0 1 259.306 205.043 cm +1 0 0 1 259.306 193.233 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 262.295 204.844 Td [(triangle,)-250(is)]TJ +/F84 9.9626 Tf 262.295 193.033 Td [(triangle,)-250(is)]TJ ET q -1 0 0 1 309.069 205.043 cm +1 0 0 1 309.069 193.233 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 312.058 204.844 Td [(unit)-250(\227)-250(Format)-250(check)]TJ +/F84 9.9626 Tf 312.058 193.033 Td [(unit)-250(\227)-250(Format)-250(check)]TJ 0 g 0 G [-441(.)-500(.)]TJ 0 g 0 G - [-1500(20)]TJ + [-1500(21)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -174.305 -12.074 Td [(3.2.10)-700(cscnv)-250(\227)-250(Convert)-250(to)-250(a)-250(dif)18(fer)18(ent)-250(storage)-250(format)]TJ + -174.305 -12.118 Td [(3.2.10)-700(cscnv)-250(\227)-250(Convert)-250(to)-250(a)-250(dif)18(fer)18(ent)-250(storage)-250(format)]TJ 0 g 0 G [-815(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(20)]TJ + [-1500(21)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - 0 -12.074 Td [(3.2.11)-700(csclip)-250(\227)-250(Reduce)-250(to)-250(a)-250(submatrix)]TJ + 0 -12.118 Td [(3.2.11)-700(csclip)-250(\227)-250(Reduce)-250(to)-250(a)-250(submatrix)]TJ 0 g 0 G [-358(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(21)]TJ + [-1500(22)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - 0 -12.074 Td [(3.2.12)-700(clean)]TJ + 0 -12.118 Td [(3.2.12)-700(clean)]TJ ET q -1 0 0 1 193.106 168.821 cm +1 0 0 1 193.106 156.879 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 196.094 168.622 Td [(zer)18(os)-250(\227)-250(Eliminate)-250(zer)18(o)-250(coef)18(\002cients)]TJ +/F84 9.9626 Tf 196.094 156.679 Td [(zer)18(os)-250(\227)-250(Eliminate)-250(zer)18(o)-250(coef)18(\002cients)]TJ 0 g 0 G [-677(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(21)]TJ + [-1500(22)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -58.341 -12.074 Td [(3.2.13)-700(get)]TJ + -58.341 -12.117 Td [(3.2.13)-700(get)]TJ ET q -1 0 0 1 183.79 156.747 cm +1 0 0 1 183.79 144.761 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 156.548 Td [(diag)-250(\227)-250(Get)-250(main)-250(diagonal)]TJ +/F84 9.9626 Tf 186.779 144.562 Td [(diag)-250(\227)-250(Get)-250(main)-250(diagonal)]TJ 0 g 0 G [-870(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(22)]TJ + [-1500(23)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -49.026 -12.074 Td [(3.2.14)-700(clip)]TJ + -49.026 -12.118 Td [(3.2.14)-700(clip)]TJ ET q -1 0 0 1 186.44 144.673 cm +1 0 0 1 186.44 132.643 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 189.429 144.474 Td [(diag)-250(\227)-250(Cut)-250(out)-250(main)-250(diagonal)]TJ +/F84 9.9626 Tf 189.429 132.444 Td [(diag)-250(\227)-250(Cut)-250(out)-250(main)-250(diagonal)]TJ 0 g 0 G [-309(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(22)]TJ + [-1500(23)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -51.676 -12.074 Td [(3.2.15)-700(tril)-250(\227)-250(Return)-250(the)-250(lower)-250(triangle)]TJ + -51.676 -12.118 Td [(3.2.15)-700(tril)-250(\227)-250(Return)-250(the)-250(lower)-250(triangle)]TJ 0 g 0 G [-292(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1500(22)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - 0 -12.074 Td [(3.2.16)-700(triu)-250(\227)-250(Return)-250(the)-250(upper)-250(triangle)]TJ -0 g 0 G - [-596(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G [-1500(23)]TJ 0 g 0 G @@ -1120,25278 +1337,31960 @@ ET endstream endobj -404 0 obj -<< -/Type /ObjStm -/N 100 -/First 877 -/Length 7532 ->> -stream -403 0 407 48 408 127 411 175 412 257 415 305 416 361 419 409 420 452 423 494 -424 540 427 587 428 662 431 709 432 797 435 844 436 917 439 964 440 1038 443 1085 -444 1151 447 1198 448 1252 451 1299 452 1380 455 1427 456 1483 459 1530 460 1581 463 1629 -464 1675 467 1723 468 1773 471 1821 472 1871 475 1919 476 1984 479 2032 480 2097 483 2145 -484 2205 487 2253 488 2298 491 2346 492 2394 495 2436 496 2467 499 2514 500 2594 503 2641 -504 2729 507 2776 508 2863 511 2910 512 3013 515 3055 516 3081 519 3128 520 3231 523 3278 -524 3377 527 3424 528 3525 531 3572 532 3673 535 3720 536 3821 539 3868 540 3971 543 4014 -544 4055 547 4103 548 4160 551 4208 552 4262 555 4310 556 4375 559 4423 560 4500 563 4548 -564 4607 567 4655 568 4706 571 4749 572 4784 575 4832 576 4904 577 4949 579 5078 582 5184 -583 5240 3 5296 580 5350 590 5479 592 5593 589 5650 636 5717 593 6211 594 6357 595 6503 -% 403 0 obj -<< /S /GoTo /D (subsection.6.24) >> -% 407 0 obj -(6.24 psb\137get\137overlap \204 Extract list of overlap elements) -% 408 0 obj -<< /S /GoTo /D (subsection.6.25) >> -% 411 0 obj -(6.25 psb\137sp\137getrow \204 Extract row\(s\) from a sparse matrix) -% 412 0 obj -<< /S /GoTo /D (subsection.6.26) >> -% 415 0 obj -(6.26 psb\137sizeof \204 Memory occupation) -% 416 0 obj -<< /S /GoTo /D (subsection.6.27) >> -% 419 0 obj -(6.27 Sorting utilities \204 ) -% 420 0 obj -<< /S /GoTo /D (section.7) >> -% 423 0 obj -(7 Parallel environment routines) -% 424 0 obj -<< /S /GoTo /D (subsection.7.1) >> -% 427 0 obj -(7.1 psb\137init \204 Initializes PSBLAS parallel environment) -% 428 0 obj -<< /S /GoTo /D (subsection.7.2) >> -% 431 0 obj -(7.2 psb\137info \204 Return information about PSBLAS parallel environment) -% 432 0 obj -<< /S /GoTo /D (subsection.7.3) >> -% 435 0 obj -(7.3 psb\137exit \204 Exit from PSBLAS parallel environment) -% 436 0 obj -<< /S /GoTo /D (subsection.7.4) >> -% 439 0 obj -(7.4 psb\137get\137mpi\137comm \204 Get the MPI communicator) -% 440 0 obj -<< /S /GoTo /D (subsection.7.5) >> -% 443 0 obj -(7.5 psb\137get\137mpi\137rank \204 Get the MPI rank) -% 444 0 obj -<< /S /GoTo /D (subsection.7.6) >> -% 447 0 obj -(7.6 psb\137wtime \204 Wall clock timing) -% 448 0 obj -<< /S /GoTo /D (subsection.7.7) >> -% 451 0 obj -(7.7 psb\137barrier \204 Sinchronization point parallel environment) -% 452 0 obj -<< /S /GoTo /D (subsection.7.8) >> -% 455 0 obj -(7.8 psb\137abort \204 Abort a computation) -% 456 0 obj -<< /S /GoTo /D (subsection.7.9) >> -% 459 0 obj -(7.9 psb\137bcast \204 Broadcast data) -% 460 0 obj -<< /S /GoTo /D (subsection.7.10) >> -% 463 0 obj -(7.10 psb\137sum \204 Global sum) -% 464 0 obj -<< /S /GoTo /D (subsection.7.11) >> -% 467 0 obj -(7.11 psb\137max \204 Global maximum) -% 468 0 obj -<< /S /GoTo /D (subsection.7.12) >> -% 471 0 obj -(7.12 psb\137min \204 Global minimum) -% 472 0 obj -<< /S /GoTo /D (subsection.7.13) >> -% 475 0 obj -(7.13 psb\137amx \204 Global maximum absolute value) -% 476 0 obj -<< /S /GoTo /D (subsection.7.14) >> -% 479 0 obj -(7.14 psb\137amn \204 Global minimum absolute value) -% 480 0 obj -<< /S /GoTo /D (subsection.7.15) >> -% 483 0 obj -(7.15 psb\137nrm2 \204 Global 2-norm reduction) -% 484 0 obj -<< /S /GoTo /D (subsection.7.16) >> -% 487 0 obj -(7.16 psb\137snd \204 Send data) -% 488 0 obj -<< /S /GoTo /D (subsection.7.17) >> -% 491 0 obj -(7.17 psb\137rcv \204 Receive data) -% 492 0 obj -<< /S /GoTo /D (section.8) >> -% 495 0 obj -(8 Error handling) -% 496 0 obj -<< /S /GoTo /D (subsection.8.1) >> -% 499 0 obj -(8.1 psb\137errpush \204 Pushes an error code onto the error stack) -% 500 0 obj -<< /S /GoTo /D (subsection.8.2) >> -% 503 0 obj -(8.2 psb\137error \204 Prints the error stack content and aborts execution) -% 504 0 obj -<< /S /GoTo /D (subsection.8.3) >> -% 507 0 obj -(8.3 psb\137set\137errverbosity \204 Sets the verbosity of error messages) -% 508 0 obj -<< /S /GoTo /D (subsection.8.4) >> -% 511 0 obj -(8.4 psb\137set\137erraction \204 Set the type of action to be taken upon error condition) -% 512 0 obj -<< /S /GoTo /D (section.9) >> -% 515 0 obj -(9 Utilities) -% 516 0 obj -<< /S /GoTo /D (subsection.9.1) >> -% 519 0 obj -(9.1 \040hb\137read \204 Read a sparse matrix from a file in the Harwell\205Boeing format) -% 520 0 obj -<< /S /GoTo /D (subsection.9.2) >> -% 523 0 obj -(9.2 hb\137write \204 Write a sparse matrix to a file in the Harwell\205Boeing format) -% 524 0 obj -<< /S /GoTo /D (subsection.9.3) >> -% 527 0 obj -(9.3 mm\137mat\137read \204 Read a sparse matrix from a file in the MatrixMarket format) -% 528 0 obj -<< /S /GoTo /D (subsection.9.4) >> -% 531 0 obj -(9.4 mm\137array\137read \204 Read a dense array from a file in the MatrixMarket format) -% 532 0 obj -<< /S /GoTo /D (subsection.9.5) >> -% 535 0 obj -(9.5 mm\137mat\137write \204 Write a sparse matrix to a file in the MatrixMarket format) -% 536 0 obj -<< /S /GoTo /D (subsection.9.6) >> -% 539 0 obj -(9.6 mm\137array\137write \204 Write a dense array from a file in the MatrixMarket format) -% 540 0 obj -<< /S /GoTo /D (section.10) >> -% 543 0 obj -(10 Preconditioner routines) -% 544 0 obj -<< /S /GoTo /D (subsection.10.1) >> -% 547 0 obj -(10.1 init \204 Initialize a preconditioner) -% 548 0 obj -<< /S /GoTo /D (subsection.10.2) >> -% 551 0 obj -(10.2 build \204 Builds a preconditioner) -% 552 0 obj -<< /S /GoTo /D (subsection.10.3) >> -% 555 0 obj -(10.3 apply \204 Preconditioner application routine) -% 556 0 obj -<< /S /GoTo /D (subsection.10.4) >> -% 559 0 obj -(10.4 descr \204 Prints a description of current preconditioner) -% 560 0 obj -<< /S /GoTo /D (subsection.10.5) >> -% 563 0 obj -(10.5 clone \204 clone current preconditioner) -% 564 0 obj -<< /S /GoTo /D (subsection.10.6) >> -% 567 0 obj -(10.6 free \204 Free a preconditioner) -% 568 0 obj -<< /S /GoTo /D (section.11) >> -% 571 0 obj -(11 Iterative Methods) -% 572 0 obj -<< /S /GoTo /D (subsection.11.1) >> -% 575 0 obj -(11.1 psb\137krylov \040\204 Krylov Methods Driver Routine) -% 576 0 obj -<< /S /GoTo /D [577 0 R /Fit] >> -% 577 0 obj -<< -/Type /Page -/Contents 581 0 R -/Resources 580 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R -/Group 579 0 R ->> -% 579 0 obj -%PTEX Group needed for transparent pngs -<> -% 582 0 obj -<< -/D [577 0 R /XYZ 98.895 753.953 null] ->> -% 583 0 obj -<< -/D [577 0 R /XYZ 99.895 716.092 null] ->> -% 3 0 obj -<< -/D [577 0 R /XYZ 99.895 716.092 null] ->> -% 580 0 obj -<< -/Font << /F51 584 0 R /F52 585 0 R /F54 586 0 R >> -/XObject << /Im1 578 0 R >> -/ProcSet [ /PDF /Text /ImageC ] ->> -% 590 0 obj -<< -/Type /Page -/Contents 591 0 R -/Resources 589 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R ->> -% 592 0 obj -<< -/D [590 0 R /XYZ 149.705 753.953 null] ->> -% 589 0 obj -<< -/Font << /F54 586 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 636 0 obj -<< -/Type /Page -/Contents 637 0 R -/Resources 635 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R -/Annots [ 593 0 R 594 0 R 595 0 R 596 0 R 597 0 R 598 0 R 599 0 R 600 0 R 601 0 R 602 0 R 603 0 R 604 0 R 605 0 R 606 0 R 607 0 R 608 0 R 609 0 R 610 0 R 611 0 R 640 0 R 612 0 R 641 0 R 613 0 R 614 0 R 615 0 R 616 0 R 617 0 R 618 0 R 619 0 R 620 0 R 621 0 R 642 0 R 622 0 R 643 0 R 623 0 R 644 0 R 624 0 R 625 0 R 626 0 R 627 0 R 628 0 R 629 0 R 630 0 R 631 0 R 632 0 R 633 0 R ] ->> -% 593 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 681.973 173.389 691.303] -/A << /S /GoTo /D (section.1) >> ->> -% 594 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 659.857 196.921 669.187] -/A << /S /GoTo /D (section.2) >> ->> -% 595 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 647.803 227.028 657.212] -/A << /S /GoTo /D (subsection.2.1) >> ->> - -endstream -endobj -695 0 obj +794 0 obj << -/Length 15896 +/Length 15859 >> stream 0 g 0 G 0 g 0 G 0 0 1 rg 0 0 1 RG BT -/F54 9.9626 Tf 188.563 706.129 Td [(3.2.17)-700(psb)]TJ +/F84 9.9626 Tf 188.563 706.129 Td [(3.2.16)-700(triu)-250(\227)-250(Return)-250(the)-250(upper)-250(triangle)]TJ +0 g 0 G + [-596(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(24)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -12.08 Td [(3.2.17)-670(psb)]TJ ET q -1 0 0 1 236.762 706.328 cm +1 0 0 1 236.463 694.248 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 239.751 706.129 Td [(set)]TJ +/F84 9.9626 Tf 239.452 694.049 Td [(set)]TJ ET q -1 0 0 1 252.592 706.328 cm +1 0 0 1 252.293 694.248 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 255.581 706.129 Td [(mat)]TJ +/F84 9.9626 Tf 255.282 694.049 Td [(mat)]TJ ET q -1 0 0 1 273.205 706.328 cm +1 0 0 1 272.906 694.248 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 276.194 706.129 Td [(default)-250(\227)-250(Set)-250(default)-250(storage)-250(format)]TJ +/F84 9.9626 Tf 275.895 694.049 Td [(default)-250(\227)-250(Set)-250(default)-250(storage)-250(format)]TJ 0 g 0 G - [-829(.)-500(.)-500(.)-500(.)]TJ + [-859(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(24)]TJ + [-1500(25)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -87.631 -12.08 Td [(3.2.18)-700(clone)-250(\227)-250(Clone)-250(curr)18(ent)-250(object)]TJ + -87.332 -12.08 Td [(3.2.18)-700(clone)-250(\227)-250(Clone)-250(curr)18(ent)-250(object)]TJ 0 g 0 G [-763(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(24)]TJ + [-1500(25)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - 0 -12.08 Td [(3.2.19)-700(Named)-250(Constants)]TJ + 0 -12.079 Td [(3.2.19)-700(Named)-250(Constants)]TJ 0 g 0 G [-277(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(24)]TJ + [-1500(25)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -22.914 -12.079 Td [(3.3)-1050(Dense)-250(V)111(ector)-250(Data)-250(Str)8(uctur)18(e)]TJ + -22.914 -12.08 Td [(3.3)-1050(Dense)-250(V)111(ector)-250(Data)-250(Str)8(uctur)18(e)]TJ 0 g 0 G [-491(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(24)]TJ + [-1500(25)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - 22.914 -12.08 Td [(3.3.1)-1200(V)111(ector)-250(Methods)]TJ + 22.914 -12.08 Td [(3.3.1)-1164(V)111(ector)-250(Methods)]TJ 0 g 0 G - [-535(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-571(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(25)]TJ + [-1500(26)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - 0 -12.08 Td [(3.3.2)-1200(get)]TJ + 0 -12.079 Td [(3.3.2)-1200(get)]TJ ET q -1 0 0 1 234.6 645.93 cm +1 0 0 1 234.6 633.85 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 237.589 645.73 Td [(nr)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(r)18(ows)-250(in)-250(a)-250(dense)-250(vector)]TJ +/F84 9.9626 Tf 237.589 633.651 Td [(nr)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(r)18(ows)-250(in)-250(a)-250(dense)-250(vector)]TJ 0 g 0 G [-690(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(25)]TJ + [-1500(26)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -49.026 -12.079 Td [(3.3.3)-1200(sizeof)-226(\227)-227(Get)-226(memory)-227(occupation)-226(in)-226(bytes)-227(of)-226(a)-227(dense)-226(vector)]TJ + -49.026 -12.08 Td [(3.3.3)]TJ 0.99 0 0 1 220.443 621.571 Tm [(sizeof)-253(\227)-252(Get)-253(memory)-252(occupation)-253(in)-252(bytes)-253(of)-252(a)-253(dense)-252(vector)]TJ 0 g 0 G 0 g 0 G - [-550(25)]TJ + 1 0 0 1 484.453 621.571 Tm [(26)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - 0 -12.08 Td [(3.3.4)-1200(set)-250(\227)-250(Set)-250(contents)-250(of)-250(the)-250(vector)]TJ + -295.89 -12.08 Td [(3.3.4)-1200(set)-250(\227)-250(Set)-250(contents)-250(of)-250(the)-250(vector)]TJ 0 g 0 G [-461(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(26)]TJ + [-1500(27)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - 0 -12.08 Td [(3.3.5)-1200(get)]TJ + 0 -12.079 Td [(3.3.5)-1200(get)]TJ ET q -1 0 0 1 234.6 609.691 cm +1 0 0 1 234.6 597.611 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 237.589 609.491 Td [(vect)-250(\227)-250(Get)-250(a)-250(copy)-250(of)-250(the)-250(vector)-250(contents)]TJ +/F84 9.9626 Tf 237.589 597.412 Td [(vect)-250(\227)-250(Get)-250(a)-250(copy)-250(of)-250(the)-250(vector)-250(contents)]TJ 0 g 0 G [-770(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(27)]TJ + [-1500(28)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -49.026 -12.079 Td [(3.3.6)-1200(clone)-250(\227)-250(Clone)-250(curr)18(ent)-250(object)]TJ + -49.026 -12.08 Td [(3.3.6)-1200(clone)-250(\227)-250(Clone)-250(curr)18(ent)-250(object)]TJ 0 g 0 G [-763(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(27)]TJ + [-1500(28)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG -22.914 -12.08 Td [(3.4)-1050(Pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ 0 g 0 G [-741(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(27)]TJ + [-1500(28)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG 0 -12.08 Td [(3.5)-1050(Heap)-250(data)-250(str)8(uctur)18(e)]TJ 0 g 0 G [-382(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(28)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -14.944 -22.125 Td [(4)-1000(Computational)-250(routines)]TJ + [-1500(29)]TJ 0 g 0 G - [-21085(29)]TJ 0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.08 Td [(4.1)-1050(psb)]TJ -ET -q -1 0 0 1 204.881 539.246 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 207.87 539.047 Td [(geaxpby)-250(\227)-250(General)-250(Dense)-250(Matrix)-250(Sum)]TJ -0 g 0 G - [-539(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1500(30)]TJ +/F75 9.9626 Tf -14.944 -22.125 Td [(4)-1000(Computational)-250(routines)]TJ 0 g 0 G + [-21085(30)]TJ 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(4.2)-1050(psb)]TJ +/F84 9.9626 Tf 14.944 -12.08 Td [(4.1)-1020(psb)]TJ ET q -1 0 0 1 204.881 527.167 cm +1 0 0 1 204.583 527.167 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 526.967 Td [(gedot)-250(\227)-250(Dot)-250(Pr)18(oduct)]TJ +/F84 9.9626 Tf 207.571 526.967 Td [(geaxpby)-250(\227)-250(General)-250(Dense)-250(Matrix)-250(Sum)]TJ 0 g 0 G - [-837(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-569(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(32)]TJ + [-1500(31)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.079 Td [(4.3)-1050(psb)]TJ + -41.922 -12.079 Td [(4.2)-1020(psb)]TJ ET q -1 0 0 1 204.881 515.087 cm +1 0 0 1 204.583 515.087 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 514.888 Td [(gedots)-250(\227)-250(Generalized)-250(Dot)-250(Pr)18(oduct)]TJ +/F84 9.9626 Tf 207.571 514.888 Td [(gedot)-250(\227)-250(Dot)-250(Pr)18(oduct)]TJ 0 g 0 G - [-793(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-867(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(34)]TJ + [-1500(33)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(4.4)-1050(psb)]TJ + -41.922 -12.08 Td [(4.3)-1020(psb)]TJ ET q -1 0 0 1 204.881 503.007 cm +1 0 0 1 204.583 503.007 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 502.808 Td [(normi)-250(\227)-250(In\002nity-Norm)-250(of)-250(V)111(ector)]TJ +/F84 9.9626 Tf 207.571 502.808 Td [(gedots)-250(\227)-250(Generalized)-250(Dot)-250(Pr)18(oduct)]TJ 0 g 0 G - [-868(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-823(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(36)]TJ + [-1500(35)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(4.5)-1050(psb)]TJ + -41.922 -12.08 Td [(4.4)-1020(psb)]TJ ET q -1 0 0 1 204.881 490.928 cm +1 0 0 1 204.583 490.928 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 490.728 Td [(geamaxs)-250(\227)-250(Generalized)-250(In\002nity)-250(Norm)]TJ +/F84 9.9626 Tf 207.571 490.728 Td [(normi)-250(\227)-250(In\002nity-Norm)-250(of)-250(V)111(ector)]TJ 0 g 0 G - [-600(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-898(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(38)]TJ + [-1500(37)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.079 Td [(4.6)-1050(psb)]TJ + -41.922 -12.079 Td [(4.5)-1020(psb)]TJ ET q -1 0 0 1 204.881 478.848 cm +1 0 0 1 204.583 478.848 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 478.649 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(V)111(ector)]TJ +/F84 9.9626 Tf 207.571 478.649 Td [(geamaxs)-250(\227)-250(Generalized)-250(In\002nity)-250(Norm)]TJ 0 g 0 G - [-438(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-630(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G [-1500(39)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(4.7)-1050(psb)]TJ + -41.922 -12.08 Td [(4.6)-1020(psb)]TJ ET q -1 0 0 1 204.881 466.768 cm +1 0 0 1 204.583 466.768 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 466.569 Td [(geasums)-250(\227)-250(Generalized)-250(1-Norm)-250(of)-250(V)111(ector)]TJ +/F84 9.9626 Tf 207.571 466.569 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(V)111(ector)]TJ 0 g 0 G - [-605(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-468(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(41)]TJ + [-1500(40)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(4.8)-1050(psb)]TJ + -41.922 -12.08 Td [(4.7)-1020(psb)]TJ ET q -1 0 0 1 204.881 454.688 cm +1 0 0 1 204.583 454.688 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 454.489 Td [(norm2)-250(\227)-250(2-Norm)-250(of)-250(V)111(ector)]TJ +/F84 9.9626 Tf 207.571 454.489 Td [(geasums)-250(\227)-250(Generalized)-250(1-Norm)-250(of)-250(V)111(ector)]TJ 0 g 0 G - [-438(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-635(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(43)]TJ + [-1500(42)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(4.9)-1050(psb)]TJ + -41.922 -12.08 Td [(4.8)-1020(psb)]TJ ET q -1 0 0 1 204.881 442.609 cm +1 0 0 1 204.583 442.609 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 442.409 Td [(genrm2s)-250(\227)-250(Generalized)-250(2-Norm)-250(of)-250(V)111(ector)]TJ +/F84 9.9626 Tf 207.571 442.409 Td [(norm2)-250(\227)-250(2-Norm)-250(of)-250(V)111(ector)]TJ 0 g 0 G - [-655(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-468(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(45)]TJ + [-1500(44)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.079 Td [(4.10)-550(psb)]TJ + -41.922 -12.079 Td [(4.9)-1020(psb)]TJ ET q -1 0 0 1 204.881 430.529 cm +1 0 0 1 204.583 430.529 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 430.33 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(Sparse)-250(Matrix)]TJ +/F84 9.9626 Tf 207.571 430.33 Td [(genrm2s)-250(\227)-250(Generalized)-250(2-Norm)-250(of)-250(V)111(ector)]TJ 0 g 0 G - [-841(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-685(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G [-1500(46)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(4.11)-550(psb)]TJ + -41.922 -12.08 Td [(4.10)-520(psb)]TJ ET q -1 0 0 1 204.881 418.449 cm +1 0 0 1 204.583 418.449 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 418.25 Td [(normi)-250(\227)-250(In\002nity)-250(Norm)-250(of)-250(Sparse)-250(Matrix)]TJ +/F84 9.9626 Tf 207.571 418.25 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(Sparse)-250(Matrix)]TJ 0 g 0 G - [-604(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-871(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G [-1500(47)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(4.12)-550(psb)]TJ + -41.922 -12.08 Td [(4.11)-520(psb)]TJ ET q -1 0 0 1 204.881 406.37 cm +1 0 0 1 204.583 406.37 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 406.17 Td [(spmm)-250(\227)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Pr)18(oduct)]TJ +/F84 9.9626 Tf 207.571 406.17 Td [(normi)-250(\227)-250(In\002nity)-250(Norm)-250(of)-250(Sparse)-250(Matrix)]TJ 0 g 0 G - [-491(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-634(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G [-1500(48)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.079 Td [(4.13)-550(psb)]TJ + -41.922 -12.079 Td [(4.12)-520(psb)]TJ ET q -1 0 0 1 204.881 394.29 cm +1 0 0 1 204.583 394.29 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 394.091 Td [(spsm)-250(\227)-250(T)90(riangular)-250(System)-250(Solve)]TJ +/F84 9.9626 Tf 207.571 394.091 Td [(spmm)-250(\227)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Pr)18(oduct)]TJ 0 g 0 G - [-945(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-521(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(50)]TJ + [-1500(49)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(4.14)-550(psb)]TJ + -41.922 -12.08 Td [(4.13)-520(psb)]TJ ET q -1 0 0 1 204.881 382.21 cm +1 0 0 1 204.583 382.21 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 382.011 Td [(gemlt)-250(\227)-250(Entrywise)-250(Pr)18(oduct)]TJ +/F84 9.9626 Tf 207.571 382.011 Td [(spsm)-250(\227)-250(T)90(riangular)-250(System)-250(Solve)]TJ 0 g 0 G - [-968(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-975(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(53)]TJ + [-1500(51)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(4.15)-550(psb)]TJ + -41.922 -12.08 Td [(4.14)-520(psb)]TJ ET q -1 0 0 1 204.881 370.131 cm +1 0 0 1 204.583 370.131 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 369.931 Td [(gediv)-250(\227)-250(Entrywise)-250(Division)]TJ +/F84 9.9626 Tf 207.571 369.931 Td [(gemlt)-250(\227)-250(Entrywise)-250(Pr)18(oduct)]TJ 0 g 0 G - [-748(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-998(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(55)]TJ + [-1500(54)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.079 Td [(4.16)-550(psb)]TJ + -41.922 -12.079 Td [(4.15)-520(psb)]TJ ET q -1 0 0 1 204.881 358.051 cm +1 0 0 1 204.583 358.051 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 357.852 Td [(geinv)-250(\227)-250(Entrywise)-250(Inversion)]TJ +/F84 9.9626 Tf 207.571 357.852 Td [(gediv)-250(\227)-250(Entrywise)-250(Division)]TJ 0 g 0 G - [-340(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-778(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(57)]TJ + [-1500(56)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -57.165 -22.126 Td [(5)-1000(Communication)-250(routines)]TJ -0 g 0 G - [-20585(58)]TJ -0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.079 Td [(5.1)-1050(psb)]TJ + -41.922 -12.08 Td [(4.16)-520(psb)]TJ ET q -1 0 0 1 204.881 323.846 cm +1 0 0 1 204.583 345.971 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 323.647 Td [(halo)-250(\227)-250(Halo)-250(Data)-250(Communication)]TJ +/F84 9.9626 Tf 207.571 345.772 Td [(geinv)-250(\227)-250(Entrywise)-250(Inversion)]TJ 0 g 0 G - [-888(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-370(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(59)]TJ + [-1500(58)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F75 9.9626 Tf -56.866 -22.125 Td [(5)-1000(Communication)-250(routines)]TJ 0 g 0 G + [-20585(59)]TJ 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(5.2)-1050(psb)]TJ +/F84 9.9626 Tf 14.944 -12.08 Td [(5.1)-1020(psb)]TJ ET q -1 0 0 1 204.881 311.766 cm +1 0 0 1 204.583 311.766 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 311.567 Td [(ovrl)-250(\227)-250(Overlap)-250(Update)]TJ +/F84 9.9626 Tf 207.571 311.567 Td [(halo)-250(\227)-250(Halo)-250(Data)-250(Communication)]TJ 0 g 0 G - [-553(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-918(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(62)]TJ + [-1500(60)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(5.3)-1050(psb)]TJ + -41.922 -12.08 Td [(5.2)-1020(psb)]TJ ET q -1 0 0 1 204.881 299.686 cm +1 0 0 1 204.583 299.686 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 299.487 Td [(gather)-250(\227)-250(Gather)-250(Global)-250(Dense)-250(Matrix)]TJ +/F84 9.9626 Tf 207.571 299.487 Td [(ovrl)-250(\227)-250(Overlap)-250(Update)]TJ 0 g 0 G - [-973(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-583(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(66)]TJ + [-1500(63)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(5.4)-1050(psb)]TJ + -41.922 -12.08 Td [(5.3)-1020(psb)]TJ ET q -1 0 0 1 204.881 287.607 cm +1 0 0 1 204.583 287.607 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 287.407 Td [(scatter)-250(\227)-250(Scatter)-250(Global)-250(Dense)-250(Matrix)]TJ +/F84 9.9626 Tf 207.571 287.407 Td [(gather)-250(\227)-250(Gather)-250(Global)-250(Dense)-250(Matrix)]TJ 0 g 0 G - [-967(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-253(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(68)]TJ + [-1500(67)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -57.165 -22.125 Td [(6)-1000(Data)-250(management)-250(routines)]TJ -0 g 0 G - [-19668(70)]TJ -0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.08 Td [(6.1)-1050(psb)]TJ + -41.922 -12.079 Td [(5.4)-1020(psb)]TJ ET q -1 0 0 1 204.881 253.402 cm +1 0 0 1 204.583 275.527 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 253.202 Td [(cdall)-250(\227)-250(Allocates)-250(a)-250(communication)-250(descriptor)]TJ +/F84 9.9626 Tf 207.571 275.328 Td [(scatter)-250(\227)-250(Scatter)-250(Global)-250(Dense)-250(Matrix)]TJ 0 g 0 G - [-363(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-997(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(70)]TJ + [-1500(69)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F75 9.9626 Tf -56.866 -22.126 Td [(6)-1000(Data)-250(management)-250(routines)]TJ 0 g 0 G + [-19668(71)]TJ 0 0 1 rg 0 0 1 RG - -42.221 -12.079 Td [(6.2)-1050(psb)]TJ +/F84 9.9626 Tf 14.944 -12.079 Td [(6.1)-1020(psb)]TJ ET q -1 0 0 1 204.881 241.322 cm +1 0 0 1 204.583 241.322 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 241.123 Td [(cdins)-250(\227)-250(Communication)-250(descriptor)-250(insert)-250(r)18(outine)]TJ +/F84 9.9626 Tf 207.571 241.123 Td [(cdall)-250(\227)-250(Allocates)-250(a)-250(communication)-250(descriptor)]TJ 0 g 0 G - [-261(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-393(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(74)]TJ + [-1500(71)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(6.3)-1050(psb)]TJ + -41.922 -12.08 Td [(6.2)-1020(psb)]TJ ET q -1 0 0 1 204.881 229.242 cm +1 0 0 1 204.583 229.242 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 229.043 Td [(cdasb)-250(\227)-250(Communication)-250(descriptor)-250(assembly)-250(r)18(outine)]TJ +/F84 9.9626 Tf 207.571 229.043 Td [(cdins)-250(\227)-250(Communication)-250(descriptor)-250(insert)-250(r)18(outine)]TJ 0 g 0 G - [-718(.)-500(.)-500(.)]TJ + [-291(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(76)]TJ + [-1500(75)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(6.4)-1050(psb)]TJ + -41.922 -12.08 Td [(6.3)-1020(psb)]TJ ET q -1 0 0 1 204.881 217.163 cm +1 0 0 1 204.583 217.163 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 216.963 Td [(cdcpy)-250(\227)-250(Copies)-250(a)-250(communication)-250(descriptor)]TJ +/F84 9.9626 Tf 207.571 216.963 Td [(cdasb)-250(\227)-250(Communication)-250(descriptor)-250(assembly)-250(r)18(outine)]TJ 0 g 0 G - [-873(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-748(.)-500(.)-500(.)]TJ 0 g 0 G [-1500(77)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.079 Td [(6.5)-1050(psb)]TJ + -41.922 -12.079 Td [(6.4)-1020(psb)]TJ ET q -1 0 0 1 204.881 205.083 cm +1 0 0 1 204.583 205.083 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 204.884 Td [(cdfr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(communication)-250(descriptor)]TJ +/F84 9.9626 Tf 207.571 204.884 Td [(cdcpy)-250(\227)-250(Copies)-250(a)-250(communication)-250(descriptor)]TJ 0 g 0 G - [-791(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-903(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G [-1500(78)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(6.6)-1050(psb)]TJ + -41.922 -12.08 Td [(6.5)-1020(psb)]TJ ET q -1 0 0 1 204.881 193.003 cm +1 0 0 1 204.583 193.003 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 192.804 Td [(cdbldext)-250(\227)-250(Build)-250(an)-250(extended)-250(communication)-250(descriptor)]TJ +/F84 9.9626 Tf 207.571 192.804 Td [(cdfr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(communication)-250(descriptor)]TJ 0 g 0 G - [-676(.)]TJ + [-821(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G [-1500(79)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(6.7)-1050(psb)]TJ + -41.922 -12.08 Td [(6.6)-1020(psb)]TJ ET q -1 0 0 1 204.881 180.923 cm +1 0 0 1 204.583 180.923 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 180.724 Td [(spall)-250(\227)-250(Allocates)-250(a)-250(sparse)-250(matrix)]TJ +/F84 9.9626 Tf 207.571 180.724 Td [(cdbldext)-250(\227)-250(Build)-250(an)-250(extended)-250(communication)-250(descriptor)]TJ 0 g 0 G - [-842(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-706(.)]TJ 0 g 0 G - [-1500(81)]TJ + [-1500(80)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(6.8)-1050(psb)]TJ + -41.922 -12.08 Td [(6.7)-1020(psb)]TJ ET q -1 0 0 1 204.881 168.844 cm +1 0 0 1 204.583 168.844 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 168.644 Td [(spins)-250(\227)-250(Insert)-250(a)-250(set)-250(of)-250(coef)18(\002cients)-250(into)-250(a)-250(sparse)-250(matrix)]TJ +/F84 9.9626 Tf 207.571 168.644 Td [(spall)-250(\227)-250(Allocates)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G - [-625(.)-500(.)-500(.)]TJ + [-872(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(83)]TJ + [-1500(82)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.079 Td [(6.9)-1050(psb)]TJ + -41.922 -12.079 Td [(6.8)-1020(psb)]TJ ET q -1 0 0 1 204.881 156.764 cm +1 0 0 1 204.583 156.764 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 156.565 Td [(spasb)-250(\227)-250(Sparse)-250(matrix)-250(assembly)-250(r)18(outine)]TJ +/F84 9.9626 Tf 207.571 156.565 Td [(spins)-250(\227)-250(Insert)-250(a)-250(set)-250(of)-250(coef)18(\002cients)-250(into)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G - [-611(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-655(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(86)]TJ + [-1500(84)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(6.10)-550(psb)]TJ + -41.922 -12.08 Td [(6.9)-1020(psb)]TJ ET q -1 0 0 1 204.881 144.684 cm +1 0 0 1 204.583 144.684 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 144.485 Td [(spfr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(sparse)-250(matrix)]TJ +/F84 9.9626 Tf 207.571 144.485 Td [(spasb)-250(\227)-250(Sparse)-250(matrix)-250(assembly)-250(r)18(outine)]TJ 0 g 0 G - [-520(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-641(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(88)]TJ + [-1500(87)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.08 Td [(6.11)-550(psb)]TJ + -41.922 -12.08 Td [(6.10)-520(psb)]TJ ET q -1 0 0 1 204.881 132.605 cm +1 0 0 1 204.583 132.605 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 132.405 Td [(sprn)-250(\227)-250(Reinit)-250(sparse)-250(matrix)-250(str)8(uctur)18(e)-250(for)-250(psblas)-250(r)18(outines.)]TJ +/F84 9.9626 Tf 207.571 132.405 Td [(spfr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G - [-725(.)]TJ + [-550(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G [-1500(89)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG - -42.221 -12.079 Td [(6.12)-550(psb)]TJ + -41.922 -12.079 Td [(6.11)-520(psb)]TJ ET q -1 0 0 1 204.881 120.525 cm +1 0 0 1 204.583 120.525 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 120.326 Td [(geall)-250(\227)-250(Allocates)-250(a)-250(dense)-250(matrix)]TJ +/F84 9.9626 Tf 207.571 120.326 Td [(sprn)-250(\227)-250(Reinit)-250(sparse)-250(matrix)-250(str)8(uctur)18(e)-250(for)-250(psblas)-250(r)18(outines.)]TJ 0 g 0 G - [-330(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-755(.)]TJ 0 g 0 G [-1500(90)]TJ 0 g 0 G 0 g 0 G - 111.791 -29.888 Td [(ii)]TJ + 112.09 -29.888 Td [(ii)]TJ 0 g 0 G ET endstream endobj -741 0 obj +605 0 obj << -/Length 18214 +/Type /ObjStm +/N 100 +/First 903 +/Length 13873 >> stream -0 g 0 G -0 g 0 G +604 0 608 48 609 240 612 288 613 442 616 490 617 649 620 692 621 864 624 907 +625 1001 628 1044 629 1138 632 1181 633 1305 636 1349 637 1493 640 1537 641 1656 644 1700 +645 1819 648 1863 649 1997 652 2041 653 2155 656 2199 657 2348 660 2392 661 2541 664 2585 +665 2754 668 2798 669 2947 672 2991 673 3135 676 3179 677 3348 678 3393 680 3522 683 3628 +684 3684 3 3740 681 3794 691 3923 693 4037 690 4094 737 4161 694 4647 695 4794 696 4940 +697 5086 698 5238 699 5390 700 5541 701 5698 702 5850 703 5996 704 6148 705 6305 706 6462 +707 6619 708 6776 709 6933 710 7090 711 7247 712 7404 713 7561 741 7719 714 7876 742 8034 +715 8191 716 8349 717 8507 718 8665 719 8823 720 8975 721 9132 722 9288 723 9445 743 9602 +724 9758 744 9915 725 10071 726 10228 727 10385 728 10542 729 10699 730 10856 731 11014 732 11172 +733 11330 734 11488 739 11644 740 11700 736 11756 793 11836 735 12338 745 12496 746 12654 747 12812 +% 604 0 obj +<< /S /GoTo /D (subsection.12.2) >> +% 608 0 obj +(\376\377\0001\0002\000.\0002\000\040\000E\000x\000t\000e\000n\000s\000i\000o\000n\000s\000'\000\040\000D\000a\000t\000a\000\040\000S\000t\000r\000u\000c\000t\000u\000r\000e\000s) +% 609 0 obj +<< /S /GoTo /D (subsection.12.3) >> +% 612 0 obj +(\376\377\0001\0002\000.\0003\000\040\000C\000P\000U\000-\000c\000l\000a\000s\000s\000\040\000e\000x\000t\000e\000n\000s\000i\000o\000n\000s) +% 613 0 obj +<< /S /GoTo /D (subsection.12.4) >> +% 616 0 obj +(\376\377\0001\0002\000.\0004\000\040\000C\000U\000D\000A\000-\000c\000l\000a\000s\000s\000\040\000e\000x\000t\000e\000n\000s\000i\000o\000n\000s) +% 617 0 obj +<< /S /GoTo /D (section.13) >> +% 620 0 obj +(\376\377\0001\0003\000\040\000C\000U\000D\000A\000\040\000E\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t\000\040\000R\000o\000u\000t\000i\000n\000e\000s) +% 621 0 obj +<< /S /GoTo /D (section*.7) >> +% 624 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000i\000n\000i\000t) +% 625 0 obj +<< /S /GoTo /D (section*.8) >> +% 628 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000e\000x\000i\000t) +% 629 0 obj +<< /S /GoTo /D (section*.9) >> +% 632 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000D\000e\000v\000i\000c\000e\000S\000y\000n\000c) +% 633 0 obj +<< /S /GoTo /D (section*.10) >> +% 636 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000g\000e\000t\000D\000e\000v\000i\000c\000e\000C\000o\000u\000n\000t) +% 637 0 obj +<< /S /GoTo /D (section*.11) >> +% 640 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000g\000e\000t\000D\000e\000v\000i\000c\000e) +% 641 0 obj +<< /S /GoTo /D (section*.12) >> +% 644 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000s\000e\000t\000D\000e\000v\000i\000c\000e) +% 645 0 obj +<< /S /GoTo /D (section*.13) >> +% 648 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000D\000e\000v\000i\000c\000e\000H\000a\000s\000U\000V\000A) +% 649 0 obj +<< /S /GoTo /D (section*.14) >> +% 652 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000W\000a\000r\000p\000S\000i\000z\000e) +% 653 0 obj +<< /S /GoTo /D (section*.15) >> +% 656 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000u\000l\000t\000i\000P\000r\000o\000c\000e\000s\000s\000o\000r\000s) +% 657 0 obj +<< /S /GoTo /D (section*.16) >> +% 660 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000a\000x\000T\000h\000r\000e\000a\000d\000s\000P\000e\000r\000M\000P) +% 661 0 obj +<< /S /GoTo /D (section*.17) >> +% 664 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000a\000x\000R\000e\000g\000i\000s\000t\000e\000r\000P\000e\000r\000B\000l\000o\000c\000k) +% 665 0 obj +<< /S /GoTo /D (section*.18) >> +% 668 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000e\000m\000o\000r\000y\000C\000l\000o\000c\000k\000R\000a\000t\000e) +% 669 0 obj +<< /S /GoTo /D (section*.19) >> +% 672 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000e\000m\000o\000r\000y\000B\000u\000s\000W\000i\000d\000t\000h) +% 673 0 obj +<< /S /GoTo /D (section*.20) >> +% 676 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000e\000m\000o\000r\000y\000P\000e\000a\000k\000B\000a\000n\000d\000w\000i\000d\000t\000h) +% 677 0 obj +<< /S /GoTo /D [678 0 R /Fit] >> +% 678 0 obj +<< +/Type /Page +/Contents 682 0 R +/Resources 681 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 688 0 R +/Group 680 0 R +>> +% 680 0 obj +%PTEX Group needed for transparent pngs +<> +% 683 0 obj +<< +/D [678 0 R /XYZ 98.895 753.953 null] +>> +% 684 0 obj +<< +/D [678 0 R /XYZ 99.895 716.092 null] +>> +% 3 0 obj +<< +/D [678 0 R /XYZ 99.895 716.092 null] +>> +% 681 0 obj +<< +/Font << /F75 685 0 R /F78 686 0 R /F84 687 0 R >> +/XObject << /Im1 679 0 R >> +/ProcSet [ /PDF /Text /ImageC ] +>> +% 691 0 obj +<< +/Type /Page +/Contents 692 0 R +/Resources 690 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 688 0 R +>> +% 693 0 obj +<< +/D [691 0 R /XYZ 149.705 753.953 null] +>> +% 690 0 obj +<< +/Font << /F84 687 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 737 0 obj +<< +/Type /Page +/Contents 738 0 R +/Resources 736 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 688 0 R +/Annots [ 694 0 R 695 0 R 696 0 R 697 0 R 698 0 R 699 0 R 700 0 R 701 0 R 702 0 R 703 0 R 704 0 R 705 0 R 706 0 R 707 0 R 708 0 R 709 0 R 710 0 R 711 0 R 712 0 R 713 0 R 741 0 R 714 0 R 742 0 R 715 0 R 716 0 R 717 0 R 718 0 R 719 0 R 720 0 R 721 0 R 722 0 R 723 0 R 743 0 R 724 0 R 744 0 R 725 0 R 726 0 R 727 0 R 728 0 R 729 0 R 730 0 R 731 0 R 732 0 R 733 0 R 734 0 R ] +>> +% 694 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 681.902 134.097 691.232] +/A << /S /GoTo /D (section*.2) >> +>> +% 695 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 659.713 173.389 669.043] +/A << /S /GoTo /D (section.1) >> +>> +% 696 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 637.524 196.921 646.854] +/A << /S /GoTo /D (section.2) >> +>> +% 697 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 625.426 227.028 634.835] +/A << /S /GoTo /D (subsection.2.1) >> +>> +% 698 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 610.658 211.078 622.717] +/A << /S /GoTo /D (subsection.2.2) >> +>> +% 699 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 598.54 232.706 610.599] +/A << /S /GoTo /D (subsection.2.3) >> +>> +% 700 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 586.422 301.886 598.481] +/A << /S /GoTo /D (subsubsection.2.3.1) >> +>> +% 701 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 574.304 230.734 586.364] +/A << /S /GoTo /D (subsection.2.4) >> +>> +% 702 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 554.745 242.261 564.075] +/A << /S /GoTo /D (section.3) >> +>> +% 703 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 539.997 249.144 552.056] +/A << /S /GoTo /D (subsection.3.1) >> +>> +% 704 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 527.879 258.689 539.939] +/A << /S /GoTo /D (subsubsection.3.1.1) >> +>> +% 705 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 515.761 360.207 527.821] +/A << /S /GoTo /D (subsubsection.3.1.2) >> +>> +% 706 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 503.643 350.723 515.703] +/A << /S /GoTo /D (subsubsection.3.1.3) >> +>> +% 707 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 491.525 373.457 503.585] +/A << /S /GoTo /D (subsubsection.3.1.4) >> +>> +% 708 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 479.407 363.973 491.467] +/A << /S /GoTo /D (subsubsection.3.1.5) >> +>> +% 709 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 467.289 384.834 479.349] +/A << /S /GoTo /D (subsubsection.3.1.6) >> +>> +% 710 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 455.171 356.411 467.037] +/A << /S /GoTo /D (subsubsection.3.1.7) >> +>> +% 711 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 443.053 297.523 455.113] +/A << /S /GoTo /D (subsubsection.3.1.8) >> +>> +% 712 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 430.935 345.014 442.995] +/A << /S /GoTo /D (subsubsection.3.1.9) >> +>> +% 713 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 418.817 444.603 430.877] +/A << /S /GoTo /D (subsubsection.3.1.10) >> +>> +% 741 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 406.862 221.947 418.922] +/A << /S /GoTo /D (subsubsection.3.1.10) >> +>> +% 714 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 394.744 444.603 406.804] +/A << /S /GoTo /D (subsubsection.3.1.11) >> +>> +% 742 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 382.789 221.947 394.849] +/A << /S /GoTo /D (subsubsection.3.1.11) >> +>> +% 715 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 370.671 358.404 382.731] +/A << /S /GoTo /D (subsubsection.3.1.12) >> +>> +% 716 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 358.553 354.718 370.613] +/A << /S /GoTo /D (subsubsection.3.1.13) >> +>> +% 717 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 346.435 413.607 358.495] +/A << /S /GoTo /D (subsubsection.3.1.14) >> +>> +% 718 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 336.967 250.062 346.377] +/A << /S /GoTo /D (subsubsection.3.1.15) >> +>> +% 719 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 322.199 223.242 334.259] +/A << /S /GoTo /D (subsection.3.2) >> +>> +% 720 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 310.081 273.364 322.141] +/A << /S /GoTo /D (subsubsection.3.2.1) >> +>> +% 721 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 297.964 399.41 310.023] +/A << /S /GoTo /D (subsubsection.3.2.2) >> +>> +% 722 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 285.846 410.528 297.905] +/A << /S /GoTo /D (subsubsection.3.2.3) >> +>> +% 723 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 273.728 444.603 285.787] +/A << /S /GoTo /D (subsubsection.3.2.4) >> +>> +% 743 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 261.772 230.246 273.528] +/A << /S /GoTo /D (subsubsection.3.2.4) >> +>> +% 724 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 249.655 444.603 261.714] +/A << /S /GoTo /D (subsubsection.3.2.5) >> +>> +% 744 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 237.699 237.718 249.455] +/A << /S /GoTo /D (subsubsection.3.2.5) >> +>> +% 725 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 225.581 429.161 237.641] +/A << /S /GoTo /D (subsubsection.3.2.6) >> +>> +% 726 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 213.464 384.545 225.523] +/A << /S /GoTo /D (subsubsection.3.2.7) >> +>> +% 727 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 201.346 329.343 213.405] +/A << /S /GoTo /D (subsubsection.3.2.8) >> +>> +% 728 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 189.228 405.337 201.287] +/A << /S /GoTo /D (subsubsection.3.2.9) >> +>> +% 729 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 177.11 371.724 189.169] +/A << /S /GoTo /D (subsubsection.3.2.10) >> +>> +% 730 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 164.992 309.029 177.051] +/A << /S /GoTo /D (subsubsection.3.2.11) >> +>> +% 731 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 155.524 350.683 164.933] +/A << /S /GoTo /D (subsubsection.3.2.12) >> +>> +% 732 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 140.756 303.929 152.815] +/A << /S /GoTo /D (subsubsection.3.2.13) >> +>> +% 733 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 128.638 324.462 140.698] +/A << /S /GoTo /D (subsubsection.3.2.14) >> +>> +% 734 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 116.52 309.687 128.58] +/A << /S /GoTo /D (subsubsection.3.2.15) >> +>> +% 739 0 obj +<< +/D [737 0 R /XYZ 98.895 753.953 null] +>> +% 740 0 obj +<< +/D [737 0 R /XYZ 99.895 723.975 null] +>> +% 736 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 793 0 obj +<< +/Type /Page +/Contents 794 0 R +/Resources 792 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 688 0 R +/Annots [ 735 0 R 745 0 R 746 0 R 747 0 R 748 0 R 749 0 R 750 0 R 751 0 R 752 0 R 753 0 R 754 0 R 755 0 R 756 0 R 757 0 R 758 0 R 759 0 R 760 0 R 761 0 R 762 0 R 763 0 R 764 0 R 765 0 R 766 0 R 767 0 R 768 0 R 769 0 R 770 0 R 771 0 R 772 0 R 773 0 R 774 0 R 775 0 R 776 0 R 777 0 R 778 0 R 779 0 R 780 0 R 781 0 R 782 0 R 783 0 R 784 0 R 785 0 R 786 0 R 787 0 R 788 0 R 789 0 R 790 0 R ] +>> +% 735 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 702.323 364.939 714.383] +/A << /S /GoTo /D (subsubsection.3.2.16) >> +>> +% 745 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 690.243 437.039 702.303] +/A << /S /GoTo /D (subsubsection.3.2.17) >> +>> +% 746 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 678.164 348.332 690.223] +/A << /S /GoTo /D (subsubsection.3.2.18) >> +>> +% 747 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 668.734 300.871 678.144] +/A << /S /GoTo /D (subsubsection.3.2.19) >> +>> + +endstream +endobj +842 0 obj +<< +/Length 18653 +>> +stream +0 g 0 G +0 g 0 G +0 0 1 rg 0 0 1 RG +BT +/F84 9.9626 Tf 114.839 706.129 Td [(6.12)-520(psb)]TJ +ET +q +1 0 0 1 153.773 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 706.129 Td [(geall)-250(\227)-250(Allocates)-250(a)-250(dense)-250(matrix)]TJ +0 g 0 G + [-360(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(91)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(6.13)-520(psb)]TJ +ET +q +1 0 0 1 153.773 694.237 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 694.038 Td [(geins)-250(\227)-250(Dense)-250(matrix)-250(insertion)-250(r)18(outine)]TJ +0 g 0 G + [-441(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(93)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(6.14)-520(psb)]TJ +ET +q +1 0 0 1 153.773 682.146 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 681.947 Td [(geasb)-250(\227)-250(Assembly)-250(a)-250(dense)-250(matrix)]TJ +0 g 0 G + [-406(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(95)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(6.15)-520(psb)]TJ +ET +q +1 0 0 1 153.773 670.055 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 669.856 Td [(gefr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(dense)-250(matrix)]TJ +0 g 0 G + [-788(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(96)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(6.16)-520(psb)]TJ +ET +q +1 0 0 1 153.773 657.964 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 657.765 Td [(gelp)-250(\227)-250(Applies)-250(a)-250(left)-250(permutation)-250(to)-250(a)-250(dense)-250(matrix)]TJ +0 g 0 G + [-831(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(97)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(6.17)-520(psb)]TJ +ET +q +1 0 0 1 153.773 645.873 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 645.674 Td [(glob)]TJ +ET +q +1 0 0 1 176.747 645.873 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 179.735 645.674 Td [(to)]TJ +ET +q +1 0 0 1 189.02 645.873 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 192.009 645.674 Td [(loc)-250(\227)-250(Global)-250(to)-250(local)-250(indices)-250(convertion)]TJ +0 g 0 G + [-457(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(98)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -77.17 -12.091 Td [(6.18)-520(psb)]TJ +ET +q +1 0 0 1 153.773 633.782 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 633.583 Td [(loc)]TJ +ET +q +1 0 0 1 170.122 633.782 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 173.11 633.583 Td [(to)]TJ +ET +q +1 0 0 1 182.395 633.782 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 185.384 633.583 Td [(glob)-250(\227)-250(Local)-250(to)-250(global)-250(indices)-250(conversion)]TJ +0 g 0 G + [-996(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(100)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -70.545 -12.091 Td [(6.19)-520(psb)]TJ +ET +q +1 0 0 1 153.773 621.691 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 621.492 Td [(is)]TJ +ET +q +1 0 0 1 164.483 621.691 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 167.472 621.492 Td [(owned)-250(\227)]TJ +0 g 0 G + [-1165(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(101)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -52.633 -12.091 Td [(6.20)-520(psb)]TJ +ET +q +1 0 0 1 153.773 609.6 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 609.401 Td [(owned)]TJ +ET +q +1 0 0 1 187.765 609.6 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 190.754 609.401 Td [(index)-250(\227)]TJ +0 g 0 G + [-901(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(102)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -75.915 -12.091 Td [(6.21)-520(psb)]TJ +ET +q +1 0 0 1 153.773 597.509 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 597.31 Td [(is)]TJ +ET +q +1 0 0 1 164.483 597.509 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 167.472 597.31 Td [(local)-250(\227)]TJ +0 g 0 G + [-645(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(103)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -52.633 -12.091 Td [(6.22)-520(psb)]TJ +ET +q +1 0 0 1 153.773 585.418 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 585.219 Td [(local)]TJ +ET +q +1 0 0 1 178.002 585.418 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 180.991 585.219 Td [(index)-250(\227)]TJ +0 g 0 G + [-1131(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(104)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -66.152 -12.091 Td [(6.23)-520(psb)]TJ +ET +q +1 0 0 1 153.773 573.327 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 573.128 Td [(get)]TJ +ET +q +1 0 0 1 170.919 573.327 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 173.907 573.128 Td [(boundary)-250(\227)-250(Extract)-250(list)-250(of)-250(boundary)-250(elements)]TJ +0 g 0 G + [-857(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(105)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -59.068 -12.091 Td [(6.24)-520(psb)]TJ +ET +q +1 0 0 1 153.773 561.236 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 561.037 Td [(get)]TJ +ET +q +1 0 0 1 170.919 561.236 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 173.907 561.037 Td [(overlap)-250(\227)-250(Extract)-250(list)-250(of)-250(overlap)-250(elements)]TJ +0 g 0 G + [-545(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(106)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -59.068 -12.091 Td [(6.25)-520(psb)]TJ +ET +q +1 0 0 1 153.773 549.145 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 548.946 Td [(sp)]TJ +ET +q +1 0 0 1 167.571 549.145 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 170.56 548.946 Td [(getr)18(ow)-250(\227)-250(Extract)-250(r)18(ow\050s\051)-250(fr)18(om)-250(a)-250(sparse)-250(matrix)]TJ +0 g 0 G + [-701(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(107)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -55.721 -12.091 Td [(6.26)-520(psb)]TJ +ET +q +1 0 0 1 153.773 537.054 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 536.855 Td [(sizeof)-250(\227)-250(Memory)-250(occupation)]TJ +0 g 0 G + [-281(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(109)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(6.27)-550(Sorting)-250(utilities)-250(\227)]TJ +0 g 0 G + [-1157(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(110)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F75 9.9626 Tf -14.944 -22.145 Td [(7)-1000(Parallel)-250(environment)-250(routines)]TJ +0 g 0 G + [-17835(112)]TJ +0 0 1 rg 0 0 1 RG +/F84 9.9626 Tf 14.944 -12.091 Td [(7.1)-1020(psb)]TJ +ET +q +1 0 0 1 153.773 490.728 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 490.528 Td [(init)-250(\227)-250(Initializes)-250(PSBLAS)-250(parallel)-250(envir)18(onment)]TJ +0 g 0 G + [-796(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(113)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(7.2)]TJ 0.98 0 0 1 137.455 478.437 Tm [(psb)]TJ +ET +q +1 0 0 1 153.459 478.637 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 0.98 0 0 1 156.448 478.437 Tm [(info)-218(\227)-219(Return)-218(information)-219(abou)1(t)-219(PSBLAS)-218(parallel)-219(envir)19(onment)]TJ +0 g 0 G +0 g 0 G + 1 0 0 1 428.663 478.437 Tm [(114)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -313.824 -12.091 Td [(7.3)-1020(psb)]TJ +ET +q +1 0 0 1 153.773 466.545 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 466.346 Td [(exit)-250(\227)-250(Exit)-250(fr)18(om)-250(PSBLAS)-250(parallel)-250(envir)18(onment)]TJ +0 g 0 G + [-853(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(115)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(7.4)-1020(psb)]TJ +ET +q +1 0 0 1 153.773 454.454 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 454.255 Td [(get)]TJ +ET +q +1 0 0 1 170.919 454.454 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 173.907 454.255 Td [(mpi)]TJ +ET +q +1 0 0 1 192.189 454.454 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 195.177 454.255 Td [(comm)-250(\227)-250(Get)-250(the)-250(MPI)-250(communicator)]TJ +0 g 0 G + [-645(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(116)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -80.338 -12.091 Td [(7.5)-1020(psb)]TJ +ET +q +1 0 0 1 153.773 442.363 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 442.164 Td [(get)]TJ +ET +q +1 0 0 1 170.919 442.363 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 173.907 442.164 Td [(mpi)]TJ +ET +q +1 0 0 1 192.189 442.363 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 195.177 442.164 Td [(rank)-250(\227)-250(Get)-250(the)-250(MPI)-250(rank)]TJ +0 g 0 G + [-528(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(117)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -80.338 -12.091 Td [(7.6)-1020(psb)]TJ +ET +q +1 0 0 1 153.773 430.272 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 430.073 Td [(wtime)-250(\227)-250(W)92(all)-250(clock)-250(timing)]TJ +0 g 0 G + [-529(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(118)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(7.7)-1020(psb)]TJ +ET +q +1 0 0 1 153.773 418.181 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 417.982 Td [(barrier)-250(\227)-250(Sinchr)18(onization)-250(point)-250(parallel)-250(envir)18(onment)]TJ +0 g 0 G + [-933(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(119)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(7.8)-1020(psb)]TJ +ET +q +1 0 0 1 153.773 406.09 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 405.891 Td [(abort)-250(\227)-250(Abort)-250(a)-250(computation)]TJ +0 g 0 G + [-976(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(120)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(7.9)-1020(psb)]TJ +ET +q +1 0 0 1 153.773 393.999 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 393.8 Td [(bcast)-250(\227)-250(Br)18(oadcast)-250(data)]TJ +0 g 0 G + [-769(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(121)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(7.10)-520(psb)]TJ +ET +q +1 0 0 1 153.773 381.908 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 381.709 Td [(sum)-250(\227)-250(Global)-250(sum)]TJ +0 g 0 G + [-278(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(123)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(7.11)-520(psb)]TJ +ET +q +1 0 0 1 153.773 369.817 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 369.618 Td [(max)-250(\227)-250(Global)-250(maximum)]TJ +0 g 0 G + [-640(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(125)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(7.12)-520(psb)]TJ +ET +q +1 0 0 1 153.773 357.726 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 357.527 Td [(min)-250(\227)-250(Global)-250(minimum)]TJ +0 g 0 G + [-926(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(127)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(7.13)-520(psb)]TJ +ET +q +1 0 0 1 153.773 345.635 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 345.436 Td [(amx)-250(\227)-250(Global)-250(maximum)-250(absolute)-250(value)]TJ +0 g 0 G + [-730(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(129)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(7.14)-520(psb)]TJ +ET +q +1 0 0 1 153.773 333.544 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 333.345 Td [(amn)-250(\227)-250(Global)-250(minimum)-250(absolute)-250(value)]TJ +0 g 0 G + [-807(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(131)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(7.15)-520(psb)]TJ +ET +q +1 0 0 1 153.773 321.453 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 321.254 Td [(nrm2)-250(\227)-250(Global)-250(2-norm)-250(r)18(eduction)]TJ +0 g 0 G + [-740(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(133)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(7.16)-520(psb)]TJ +ET +q +1 0 0 1 153.773 309.362 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 309.163 Td [(snd)-250(\227)-250(Send)-250(data)]TJ +0 g 0 G + [-541(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(135)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(7.17)-520(psb)]TJ +ET +q +1 0 0 1 153.773 297.271 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 297.072 Td [(r)18(cv)-250(\227)-250(Receive)-250(data)]TJ +0 g 0 G + [-314(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(136)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F75 9.9626 Tf -56.867 -22.144 Td [(8)-1000(Error)-250(handling)]TJ +0 g 0 G + [-24750(137)]TJ +0 0 1 rg 0 0 1 RG +/F84 9.9626 Tf 14.944 -12.091 Td [(8.1)-1020(psb)]TJ +ET +q +1 0 0 1 153.773 263.036 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 262.837 Td [(errpush)-250(\227)-250(Pushes)-250(an)-250(err)18(or)-250(code)-250(onto)-250(the)-250(err)18(or)-250(stack)]TJ +0 g 0 G + [-764(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(139)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(8.2)-1020(psb)]TJ +ET +q +1 0 0 1 153.773 250.945 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 250.746 Td [(err)18(or)-250(\227)-250(Prints)-250(the)-250(err)18(or)-250(stack)-250(content)-250(and)-250(aborts)-250(execution)]TJ +0 g 0 G +0 g 0 G + [-1411(140)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.923 -12.091 Td [(8.3)-1020(psb)]TJ +ET +q +1 0 0 1 153.773 238.854 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 156.762 238.655 Td [(set)]TJ +ET +q +1 0 0 1 169.604 238.854 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 172.592 238.655 Td [(errverbosity)-250(\227)-250(Sets)-250(the)-250(verbosity)-250(of)-250(err)18(or)-250(messages)]TJ +0 g 0 G + [-283(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(141)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -57.753 -12.091 Td [(8.4)]TJ 0.994 0 0 1 137.455 226.564 Tm [(psb)]TJ +ET +q +1 0 0 1 153.679 226.763 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 0.994 0 0 1 156.668 226.564 Tm [(set)]TJ +ET +q +1 0 0 1 169.436 226.763 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 0.994 0 0 1 172.425 226.564 Tm [(erraction)-251(\227)-251(Set)-252(the)-251(type)-251(of)-251(action)-251(to)-252(b)1(e)-252(taken)-251(upon)-251(err)18(or)]TJ 1 0 0 1 137.753 214.608 Tm [(condition)]TJ +0 g 0 G + [-481(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(142)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F75 9.9626 Tf -37.858 -22.144 Td [(9)-1000(Utilities)]TJ +0 g 0 G + [-27780(143)]TJ +0 0 1 rg 0 0 1 RG +/F84 9.9626 Tf 14.944 -12.091 Td [(9.1)]TJ 0.98 0 0 1 137.753 180.373 Tm [(hb)]TJ +ET +q +1 0 0 1 149.432 180.572 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 0.98 0 0 1 152.421 180.373 Tm [(r)18(ead)-209(\227)-209(Read)-210(a)-209(sparse)-210(matr)1(ix)-210(fr)19(om)-210(a)-209(\002le)-210(in)-209(the)-209(Harwell\226Boeing)]TJ 1 0 0 1 137.753 168.418 Tm [(format)]TJ +0 g 0 G + [-967(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(144)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -22.914 -12.091 Td [(9.2)]TJ 0.99 0 0 1 137.753 156.327 Tm [(hb)]TJ +ET +q +1 0 0 1 149.546 156.526 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 0.99 0 0 1 152.534 156.327 Tm [(write)-252(\227)-251(W)75(rite)-252(a)-251(sparse)-252(matrix)-251(to)-252(a)-251(\002le)-252(in)-251(the)-252(Harwell\226Boeing)]TJ 1 0 0 1 137.753 144.372 Tm [(format)]TJ +0 g 0 G + [-967(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(145)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -22.914 -12.091 Td [(9.3)]TJ 1.014 0 0 1 137.753 132.281 Tm [(mm)]TJ +ET +q +1 0 0 1 156.191 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 1.014 0 0 1 159.18 132.281 Tm [(mat)]TJ +ET +q +1 0 0 1 177.042 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 1.014 0 0 1 180.031 132.281 Tm [(r)18(ead)-246(\227)-245(Read)-246(a)-245(sparse)-245(matrix)-246(fr)18(om)-245(a)-246(\002le)-245(in)-246(the)-245(Matrix-)]TJ 1 0 0 1 137.753 120.326 Tm [(Market)-250(format)]TJ +0 g 0 G + [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(146)]TJ +0 g 0 G +0 g 0 G + 129.649 -29.888 Td [(iii)]TJ +0 g 0 G +ET + +endstream +endobj +885 0 obj +<< +/Length 14708 +>> +stream +0 g 0 G +0 g 0 G +0 0 1 rg 0 0 1 RG +BT +/F84 9.9626 Tf 165.649 706.129 Td [(9.4)]TJ 1.02 0 0 1 188.563 706.129 Tm [(mm)]TJ +ET +q +1 0 0 1 207.106 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 1.02 0 0 1 210.095 706.129 Tm [(array)]TJ +ET +q +1 0 0 1 234.533 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 1.02 0 0 1 237.521 706.129 Tm [(r)18(ead)-247(\227)-247(Read)-247(a)-247(dense)-247(array)-247(fr)18(om)-247(a)-247(\002le)-246(in)-247(the)-247(Matrix-)]TJ 1 0 0 1 188.563 694.174 Tm [(Market)-250(format)]TJ +0 g 0 G + [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(147)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -22.914 -11.955 Td [(9.5)]TJ 0.98 0 0 1 188.563 682.219 Tm [(mm)]TJ +ET +q +1 0 0 1 206.403 682.418 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 0.98 0 0 1 209.392 682.219 Tm [(mat)]TJ +ET +q +1 0 0 1 226.675 682.418 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 0.98 0 0 1 229.664 682.219 Tm [(write)-234(\227)-234(W)76(rite)-234(a)-234(sparse)-234(matrix)-234(to)-234(a)-234(\002le)-234(in)-234(the)-234(MatrixMar)19(-)]TJ 1 0 0 1 188.563 670.263 Tm [(ket)-250(format)]TJ +0 g 0 G + [-856(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(148)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -22.914 -11.955 Td [(9.6)-1050(mm)]TJ +ET +q +1 0 0 1 206.755 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 209.743 658.308 Td [(array)]TJ +ET +q +1 0 0 1 233.713 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 236.702 658.308 Td [(write)-249(\227)-249(W)74(rite)-249(a)-249(dense)-250(array)-249(fr)18(om)-249(a)-249(\002le)-249(in)-249(the)-249(Matrix-)]TJ -48.139 -11.955 Td [(Market)-250(format)]TJ +0 g 0 G + [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(149)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F75 9.9626 Tf -37.858 -21.918 Td [(10)-500(Preconditioner)-250(routines)]TJ +0 g 0 G + [-20696(151)]TJ +0 0 1 rg 0 0 1 RG +/F84 9.9626 Tf 14.944 -11.955 Td [(10.1)-550(init)-250(\227)-250(Initialize)-250(a)-250(pr)18(econditioner)]TJ +0 g 0 G + [-772(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(152)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(10.2)-550(Set)-250(\227)-250(set)-250(pr)18(econditioner)-250(parameters)]TJ +0 g 0 G + [-609(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(153)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(10.3)-550(build)-250(\227)-250(Builds)-250(a)-250(pr)18(econditioner)]TJ +0 g 0 G + [-970(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(155)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(10.4)-550(apply)-250(\227)-250(Pr)18(econditioner)-250(application)-250(r)18(outine)]TJ +0 g 0 G + [-421(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(157)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.956 Td [(10.5)-550(descr)-250(\227)-250(Prints)-250(a)-250(description)-250(of)-250(curr)18(ent)-250(pr)18(econditioner)]TJ +0 g 0 G + [-350(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(158)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(10.6)-550(clone)-250(\227)-250(clone)-250(curr)18(ent)-250(pr)18(econditioner)]TJ +0 g 0 G + [-260(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(159)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(10.7)-550(fr)18(ee)-250(\227)-250(Fr)18(ee)-250(a)-250(pr)18(econditioner)]TJ +0 g 0 G + [-341(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(160)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(10.8)-550(allocate)]TJ +ET +q +1 0 0 1 222.804 528.993 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 225.793 528.794 Td [(wrk)-250(\227)-250(pr)18(econditioner)]TJ +0 g 0 G + [-878(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(161)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -60.144 -11.955 Td [(10.9)-550(deallocate)]TJ +ET +q +1 0 0 1 233.663 517.038 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 236.652 516.839 Td [(wrk)-250(\227)-250(pr)18(econditioner)]TJ +0 g 0 G + [-538(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(162)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F75 9.9626 Tf -85.947 -21.918 Td [(11)-500(Iterative)-250(Methods)]TJ +0 g 0 G + [-23362(163)]TJ +0 0 1 rg 0 0 1 RG +/F84 9.9626 Tf 14.944 -11.955 Td [(11.1)-520(psb)]TJ +ET +q +1 0 0 1 204.583 483.165 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 207.571 482.966 Td [(krylov)-250(\227)-250(Krylov)-250(Methods)-250(Driver)-250(Routine)]TJ +0 g 0 G + [-746(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(164)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -41.922 -11.955 Td [(11.2)-520(psb)]TJ +ET +q +1 0 0 1 204.583 471.21 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 207.571 471.011 Td [(richar)18(dson)-250(\227)-250(Richar)18(dson)-250(Iteration)-250(Driver)-250(Routine)]TJ +0 g 0 G + [-839(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(167)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F75 9.9626 Tf -56.866 -21.918 Td [(12)-500(Extensions)]TJ +0 g 0 G + [-26557(170)]TJ +0 0 1 rg 0 0 1 RG +/F84 9.9626 Tf 14.944 -11.955 Td [(12.1)-550(Using)-250(the)-250(extensions)]TJ +0 g 0 G + [-783(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(170)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.956 Td [(12.2)-550(Extensions')-250(Data)-250(Str)8(uctur)18(es)]TJ +0 g 0 G + [-797(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(171)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(12.3)-550(CPU-class)-250(extensions)]TJ +0 g 0 G + [-544(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(171)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(12.4)-550(CUDA-class)-250(extensions)]TJ +0 g 0 G + [-346(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(178)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F75 9.9626 Tf -14.944 -21.918 Td [(13)-500(CUDA)-250(Environment)-250(Routines)]TJ +0 g 0 G + [-17779(179)]TJ +0 0 1 rg 0 0 1 RG +/F84 9.9626 Tf 14.944 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 367.598 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 184.956 367.399 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 367.598 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 210.042 367.399 Td [(init)]TJ +0 g 0 G + [-304(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(179)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 355.643 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 184.956 355.444 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 355.643 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 210.042 355.444 Td [(exit)]TJ +0 g 0 G + [-932(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(179)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 343.688 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 184.956 343.489 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 343.688 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 210.042 343.489 Td [(DeviceSync)]TJ +0 g 0 G + [-405(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(180)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 331.733 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 184.956 331.534 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 331.733 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 210.042 331.534 Td [(getDeviceCount)]TJ +0 g 0 G + [-635(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(180)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.956 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 319.778 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 184.956 319.578 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 319.778 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 210.042 319.578 Td [(getDevice)]TJ +0 g 0 G + [-401(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(180)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 307.823 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 184.956 307.623 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 307.823 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 210.042 307.623 Td [(setDevice)]TJ +0 g 0 G + [-533(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(180)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 295.867 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 184.956 295.668 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 295.867 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 210.042 295.668 Td [(DeviceHasUV)111(A)]TJ +0 g 0 G + [-839(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(180)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 283.912 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 184.956 283.713 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 283.912 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 210.042 283.713 Td [(W)92(arpSize)]TJ +0 g 0 G + [-595(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(180)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 271.957 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 184.956 271.758 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 271.957 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 210.042 271.758 Td [(MultiPr)18(ocessors)]TJ +0 g 0 G + [-674(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(180)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 260.002 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 184.956 259.803 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 260.002 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 210.042 259.803 Td [(MaxThr)18(eadsPerMP)]TJ +0 g 0 G + [-718(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(180)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.956 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 248.047 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 184.956 247.847 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 248.047 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 210.042 247.847 Td [(MaxRegisterPerBlock)]TJ +0 g 0 G + [-538(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(181)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 236.092 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 184.956 235.892 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 236.092 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 210.042 235.892 Td [(MemoryClockRate)]TJ +0 g 0 G + [-970(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(181)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 224.136 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 184.956 223.937 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 224.136 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 210.042 223.937 Td [(MemoryBusW)55(idth)]TJ +0 g 0 G + [-346(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(181)]TJ +0 g 0 G 0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 212.181 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F54 9.9626 Tf 114.839 706.129 Td [(6.13)-550(psb)]TJ +/F84 9.9626 Tf 184.956 211.982 Td [(cuda)]TJ ET q -1 0 0 1 154.072 706.328 cm +1 0 0 1 207.053 212.181 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 157.061 706.129 Td [(geins)-250(\227)-250(Dense)-250(matrix)-250(insertion)-250(r)18(outine)]TJ +/F84 9.9626 Tf 210.042 211.982 Td [(MemoryPeakBandwidth)]TJ +0 g 0 G + [-652(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-411(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-1000(181)]TJ +0 g 0 G +0 g 0 G + 108.254 -121.544 Td [(iv)]TJ +0 g 0 G +ET + +endstream +endobj +796 0 obj +<< +/Type /ObjStm +/N 100 +/First 926 +/Length 16423 +>> +stream +748 0 749 151 750 307 751 464 752 621 753 778 754 935 755 1092 756 1244 757 1396 +758 1543 759 1695 760 1847 761 1999 762 2151 763 2303 764 2455 765 2607 766 2759 767 2911 +768 3062 769 3215 770 3368 771 3521 772 3674 773 3827 774 3980 775 4127 776 4279 777 4431 +778 4583 779 4735 780 4882 781 5034 782 5186 783 5338 784 5490 785 5642 786 5794 787 5946 +788 6098 789 6250 790 6401 795 6551 792 6608 841 6688 791 7190 797 7343 798 7496 799 7649 +800 7800 801 7953 802 8106 803 8258 804 8411 805 8564 806 8717 807 8870 808 9023 809 9176 +810 9325 811 9478 812 9631 813 9777 814 9928 815 10080 816 10230 817 10381 818 10533 819 10685 +820 10837 821 10989 822 11141 823 11294 824 11446 825 11599 826 11750 827 11903 828 12056 829 12209 +830 12362 831 12508 832 12660 833 12807 834 12959 844 13111 835 13262 836 13408 845 13560 837 13711 +846 13863 838 14014 847 14166 843 14315 840 14371 884 14451 839 14889 887 15041 848 15193 888 15345 +% 748 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 656.654 313.682 665.87] +/A << /S /GoTo /D (subsection.3.3) >> +>> +% 749 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 644.575 290.47 653.984] +/A << /S /GoTo /D (subsubsection.3.3.1) >> +>> +% 750 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 629.845 446.194 641.905] +/A << /S /GoTo /D (subsubsection.3.3.2) >> +>> +% 751 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 617.765 479.958 629.825] +/A << /S /GoTo /D (subsubsection.3.3.3) >> +>> +% 752 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 608.336 358.813 617.745] +/A << /S /GoTo /D (subsubsection.3.3.4) >> +>> +% 753 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 593.606 415.509 605.666] +/A << /S /GoTo /D (subsubsection.3.3.5) >> +>> +% 754 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 581.526 348.332 593.586] +/A << /S /GoTo /D (subsubsection.3.3.6) >> +>> +% 755 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 572.097 318.663 581.506] +/A << /S /GoTo /D (subsection.3.4) >> +>> +% 756 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 557.367 277.409 569.426] +/A << /S /GoTo /D (subsection.3.5) >> +>> +% 757 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [149.709 535.481 275.386 547.201] +/A << /S /GoTo /D (section.4) >> +>> +% 758 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 523.162 380.152 535.221] +/A << /S /GoTo /D (subsection.4.1) >> +>> +% 759 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 511.082 302.465 523.142] +/A << /S /GoTo /D (subsection.4.2) >> +>> +% 760 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 499.002 362.678 511.062] +/A << /S /GoTo /D (subsection.4.3) >> +>> +% 761 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 486.923 354.459 498.982] +/A << /S /GoTo /D (subsection.4.4) >> +>> +% 762 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 474.843 379.545 486.903] +/A << /S /GoTo /D (subsection.4.5) >> +>> +% 763 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 462.763 328.856 474.823] +/A << /S /GoTo /D (subsection.4.6) >> +>> +% 764 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 450.684 394.439 462.743] +/A << /S /GoTo /D (subsection.4.7) >> +>> +% 765 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 438.604 328.856 450.663] +/A << /S /GoTo /D (subsection.4.8) >> +>> +% 766 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 426.524 393.941 438.584] +/A << /S /GoTo /D (subsection.4.9) >> +>> +% 767 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 414.444 362.2 426.504] +/A << /S /GoTo /D (subsection.4.10) >> +>> +% 768 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 402.365 386.977 414.424] +/A << /S /GoTo /D (subsection.4.11) >> +>> +% 769 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 390.285 425.462 402.345] +/A << /S /GoTo /D (subsection.4.12) >> +>> +% 770 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 378.205 353.692 390.265] +/A << /S /GoTo /D (subsection.4.13) >> +>> +% 771 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 366.126 331.047 378.185] +/A << /S /GoTo /D (subsection.4.14) >> +>> +% 772 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 354.046 333.239 366.106] +/A << /S /GoTo /D (subsection.4.15) >> +>> +% 773 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 341.966 337.303 354.026] +/A << /S /GoTo /D (subsection.4.16) >> +>> +% 774 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [149.709 322.471 280.368 331.622] +/A << /S /GoTo /D (section.5) >> +>> +% 775 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 307.761 361.732 319.821] +/A << /S /GoTo /D (subsection.5.1) >> +>> +% 776 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 295.681 312.766 307.741] +/A << /S /GoTo /D (subsection.5.2) >> +>> +% 777 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 283.602 375.828 295.661] +/A << /S /GoTo /D (subsection.5.3) >> +>> +% 778 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 271.522 375.888 283.582] +/A << /S /GoTo /D (subsection.5.4) >> +>> +% 779 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [149.709 249.556 289.504 261.177] +/A << /S /GoTo /D (section.6) >> +>> +% 780 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 237.317 411.793 249.377] +/A << /S /GoTo /D (subsection.6.1) >> +>> +% 781 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 225.237 427.753 237.297] +/A << /S /GoTo /D (subsection.6.2) >> +>> +% 782 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 213.158 445.616 225.217] +/A << /S /GoTo /D (subsection.6.3) >> +>> +% 783 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 201.078 406.712 213.138] +/A << /S /GoTo /D (subsection.6.4) >> +>> +% 784 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 188.998 400.057 201.058] +/A << /S /GoTo /D (subsection.6.5) >> +>> +% 785 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 176.918 460.978 188.978] +/A << /S /GoTo /D (subsection.6.6) >> +>> +% 786 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 164.839 354.718 176.898] +/A << /S /GoTo /D (subsection.6.7) >> +>> +% 787 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 152.759 446.543 164.819] +/A << /S /GoTo /D (subsection.6.8) >> +>> +% 788 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 140.679 386.907 152.739] +/A << /S /GoTo /D (subsection.6.9) >> +>> +% 789 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 128.6 342.982 140.659] +/A << /S /GoTo /D (subsection.6.10) >> +>> +% 790 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 116.52 460.49 128.58] +/A << /S /GoTo /D (subsection.6.11) >> +>> +% 795 0 obj +<< +/D [793 0 R /XYZ 149.705 753.953 null] +>> +% 792 0 obj +<< +/Font << /F84 687 0 R /F75 685 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 841 0 obj +<< +/Type /Page +/Contents 842 0 R +/Resources 840 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 688 0 R +/Annots [ 791 0 R 797 0 R 798 0 R 799 0 R 800 0 R 801 0 R 802 0 R 803 0 R 804 0 R 805 0 R 806 0 R 807 0 R 808 0 R 809 0 R 810 0 R 811 0 R 812 0 R 813 0 R 814 0 R 815 0 R 816 0 R 817 0 R 818 0 R 819 0 R 820 0 R 821 0 R 822 0 R 823 0 R 824 0 R 825 0 R 826 0 R 827 0 R 828 0 R 829 0 R 830 0 R 831 0 R 832 0 R 833 0 R 834 0 R 844 0 R 835 0 R 836 0 R 845 0 R 837 0 R 846 0 R 838 0 R 847 0 R ] +>> +% 791 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 702.323 301.537 714.383] +/A << /S /GoTo /D (subsection.6.12) >> +>> +% 797 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 690.232 330.618 702.292] +/A << /S /GoTo /D (subsection.6.13) >> +>> +% 798 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 678.141 308.551 690.201] +/A << /S /GoTo /D (subsection.6.14) >> +>> +% 799 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 666.05 289.802 678.11] +/A << /S /GoTo /D (subsection.6.15) >> +>> +% 800 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 653.959 386.507 666.019] +/A << /S /GoTo /D (subsection.6.16) >> +>> +% 801 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 641.868 367.818 653.928] +/A << /S /GoTo /D (subsection.6.17) >> +>> +% 802 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 629.777 369.92 641.837] +/A << /S /GoTo /D (subsection.6.18) >> +>> +% 803 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 617.686 213.818 629.746] +/A << /S /GoTo /D (subsection.6.19) >> +>> +% 804 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 605.595 231.391 617.655] +/A << /S /GoTo /D (subsection.6.20) >> +>> +% 805 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 593.504 204.054 605.564] +/A << /S /GoTo /D (subsection.6.21) >> +>> +% 806 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 581.413 221.628 593.473] +/A << /S /GoTo /D (subsection.6.22) >> +>> +% 807 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 569.322 378.777 581.382] +/A << /S /GoTo /D (subsection.6.23) >> +>> +% 808 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 557.231 359.469 569.291] +/A << /S /GoTo /D (subsection.6.24) >> +>> +% 809 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 545.14 372.86 557.2] +/A << /S /GoTo /D (subsection.6.25) >> +>> +% 810 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 533.049 287.381 545.109] +/A << /S /GoTo /D (subsection.6.26) >> +>> +% 811 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 520.958 221.369 533.017] +/A << /S /GoTo /D (subsection.6.27) >> +>> +% 812 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 501.444 251.974 510.774] +/A << /S /GoTo /D (section.7) >> +>> +% 813 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 486.723 364.44 498.782] +/A << /S /GoTo /D (subsection.7.1) >> +>> +% 814 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 474.632 429.161 486.691] +/A << /S /GoTo /D (subsection.7.2) >> +>> +% 815 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 462.541 363.873 474.6] +/A << /S /GoTo /D (subsection.7.3) >> +>> +% 816 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 450.45 358.473 462.509] +/A << /S /GoTo /D (subsection.7.4) >> +>> +% 817 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 438.359 307.336 450.418] +/A << /S /GoTo /D (subsection.7.5) >> +>> +% 818 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 426.267 277.438 438.327] +/A << /S /GoTo /D (subsection.7.6) >> +>> +% 819 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 414.176 392.963 426.236] +/A << /S /GoTo /D (subsection.7.7) >> +>> +% 820 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 402.085 287.928 414.145] +/A << /S /GoTo /D (subsection.7.8) >> +>> +% 821 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 389.994 260.104 402.054] +/A << /S /GoTo /D (subsection.7.9) >> +>> +% 822 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 377.903 242.579 389.963] +/A << /S /GoTo /D (subsection.7.10) >> +>> +% 823 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 365.812 268.86 377.872] +/A << /S /GoTo /D (subsection.7.11) >> +>> +% 824 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 353.721 266.011 365.781] +/A << /S /GoTo /D (subsection.7.12) >> +>> +% 825 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 341.63 335.211 353.69] +/A << /S /GoTo /D (subsection.7.13) >> +>> +% 826 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 329.539 334.444 341.599] +/A << /S /GoTo /D (subsection.7.14) >> +>> +% 827 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 317.448 305.224 329.508] +/A << /S /GoTo /D (subsection.7.15) >> +>> +% 828 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 305.357 232.488 317.417] +/A << /S /GoTo /D (subsection.7.16) >> +>> +% 829 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 293.266 242.221 305.326] +/A << /S /GoTo /D (subsection.7.17) >> +>> +% 830 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 271.281 183.083 283.082] +/A << /S /GoTo /D (section.8) >> +>> +% 831 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 259.031 387.175 271.091] +/A << /S /GoTo /D (subsection.8.1) >> +>> +% 832 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 246.94 415.598 259] +/A << /S /GoTo /D (subsection.8.2) >> +>> +% 833 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 234.849 399.439 246.909] +/A << /S /GoTo /D (subsection.8.3) >> +>> +% 834 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 222.758 444.603 234.818] +/A << /S /GoTo /D (subsection.8.4) >> +>> +% 844 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 213.453 180.781 222.862] +/A << /S /GoTo /D (subsection.8.4) >> +>> +% 835 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 191.289 152.896 200.619] +/A << /S /GoTo /D (section.9) >> +>> +% 836 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 176.568 444.603 188.627] +/A << /S /GoTo /D (subsection.9.1) >> +>> +% 845 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 167.262 168.468 176.672] +/A << /S /GoTo /D (subsection.9.1) >> +>> +% 837 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 152.521 444.603 164.581] +/A << /S /GoTo /D (subsection.9.2) >> +>> +% 846 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 143.216 168.468 152.626] +/A << /S /GoTo /D (subsection.9.2) >> +>> +% 838 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 128.475 444.603 140.535] +/A << /S /GoTo /D (subsection.9.3) >> +>> +% 847 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 119.17 202.859 128.58] +/A << /S /GoTo /D (subsection.9.3) >> +>> +% 843 0 obj +<< +/D [841 0 R /XYZ 98.895 753.953 null] +>> +% 840 0 obj +<< +/Font << /F84 687 0 R /F75 685 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 884 0 obj +<< +/Type /Page +/Contents 885 0 R +/Resources 883 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 688 0 R +/Annots [ 839 0 R 887 0 R 848 0 R 888 0 R 849 0 R 889 0 R 850 0 R 851 0 R 852 0 R 853 0 R 854 0 R 855 0 R 856 0 R 857 0 R 858 0 R 859 0 R 860 0 R 861 0 R 862 0 R 863 0 R 864 0 R 865 0 R 866 0 R 867 0 R 868 0 R 869 0 R 870 0 R 871 0 R 872 0 R 873 0 R 874 0 R 875 0 R 876 0 R 877 0 R 878 0 R 879 0 R 880 0 R 881 0 R 882 0 R ] +>> +% 839 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 702.323 495.412 714.383] +/A << /S /GoTo /D (subsection.9.4) >> +>> +% 887 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [149.709 693.018 253.668 702.428] +/A << /S /GoTo /D (subsection.9.4) >> +>> +% 848 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 678.413 495.412 690.472] +/A << /S /GoTo /D (subsection.9.5) >> +>> +% 888 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [149.709 669.108 235.327 678.517] +/A << /S /GoTo /D (subsection.9.5) >> +>> + +endstream +endobj +893 0 obj +<< +/Length 4149 +>> +stream 0 g 0 G - [-1500(92)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(6.14)-550(psb)]TJ -ET -q -1 0 0 1 154.072 694.234 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q BT -/F54 9.9626 Tf 157.061 694.035 Td [(geasb)-250(\227)-250(Assembly)-250(a)-250(dense)-250(matrix)]TJ +/F75 14.3462 Tf 99.895 705.784 Td [(Preface)]TJ/F84 9.9626 Tf 0.98 0 0 1 99.587 683.082 Tm [(This)-249(manual)-250(describes)-249(the)-250(main)-250(featur)19(es)-250(of)-249(PSBLAS,)-250(a)-249(library)-250(for)-249(parallel)-250(sparse)]TJ 1 0 0 1 99.895 671.127 Tm [(computations)-250(that)-250(has)-250(been)-250(developed)-250(over)-250(a)-250(number)-250(of)-250(years.)]TJ 1.01 0 0 1 114.839 659.172 Tm [(Our)-248(work)-248(has)-248(been)-247(mainly)-248(devoted)-248(to)-248(pr)18(oviding)-248(a)-248(foundational)-248(toolkit)-247(on)]TJ 1.02 0 0 1 99.477 647.217 Tm [(which)-264(many)-265(algorithms)-264(can)-264(be)-265(implemented;)-274(the)-264(toolkit)-264(has)-265(pr)18(oven)-264(its)-265(ef)18(fec-)]TJ 1.02 0 0 1 99.895 635.261 Tm [(tiveness)-269(and)-268(\003exibility)-269(in)-268(many)-269(ways.)-374(The)-268(PSBLAS)-269(component)-268(deals)-269(mostly)]TJ 1.02 0 0 1 99.477 623.306 Tm [(with)-285(the)-286(computational)-285(kernels)-286(and)-285(envir)17(onment)-285(handling;)-306(it)-285(supports)-286(com-)]TJ 1.017 0 0 1 99.596 611.351 Tm [(putations)-245(on)-246(normal)-245(CPUs,)-246(including)-245(the)-245(usage)-246(of)-245(OpenMP)-246(for)-245(parallellizing)]TJ 1 0 0 1 99.895 599.396 Tm [(acr)18(oss)-250(multiple)-250(cor)18(es.)]TJ 1.02 0 0 1 114.839 587.441 Tm [(This)-283(foundational)-284(package)-283(pr)17(ovides)-283(linear)-283(solvers)-284(and)-283(some)-284(very)-283(simple)]TJ 1.019 0 0 1 99.596 575.486 Tm [(pr)18(econditioners;)-247(the)-246(companion)-246(package)-246(AMG4PSBLAS)-247(exp)1(lor)17(es)-246(how)-246(to)-246(use)]TJ 0.981 0 0 1 99.895 563.53 Tm [(the)-256(base)-256(toolkit)-256(to)-256(build)-256(much)-256(mor)18(e)-256(sophisticated)-256(pr)19(econditioners)-256(which)-256(can)-256(be)]TJ 1 0 0 1 99.596 551.575 Tm [(plugged)-250(seamlessly)-250(into)-250(the)-250(base)-250(solvers.)]TJ 0.98 0 0 1 114.839 539.62 Tm [(The)-253(softwar)19(e)-253(ar)18(chite)1(ctur)18(e)-253(allows)-252(us)-253(to)-253(of)19(f)-1(er)-252(support)-253(for)-252(many)-253(alternatives)-253(in)]TJ 1.005 0 0 1 99.895 527.665 Tm [(the)-249(implementation,)-250(including)-249(usage)-249(of)-250(heter)18(ogeneous)-249(platforms,)-249(and)-250(compu-)]TJ 1.02 0 0 1 99.895 515.71 Tm [(tations)-245(performed)-245(on)-245(GPUs)-246(thr)18(ouh)-245(CUDA.)-245(Ther)17(e)-245(is)-245(support)-245(for)-245(GPU)-245(compu-)]TJ 1.007 0 0 1 99.895 503.755 Tm [(tations)-247(thr)17(ough)-247(OpenACC,)-247(but)-248(it)-247(is)-248(at)-247(this)-248(time)-247(a)-248(highly)-247(experimental)-247(version;)]TJ 1.02 0 0 1 99.477 491.799 Tm [(we)-281(plan)-281(to)-281(also)-281(look)-281(at)-281(using)-281(accelerato)1(rs)-281(thr)17(ough)-281(OpenMP)-281(as)-281(support)-280(fr)17(om)]TJ 1 0 0 1 99.895 479.844 Tm [(compilers)-250(impr)18(oves.)]TJ 1.02 0 0 1 114.839 467.889 Tm [(The)-272(pr)17(oject)-272(is)-272(lead)-272(by)-272(Salvator)17(e)-272(Filippone;)-285(a)-273(numb)1(er)-273(of)-272(people)-272(have)-272(been)]TJ 1.02 0 0 1 99.895 455.934 Tm [(contributing)-317(to)-317(this)-318(package)-317(over)-317(the)-317(years;)-353(contributors)-318(in)-317(r)18(oughly)-317(r)17(everse)]TJ 1 0 0 1 99.895 443.979 Tm [(chr)18(onological)-250(or)18(der:)]TJ 14.944 -11.955 Td [(Luca)-250(Pep)]TJ 41.494 0.025 Td [(\036)]TJ -0.727 -0.025 Td [(e)-250(Sciarria)]TJ -40.767 -11.956 Td [(Theophane)-250(Loloum)]TJ 0 -11.955 Td [(Dimitri)-250(W)92(alther)]TJ 0 -11.955 Td [(Andea)-250(Di)-250(Iorio)]TJ 0 -11.955 Td [(Stefano)-250(Petrilli)]TJ 0 -11.955 Td [(Sor)18(en)-250(Rasmussen)]TJ 0 -11.955 Td [(Zaak)-250(Beekman)]TJ 0 -11.956 Td [(Ambra)-250(Abdullahi)-250(Hassan)]TJ 0 -11.955 Td [(Pasqua)-250(D'Ambra)]TJ 0 -11.955 Td [(Daniela)-250(di)-250(Sera\002no)]TJ 0 -11.955 Td [(Michele)-250(Martone)]TJ 0 -11.955 Td [(Michele)-250(Colajanni)]TJ 0 -11.955 Td [(Fabio)-250(Cerioni)]TJ 0 -11.956 Td [(Stefano)-250(Maiolatesi)]TJ 0 -11.955 Td [(Dario)-250(Pascucci)]TJ 0 g 0 G - [-376(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1500(94)]TJ + 243.12 -21.918 Td [(Salvator)18(e)-250(Filippone)]TJ 18.729 -11.955 Td [(Alfr)18(edo)-250(Buttari)]TJ -8.847 -11.955 Td [(Fabio)-250(Durastante)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(6.15)-550(psb)]TJ -ET -q -1 0 0 1 154.072 682.14 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 681.941 Td [(gefr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(dense)-250(matrix)]TJ + -98.581 -128.385 Td [(1)]TJ 0 g 0 G - [-758(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +ET + +endstream +endobj +907 0 obj +<< +/Length 9122 +>> +stream 0 g 0 G - [-1500(95)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.095 Td [(6.16)-550(psb)]TJ -ET -q -1 0 0 1 154.072 670.046 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q BT -/F54 9.9626 Tf 157.061 669.846 Td [(gelp)-250(\227)-250(Applies)-250(a)-250(left)-250(permutation)-250(to)-250(a)-250(dense)-250(matrix)]TJ +/F75 14.3462 Tf 150.705 705.784 Td [(1)-1000(Introduction)]TJ/F84 9.9626 Tf 1.009 0 0 1 150.396 682.871 Tm [(The)-247(PSBLAS)-247(library)110(,)-247(developed)-246(with)-247(the)-247(aim)-247(to)-247(facilitate)-247(the)-246(parallelization)-247(of)]TJ 0.98 0 0 1 150.705 670.916 Tm [(computationally)-246(intensive)-247(scienti\002c)-246(applications,)-248(is)-246(designed)-247(to)-246(addr)18(ess)-246(parallel)]TJ 1.02 0 0 1 150.705 658.961 Tm [(implementation)-250(of)-250(iterative)-251(solvers)-250(for)-250(sparse)-250(linear)-250(systems)-251(thr)18(ough)-250(the)-250(dis-)]TJ 0.98 0 0 1 150.705 647.006 Tm [(tributed)-232(memory)-232(paradigm.)-309(It)-232(includes)-232(r)18(outines)-232(for)-232(multiplying)-232(sparse)-232(matrices)]TJ 1.02 0 0 1 150.705 635.051 Tm [(by)-305(dense)-305(matrices,)-320(solving)-305(block)-305(diagonal)-305(systems)-306(wit)1(h)-306(triangular)-305(diagonal)]TJ 1.02 0 0 1 150.705 623.095 Tm [(entries,)-366(pr)18(epr)18(ocessing)-342(sparse)-342(matrices,)-365(and)-342(contains)-342(additional)-341(r)17(outines)-341(for)]TJ 1.015 0 0 1 150.705 611.14 Tm [(dense)-245(matrix)-244(operations.)-305(The)-245(curr)18(ent)-245(imple)1(mentation)-245(of)-245(PSBLAS)-244(addr)17(esses)-244(a)]TJ 1 0 0 1 150.705 599.185 Tm [(distributed)-250(memory)-250(execution)-250(model)-250(operating)-250(with)-250(message)-250(passing.)]TJ 1.013 0 0 1 165.649 587.121 Tm [(The)-247(PSBLAS)-246(library)-247(version)-246(3)-247(is)-247(impleme)1(nted)-247(in)-247(the)-246(Fortran)-247(2008)-246([)]TJ +1 0 0 rg 1 0 0 RG + 1 0 0 1 461.514 587.121 Tm [(17)]TJ 0 g 0 G - [-801(.)-500(.)-500(.)-500(.)]TJ + 1.013 0 0 1 471.477 587.121 Tm [(])-247(pr)18(o-)]TJ 1.009 0 0 1 150.705 575.166 Tm [(gramming)-247(language,)-248(with)-247(r)18(euse)-248(and/or)-247(adaptation)-248(of)-247(existing)-248(Fort)1(ran)-248(77)-247(and)]TJ 1 0 0 1 150.705 563.211 Tm [(Fortran)-250(95)-250(softwar)18(e,)-250(plus)-250(a)-250(handful)-250(of)-250(C)-250(r)18(outines.)]TJ 1.02 0 0 1 165.649 551.147 Tm [(The)-348(use)-347(of)-348(Fortran)-348(2008)-348(of)18(fers)-347(a)-348(number)-348(of)-347(advantages)-348(over)-348(Fortran)-348(95,)]TJ 1.02 0 0 1 150.705 539.192 Tm [(mostly)-323(in)-324(the)-323(handling)-324(of)-323(r)17(equir)18(ements)-323(for)-324(evolution)-323(and)-324(adaptation)-323(of)-324(the)]TJ 0.995 0 0 1 150.705 527.237 Tm [(library)-252(to)-252(new)-251(computing)-252(ar)18(chitectur)18(es)-252(and)-252(integration)-252(of)-251(new)-252(algorithms.)-314(For)]TJ 1.011 0 0 1 150.705 515.281 Tm [(a)-247(detailed)-247(discussion)-247(of)-248(our)-247(design)-247(see)-247([)]TJ +1 0 0 rg 1 0 0 RG + 1 0 0 1 327.361 515.281 Tm [(11)]TJ 0 g 0 G - [-1500(96)]TJ + 1.011 0 0 1 337.324 515.281 Tm [(];)-247(other)-247(works)-247(discussing)-248(advanced)]TJ 0.98 0 0 1 150.406 503.326 Tm [(pr)18(ogramming)-202(in)-203(Fortran)-202(2008)-203(include)-202([)]TJ +1 0 0 rg 1 0 0 RG + 1 0 0 1 316.278 503.326 Tm [(21)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(6.17)-550(psb)]TJ -ET -q -1 0 0 1 154.072 657.951 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 657.752 Td [(glob)]TJ -ET -q -1 0 0 1 177.046 657.951 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 180.034 657.752 Td [(to)]TJ -ET -q -1 0 0 1 189.319 657.951 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 192.308 657.752 Td [(loc)-250(\227)-250(Global)-250(to)-250(local)-250(indices)-250(convertion)]TJ + 0.98 0 0 1 326.241 503.326 Tm [(,)]TJ +1 0 0 rg 1 0 0 RG + 1 0 0 1 330.659 503.326 Tm [(19)]TJ 0 g 0 G - [-427(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + 0.98 0 0 1 340.622 503.326 Tm [(];)-220(suf)18(\002cient)-203(support)-202(for)-203(Fortran)-202(2008)]TJ 1.018 0 0 1 150.705 491.371 Tm [(is)-245(now)-245(available)-245(fr)17(om)-245(many)-245(compilers,)-245(including)-245(r)17(ecent)-245(versions)-245(of)-245(the)-245(GNU)]TJ 0.985 0 0 1 150.705 479.416 Tm [(Fortran)-254(compiler)-253(fr)18(om)-254(the)-254(Fr)19(ee)-254(Softwar)18(e)-254(Foundation,)-254(and)-253(the)-254(FLANG)-254(compiler)]TJ 1 0 0 1 150.705 467.461 Tm [(fr)18(om)-250(the)-250(LL)92(VM)-250(pr)18(oject.)]TJ 0.98 0 0 1 165.649 455.397 Tm [(Pr)18(evious)-234(appr)19(oaches)-234(have)-234(been)-234(based)-234(on)-234(mixing)-234(Fortran)-234(95,)-239(with)-234(its)-234(support)]TJ 1.02 0 0 1 150.705 443.442 Tm [(for)-260(object-based)-260(design,)-264(with)-260(other)-260(languages;)-267(these)-260(have)-260(been)-260(advocated)-260(by)]TJ 1.016 0 0 1 150.705 431.486 Tm [(a)-246(number)-246(of)-246(authors,)-247(e.g.)-246([)]TJ +1 0 0 rg 1 0 0 RG + 1 0 0 1 268.108 431.486 Tm [(16)]TJ 0 g 0 G - [-1500(97)]TJ + 1.016 0 0 1 278.071 431.486 Tm [(].)-305(Mor)17(eover)73(,)-246(the)-246(Fortran)-246(95)-246(facilities)-247(for)-246(dynamic)]TJ 1.006 0 0 1 150.705 419.531 Tm [(memory)-248(management)-248(and)-247(interface)-248(overloading)-248(gr)18(eatly)-248(enhance)-247(the)-248(usability)]TJ 1.02 0 0 1 150.705 407.576 Tm [(of)-287(the)-287(PSBLAS)-287(subr)17(outines.)-430(In)-287(this)-287(way)109(,)-298(the)-287(library)-287(can)-288(take)-287(car)18(e)-287(of)-288(r)8(untime)]TJ 1.02 0 0 1 150.705 395.621 Tm [(memory)-248(r)18(equir)17(ements)-248(that)-248(ar)18(e)-249(qui)1(te)-249(dif)18(\002cult)-248(or)-248(even)-248(impossible)-248(to)-248(pr)17(edict)-248(at)]TJ 1 0 0 1 150.705 383.666 Tm [(implementation)-250(or)-250(compilation)-250(time.)]TJ 14.944 -12.064 Td [(The)-249(pr)18(esentation)-250(of)-249(the)-249(PSBLAS)-250(library)-249(follows)-250(the)-249(general)-249(str)8(uctur)18(e)-250(of)-249(the)]TJ 0.98 0 0 1 150.406 359.647 Tm [(pr)18(oposal)-255(for)-255(s)1(erial)-255(Sparse)-255(BLAS)-255([)]TJ +1 0 0 rg 1 0 0 RG + 1 0 0 1 292.915 359.647 Tm [(8)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -77.469 -12.094 Td [(6.18)-550(psb)]TJ -ET -q -1 0 0 1 154.072 645.857 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 645.658 Td [(loc)]TJ -ET -q -1 0 0 1 170.42 645.857 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 173.409 645.658 Td [(to)]TJ -ET -q -1 0 0 1 182.694 645.857 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 185.683 645.658 Td [(glob)-250(\227)-250(Local)-250(to)-250(global)-250(indices)-250(conversion)]TJ + 0.98 0 0 1 297.896 359.647 Tm [(,)]TJ +1 0 0 rg 1 0 0 RG + 1 0 0 1 302.826 359.647 Tm [(9)]TJ 0 g 0 G - [-966(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + 0.98 0 0 1 307.808 359.647 Tm [(],)-255(which)-255(in)-255(its)-255(turn)-255(is)-255(based)-255(on)-255(the)-254(pr)18(oposal)]TJ 1 0 0 1 150.705 347.691 Tm [(for)-250(BLAS)-250(on)-250(dense)-250(matrices)-250([)]TJ +1 0 0 rg 1 0 0 RG + [(15)]TJ 0 g 0 G - [-1500(99)]TJ + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-250(5)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -70.844 -12.094 Td [(6.19)-550(psb)]TJ -ET -q -1 0 0 1 154.072 633.763 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 633.564 Td [(is)]TJ -ET -q -1 0 0 1 164.782 633.763 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 167.771 633.564 Td [(owned)-250(\227)]TJ + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-250(6)]TJ 0 g 0 G - [-1135(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [(].)]TJ 1.016 0 0 1 165.649 335.627 Tm [(The)-245(applicability)-245(of)-246(sparse)-245(iterative)-245(solvers)-245(to)-245(many)-245(dif)17(f)1(er)17(ent)-245(ar)18(eas)-245(causes)]TJ 0.98 0 0 1 150.705 323.672 Tm [(some)-251(terminology)-252(pr)19(oblems)-252(because)-251(the)-252(same)-251(concept)-251(may)-252(be)-251(denoted)-252(thr)19(ough)]TJ 1.013 0 0 1 150.705 311.717 Tm [(dif)18(fer)18(ent)-248(names)-247(depending)-247(on)-248(the)-247(application)-248(ar)18(ea.)-308(The)-247(PSBLAS)-247(featur)17(es)-247(pr)18(e-)]TJ 1.02 0 0 1 150.705 299.762 Tm [(sented)-289(in)-290(this)-289(document)-290(will)-289(be)-290(discussed)-289(r)17(eferring)-289(to)-290(a)-289(\002nite)-290(dif)18(fer)18(ence)-289(dis-)]TJ 1.012 0 0 1 150.705 287.807 Tm [(cr)18(etization)-248(of)-248(a)-248(Partial)-248(Dif)18(fer)18(ential)-248(Equation)-248(\050PDE\051.)-247(However)73(,)-248(the)-248(scope)-248(of)-248(the)]TJ 1.02 0 0 1 150.705 275.852 Tm [(library)-249(is)-249(wider)-249(than)-249(that:)-312(for)-250(exampl)1(e,)-251(it)-249(can)-249(be)-249(applied)-249(to)-249(\002nite)-249(element)-249(dis-)]TJ 0.98 0 0 1 150.705 263.896 Tm [(cr)18(etizations)-240(of)-241(PDEs,)-243(and)-241(even)-241(to)-240(dif)18(fer)18(ent)-241(classes)-240(of)-241(pr)18(oblems)-240(such)-241(as)-241(nonlinear)]TJ 1 0 0 1 150.705 251.941 Tm [(optimization,)-250(for)-250(example)-250(in)-250(optimal)-250(contr)18(ol)-250(pr)18(oblems.)]TJ 1.02 0 0 1 165.649 239.877 Tm [(The)-346(design)-346(of)-345(a)-346(solver)-346(for)-346(sparse)-346(linear)-346(systems)-345(is)-346(driven)-346(by)-346(many)-346(con-)]TJ 1.006 0 0 1 150.705 227.922 Tm [(\003icting)-250(objectives,)-249(such)-250(as)-250(limiting)-249(occupation)-250(of)-250(storage)-249(r)18(esour)18(ces,)-250(exploiting)]TJ 1.013 0 0 1 150.705 215.967 Tm [(r)18(egularities)-247(in)-247(the)-247(input)-247(data,)-247(exploi)1(ting)-247(har)17(dwar)18(e)-247(characteristics)-247(of)-246(the)-247(paral-)]TJ 1.02 0 0 1 150.705 204.012 Tm [(lel)-293(platform.)-447(T)90(o)-292(achieve)-293(an)-293(optimal)-293(communication)-292(to)-293(computation)-293(ratio)-293(on)]TJ 1.02 0 0 1 150.705 192.057 Tm [(distributed)-322(memory)-322(machines)-323(it)-322(is)-322(essential)-322(to)-323(ke)1(ep)-323(the)]TJ/F78 9.9626 Tf 1.02 0 0 1 407.18 192.057 Tm [(data)-322(locality)]TJ/F84 9.9626 Tf 1.02 0 0 1 461.316 192.057 Tm [(as)-322(high)]TJ 1.02 0 0 1 150.705 180.101 Tm [(as)-270(possible;)-283(this)-270(can)-270(be)-270(done)-271(thr)18(ough)-270(an)-270(appr)17(opriate)-270(data)-270(allocation)-270(strategy)109(.)]TJ 1.02 0 0 1 150.396 168.146 Tm [(The)-264(choice)-265(of)-264(the)-265(pr)18(econditioner)-264(is)-265(another)-264(very)-265(important)-264(factor)-265(that)-264(af)18(fects)]TJ 1.02 0 0 1 150.705 156.191 Tm [(ef)18(\002ciency)-247(of)-248(the)-248(impleme)1(nted)-248(application.)-311(Optimal)-248(data)-247(distribution)-248(r)18(equir)18(e-)]TJ 1.014 0 0 1 150.705 144.236 Tm [(ments)-247(for)-246(a)-247(given)-247(pr)18(econditioner)-247(may)-246(con\003ict)-247(with)-247(distribution)-246(r)17(equir)18(ements)]TJ 1.02 0 0 1 150.705 132.281 Tm [(of)-310(the)-311(r)18(est)-310(of)-311(the)-310(solver)72(.)-499(Finding)-311(the)-310(optimal)-310(trade-of)17(f)-310(may)-310(be)-310(very)-311(dif)18(\002cult)]TJ 1.016 0 0 1 150.705 120.326 Tm [(because)-246(it)-245(is)-246(application)-245(dependent.)-305(Possible)-246(solutions)-246(to)-245(these)-246(pr)18(oblems)-246(and)]TJ 0 g 0 G - [-1000(100)]TJ + 1 0 0 1 320.07 90.438 Tm [(2)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -52.932 -12.094 Td [(6.20)-550(psb)]TJ -ET -q -1 0 0 1 154.072 621.669 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 621.47 Td [(owned)]TJ ET -q -1 0 0 1 188.064 621.669 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 191.053 621.47 Td [(index)-250(\227)]TJ -0 g 0 G - [-871(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + +endstream +endobj +925 0 obj +<< +/Length 5870 +>> +stream 0 g 0 G - [-1000(101)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -76.214 -12.095 Td [(6.21)-550(psb)]TJ -ET -q -1 0 0 1 154.072 609.575 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 609.375 Td [(is)]TJ -ET -q -1 0 0 1 164.782 609.575 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q BT -/F54 9.9626 Tf 167.771 609.375 Td [(local)-250(\227)]TJ -0 g 0 G - [-615(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +/F84 9.9626 Tf 1.02 0 0 1 99.895 706.129 Tm [(other)-274(important)-274(inputs)-275(to)-274(the)-274(development)-274(of)-275(the)-274(PSBLAS)-274(softwar)18(e)-275(package)]TJ 1.015 0 0 1 99.895 694.174 Tm [(have)-245(come)-246(fr)18(om)-245(an)-245(established)-246(experience)-245(in)-245(applying)-245(the)-246(PSBLAS)-245(solvers)-245(to)]TJ 1 0 0 1 99.895 682.219 Tm [(computational)-250(\003uid)-250(dynamics)-250(applications.)]TJ/F75 14.3462 Tf 0 -33.474 Td [(2)-1000(General)-250(overview)]TJ/F84 9.9626 Tf 1.02 0 0 1 99.587 626.043 Tm [(The)-390(PSBLAS)-390(library)-390(is)-391(designed)-390(to)-390(handle)-390(the)-391(implementation)-390(of)-390(iterative)]TJ 1.02 0 0 1 99.895 614.088 Tm [(solvers)-294(fo)1(r)-294(sparse)-293(linear)-294(systems)-293(on)-294(distributed)-293(memory)-294(parallel)-293(computers.)]TJ 1.02 0 0 1 99.587 602.133 Tm [(The)-332(system)-332(coef)18(\002cient)-332(matrix)]TJ/F78 9.9626 Tf 1 0 0 1 237.557 602.133 Tm [(A)]TJ/F84 9.9626 Tf 1.02 0 0 1 248.248 602.133 Tm [(must)-332(be)-332(squar)18(e;)-376(it)-332(may)-332(be)-332(r)18(eal)-332(or)-332(complex,)]TJ 1.02 0 0 1 99.895 590.178 Tm [(nonsymmetric,)-250(and)-249(its)-249(sparsit)1(y)-249(pattern)-249(needs)-249(not)-249(to)-249(be)-249(symmetric.)-315(The)-249(serial)]TJ 1.02 0 0 1 99.895 578.223 Tm [(computation)-246(part)1(s)-246(ar)18(e)-246(based)-245(on)-246(the)-246(serial)-245(sparse)-246(BLAS,)-245(so)-246(that)-245(any)-246(extension)]TJ 1.02 0 0 1 99.895 566.267 Tm [(made)-355(to)-355(the)-356(data)-355(str)8(uctur)18(es)-355(of)-356(the)-355(serial)-355(kernels)-355(is)-355(available)-355(to)-356(the)-355(parallel)]TJ 1.015 0 0 1 99.616 554.312 Tm [(version.)-305(The)-245(overall)-245(design)-245(and)-245(parallelization)-245(strategy)-245(have)-245(been)-245(in\003uenced)]TJ 1.02 0 0 1 99.895 542.357 Tm [(by)-281(the)-281(str)8(uctur)18(e)-281(of)-281(the)-281(ScaLAP)90(ACK)-281(parallel)-281(library)109(.)-412(The)-281(layer)18(ed)-281(str)8(uctur)17(e)-281(of)]TJ 1 0 0 1 99.895 530.402 Tm [(the)-251(PSBLAS)-250(library)-251(is)-251(shown)-250(in)-251(\002gur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ 0 g 0 G - [-1000(102)]TJ + [(;)-251(lower)-251(layers)-251(of)-250(the)-251(library)-251(indicate)-250(an)]TJ 1.005 0 0 1 99.895 518.447 Tm [(encapsulation)-247(r)18(elationship)-247(with)-247(upper)-247(layers.)-308(The)-247(ongoing)-247(discussion)-247(focuses)]TJ 1.018 0 0 1 99.895 506.491 Tm [(on)-245(the)-244(Fortran)-244(2008)-245(layer)-244(immediately)-245(below)-244(the)-245(application)-244(layer)72(.)-304(The)-244(serial)]TJ 0.98 0 0 1 99.596 494.536 Tm [(parts)-246(of)-245(the)-246(computation)-246(on)-245(each)-246(pr)18(ocess)-245(ar)18(e)-246(executed)-245(thr)18(ough)-246(calls)-245(to)-246(the)-246(serial)]TJ 0.98 0 0 1 99.895 482.581 Tm [(sparse)-243(BLAS)-243(subr)18(outines.)-312(In)-243(a)-243(similar)-243(way)113(,)-245(the)-243(inter)18(-pr)19(ocess)-243(message)-243(exchanges)]TJ 1 0 0 1 99.895 470.626 Tm [(ar)18(e)-251(encapsulated)-251(in)-251(an)-251(applicaiton)-251(layer)-251(that)-251(has)-251(been)-251(str)18(ongly)-251(inspir)18(ed)-251(by)-251(the)]TJ 0.987 0 0 1 99.895 458.671 Tm [(Basic)-251(Linear)-252(Algebra)-251(Communication)-251(Subr)18(outines)-251(\050BLACS\051)-252(library)-251([)]TJ +1 0 0 rg 1 0 0 RG + 1 0 0 1 396.344 458.671 Tm [(7)]TJ 0 g 0 G + 0.987 0 0 1 401.325 458.671 Tm [(].)-313(Usually)]TJ 0.98 0 0 1 99.895 446.716 Tm [(ther)18(e)-240(is)-241(no)-241(need)-240(to)-241(deal)-241(dir)19(ectly)-241(with)-240(MPI;)-241(however)75(,)-243(in)-241(some)-240(cases,)-244(MPI)-241(r)19(outines)]TJ 0.98 0 0 1 99.895 434.76 Tm [(ar)18(e)-199(used)-199(di)1(r)18(ectly)-199(to)-199(impr)18(ove)-198(ef)18(\002ciency)113(.)-298(For)-199(further)-199(details)-198(on)-199(our)-199(communication)]TJ 1 0 0 1 99.895 422.805 Tm [(layer)-250(see)-250(Sec.)]TJ 0 0 1 rg 0 0 1 RG - -52.932 -12.094 Td [(6.22)-550(psb)]TJ -ET -q -1 0 0 1 154.072 597.481 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 597.281 Td [(local)]TJ -ET -q -1 0 0 1 178.301 597.481 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 181.29 597.281 Td [(index)-250(\227)]TJ + [-250(7)]TJ +0 g 0 G + [(.)]TJ 0 g 0 G - [-1101(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1000(103)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -66.451 -12.094 Td [(6.23)-550(psb)]TJ ET +1 0 0 1 207.727 270.025 cm q -1 0 0 1 154.072 585.386 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 585.187 Td [(get)]TJ -ET +.65 0 0 .65 0 0 cm q -1 0 0 1 171.217 585.386 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 0 0 cm +/Im2 Do Q +Q +0 g 0 G +1 0 0 1 -207.727 -270.025 cm BT -/F54 9.9626 Tf 174.206 585.187 Td [(boundary)-250(\227)-250(Extract)-250(list)-250(of)-250(boundary)-250(elements)]TJ +/F84 9.9626 Tf 165.575 238.145 Td [(Figur)18(e)-250(1:)-310(PSBLAS)-250(library)-250(components)-250(hierar)18(chy)111(.)]TJ 0 g 0 G - [-827(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1000(104)]TJ + 1.02 0 0 1 114.839 215.967 Tm [(The)-316(type)-315(of)-316(linear)-316(system)-315(matrices)-316(that)-316(we)-315(addr)17(ess)-315(typically)-316(arise)-315(in)-316(the)]TJ 0.98 0 0 1 99.895 204.012 Tm [(numerical)-223(solution)-223(of)-223(PDEs;)-234(in)-223(such)-223(a)-223(context,)-229(it)-223(is)-224(ne)1(cessary)-224(to)-223(pay)-223(special)-223(atten-)]TJ 0.998 0 0 1 99.895 192.057 Tm [(tion)-250(t)1(o)-250(the)-249(str)8(uctur)18(e)-250(of)-249(the)-250(pr)18(oblem)-249(fr)18(om)-250(which)-249(the)-250(application)-249(originates.)-310(The)]TJ 0.98 0 0 1 99.895 180.101 Tm [(nonzer)18(o)-213(pattern)-214(of)-214(a)-214(matrix)-214(arising)-213(fr)18(om)-214(the)-214(discr)19(etization)-214(of)-214(a)-214(PDE)-213(is)-214(in\003uenced)]TJ 1.001 0 0 1 99.895 168.146 Tm [(by)-249(various)-248(factors,)-249(such)-249(as)-249(the)-248(shape)-249(of)-249(the)-248(domain,)-249(the)-249(discr)18(etization)-248(strategy)110(,)]TJ 1.02 0 0 1 99.895 156.191 Tm [(and)-263(the)-262(equation/unknown)-263(or)18(dering.)-357(The)-263(matrix)-263(itself)-262(can)-263(be)-263(interpr)18(eted)-263(as)]TJ 1 0 0 1 99.895 144.236 Tm [(the)-250(adjacency)-250(matrix)-250(of)-250(the)-250(graph)-250(associated)-250(with)-250(the)-250(discr)18(etization)-250(mesh.)]TJ 1.02 0 0 1 114.839 132.281 Tm [(The)-380(distribution)-380(of)-380(the)-380(coef)18(\002cient)-380(matrix)-379(for)-380(the)-380(linear)-380(system)-380(is)-380(based)]TJ 1.02 0 0 1 99.895 120.326 Tm [(on)-272(the)-271(\223owner)-272(computes\224)-271(r)7(ule:)-357(the)-271(variable)-272(associated)-271(to)-272(each)-272(mes)1(h)-272(point)-272(is)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -59.367 -12.094 Td [(6.24)-550(psb)]TJ -ET -q -1 0 0 1 154.072 573.292 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 573.093 Td [(get)]TJ -ET -q -1 0 0 1 171.217 573.292 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 174.206 573.093 Td [(overlap)-250(\227)-250(Extract)-250(list)-250(of)-250(overlap)-250(elements)]TJ + 1 0 0 1 269.26 90.438 Tm [(3)]TJ 0 g 0 G - [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +ET + +endstream +endobj +921 0 obj +<< +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/psblas.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 928 0 R +/BBox [0 0 197 215] +/Resources << +/ProcSet [ /PDF /Text ] +/ExtGState << +/R7 929 0 R +>>/Font << /R8 930 0 R>> +>> +/Length 898 +/Filter /FlateDecode +>> +stream +xœµVM7 ½ëWèÖ4€Y‘ú>&@[HMl ‡¢câu·;{þý’#QvvÝKÐÂÏ{#=RüÐð«u€ÖɯÿOóÓÇl÷g³°öã¯ýá´7_ ¶çþ7ìÛ // ¢Ãl7÷¦É E„ŒŽ,E(%’ÝÌ«õîô°íúq{:ï~Üüe0ƒsµØÀíæ³yõöý›µ¼ûyc>Ÿ(Ú¿M, +¯ ¼”ìá3›"dQžÙ7_Û³ËXm2‚0— +¼î(:HhU3vܪÅ“Aª f6è=A΃AŸÄ!9`³Ç +PwæAt_UOÏ¡OæO³æl8f¢ãPHÄY¥æ«‡åÕÌ+k(*«¦XÁI䓉uyQ¥ +Åá ¢ï¹Éø*Þcv ( ñÌÿôÚÇJÊBt«FpÍ©vvË×VþÕ‘” +„rÓ‘û×&@^ +éÿ2!)Õš\‘Œ÷¼ža®8Î7L¤ÂõŸÇ )]aôà¥`;¡vç ãp½ªba=WY³¨Hj.8‰µ2Rt%ùÁ` R=EÝ 4¢{4_póXÕFwt{Š[Íf.óÈv¾†Ê‘yÎÌÆGÞ§Áxq€Èúš‹ú‚c—++SJëgÅ¡Br5XUäÛFZYMv8™\øH”˜M!‚¢%rl9ª§¸Ù›Œbõh¾0ÝgÔ3©EÅ-:º‹¼i¾>9ÎßÑò…/aëUËv±Òu‹ß`ú®›Lk„¡ÀtÕ—ßö¼E÷õ¶Ž¿áæsù—¼0ȽýßI +~·œíÕÏãÎh§ÀP‰ó+‰I‰Ë`ä8Äwsâ~L¥¼–U9‰–2ó¢ß”ÇM®F‘ +ÁóÕÞ²þ-ÃÉu1 ƒéuÄóŠ +}¦öqëXË|Vb´A—Ó6QƒŠY¡8Õ†% +Ûæ“Ž=…åËE²A5) +í}óiV<\îrãDÝ âÖß7x¤U +«Ͻ'ƒÇ›ÇÇùaÚ>=|9Êh±Büuæ¹£$îËLËàq·–ÑÃÊŸ5×k^Þ½;>íN÷Ûi™ZŠ\V+9D£­8îNËLÓG™÷×»~0+¾’”àŠ'¢ˆ±íúmw>o÷;{·=ŸŽûEý—»a¥ÃѲîîÝâ8SË4Â%ÕÇ¥_¾œžNÛ#OαéƒùüÐ +endstream +endobj +932 0 obj +<< +/Filter /FlateDecode +/Subtype /Type1C +/Length 2887 +>> +stream +xœe–iXSWÇo É=Z¤-iʦ µ­m]‘Ö*ÕVYÔZQU (! KBI ,A8,!, Ö„M‚T¥*(¶ÖQ¬ ÖœÇn:Jg9—¹|˜‹vž~˜/÷¹÷žåyß÷ÿ?¿÷Ð0‡%Fs ¤ðÓ׋SbE‹ßë±b ±’IÑü?æw0VbôåБšV82ÑÔ«Èò2*x£Óhr}›¿X’•&HHÌð|÷ppø{k×®ûóÏ&ÏãYÿñ à§ Dž«©_(–¤ðEÛ<ý©ÙB¡à„g‚0K’˜îÇ[\+ä'{î‰Xæù®ÿ{žÞ^^›ÖSÍ)Ç¥éžÏ#÷< öñ ô æ'H…±iÿ?‚a˜›¯ÈO,IÛž±76ðÄ~~|P‚ Dèéåý>†­ÂbobAØ!,{ Åck0?l=€mÀvc{°MاØgØØ~Œ†½Š±°×0wªx˜ÆÇîѲiW—ì]ÒA~Êá‡}¿2ü&&ÎŒg^Æ·g YúêÒK-=½ ,Ó'æã¡E oXhóafö±rMMⲌ¸îšÉ$ãrö‘«s²£‹Ýä(ÊŒGgm†ò:‘{-e sp­A¢O…*7˜¦È+rT…/¨Q áȆic8'¡‰p¿DûvÌÐë‰UìÊòšÒZlÕÊ(.YŠÃà\UDP¢DQ¡4@;@§ðÏGš =œkîå’|ª‘‹“âLx”.ÇÏ”…ÿ»ì{T¨Ø·‡ó/(‹Êä†f»9Ío„fy1Û'7;#ÆÒÎ}„pVRÃü6Ë$Çg´†\è ‚pè«Vo= XIrôW|N5õ²w[Fàçp8k@Ø‘b•>«36Õ´6–°­ÙÔ7莼'H_Žù˜ý7xVkWRËÇÓ­'ÝáaY܉Œä¬ãù{!ˆRUwsÑ e“¾ÚÄf²„ŸŒöþ8Œ·žKUEc#¼m´ŽPÝtTNİÉ×½Ö’ÒãçÕÈ9?ýâ —OÈ׸j1{æÊZr%É8¶gg\|뀌+˾§À“ÑkSj/2¥Aј…R4Ý„ÓUç»3HúÈ…¥­&V²«Kõ°‚SzÍîB"Ãssƒ +kµiqÖ±F|g©º~ Ð&¼7©U3Zör@o£Wv=^ÂK;ÌãÞÇ ËËÕ¡ +7´f'»ó²ýìs;H@Ò£vú ¶wrœÐ#Jã-´{3t=áή+¯.5@ÐVÍã’å8Œ,(Ø—CiW`Â÷”+Lp )Þ}æ¼µ²Z«©åÔ«j ´šl=2‹ V”Åý ×–…IÔAr7'â hF“ƒ¨ºŸRôïs.¬D"e³‘2ÏÙ–¥Üù6JcÚt•VZÁD جlØÚ˜'Ñ›Œ“u_Žß)4ªà.°° ‡[sÕ;´”U{Ìø'ª:x z”†€á¦iα,êfç ×Ï-Ö:ôtƒ]„V0r™ÅÅy%%EЭjÊru€ÕP%•V¤zlŽðé>1ÍHêÍlH‡B7^²8B 46Ë9²¶¼õ8È&½ŒL}MEUV`5¬+n¡Äh(´Y‹=î5që|zÏA;—ľ×+Û Õm Ëvv¼SàÝBÕ¸K3F8v;‘–Ü?ãšEUD›us5Ìš’òBNA^a^†]X³¹…Zwx²´¸¼°n~A^dn錹r®¿u¬‡£®•¥)4R说~ÉE§qÖì3梌æy3 ástêÓØp§:gËIªTç͸OyvüÔ©ù¦Î8SÌäy9>ûâ,9Ašñà µÕùò÷è݇.¬âÀ×l…*G«„ £ n˜‹Âq8¦íP¶Jí1æ ÖûFfX2[Z›,Í%º=·¸º¤ +êµ»yàœUt˜s'7ìWäÇð¥™*L¬ˆÀaÞäð`ÓØ$‡e ×5Ê=NÁ&Co?5Dnüž …ÚÕ7 'a«Øøx‡wºQ¬öû ]£\K¾L™vÂÜÕßT§5rêS šZÌ-v^ûÁýa’£©ÜT^~BÉ6à“üyƒ ýrsüño4QFÌ­…wºœðhh@çaÿÛÊF4 +´ÇÐ0»½¸¡¤NË5gZ¯vÛ'á×pHÕ/é81üm=…Œ&«Ò7¿FýŸsaK¾fGÁ‹ân/W.š%$dV½ÞŠ<®ykMP$‰gªJË2ž3W4ŽÞ· —6”tZg£Ùn¡ìûVH"\Ù§?Á3¡²G»‡þšÜÝ‹ºhLø^]~Í"KÎ""Œ-ÉÈ‹›3l-ͶŒÕô¾PÙ¥‚Xœ§!Õ3Ö +"d~+[Æ,ÊÈÍU@ TT(ª«§.>¾*Ù#òI¢DaV, ‡Û†CCÐmþØñ6~µZ'ƒéàÓÈ£~Þ—S$'ƒÙI&2š™¬@s©¡ÎäQ ŠÌÔNÊþü„?~ðûw¡×I×)Øax\ê¿62$Oèåt%×¥ÖQíâyú´_gèæy:»’Y[j(­‚ »FEõÎTV©µT®¹&<°"§^Ä/xQY¸‚¯Ù­psBå”+TíóÎí´Ö§Èø”ŽôTj¡0VŸx(„Oâ\ +IZ¹t4¤w!}^€gì½—ú€h9@±^è Ò…s2•=3B2H>ÉçynÞÌû7ŠC‰#ˆþhñÚï-whèÇGtBŒV²Õºø`uÞ1èFæ3ÑvðøçÓ÷à´ÛoÝ34\z"ž“,P&+š´®¿Ú»oB03¼å㣶xsÉÝä!FáŽ/†m'~¼êŒn?àßvaÝDëÑ÷ì+°¥¸5ïšôt0Ü ¶æ+ÉÔ·ÆsRz…t¥J–xFzëþƒöÁQîè`û¼/dŠº2›åž™j@7GÛú.º?Üwyc$O‘œÀI‘(S¥ME®ƒßŒô| Áõ^`‚2!-›’"VîIN™"Ì‚B-ÌÎe³/uV9:Î6:.ǰÿMɪH +endstream +endobj +938 0 obj +<< +/Length 9368 +>> +stream 0 g 0 G - [-1000(105)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -59.367 -12.094 Td [(6.25)-550(psb)]TJ -ET -q -1 0 0 1 154.072 561.198 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 560.999 Td [(sp)]TJ -ET -q -1 0 0 1 167.87 561.198 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q BT -/F54 9.9626 Tf 170.859 560.999 Td [(getr)18(ow)-250(\227)-250(Extract)-250(r)18(ow\050s\051)-250(fr)18(om)-250(a)-250(sparse)-250(matrix)]TJ -0 g 0 G - [-671(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(106)]TJ +/F84 9.9626 Tf 1.02 0 0 1 150.705 706.129 Tm [(assigned)-295(to)-294(a)-295(pr)17(oce)1(ss)-295(that)-295(will)-295(own)-294(the)-295(corr)18(esponding)-295(r)18(ow)-295(in)-295(the)-295(coef)18(\002cient)]TJ 1.02 0 0 1 150.705 694.174 Tm [(matrix)-247(and)-246(will)-247(carry)-247(out)-247(all)-246(r)17(elated)-247(computations.)-308(This)-247(allocation)-247(strategy)-247(is)]TJ 0.98 0 0 1 150.705 682.219 Tm [(equivalent)-251(to)-251(a)-251(partition)-252(of)-251(the)-251(discr)19(etization)-252(mesh)-251(into)]TJ/F78 9.9626 Tf 0.98 0 0 1 390.229 682.219 Tm [(sub-domains)]TJ/F84 9.9626 Tf 0.98 0 0 1 440.314 682.219 Tm [(.)-315(Our)-251(library)]TJ 1.02 0 0 1 150.705 670.263 Tm [(supports)-297(any)-298(distribution)-297(that)-297(keeps)-298(together)-297(the)-297(coef)17(\002)1(cients)-297(of)-298(each)-297(matrix)]TJ 1.02 0 0 1 150.705 658.308 Tm [(r)18(ow;)-366(ther)18(e)-326(ar)18(e)-325(no)-326(other)-325(constraints)-325(on)-326(the)-325(variable)-326(assignment)1(.)-545(This)-326(choice)]TJ 1.02 0 0 1 150.705 646.353 Tm [(is)-276(consistent)-275(with)-276(simple)-276(data)-276(dist)1(ributions)-276(such)-276(as)]TJ/F145 9.9626 Tf 1 0 0 1 383.652 646.353 Tm [(CYCLIC\050N\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 433.526 646.353 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 453.532 646.353 Tm [(BLOCK)]TJ/F84 9.9626 Tf 1.02 0 0 1 479.684 646.353 Tm [(,)-276(as)]TJ 1.02 0 0 1 150.286 634.398 Tm [(well)-256(as)-256(completely)-256(arbitrary)-256(assignments)-256(of)-256(equation)-256(indices)-256(to)-256(pr)18(ocesses.)-337(In)]TJ 1.002 0 0 1 150.406 622.443 Tm [(particular)-250(it)-250(is)-251(consistent)-250(with)-250(the)-250(usage)-250(of)-251(graph)-250(partitioning)-250(tools)-250(commonly)]TJ 1.02 0 0 1 150.705 610.488 Tm [(available)-288(in)-288(the)-289(literatur)18(e,)-299(e.g.)-434(METIS)-288([)]TJ +1 0 0 rg 1 0 0 RG + 1 0 0 1 326.838 610.488 Tm [(14)]TJ 0 g 0 G + 1.02 0 0 1 336.801 610.488 Tm [(].)-433(Dense)-289(vectors)-288(conform)-288(to)-288(sparse)]TJ 0.98 0 0 1 150.705 598.532 Tm [(matrices,)-228(that)-221(is,)-229(the)-221(entries)-221(of)-222(a)-221(vector)-221(follow)-222(the)-221(same)-222(distribution)-221(of)-221(the)-222(matrix)]TJ 1 0 0 1 150.705 586.577 Tm [(r)18(ows.)]TJ 1.02 0 0 1 165.649 573.93 Tm [(W)90(e)-290(assume)-290(that)-291(the)-290(sparse)-290(matrix)-290(is)-291(built)-290(in)-290(parallel,)-302(wher)18(e)-290(each)-291(pr)18(ocess)]TJ 1.002 0 0 1 150.705 561.974 Tm [(generates)-249(its)-249(own)-249(portion.)-310(W)92(e)-249(never)-249(r)18(equir)18(e)-249(that)-250(the)-249(entir)18(e)-249(matrix)-249(be)-249(available)]TJ 0.98 0 0 1 150.705 550.019 Tm [(on)-244(a)-244(single)-244(node.)-313(However)76(,)-247(i)1(t)-245(is)-244(possible)-244(to)-244(hold)-244(the)-244(entir)19(e)-244(matrix)-244(in)-245(on)1(e)-245(pr)19(ocess)]TJ 0.98 0 0 1 150.705 538.064 Tm [(and)-198(distribute)-199(it)-198(explicitly)]TJ 0 0 1 rg 0 0 1 RG - -56.02 -12.094 Td [(6.26)-550(psb)]TJ -ET -q -1 0 0 1 154.072 549.104 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 548.905 Td [(sizeof)-250(\227)-250(Memory)-250(occupation)]TJ +/F84 7.5716 Tf 1 0 0 1 260.936 541.68 Tm [(1)]TJ 0 g 0 G - [-251(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +/F84 9.9626 Tf 0.98 0 0 1 265.22 538.064 Tm [(,)-210(even)-198(though)-199(the)-198(r)18(esulting)-198(memory)-199(bottleneck)-198(would)]TJ 1 0 0 1 150.705 526.109 Tm [(make)-250(this)-250(option)-250(unattractive)-250(in)-250(most)-250(cases.)]TJ/F75 11.9552 Tf 0 -33.074 Td [(2.1)-1000(Basic)-250(Nomenclature)]TJ/F84 9.9626 Tf 1.02 0 0 1 150.705 472.73 Tm [(Our)-254(computational)-254(model)-255(implies)-254(that)-254(the)-254(data)-255(alloc)1(ation)-255(on)-254(the)-254(parallel)-254(dis-)]TJ 0.981 0 0 1 150.705 460.774 Tm [(tributed)-253(memory)-254(machine)-254(is)-253(guided)-254(by)-253(the)-254(str)8(uctur)19(e)-254(of)-253(the)-254(physical)-253(model,)-254(and)]TJ 1 0 0 1 150.705 448.819 Tm [(speci\002cally)-250(by)-250(the)-250(discr)18(etization)-250(mesh)-250(of)-250(the)-250(PDE.)]TJ 1.02 0 0 1 165.649 436.172 Tm [(Each)-341(point)-341(of)-341(the)-341(discr)18(etization)-341(mesh)-341(will)-341(have)-341(\050at)-341(least\051)-341(one)-341(associated)]TJ 1.02 0 0 1 150.705 424.216 Tm [(equation/variable,)-353(and)-332(ther)18(efor)17(e)-331(one)-332(index.)-564(W)91(e)-332(say)-332(that)-332(point)]TJ/F78 9.9626 Tf 1 0 0 1 440.497 424.216 Tm [(i)]TJ 1.02 0 0 1 446.831 424.216 Tm [(depends)]TJ/F84 9.9626 Tf 1.02 0 0 1 482.953 424.216 Tm [(on)]TJ 1.02 0 0 1 150.406 412.261 Tm [(point)]TJ/F78 9.9626 Tf 1 0 0 1 177.509 412.261 Tm [(j)]TJ/F84 9.9626 Tf 1.02 0 0 1 183.194 412.261 Tm [(if)-275(the)-274(equation)-275(for)-274(a)-275(variable)-275(associate)1(d)-275(with)]TJ/F78 9.9626 Tf 1 0 0 1 386.696 412.261 Tm [(i)]TJ/F84 9.9626 Tf 1.02 0 0 1 392.451 412.261 Tm [(contains)-275(a)-274(term)-275(in)]TJ/F78 9.9626 Tf 1 0 0 1 476.753 412.261 Tm [(j)]TJ/F84 9.9626 Tf 1.02 0 0 1 479.647 412.261 Tm [(,)-282(or)]TJ 1.02 0 0 1 150.705 400.306 Tm [(equivalently)-262(if)]TJ/F78 9.9626 Tf 1 0 0 1 218.767 400.306 Tm [(a)]TJ/F78 7.5716 Tf 4.589 -1.96 Td [(i)-67(j)]TJ/F190 10.3811 Tf 8.519 1.96 Td [(6)]TJ/F192 10.3811 Tf 0.249 0 Td [(=)]TJ/F84 9.9626 Tf 1.02 0 0 1 243.403 400.306 Tm [(0.)-354(After)-262(the)-262(partition)-262(of)-262(the)-262(discr)18(etization)-262(mesh)-262(into)]TJ/F78 9.9626 Tf 1.02 0 0 1 478.378 400.306 Tm [(sub-)]TJ 1.02 0 0 1 150.705 388.351 Tm [(domains)]TJ/F84 9.9626 Tf 1.02 0 0 1 188.452 388.351 Tm [(assigned)-326(to)-325(the)-326(parallel)-325(pr)17(ocesses,)-345(we)-326(classify)-326(the)-325(points)-326(of)-325(a)-326(given)]TJ 1 0 0 1 150.705 376.396 Tm [(sub-domain)-250(as)-250(following.)]TJ 0 g 0 G - [-1000(108)]TJ +/F75 9.9626 Tf 0 -22.003 Td [(Internal.)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.095 Td [(6.27)-550(Sorting)-250(utilities)-250(\227)]TJ +/F84 9.9626 Tf 1.02 0 0 1 194.311 354.393 Tm [(An)-313(internal)-313(point)-312(of)-313(a)-313(given)-313(domain)]TJ/F78 9.9626 Tf 1.02 0 0 1 363.064 354.393 Tm [(depends)]TJ/F84 9.9626 Tf 1.02 0 0 1 398.994 354.393 Tm [(only)-313(on)-313(points)-312(of)-313(the)]TJ 1.02 0 0 1 175.611 342.438 Tm [(same)-387(domain.)-729(If)-387(all)-387(points)-387(of)-386(a)-387(domain)-387(ar)17(e)-386(assigned)-387(to)-387(one)-387(pr)18(ocess,)]TJ 0.991 0 0 1 175.611 330.483 Tm [(then)-252(a)-252(computational)-251(step)-252(\050e.g.,)-252(a)-252(matrix-vector)-252(pr)19(oduct\051)-252(of)-252(the)-252(equations)]TJ 1.02 0 0 1 175.611 318.527 Tm [(associated)-369(with)-368(the)-369(internal)-368(points)-369(r)18(equir)18(es)-369(no)-368(data)-369(items)-368(fr)17(om)-368(other)]TJ 1 0 0 1 175.611 306.572 Tm [(domains)-250(and)-250(no)-250(communications.)]TJ 0 g 0 G - [-1157(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +/F75 9.9626 Tf -24.906 -22.695 Td [(Boundary)92(.)]TJ 0 g 0 G - [-1000(109)]TJ +/F84 9.9626 Tf 0.98 0 0 1 201.713 283.877 Tm [(A)-237(point)-238(of)-237(a)-237(given)-237(domain)-238(is)-237(a)-237(boundary)-237(point)-238(if)-237(it)]TJ/F78 9.9626 Tf 0.98 0 0 1 420.258 283.877 Tm [(depends)]TJ/F84 9.9626 Tf 0.98 0 0 1 454.042 283.877 Tm [(on)-237(points)]TJ 1 0 0 1 175.611 271.922 Tm [(belonging)-250(to)-250(other)-250(domains.)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -14.944 -22.149 Td [(7)-1000(Parallel)-250(environment)-250(routines)]TJ +/F75 9.9626 Tf -24.906 -22.695 Td [(Halo.)]TJ 0 g 0 G - [-17835(111)]TJ +/F84 9.9626 Tf 1.02 0 0 1 179.925 249.227 Tm [(A)-357(halo)-357(point)-356(for)-357(a)-357(given)-357(domain)-357(is)-357(a)-357(point)-356(belonging)-357(to)-357(another)-357(do-)]TJ 1.009 0 0 1 175.611 237.271 Tm [(main)-248(such)-247(that)-247(ther)17(e)-247(is)-248(a)-247(boundary)-248(point)-247(which)]TJ/F78 9.9626 Tf 1.009 0 0 1 388.252 237.271 Tm [(depends)]TJ/F84 9.9626 Tf 1.009 0 0 1 423.138 237.271 Tm [(on)-247(it.)-308(Whenever)]TJ 1.02 0 0 1 175.313 225.316 Tm [(performing)-297(a)-297(computational)-297(step,)-311(such)-297(as)-297(a)-297(matrix-vector)-297(pr)17(oduct,)-310(the)]TJ 1.012 0 0 1 175.333 213.361 Tm [(values)-247(associated)-247(with)-247(halo)-248(points)-247(ar)18(e)-247(r)18(equested)-248(fr)18(om)-247(other)-247(domains.)-307(A)]TJ 1.005 0 0 1 175.611 201.406 Tm [(boundary)-248(point)-248(of)-248(a)-247(given)-248(domain)-248(is)-248(usually)-248(a)-248(halo)-247(point)-248(for)-248(some)-248(other)]TJ 0.999 0 0 1 175.611 189.451 Tm [(domain)]TJ 0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.094 Td [(7.1)-1050(psb)]TJ +/F84 7.5716 Tf 1 0 0 1 209.58 193.067 Tm [(2)]TJ +0 g 0 G +/F84 9.9626 Tf 0.999 0 0 1 213.864 189.451 Tm [(;)-250(ther)18(efor)18(e)-249(the)-249(car)18(dinality)-250(of)-249(the)-250(boundary)-249(points)-249(set)-250(determines)]TJ 1 0 0 1 175.611 177.496 Tm [(the)-250(amount)-250(of)-250(data)-250(sent)-250(to)-250(other)-250(domains.)]TJ +0 g 0 G ET q -1 0 0 1 154.072 502.766 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 150.705 168.389 cm +[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S Q BT -/F54 9.9626 Tf 157.061 502.567 Td [(init)-250(\227)-250(Initializes)-250(PSBLAS)-250(parallel)-250(envir)18(onment)]TJ +/F84 5.9776 Tf 161.564 161.427 Td [(1)]TJ/F84 7.9701 Tf 3.487 -2.893 Td [(In)-250(our)-250(pr)18(ototype)-250(implementation)-250(we)-250(pr)18(ovide)-250(sample)-250(scatter/gather)-250(r)18(outines.)]TJ/F84 5.9776 Tf -3.487 -6.922 Td [(2)]TJ/F84 7.9701 Tf 1.02 0 0 1 164.804 148.719 Tm [(This)-350(is)-351(the)-350(normal)-350(situation)-350(when)-351(the)-350(pattern)-350(of)-350(the)-351(sparse)-350(matrix)-350(is)-350(symmetric,)-377(which)-350(is)]TJ 1.02 0 0 1 150.705 139.255 Tm [(equivalent)-244(to)-245(say)-244(that)-244(the)-245(interaction)-244(between)-244(two)-245(variables)-244(is)-245(r)18(ecipr)18(ocal.)-304(If)-244(the)-245(matrix)-244(pattern)-244(is)]TJ 1.02 0 0 1 150.705 129.79 Tm [(non-symmetric)-249(we)-249(may)-249(have)-249(one-way)-249(interactions,)-251(and)-249(these)-249(could)-249(cause)-249(a)-249(situation)-249(in)-249(which)-249(a)]TJ 1 0 0 1 150.705 120.326 Tm [(boundary)-250(point)-250(is)-250(not)-250(a)-250(halo)-250(point)-250(for)-250(its)-250(neighbour)74(.)]TJ 0 g 0 G - [-766(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1000(112)]TJ +/F84 9.9626 Tf 169.365 -29.888 Td [(4)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.095 Td [(7.2)-1050(psb)]TJ ET -q -1 0 0 1 154.072 490.672 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 490.472 Td [(info)-264(\227)-264(Return)-264(information)-264(abou)1(t)-264(PSBLAS)-264(parallel)-264(envir)18(on-)]TJ -19.308 -11.955 Td [(ment)]TJ + +endstream +endobj +951 0 obj +<< +/Length 5241 +>> +stream 0 g 0 G - [-930(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1000(113)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -22.914 -12.094 Td [(7.3)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 466.622 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q BT -/F54 9.9626 Tf 157.061 466.423 Td [(exit)-250(\227)-250(Exit)-250(fr)18(om)-250(PSBLAS)-250(parallel)-250(envir)18(onment)]TJ +/F75 9.9626 Tf 99.895 706.129 Td [(Overlap.)]TJ 0 g 0 G - [-823(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +/F84 9.9626 Tf 1.006 0 0 1 144.059 706.129 Tm [(An)-248(overlap)-248(point)-248(is)-248(a)-248(boundary)-249(point)-248(assigned)-248(to)-248(multiple)-248(domains.)]TJ 0.983 0 0 1 124.413 694.174 Tm [(Any)-254(ope)1(ration)-254(that)-253(involves)-254(an)-253(overlap)-254(point)-253(has)-254(to)-253(be)-254(r)19(eplicated)-254(for)-253(each)]TJ 1 0 0 1 124.802 682.219 Tm [(assignment.)]TJ 0.98 0 0 1 99.895 663.276 Tm [(Overlap)-232(points)-233(do)-232(not)-233(usually)-232(exist)-233(in)-232(the)-233(basic)-232(data)-233(distributions;)-240(however)-232(they)]TJ 0.989 0 0 1 99.895 651.321 Tm [(ar)18(e)-251(a)-252(featur)19(e)-252(of)-251(Domain)-252(Decomposi)1(tion)-252(Schwarz)-251(pr)18(econditioners)-251(which)-252(ar)18(e)-251(the)]TJ 1 0 0 1 99.895 639.365 Tm [(subject)-250(of)-250(r)18(elated)-250(r)18(esear)18(ch)-250(work)-250([)]TJ +1 0 0 rg 1 0 0 RG + [(4)]TJ 0 g 0 G - [-1000(114)]TJ + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-250(3)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.4)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 454.528 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 454.329 Td [(get)]TJ -ET -q -1 0 0 1 171.217 454.528 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 174.206 454.329 Td [(mpi)]TJ -ET -q -1 0 0 1 192.487 454.528 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 195.476 454.329 Td [(comm)-250(\227)-250(Get)-250(the)-250(MPI)-250(communicator)]TJ + [(].)]TJ 0.995 0 0 1 114.839 627.41 Tm [(W)92(e)-251(denote)-251(the)-251(sets)-251(of)-251(internal,)-251(boundary)-251(and)-251(halo)-251(points)-251(for)-251(a)-251(given)-251(subdo-)]TJ 1.001 0 0 1 99.895 615.455 Tm [(main)-250(by)]TJ/F190 10.3811 Tf 1 0 0 1 138.553 615.455 Tm [(I)]TJ/F84 9.9626 Tf 1.001 0 0 1 145.084 615.455 Tm [(,)]TJ/F190 10.3811 Tf 1 0 0 1 150.19 615.455 Tm [(B)]TJ/F84 9.9626 Tf 1.001 0 0 1 159.926 615.455 Tm [(and)]TJ/F190 10.3811 Tf 1 0 0 1 179.422 615.455 Tm [(H)]TJ/F84 9.9626 Tf 1.001 0 0 1 188.394 615.455 Tm [(.)-310(Each)-249(subdomain)-249(is)-250(assigned)-249(to)-250(one)-249(pr)18(ocess;)-250(each)-249(pr)18(ocess)]TJ 1.02 0 0 1 99.895 603.5 Tm [(usually)-286(owns)-285(one)-286(subdomain,)-295(although)-286(the)-285(user)-286(may)-286(choose)-285(to)-286(assign)-285(mor)17(e)]TJ 1.02 0 0 1 99.895 591.545 Tm [(than)-257(one)-257(subdomain)-257(to)-257(a)-257(pr)18(ocess.)-340(If)-257(each)-257(pr)18(ocess)]TJ/F78 9.9626 Tf 1 0 0 1 322.219 591.545 Tm [(i)]TJ/F84 9.9626 Tf 1.02 0 0 1 327.794 591.545 Tm [(owns)-257(one)-257(subdomain,)-260(the)]TJ 0.983 0 0 1 99.895 579.589 Tm [(number)-255(of)-255(r)19(ows)-255(in)-255(the)-255(local)-255(sparse)-255(matrix)-255(is)]TJ/F190 10.3811 Tf 1 0 0 1 292.255 579.589 Tm [(j)-24(I)]TJ/F78 7.5716 Tf 8.944 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F192 10.3811 Tf 5.067 0 Td [(+)]TJ/F190 10.3811 Tf 10.258 0 Td [(j)-24(B)]TJ/F78 7.5716 Tf 10.109 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F84 9.9626 Tf 0.983 0 0 1 335.387 579.589 Tm [(,)-255(and)-255(the)-255(number)-255(of)-254(local)]TJ 0.982 0 0 1 99.895 567.634 Tm [(columns)-254(\050i.e.)-316(those)-254(for)-255(which)-254(ther)18(e)-254(exists)-254(at)-255(least)-254(one)-254(non-zer)18(o)-255(ent)1(ry)-255(in)-254(the)-255(local)]TJ 1 0 0 1 99.895 555.679 Tm [(r)18(ows\051)-250(is)]TJ/F190 10.3811 Tf 37.275 0 Td [(j)-24(I)]TJ/F78 7.5716 Tf 8.944 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F192 10.3811 Tf 5.065 0 Td [(+)]TJ/F190 10.3811 Tf 10.256 0 Td [(j)-24(B)]TJ/F78 7.5716 Tf 10.109 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F192 10.3811 Tf 5.065 0 Td [(+)]TJ/F190 10.3811 Tf 10.256 0 Td [(j)-24(H)]TJ/F78 7.5716 Tf 12.052 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F84 9.9626 Tf 3.004 0 Td [(.)]TJ 0 g 0 G - [-615(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1000(115)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -80.637 -12.094 Td [(7.5)-1050(psb)]TJ ET +1 0 0 1 171.652 541.675 cm q -1 0 0 1 154.072 442.434 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 442.235 Td [(get)]TJ -ET +0 -1 1 0 0 0 cm q -1 0 0 1 171.217 442.434 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 174.206 442.235 Td [(mpi)]TJ -ET +.65 0 0 .65 0 0 cm q -1 0 0 1 192.487 442.434 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 0 0 cm +/Im3 Do Q +Q +Q +0 g 0 G +1 0 0 1 -171.652 -541.675 cm BT -/F54 9.9626 Tf 195.476 442.235 Td [(rank)-250(\227)-250(Get)-250(the)-250(MPI)-250(rank)]TJ +/F84 9.9626 Tf 209.993 335.398 Td [(Figur)18(e)-250(2:)-310(Point)-250(class\002cation.)]TJ 0 g 0 G - [-498(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1000(116)]TJ + 0.98 0 0 1 114.839 311.71 Tm [(This)-211(classi\002cation)-211(of)-211(mesh)-211(points)-211(guides)-211(the)-211(naming)-211(scheme)-212(t)1(hat)-212(we)-211(adopted)]TJ 0.982 0 0 1 99.895 299.754 Tm [(in)-256(the)-255(library)-256(internals)-255(and)-256(in)-255(the)-256(data)-256(str)9(uctur)18(es.)-319(W)94(e)-256(explicitly)-255(note)-256(that)-255(\223Halo\224)]TJ 1 0 0 1 99.596 287.799 Tm [(points)-250(ar)18(e)-250(also)-250(often)-250(called)-250(\223ghost\224)-250(points)-250(in)-250(the)-250(literatur)18(e.)]TJ/F75 11.9552 Tf 0.299 -28.902 Td [(2.2)-1000(Library)-250(contents)]TJ/F84 9.9626 Tf -0.308 -18.964 Td [(The)-250(PSBLAS)-250(library)-250(consists)-250(of)-250(various)-250(classes)-250(of)-250(subr)18(outines:)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -80.637 -12.094 Td [(7.6)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 430.34 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 430.141 Td [(wtime)-250(\227)-250(W)92(all)-250(clock)-250(timing)]TJ +/F75 9.9626 Tf 0.308 -18.943 Td [(Computational)-250(routines)]TJ 0 g 0 G - [-499(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +/F84 9.9626 Tf 113.723 0 Td [(comprising:)]TJ 0 g 0 G - [-1000(117)]TJ + -77.917 -19.434 Td [(\225)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.095 Td [(7.7)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 418.246 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 418.046 Td [(barrier)-250(\227)-250(Sinchr)18(onization)-250(point)-250(parallel)-250(envir)18(onment)]TJ + [-500(Sparse)-250(matrix)-250(by)-250(dense)-250(matrix)-250(pr)18(oduct;)]TJ 0 g 0 G - [-903(.)-500(.)-500(.)]TJ + 0 -15.449 Td [(\225)]TJ 0 g 0 G - [-1000(118)]TJ + [-500(Sparse)-250(triangular)-250(systems)-250(solution)-250(for)-250(block)-250(diagonal)-250(matrices;)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.8)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 406.151 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 405.952 Td [(abort)-250(\227)-250(Abort)-250(a)-250(computation)]TJ + 0 -15.449 Td [(\225)]TJ 0 g 0 G - [-946(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-464(V)111(ector)-250(and)-250(matrix)-250(norms;)]TJ 0 g 0 G - [-1000(119)]TJ + 0 -15.449 Td [(\225)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.9)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 394.057 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 393.858 Td [(bcast)-250(\227)-250(Br)18(oadcast)-250(data)]TJ + [-500(Dense)-250(matrix)-250(sums;)]TJ 0 g 0 G - [-739(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + 0 -15.449 Td [(\225)]TJ 0 g 0 G - [-1000(120)]TJ + [-500(Dot)-250(pr)18(oducts.)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.10)-550(psb)]TJ -ET -q -1 0 0 1 154.072 381.963 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 381.764 Td [(sum)-250(\227)-250(Global)-250(sum)]TJ +/F75 9.9626 Tf -35.806 -19.434 Td [(Communication)-250(routines)]TJ 0 g 0 G - [-998(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +/F84 9.9626 Tf 118.704 0 Td [(handling)-250(halo)-250(and)-250(overlap)-250(communications;)]TJ 0 g 0 G - [-1000(122)]TJ + 50.661 -29.888 Td [(5)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.11)-550(psb)]TJ ET -q -1 0 0 1 154.072 369.869 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 369.67 Td [(max)-250(\227)-250(Global)-250(maximum)]TJ + +endstream +endobj +948 0 obj +<< +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/points.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 954 0 R +/BBox [0 0 274 308] +/Resources << +/ProcSet [ /PDF /Text ] +/ExtGState << +/R7 955 0 R +>>/Font << /R8 956 0 R>> +>> +/Length 1397 +/Filter /FlateDecode +>> +stream +xœÝYËŽ5Ý÷Wô’ q±Ën»ï6 +– 󣄠¹3ÊBü=®§«æ±bA‡HŒ}Ï©c·í²»?­é”ׄÿäïÍeùö]_?ü¾¤Ó©d êwßGüðaù´d"®òçæ²¾¾ä}ÍíëÕûe4­ß ,äýÔ×sÿ»º,_ýx÷Ç/w×·¯®~[¾»ZÞ.ø›Œ1¸ð™âuóâ¯ïÿ¼ûùúáoO*žþx/þÃõí½Î22Tø<ᜇd†&Âoî/×ïV˜âÿõèCê1V^õd¨æõãR ¬Û9ŸÎç¶^–ºµÓ¾ÍšÚýÝz¦zõ¯7‹!€S®ûjì§”êJÚR¿–ðWZSöN•m˜´ ide«3çûfyÿõROÛú×|J_F¿~]~z2ò–}×òVÐÕämë¦Î€sQ<I<³¦uiüd¸r͵9.Ö¤¢ÆR’ÉÑãY~ОÐCÑÝ¥Ÿ}öçÙ^â<3LA ‰c‹YÒ¶®ôçY¯qž&mCÙØâÌû懣ç—Ñ#|H–_rƧšÇÒ³,wš0s>}yüÇ5ÒNóË p%U¤ –ðW@E’§$§•|¡pxõE`&ÆøåU ™¤ó«›%AÝIUÍ0Gš]ý‘&ûÖM’ î Jšx÷¬…T.ù)~¼C²8˜}~‚­ÛÍWÛ¢íÁvKÑö¶K,8ÛÍ—&†`[C*—ü¨ONÔÇs­ƒ ½m‚ê ò9؆Áu¶!×`{P9¦m‚êKI7oÛB*—ü¨O샹~ñ̳·Ç'­¡Á^ÝIaÏvRy!œzw'ó¤`Íx"0.Ѥb'…iÄù|ùÌs¼žP:-%X/[´^º“#Àa°há…dÞPÓY/)Z‡Ýqˆ&-VŠÖ½ON¬Çtnƒ®G±À¹ÍY–& é›Ë’וB¿Ìœ¤¡¹M…ÁnngäŽ%¤Ò#ØœÃÉÙÇ‚"d;’Àô)ùÃ(˜\X‹³Ž¥²£0}Z¡pø#`Ó†Sò‹%Hvt§Ð̧f£`ú`-Î+”ÐŽQ4ó9ƒ…Ç,x›O/,îf,z»âißn«ªÝìv«$½úæ-ÜŒå`?›“禩™|,ˆ7cïó™;Ìñº@!osõé]Ц?ݲta0€yýÒ¥¤Zdy›«OïRÜ<%9­äƒ€[}拇ú6m8uõIPžþhǃf>m))…YÞæê“ Ò<%9­äƒ€[}ækçÿÜæ“WO’rõ= A} £ Ñ0'Ë 9‘S,irêÕ÷+\_ã­uâÝ¿›ÑÆE?æóé{¦ƒÙÇá'È‹ÎB#4_²$&†`[–’qq‘‘&/> Mõ5^_'†`[Bý˜OõºÖÁ–%©¡ ª/]07o[šqq ’&/M Íõ5^_'nÞ¶†4.ú1Ÿ6ØsýÜ¥%]Š!ƒCÞgVe@Ù–‹’…$)š5-ƒÃØ5}‡ä²?ÖLg+‡ |>{é>hO‘jøX5~,ê>–0àxÕ},1’š¬ác ”ø±ŠûX€5‹ûXb$3òø³ Ú…t¡í¡=Å>tpº8Õ‡’Ô$iÎ>´-ö¡Ç%ÀšTÔXJR#ÞgL¼í“-J/0®jãȶw.Þâªick£Z,”Ô¤š^”Ñk·ì«éUÝ ‹¯WjÇ‚µÛçƒ.ÁºUE³zÉgýãPˆ,é"›Ñe±ûÌ‹:t˜!*%~ Ö *«QÊÒ@emPMÓ1:¾Þ’àX¼÷(˜®4æ ¤Nƒ¾]þÎJ¦' +endstream +endobj +963 0 obj +<< +/Length 5321 +>> +stream 0 g 0 G - [-610(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1000(124)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.095 Td [(7.12)-550(psb)]TJ -ET -q -1 0 0 1 154.072 357.775 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q BT -/F54 9.9626 Tf 157.061 357.575 Td [(min)-250(\227)-250(Global)-250(minimum)]TJ +/F75 9.9626 Tf 150.705 706.129 Td [(Data)-250(management)-250(and)-250(auxiliary)-250(routines)]TJ 0 g 0 G - [-896(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +/F84 9.9626 Tf 190.375 0 Td [(including:)]TJ 0 g 0 G - [-1000(126)]TJ + -154.569 -24.208 Td [(\225)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.13)-550(psb)]TJ -ET -q -1 0 0 1 154.072 345.681 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 345.481 Td [(amx)-250(\227)-250(Global)-250(maximum)-250(absolute)-250(value)]TJ + [-500(Parallel)-250(envir)18(onment)-250(management)]TJ +0 g 0 G + 0 -18.081 Td [(\225)]TJ 0 g 0 G - [-700(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-500(Communication)-250(descriptors)-250(allocation;)]TJ 0 g 0 G - [-1000(128)]TJ + 0 -18.082 Td [(\225)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.14)-550(psb)]TJ -ET -q -1 0 0 1 154.072 333.586 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 333.387 Td [(amn)-250(\227)-250(Global)-250(minimum)-250(absolute)-250(value)]TJ + [-500(Dense)-250(and)-250(sparse)-250(matrix)-250(allocation;)]TJ 0 g 0 G - [-777(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + 0 -18.081 Td [(\225)]TJ 0 g 0 G - [-1000(130)]TJ + [-500(Dense)-250(and)-250(sparse)-250(matrix)-250(build)-250(and)-250(update;)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.15)-550(psb)]TJ -ET -q -1 0 0 1 154.072 321.492 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 321.293 Td [(nrm2)-250(\227)-250(Global)-250(2-norm)-250(r)18(eduction)]TJ + 0 -18.082 Td [(\225)]TJ 0 g 0 G - [-710(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-500(Sparse)-250(matrix)-250(and)-250(data)-250(distribution)-250(pr)18(epr)18(ocessing.)]TJ 0 g 0 G - [-1000(132)]TJ +/F75 9.9626 Tf -35.806 -24.207 Td [(Preconditioner)-250(routines)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.16)-550(psb)]TJ -ET -q -1 0 0 1 154.072 309.398 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 309.199 Td [(snd)-250(\227)-250(Send)-250(data)]TJ 0 g 0 G - [-511(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + 0 -24.208 Td [(Iterative)-250(methods)]TJ 0 g 0 G - [-1000(134)]TJ +/F84 9.9626 Tf 0.995 0 0 1 235.656 561.18 Tm [(a)-250(subset)-251(of)-250(classical)-251(and)-250(Krylov)-251(subspace)-250(iterative)-251(methods)]TJ 1.007 0 0 1 150.396 538.043 Tm [(The)-247(following)-247(naming)-246(scheme)-247(has)-247(been)-247(adopted)-247(for)-246(all)-247(the)-247(symbols)-247(internally)]TJ 1 0 0 1 150.705 526.088 Tm [(de\002ned)-250(in)-250(the)-250(PSBLAS)-250(softwar)18(e)-250(package:)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.17)-550(psb)]TJ -ET -q -1 0 0 1 154.072 297.304 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 297.105 Td [(r)18(cv)-250(\227)-250(Receive)-250(data)]TJ + 13.888 -23.137 Td [(\225)]TJ 0 g 0 G - [-284(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-500(all)-250(symbols)-250(\050i.e.)-310(subr)18(outine)-250(names,)-250(data)-250(types...\051)-310(ar)18(e)-250(pr)18(e\002xed)-250(by)]TJ/F145 9.9626 Tf 294.183 0 Td [(psb_)]TJ 0 g 0 G - [-1000(135)]TJ +/F84 9.9626 Tf -294.183 -24.208 Td [(\225)]TJ +0 g 0 G + [-500(all)-250(data)-250(type)-250(names)-250(ar)18(e)-250(suf)18(\002xed)-250(by)]TJ/F145 9.9626 Tf 166.604 0 Td [(_type)]TJ +0 g 0 G +/F84 9.9626 Tf -166.604 -24.208 Td [(\225)]TJ +0 g 0 G + [-500(all)-250(constants)-250(ar)18(e)-250(suf)18(\002xed)-250(by)]TJ/F145 9.9626 Tf 135.59 0 Td [(_)]TJ +0 g 0 G +/F84 9.9626 Tf -135.59 -24.208 Td [(\225)]TJ +0 g 0 G + 1.013 0 0 1 175.611 430.327 Tm [(all)-246(top-level)-246(subr)18(outine)-246(names)-246(follow)-246(the)-246(r)8(ule)]TJ/F145 9.9626 Tf 1 0 0 1 381.038 430.327 Tm [(psb_xxname)]TJ/F84 9.9626 Tf 1.013 0 0 1 435.824 430.327 Tm [(wher)18(e)]TJ/F145 9.9626 Tf 1 0 0 1 466.072 430.327 Tm [(xx)]TJ/F84 9.9626 Tf 1.013 0 0 1 479.015 430.327 Tm [(can)]TJ 1 0 0 1 175.611 418.372 Tm [(be)-250(either:)]TJ +0 g 0 G +/F75 9.9626 Tf 11.956 -24.208 Td [(\226)]TJ 0 g 0 G +/F145 9.9626 Tf 9.962 0 Td [(ge)]TJ/F84 9.9626 Tf 10.461 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(dense)-250(data,)]TJ +0 g 0 G +/F75 9.9626 Tf -20.423 -18.081 Td [(\226)]TJ +0 g 0 G +/F145 9.9626 Tf 9.962 0 Td [(sp)]TJ/F84 9.9626 Tf 10.461 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(sparse)-250(data,)]TJ +0 g 0 G +/F75 9.9626 Tf -20.423 -18.081 Td [(\226)]TJ +0 g 0 G +/F145 9.9626 Tf 9.962 0 Td [(cd)]TJ/F84 9.9626 Tf 10.461 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(communication)-250(descriptor)-250(\050see)]TJ 0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -57.166 -22.15 Td [(8)-1000(Error)-250(handling)]TJ + [-250(3)]TJ 0 g 0 G - [-24750(136)]TJ + [(\051.)]TJ 0.981 0 0 1 175.611 333.794 Tm [(For)-254(example)-255(the)]TJ/F145 9.9626 Tf 1 0 0 1 247.896 333.794 Tm [(psb_geins)]TJ/F84 9.9626 Tf 0.981 0 0 1 294.969 333.794 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 299.899 333.794 Tm [(psb_spins)]TJ/F84 9.9626 Tf 0.981 0 0 1 349.458 333.794 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 368.491 333.794 Tm [(psb_cdins)]TJ/F84 9.9626 Tf 0.981 0 0 1 418.05 333.794 Tm [(perform)-254(the)-255(same)]TJ 1.02 0 0 1 175.611 321.839 Tm [(action)-359(\050see)]TJ 0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.094 Td [(8.1)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 263.06 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 262.861 Td [(errpush)-250(\227)-250(Pushes)-250(an)-250(err)18(or)-250(code)-250(onto)-250(the)-250(err)18(or)-250(stack)]TJ + [-360(6)]TJ 0 g 0 G - [-734(.)-500(.)-500(.)-500(.)]TJ + [(\051)-359(on)-359(dense)-359(matrices,)-388(sparse)-359(matrices)-360(an)1(d)-360(communication)]TJ 1.02 0 0 1 175.611 309.883 Tm [(descriptors)-308(r)18(espectively)109(.)-492(Interface)-307(overloading)-308(allows)-308(the)-307(usage)-308(of)-308(the)]TJ 1 0 0 1 175.611 297.928 Tm [(same)-250(subr)18(outine)-250(names)-250(for)-250(both)-250(r)18(eal)-250(and)-250(complex)-250(data.)]TJ 0.996 0 0 1 150.705 274.791 Tm [(In)-252(the)-252(descriptio)1(n)-252(of)-252(the)-252(subr)18(outines,)-252(ar)18(guments)-251(or)-252(ar)18(gument)-252(entries)-252(ar)18(e)-251(classi-)]TJ 1 0 0 1 150.705 262.836 Tm [(\002ed)-250(as:)]TJ 0 g 0 G - [-1000(138)]TJ +/F75 9.9626 Tf 0 -23.137 Td [(global)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(8.2)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 250.966 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 250.767 Td [(err)18(or)-250(\227)-250(Prints)-250(the)-250(err)18(or)-250(stack)-250(content)-250(and)-250(aborts)-250(execution)]TJ +/F84 9.9626 Tf 0.98 0 0 1 184.468 239.699 Tm [(For)-223(input)-223(ar)19(guments,)-230(the)-223(value)-223(must)-222(be)-223(the)-223(same)-223(on)-223(all)-223(pr)18(ocesses)-222(partici-)]TJ 0.98 0 0 1 175.313 227.744 Tm [(pating)-220(in)-219(the)-220(subr)18(outine)-219(call;)-232(for)-220(output)-219(ar)18(guments)-220(the)-219(value)-220(is)-220(guaranteed)]TJ 1 0 0 1 175.611 215.789 Tm [(to)-250(be)-250(the)-250(same.)]TJ 0 g 0 G +/F75 9.9626 Tf -24.906 -24.208 Td [(local)]TJ 0 g 0 G - [-1381(139)]TJ +/F84 9.9626 Tf 26.56 0 Td [(Each)-250(pr)18(ocess)-250(has)-250(its)-250(own)-250(value\050s\051)-250(independently)111(.)]TJ -26.869 -23.137 Td [(T)92(o)-250(\002nish)-250(our)-250(general)-250(description,)-250(we)-250(de\002ne)-250(a)-250(version)-250(string)-250(with)-250(the)-250(constant)]TJ/F145 9.9626 Tf 122.476 -24.059 Td [(psb_version_string_)]TJ/F84 9.9626 Tf -122.586 -24.059 Td [(whose)-250(curr)18(ent)-250(value)-250(is)]TJ/F145 9.9626 Tf 101.857 0 Td [(3.9.0)]TJ +0 g 0 G +/F84 9.9626 Tf 67.927 -29.888 Td [(6)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(8.3)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 238.872 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 238.673 Td [(set)]TJ ET -q -1 0 0 1 169.902 238.872 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +968 0 obj +<< +/Length 9648 +>> +stream +0 g 0 G +0 g 0 G BT -/F54 9.9626 Tf 172.891 238.673 Td [(errverbosity)-250(\227)-250(Sets)-250(the)-250(verbosity)-250(of)-250(err)18(or)-250(messages)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(2.3)-1000(Application)-250(structure)]TJ/F84 9.9626 Tf 0.999 0 0 1 99.587 686.748 Tm [(The)-249(main)-249(underlying)-250(principle)-249(of)-249(the)-249(PSBLAS)-250(library)-249(is)-249(that)-249(the)-249(library)-250(objects)]TJ 0.999 0 0 1 99.895 674.792 Tm [(ar)18(e)-251(cr)18(eated)-251(and)-251(exist)-251(with)-251(r)18(efer)18(ence)-251(to)-251(a)-251(discr)18(etized)-251(space)-251(to)-251(which)-251(ther)18(e)-251(corr)18(e-)]TJ 0.98 0 0 1 99.895 662.837 Tm [(sponds)-255(an)-254(index)-255(space)-255(and)-254(a)-255(matrix)-255(sparsity)-254(pattern.)-316(As)-255(an)-255(example,)-255(consider)-254(a)]TJ 1.015 0 0 1 99.895 650.882 Tm [(cell-center)18(ed)-246(\002nite-volume)-246(discr)18(etization)-246(of)-246(the)-246(Navier)18(-Stokes)-246(equations)-245(on)-246(a)]TJ 0.98 0 0 1 99.895 638.927 Tm [(simulation)-233(domain;)-240(the)-233(index)-233(space)-232(1)]TJ 1 0 0 1 261.657 638.927 Tm [(.)-192(.)-191(.)]TJ/F78 9.9626 Tf 13.2 0 Td [(n)]TJ/F84 9.9626 Tf 0.98 0 0 1 282.794 638.927 Tm [(is)-233(isomorphic)-233(to)-232(the)-233(set)-233(of)-233(cell)-233(centers,)]TJ 1.02 0 0 1 99.477 626.972 Tm [(wher)18(eas)-332(the)-332(pattern)-332(of)-332(the)-332(associated)-332(linear)-332(system)-332(matrix)-332(is)-332(isomorphic)-332(to)]TJ 1.014 0 0 1 99.895 615.017 Tm [(the)-246(adjacency)-247(gr)1(aph)-247(imposed)-246(on)-246(the)-246(discr)17(etization)-246(mesh)-246(by)-246(the)-247(discr)18(etization)]TJ 1 0 0 1 99.895 603.061 Tm [(stencil.)]TJ 1.02 0 0 1 114.839 590.891 Tm [(Thus)-298(the)-297(\002rst)-298(or)18(der)-298(of)-297(business)-298(is)-298(to)-297(establish)-298(an)-298(i)1(ndex)-298(space,)-311(and)-297(this)-298(is)]TJ 0.989 0 0 1 99.895 578.936 Tm [(done)-253(with)-253(a)-253(call)-253(to)]TJ/F145 9.9626 Tf 1 0 0 1 182.801 578.936 Tm [(psb_cdall)]TJ/F84 9.9626 Tf 0.989 0 0 1 232.368 578.936 Tm [(in)-253(which)-253(we)-253(specify)-253(the)-253(size)-253(of)-253(the)-253(index)-253(space)]TJ/F78 9.9626 Tf 1 0 0 1 437.943 578.936 Tm [(n)]TJ/F84 9.9626 Tf 1.02 0 0 1 99.895 566.98 Tm [(and)-250(the)-249(allocation)-249(of)-250(the)-249(elements)-250(of)-249(the)-250(index)-249(space)-250(to)-249(the)-250(various)-249(pr)17(ocesses)]TJ 1 0 0 1 99.895 555.025 Tm [(making)-250(up)-250(the)-250(MPI)-250(\050virtual\051)-250(parallel)-250(machine.)]TJ 0.987 0 0 1 114.839 542.855 Tm [(The)-254(index)-255(space)-254(is)-254(partitioned)-254(among)-255(pr)19(ocesses,)-255(and)-254(this)-254(cr)18(eates)-255(a)-254(mapping)]TJ 1.02 0 0 1 99.895 530.899 Tm [(fr)18(om)-313(the)-312(\223global\224)-313(numbering)-312(1)]TJ 1 0 0 1 243.306 530.899 Tm [(.)-192(.)-191(.)]TJ/F78 9.9626 Tf 13.2 0 Td [(n)]TJ/F84 9.9626 Tf 1.02 0 0 1 265.345 530.899 Tm [(to)-312(a)-313(numbering)-312(\223local\224)-313(to)-312(each)-312(pr)17(ocess;)]TJ 1.02 0 0 1 99.895 518.944 Tm [(each)-357(pr)17(ocess)]TJ/F78 9.9626 Tf 1 0 0 1 161.071 518.944 Tm [(i)]TJ/F84 9.9626 Tf 1.02 0 0 1 167.665 518.944 Tm [(will)-357(own)-357(a)-358(certain)-357(subset)-357(1)]TJ 1 0 0 1 294.248 518.944 Tm [(.)-192(.)-191(.)]TJ/F78 9.9626 Tf 13.2 0 Td [(n)]TJ/F84 9.9626 Tf 5.664 -1.494 Td [(r)18(ow)]TJ/F78 5.9776 Tf 17.537 -1.649 Td [(i)]TJ/F84 9.9626 Tf 1.02 0 0 1 333.423 518.944 Tm [(,)-385(each)-357(element)-358(of)-357(which)]TJ 1.004 0 0 1 99.895 506.989 Tm [(corr)18(esponds)-250(to)-249(a)-250(certain)-249(element)-250(of)-249(1)]TJ 1 0 0 1 263.855 506.989 Tm [(.)-192(.)-191(.)]TJ/F78 9.9626 Tf 13.2 0 Td [(n)]TJ/F84 9.9626 Tf 1.004 0 0 1 282.719 506.989 Tm [(.)-310(The)-250(user)-250(does)-249(not)-250(set)-249(explicitly)-250(this)]TJ 0.987 0 0 1 99.895 495.034 Tm [(mapping;)-253(when)-254(the)-253(application)-253(needs)-254(to)-253(indicate)-253(to)-254(which)-253(element)-253(of)-254(the)-253(index)]TJ 1.02 0 0 1 99.895 483.079 Tm [(space)-263(a)-262(certain)-263(item)-263(is)-262(r)17(elated,)-267(such)-262(as)-263(the)-263(r)18(ow)-263(and)-263(colu)1(mn)-263(index)-263(of)-263(a)-262(matrix)]TJ 1.011 0 0 1 99.895 471.124 Tm [(coef)18(\002cient,)-246(it)-247(does)-246(so)-246(in)-246(the)-246(\223global\224)-247(numbering,)-246(and)-246(the)-246(library)-247(will)-246(translate)]TJ 1 0 0 1 99.895 459.168 Tm [(into)-250(the)-250(appr)18(opriate)-250(\223local\224)-250(numbering.)]TJ 0.98 0 0 1 114.839 446.998 Tm [(For)-242(a)-242(given)-241(index)-242(space)-242(1)]TJ 1 0 0 1 225.057 446.998 Tm [(.)-192(.)-191(.)]TJ/F78 9.9626 Tf 13.2 0 Td [(n)]TJ/F84 9.9626 Tf 0.98 0 0 1 246.282 446.998 Tm [(ther)18(e)-241(ar)18(e)-242(many)-242(possible)-242(associat)1(ed)-242(topologies,)]TJ 0.98 0 0 1 99.895 435.043 Tm [(i.e.)-308(many)-230(dif)18(fer)18(ent)-230(discr)19(etization)-230(stencils;)-239(thus)-230(the)-230(description)-230(of)-230(the)-230(index)-230(space)]TJ 1.011 0 0 1 99.895 423.087 Tm [(is)-247(not)-247(completed)-248(until)-247(the)-247(user)-247(has)-247(de\002ned)-248(a)-247(sparsity)-247(pattern,)-247(either)-247(explicitly)]TJ 0.98 0 0 1 99.895 411.132 Tm [(thr)18(ough)]TJ/F145 9.9626 Tf 1 0 0 1 137.076 411.132 Tm [(psb_cdins)]TJ/F84 9.9626 Tf 0.98 0 0 1 186.458 411.132 Tm [(or)-237(imp)1(licitly)-237(thr)18(ough)]TJ/F145 9.9626 Tf 1 0 0 1 279.084 411.132 Tm [(psb_spins)]TJ/F84 9.9626 Tf 0.98 0 0 1 326.157 411.132 Tm [(.)-310(The)-237(descriptor)-236(is)-237(\002nalized)]TJ 1.02 0 0 1 99.477 399.177 Tm [(with)-294(a)-294(call)-293(to)]TJ/F145 9.9626 Tf 1 0 0 1 161.528 399.177 Tm [(psb_cdasb)]TJ/F84 9.9626 Tf 1.02 0 0 1 211.587 399.177 Tm [(and)-294(a)-294(sparse)-293(matrix)-294(with)-294(a)-294(call)-294(to)]TJ/F145 9.9626 Tf 1 0 0 1 366.134 399.177 Tm [(psb_spasb)]TJ/F84 9.9626 Tf 1.02 0 0 1 413.207 399.177 Tm [(.)-450(After)]TJ/F145 9.9626 Tf 1 0 0 1 99.895 387.222 Tm [(psb_cdasb)]TJ/F84 9.9626 Tf 1.001 0 0 1 149.464 387.222 Tm [(each)-250(pr)18(ocess)]TJ/F78 9.9626 Tf 1 0 0 1 207.364 387.222 Tm [(i)]TJ/F84 9.9626 Tf 1.001 0 0 1 212.823 387.222 Tm [(will)-250(have)-250(de\002ned)-251(a)-250(set)-250(of)-250(\223halo\224)-250(\050or)-251(\223ghost\224\051)-250(indices)]TJ/F78 9.9626 Tf 1 0 0 1 100.02 375.267 Tm [(n)]TJ/F84 9.9626 Tf 5.664 -1.495 Td [(r)18(ow)]TJ/F78 5.9776 Tf 17.537 -1.648 Td [(i)]TJ/F192 10.3811 Tf 4.654 3.143 Td [(+)]TJ/F84 9.9626 Tf 0.98 0 0 1 137.825 375.267 Tm [(1)]TJ 1 0 0 1 144.492 375.267 Tm [(.)-192(.)-191(.)]TJ/F78 9.9626 Tf 13.2 0 Td [(n)]TJ/F84 9.9626 Tf 5.664 -3.831 Td [(col)]TJ/F78 5.9776 Tf 12.795 -1.648 Td [(i)]TJ/F84 9.9626 Tf 0.98 0 0 1 178.925 375.267 Tm [(,)-239(denoting)-235(elements)-235(of)-235(the)-235(index)-235(space)-235(that)-235(ar)18(e)]TJ/F78 9.9626 Tf 0.98 0 0 1 379.735 375.267 Tm [(not)]TJ/F84 9.9626 Tf 0.98 0 0 1 395.044 375.267 Tm [(assigned)-235(to)]TJ 1.01 0 0 1 99.596 361.477 Tm [(pr)18(ocess)]TJ/F78 9.9626 Tf 1 0 0 1 135.289 361.477 Tm [(i)]TJ/F84 9.9626 Tf 1.01 0 0 1 138.252 361.477 Tm [(;)-247(however)-246(the)-247(variables)-246(associated)-247(with)-246(them)-247(ar)18(e)-247(needed)-246(to)-247(complete)]TJ 1.02 0 0 1 99.895 349.522 Tm [(computations)-284(associated)-284(with)-285(the)-284(sparse)-284(matrix)]TJ/F78 9.9626 Tf 1 0 0 1 318.756 349.522 Tm [(A)]TJ/F84 9.9626 Tf 1.02 0 0 1 326.074 349.522 Tm [(,)-294(and)-284(thus)-284(they)-285(have)-284(to)-284(be)]TJ 1.02 0 0 1 99.895 337.567 Tm [(fetched)-303(fr)18(om)-304(\050neighbouring\051)-303(pr)18(ocesses.)-478(The)-303(descriptor)-303(of)-303(the)-303(index)-304(space)-303(is)]TJ 1.007 0 0 1 99.895 325.612 Tm [(built)-249(exactly)-248(for)-249(the)-249(purpose)-249(of)-248(pr)17(op)1(erly)-249(sequencing)-249(the)-249(communication)-248(steps)]TJ 1 0 0 1 99.895 313.656 Tm [(r)18(equir)18(ed)-250(to)-250(achieve)-250(this)-250(objective.)]TJ 0.987 0 0 1 114.839 301.486 Tm [(A)-253(simple)-253(application)-254(str)8(u)1(ctur)18(e)-253(will)-254(walk)-253(thr)18(ough)-253(the)-253(index)-253(space)-253(allocation,)]TJ 1 0 0 1 99.895 289.531 Tm [(matrix/vector)-250(cr)18(eation)-250(and)-250(linear)-250(system)-250(solution)-250(as)-250(follows:)]TJ 0 g 0 G - [-253(.)-500(.)-500(.)]TJ + 12.454 -20.572 Td [(1.)]TJ 0 g 0 G - [-1000(140)]TJ + [-500(Initialize)-250(parallel)-250(envir)18(onment)-250(with)]TJ/F145 9.9626 Tf 171.464 0 Td [(psb_init)]TJ/F84 9.9626 Tf 41.843 0 Td [(;)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -58.052 -12.095 Td [(8.4)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 226.778 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 226.578 Td [(set)]TJ -ET -q -1 0 0 1 169.902 226.778 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 172.891 226.578 Td [(erraction)-232(\227)-231(Set)-232(the)-231(type)-232(of)-231(action)-232(to)-232(b)1(e)-232(taken)-232(upon)-231(err)18(or)]TJ -35.138 -11.955 Td [(condition)]TJ + -213.307 -20.787 Td [(2.)]TJ 0 g 0 G - [-481(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + [-500(Initialize)-250(index)-250(space)-250(with)]TJ/F145 9.9626 Tf 130.489 0 Td [(psb_cdall)]TJ/F84 9.9626 Tf 47.073 0 Td [(;)]TJ 0 g 0 G - [-1000(141)]TJ + -177.562 -20.787 Td [(3.)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -37.858 -22.149 Td [(9)-1000(Utilities)]TJ + 0.983 0 0 1 124.413 227.385 Tm [(Allocate)-254(sparse)-255(matrix)-254(and)-254(dense)-255(vectors)-254(with)]TJ/F145 9.9626 Tf 1 0 0 1 326.635 227.385 Tm [(psb_spall)]TJ/F84 9.9626 Tf 0.983 0 0 1 376.199 227.385 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 395.269 227.385 Tm [(psb_geall)]TJ/F84 9.9626 Tf 0.983 0 0 1 442.342 227.385 Tm [(;)]TJ 0 g 0 G - [-27780(142)]TJ -0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.095 Td [(9.1)-1450(hb)]TJ -ET -q -1 0 0 1 153.644 180.579 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 156.633 180.38 Td [(r)18(ead)-400(\227)-400(Read)-400(a)-400(sparse)-400(matrix)-400(fr)18(om)-400(a)-400(\002le)-400(in)-400(the)-400(Harwell\226)]TJ -18.88 -11.956 Td [(Boeing)-250(format)]TJ + 1 0 0 1 112.349 206.597 Tm [(4.)]TJ 0 g 0 G - [-652(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + 1.02 0 0 1 124.802 206.597 Tm [(Loop)-265(over)-265(all)-266(local)-265(r)18(ows,)-270(generate)-266(matrix)-265(and)-265(vector)-265(entries,)-270(and)-266(insert)]TJ 1 0 0 1 124.802 194.642 Tm [(them)-250(with)]TJ/F145 9.9626 Tf 47.85 0 Td [(psb_spins)]TJ/F84 9.9626 Tf 49.564 0 Td [(and)]TJ/F145 9.9626 Tf 19.357 0 Td [(psb_geins)]TJ 0 g 0 G - [-1000(143)]TJ +/F84 9.9626 Tf -129.224 -20.787 Td [(5.)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -22.914 -12.094 Td [(9.2)-1050(hb)]TJ -ET -q -1 0 0 1 149.659 156.529 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 152.647 156.33 Td [(write)-226(\227)-226(W)74(rite)-226(a)-226(sparse)-226(matrix)-226(to)-226(a)-225(\002le)-226(in)-226(the)-226(Harwell\226Boeing)]TJ -14.894 -11.955 Td [(format)]TJ + [-461(Assemble)-250(the)-250(various)-250(entities:)]TJ 0 g 0 G - [-967(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + 17.773 -20.787 Td [(\050a\051)]TJ 0 g 0 G - [-1000(144)]TJ +/F145 9.9626 Tf 16.598 0 Td [(psb_cdasb)]TJ/F84 9.9626 Tf 47.073 0 Td [(,)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - -22.914 -12.094 Td [(9.3)-1050(mm)]TJ -ET -q -1 0 0 1 155.945 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 158.934 132.281 Td [(mat)]TJ -ET -q -1 0 0 1 176.558 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 179.546 132.281 Td [(r)18(ead)-265(\227)-265(Read)-265(a)-265(sparse)-265(matrix)-265(fr)18(om)-265(a)-265(\002le)-265(in)-265(the)-265(Matrix-)]TJ -41.793 -11.955 Td [(Market)-250(format)]TJ + -64.199 -16.371 Td [(\050b\051)]TJ 0 g 0 G - [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +/F145 9.9626 Tf 17.126 0 Td [(psb_spasb)]TJ/F84 9.9626 Tf 47.073 0 Td [(,)]TJ 0 g 0 G - [-1000(145)]TJ + -63.113 -16.371 Td [(\050c\051)]TJ 0 g 0 G +/F145 9.9626 Tf 16.04 0 Td [(psb_geasb)]TJ/F84 9.9626 Tf 47.073 0 Td [(;)]TJ 0 g 0 G - 129.649 -29.888 Td [(iii)]TJ + 75.467 -29.888 Td [(7)]TJ 0 g 0 G ET endstream endobj -645 0 obj +890 0 obj << /Type /ObjStm /N 100 -/First 927 -/Length 16599 ->> -stream -596 0 597 152 598 304 599 459 600 611 601 754 602 906 603 1063 604 1220 605 1377 -606 1532 607 1689 608 1846 609 2003 610 2160 611 2313 640 2471 612 2626 641 2784 613 2941 -614 3099 615 3257 616 3415 617 3573 618 3724 619 3881 620 4037 621 4194 642 4351 622 4507 -643 4664 623 4819 644 4976 624 5132 625 5289 626 5446 627 5603 628 5761 629 5917 630 6075 -631 6233 632 6391 633 6549 638 6704 639 6760 635 6816 694 6896 634 7398 646 7556 647 7714 -648 7872 649 8023 650 8180 651 8337 652 8493 653 8650 654 8807 655 8964 656 9116 657 9268 -658 9414 659 9566 660 9718 661 9870 662 10022 663 10174 664 10326 665 10478 666 10630 667 10781 -668 10934 669 11087 670 11240 671 11393 672 11546 673 11699 674 11852 675 11999 676 12149 677 12301 -678 12453 679 12605 680 12752 681 12904 682 13056 683 13208 684 13360 685 13512 686 13664 687 13816 -688 13968 689 14120 690 14273 691 14424 696 14575 693 14632 740 14712 692 15214 697 15367 698 15519 -% 596 0 obj +/First 913 +/Length 13454 +>> +stream +849 0 889 152 850 304 851 449 852 602 853 755 854 908 855 1061 856 1214 857 1367 +858 1520 859 1673 860 1825 861 1972 862 2123 863 2276 864 2423 865 2576 866 2729 867 2882 +868 3035 869 3183 870 3331 871 3479 872 3627 873 3775 874 3924 875 4073 876 4222 877 4371 +878 4520 879 4669 880 4818 881 4967 882 5116 886 5264 883 5321 892 5401 7 5515 891 5569 +906 5649 895 5855 896 6005 897 6155 898 6306 899 6460 900 6611 901 6761 902 6910 903 7058 +904 7206 11 7354 905 7410 924 7503 928 7653 929 7894 930 7936 931 8322 918 8622 919 8768 +920 8916 926 9059 15 9115 927 9170 923 9226 937 9347 922 9497 934 9645 935 9793 939 9941 +19 9998 943 10054 944 10111 936 10168 950 10303 954 10445 955 10559 956 10601 946 10670 947 10818 +952 10965 953 11021 23 11077 949 11132 962 11281 959 11423 960 11570 964 11717 961 11774 967 11868 +969 11982 27 12038 970 12093 971 12149 972 12205 973 12261 974 12317 975 12373 976 12429 977 12485 +% 849 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 633.079 211.078 645.138] -/A << /S /GoTo /D (subsection.2.2) >> +/Rect [164.653 654.503 495.412 666.562] +/A << /S /GoTo /D (subsection.9.6) >> >> -% 597 0 obj +% 889 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 621.004 233.094 633.064] -/A << /S /GoTo /D (subsection.2.3) >> +/Rect [149.709 645.197 253.668 654.607] +/A << /S /GoTo /D (subsection.9.6) >> >> -% 598 0 obj +% 850 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 608.93 301.886 620.99] -/A << /S /GoTo /D (subsubsection.2.3.1) >> +/Rect [149.709 623.26 274.28 632.59] +/A << /S /GoTo /D (section.10) >> >> -% 599 0 obj +% 851 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 596.856 230.734 608.916] -/A << /S /GoTo /D (subsection.2.4) >> +/Rect [164.653 608.674 333.298 620.734] +/A << /S /GoTo /D (subsection.10.1) >> >> -% 600 0 obj +% 852 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 577.37 242.261 586.7] -/A << /S /GoTo /D (section.3) >> +/Rect [164.653 596.719 349.866 608.779] +/A << /S /GoTo /D (subsection.10.2) >> >> -% 601 0 obj +% 853 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 562.666 249.144 574.726] -/A << /S /GoTo /D (subsection.3.1) >> +/Rect [164.653 584.764 331.326 596.824] +/A << /S /GoTo /D (subsection.10.3) >> >> -% 602 0 obj +% 854 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 550.592 258.689 562.652] -/A << /S /GoTo /D (subsubsection.3.1.1) >> +/Rect [164.653 572.809 381.626 584.869] +/A << /S /GoTo /D (subsection.10.4) >> >> -% 603 0 obj +% 855 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 538.518 360.207 550.578] -/A << /S /GoTo /D (subsubsection.3.1.2) >> +/Rect [164.653 560.854 427.165 572.913] +/A << /S /GoTo /D (subsection.10.5) >> >> -% 604 0 obj +% 856 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 526.444 350.723 538.504] -/A << /S /GoTo /D (subsubsection.3.1.3) >> +/Rect [164.653 548.899 353.343 560.958] +/A << /S /GoTo /D (subsection.10.6) >> >> -% 605 0 obj +% 857 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 514.37 373.457 526.43] -/A << /S /GoTo /D (subsubsection.3.1.4) >> +/Rect [164.653 536.943 315.177 549.003] +/A << /S /GoTo /D (subsection.10.7) >> >> -% 606 0 obj +% 858 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 502.296 363.973 514.355] -/A << /S /GoTo /D (subsubsection.3.1.5) >> +/Rect [164.653 524.988 324.771 537.048] +/A << /S /GoTo /D (subsection.10.8) >> >> -% 607 0 obj +% 859 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 490.222 384.834 502.281] -/A << /S /GoTo /D (subsubsection.3.1.6) >> +/Rect [164.653 513.033 335.63 525.093] +/A << /S /GoTo /D (subsection.10.9) >> >> -% 608 0 obj +% 860 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 478.148 356.411 490.013] -/A << /S /GoTo /D (subsubsection.3.1.7) >> +/Rect [149.709 493.745 247.72 503.075] +/A << /S /GoTo /D (section.11) >> >> -% 609 0 obj +% 861 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 466.074 297.523 478.133] -/A << /S /GoTo /D (subsubsection.3.1.8) >> +/Rect [164.653 479.16 393.332 491.22] +/A << /S /GoTo /D (subsection.11.1) >> >> -% 610 0 obj +% 862 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 454 345.014 466.059] -/A << /S /GoTo /D (subsubsection.3.1.9) >> +/Rect [164.653 467.205 429.766 479.265] +/A << /S /GoTo /D (subsection.11.2) >> >> -% 611 0 obj +% 863 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 441.925 444.603 453.985] -/A << /S /GoTo /D (subsubsection.3.1.10) >> +/Rect [149.709 447.917 215.89 457.068] +/A << /S /GoTo /D (section.12) >> >> -% 640 0 obj +% 864 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 429.97 222.246 442.03] -/A << /S /GoTo /D (subsubsection.3.1.10) >> +/Rect [164.653 433.332 280.885 445.392] +/A << /S /GoTo /D (subsection.12.1) >> >> -% 612 0 obj +% 865 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 417.896 444.603 429.956] -/A << /S /GoTo /D (subsubsection.3.1.11) >> +/Rect [164.653 424.027 310.634 433.242] +/A << /S /GoTo /D (subsection.12.2) >> >> -% 641 0 obj +% 866 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 405.941 222.246 418.001] -/A << /S /GoTo /D (subsubsection.3.1.11) >> +/Rect [164.653 412.072 283.267 421.481] +/A << /S /GoTo /D (subsection.12.3) >> >> -% 613 0 obj +% 867 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 393.867 358.404 405.927] -/A << /S /GoTo /D (subsubsection.3.1.12) >> +/Rect [164.653 400.116 292.711 409.526] +/A << /S /GoTo /D (subsection.12.4) >> >> -% 614 0 obj +% 868 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 381.793 354.718 393.853] -/A << /S /GoTo /D (subsubsection.3.1.13) >> +/Rect [149.709 378.179 303.341 387.329] +/A << /S /GoTo /D (section.13) >> >> -% 615 0 obj +% 869 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 369.719 413.607 381.778] -/A << /S /GoTo /D (subsubsection.3.1.14) >> +/Rect [164.653 363.593 225.882 375.653] +/A << /S /GoTo /D (section*.7) >> >> -% 616 0 obj +% 870 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 360.295 250.062 369.704] -/A << /S /GoTo /D (subsubsection.3.1.15) >> +/Rect [164.653 351.638 227.098 363.698] +/A << /S /GoTo /D (section*.8) >> >> -% 617 0 obj +% 871 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 345.571 223.242 357.63] -/A << /S /GoTo /D (subsection.3.2) >> +/Rect [164.653 339.683 262.236 351.743] +/A << /S /GoTo /D (section*.9) >> >> -% 618 0 obj +% 872 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 333.497 273.364 345.556] -/A << /S /GoTo /D (subsubsection.3.2.1) >> +/Rect [164.653 327.728 282.36 339.788] +/A << /S /GoTo /D (section*.10) >> >> -% 619 0 obj +% 873 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 321.423 399.41 333.482] -/A << /S /GoTo /D (subsubsection.3.2.2) >> +/Rect [164.653 315.773 254.803 327.832] +/A << /S /GoTo /D (section*.11) >> >> -% 620 0 obj +% 874 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 309.348 410.528 321.408] -/A << /S /GoTo /D (subsubsection.3.2.3) >> +/Rect [164.653 303.818 253.488 315.877] +/A << /S /GoTo /D (section*.12) >> >> -% 621 0 obj +% 875 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 297.274 444.603 309.334] -/A << /S /GoTo /D (subsubsection.3.2.4) >> +/Rect [164.653 291.862 280.328 303.922] +/A << /S /GoTo /D (section*.13) >> >> -% 642 0 obj +% 876 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 287.969 199.631 297.075] -/A << /S /GoTo /D (subsubsection.3.2.4) >> +/Rect [164.653 279.907 252.871 291.967] +/A << /S /GoTo /D (section*.14) >> >> -% 622 0 obj +% 877 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 273.245 444.603 285.305] -/A << /S /GoTo /D (subsubsection.3.2.5) >> +/Rect [164.653 267.952 281.971 280.012] +/A << /S /GoTo /D (section*.15) >> >> -% 643 0 obj +% 878 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 261.29 248.906 273.046] -/A << /S /GoTo /D (subsubsection.3.2.5) >> +/Rect [164.653 255.997 296.477 268.057] +/A << /S /GoTo /D (section*.16) >> >> -% 623 0 obj +% 879 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 249.216 444.603 261.276] -/A << /S /GoTo /D (subsubsection.3.2.6) >> +/Rect [164.653 244.042 305.742 256.101] +/A << /S /GoTo /D (section*.17) >> >> -% 644 0 obj +% 880 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 239.911 185.853 249.016] -/A << /S /GoTo /D (subsubsection.3.2.6) >> +/Rect [164.653 232.087 293.966 244.146] +/A << /S /GoTo /D (section*.18) >> >> -% 624 0 obj +% 881 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 225.187 384.545 237.246] -/A << /S /GoTo /D (subsubsection.3.2.7) >> +/Rect [164.653 220.131 292.711 232.191] +/A << /S /GoTo /D (section*.19) >> >> -% 625 0 obj +% 882 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 213.113 329.343 225.172] -/A << /S /GoTo /D (subsubsection.3.2.8) >> +/Rect [164.653 208.176 319.55 220.236] +/A << /S /GoTo /D (section*.20) >> >> -% 626 0 obj +% 886 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 201.038 405.337 213.098] -/A << /S /GoTo /D (subsubsection.3.2.9) >> +/D [884 0 R /XYZ 149.705 753.953 null] >> -% 627 0 obj +% 883 0 obj +<< +/Font << /F84 687 0 R /F75 685 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 892 0 obj +<< +/Type /Page +/Contents 893 0 R +/Resources 891 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 894 0 R +>> +% 7 0 obj +<< +/D [892 0 R /XYZ 99.895 723.717 null] +>> +% 891 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 906 0 obj +<< +/Type /Page +/Contents 907 0 R +/Resources 905 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 894 0 R +/Annots [ 895 0 R 896 0 R 897 0 R 898 0 R 899 0 R 900 0 R 901 0 R 902 0 R 903 0 R 904 0 R ] +>> +% 895 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 188.964 371.724 201.024] -/A << /S /GoTo /D (subsubsection.3.2.10) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [460.518 586.065 472.473 595.071] +/A << /S /GoTo /D (cite.metcalf) >> >> -% 628 0 obj +% 896 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 176.89 309.029 188.95] -/A << /S /GoTo /D (subsubsection.3.2.11) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [326.365 514.225 338.32 523.231] +/A << /S /GoTo /D (cite.Sparse03) >> >> -% 629 0 obj +% 897 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 167.466 350.683 176.876] -/A << /S /GoTo /D (subsubsection.3.2.12) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [315.282 502.27 327.237 511.276] +/A << /S /GoTo /D (cite.DesPat:11) >> >> -% 630 0 obj +% 898 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 152.742 303.929 164.802] -/A << /S /GoTo /D (subsubsection.3.2.13) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [329.663 502.171 341.618 511.276] +/A << /S /GoTo /D (cite.RouXiaXu:11) >> >> -% 631 0 obj +% 899 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 140.668 324.462 152.728] -/A << /S /GoTo /D (subsubsection.3.2.14) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [267.112 430.331 279.067 439.436] +/A << /S /GoTo /D (cite.machiels) >> >> -% 632 0 obj +% 900 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 128.594 309.687 140.654] -/A << /S /GoTo /D (subsubsection.3.2.15) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [291.919 358.491 298.893 367.447] +/A << /S /GoTo /D (cite.sblas97) >> >> -% 633 0 obj +% 901 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 116.52 314.13 128.58] -/A << /S /GoTo /D (subsubsection.3.2.16) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [301.83 358.491 308.804 367.447] +/A << /S /GoTo /D (cite.sblas02) >> >> -% 638 0 obj +% 902 0 obj << -/D [636 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [277.498 346.536 289.453 355.642] +/A << /S /GoTo /D (cite.BLAS1) >> >> -% 639 0 obj +% 903 0 obj << -/D [636 0 R /XYZ 99.895 723.975 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [292.442 346.536 299.416 355.492] +/A << /S /GoTo /D (cite.BLAS2) >> >> -% 635 0 obj +% 904 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [302.405 346.536 309.379 355.492] +/A << /S /GoTo /D (cite.BLAS3) >> >> -% 694 0 obj +% 11 0 obj << -/Type /Page -/Contents 695 0 R -/Resources 693 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R -/Annots [ 634 0 R 646 0 R 647 0 R 648 0 R 649 0 R 650 0 R 651 0 R 652 0 R 653 0 R 654 0 R 655 0 R 656 0 R 657 0 R 658 0 R 659 0 R 660 0 R 661 0 R 662 0 R 663 0 R 664 0 R 665 0 R 666 0 R 667 0 R 668 0 R 669 0 R 670 0 R 671 0 R 672 0 R 673 0 R 674 0 R 675 0 R 676 0 R 677 0 R 678 0 R 679 0 R 680 0 R 681 0 R 682 0 R 683 0 R 684 0 R 685 0 R 686 0 R 687 0 R 688 0 R 689 0 R 690 0 R 691 0 R ] +/D [906 0 R /XYZ 150.705 716.092 null] >> -% 634 0 obj +% 905 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 702.323 437.338 714.383] -/A << /S /GoTo /D (subsubsection.3.2.17) >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R >> +/ProcSet [ /PDF /Text ] >> -% 646 0 obj +% 924 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 690.243 348.332 702.303] -/A << /S /GoTo /D (subsubsection.3.2.18) >> +/Type /Page +/Contents 925 0 R +/Resources 923 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 894 0 R +/Annots [ 918 0 R 919 0 R 920 0 R ] >> -% 647 0 obj +% 928 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 680.814 300.871 690.223] -/A << /S /GoTo /D (subsubsection.3.2.19) >> +/Producer (GPL Ghostscript 9.04) +/CreationDate (D:20111215145523+01'00') +/ModDate (D:20111215145523+01'00') +/Title (psblas.fig) +/Creator (fig2dev Version 3.2 Patchlevel 5d) +/Author (sfilippo@donald \(Salvatore Filippone\)) >> -% 648 0 obj +% 929 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 668.734 313.682 677.95] -/A << /S /GoTo /D (subsection.3.3) >> +/Type /ExtGState +/OPM 1 >> -% 649 0 obj +% 930 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 656.654 290.829 666.064] -/A << /S /GoTo /D (subsubsection.3.3.1) >> +/BaseFont /JEJNJE+Times-Roman +/FontDescriptor 931 0 R +/Type /Font +/FirstChar 32 +/LastChar 116 +/Widths [ 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 500 0 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 722 667 0 0 0 556 0 0 333 0 0 611 889 0 0 556 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 444 0 444 0 444 333 500 0 278 0 0 278 0 500 500 500 0 333 389 278] +/Encoding /WinAnsiEncoding +/Subtype /Type1 >> -% 650 0 obj +% 931 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 641.925 446.194 653.984] -/A << /S /GoTo /D (subsubsection.3.3.2) >> +/Type /FontDescriptor +/FontName /JEJNJE+Times-Roman +/FontBBox [ 0 -218 863 683] +/Flags 32 +/Ascent 683 +/CapHeight 676 +/Descent -218 +/ItalicAngle 0 +/StemV 129 +/MissingWidth 500 +/XHeight 460 +/CharSet (/A/B/F/I/L/M/P/S/a/c/e/f/g/i/l/n/o/p/r/s/space/t/three/two/zero) +/FontFile3 932 0 R >> -% 651 0 obj +% 918 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 629.845 479.97 641.905] -/A << /S /GoTo /D (subsubsection.3.3.3) >> +/Rect [267.789 526.596 274.763 538.656] +/A << /S /GoTo /D (figure.1) >> >> -% 652 0 obj +% 919 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 620.415 358.813 629.825] -/A << /S /GoTo /D (subsubsection.3.3.4) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [395.348 457.615 402.322 466.471] +/A << /S /GoTo /D (cite.BLACS) >> >> -% 653 0 obj +% 920 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 605.686 415.509 617.745] -/A << /S /GoTo /D (subsubsection.3.3.5) >> +/Rect [159.182 419 166.156 431.059] +/A << /S /GoTo /D (section.7) >> >> -% 654 0 obj +% 926 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 593.606 348.332 605.666] -/A << /S /GoTo /D (subsubsection.3.3.6) >> +/D [924 0 R /XYZ 98.895 753.953 null] >> -% 655 0 obj +% 15 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 584.176 318.663 593.586] -/A << /S /GoTo /D (subsection.3.4) >> +/D [924 0 R /XYZ 99.895 663.868 null] >> -% 656 0 obj +% 927 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 569.446 277.409 581.506] -/A << /S /GoTo /D (subsection.3.5) >> +/D [924 0 R /XYZ 99.895 260.062 null] >> -% 657 0 obj +% 923 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 547.56 275.386 559.281] -/A << /S /GoTo /D (section.4) >> +/Font << /F84 687 0 R /F75 685 0 R /F78 686 0 R >> +/XObject << /Im2 921 0 R >> +/ProcSet [ /PDF /Text ] >> -% 658 0 obj +% 937 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 535.241 380.451 547.301] -/A << /S /GoTo /D (subsection.4.1) >> +/Type /Page +/Contents 938 0 R +/Resources 936 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 894 0 R +/Annots [ 922 0 R 934 0 R 935 0 R ] >> -% 659 0 obj +% 922 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 523.162 302.763 535.221] -/A << /S /GoTo /D (subsection.4.2) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [325.842 609.432 337.797 618.438] +/A << /S /GoTo /D (cite.METIS) >> >> -% 660 0 obj +% 934 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 511.082 362.977 523.142] -/A << /S /GoTo /D (subsection.4.3) >> +/Rect [259.94 534.258 266.216 547.962] +/A << /S /GoTo /D (Hfootnote.1) >> >> -% 661 0 obj +% 935 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 499.002 354.758 511.062] -/A << /S /GoTo /D (subsection.4.4) >> +/Rect [208.583 185.645 214.86 199.235] +/A << /S /GoTo /D (Hfootnote.2) >> >> -% 662 0 obj +% 939 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 486.923 379.844 498.982] -/A << /S /GoTo /D (subsection.4.5) >> +/D [937 0 R /XYZ 149.705 753.953 null] >> -% 663 0 obj +% 19 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 474.843 329.154 486.903] -/A << /S /GoTo /D (subsection.4.6) >> +/D [937 0 R /XYZ 150.705 504.866 null] >> -% 664 0 obj +% 943 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 462.763 394.738 474.823] -/A << /S /GoTo /D (subsection.4.7) >> +/D [937 0 R /XYZ 165.051 167.999 null] >> -% 665 0 obj +% 944 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 450.684 329.154 462.743] -/A << /S /GoTo /D (subsection.4.8) >> +/D [937 0 R /XYZ 165.051 158.184 null] >> -% 666 0 obj +% 936 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 438.604 394.24 450.663] -/A << /S /GoTo /D (subsection.4.9) >> +/Font << /F84 687 0 R /F78 686 0 R /F145 940 0 R /F75 685 0 R /F190 941 0 R /F192 942 0 R >> +/ProcSet [ /PDF /Text ] >> -% 667 0 obj +% 950 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 426.524 362.499 438.584] -/A << /S /GoTo /D (subsection.4.10) >> +/Type /Page +/Contents 951 0 R +/Resources 949 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 894 0 R +/Annots [ 946 0 R 947 0 R ] >> -% 668 0 obj +% 954 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 414.444 387.276 426.504] -/A << /S /GoTo /D (subsection.4.11) >> +/Producer (ESP Ghostscript 815.03) +/CreationDate (D:20070123225315) +/ModDate (D:20070123225315) >> -% 669 0 obj +% 955 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 402.365 425.761 414.424] -/A << /S /GoTo /D (subsection.4.12) >> +/Type /ExtGState +/OPM 1 >> -% 670 0 obj +% 956 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 390.285 353.991 402.345] -/A << /S /GoTo /D (subsection.4.13) >> +/BaseFont /Times-Roman +/Type /Font +/Subtype /Type1 >> -% 671 0 obj +% 946 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 378.205 331.346 390.265] -/A << /S /GoTo /D (subsection.4.14) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [244.163 638.309 251.137 647.315] +/A << /S /GoTo /D (cite.2007c) >> >> -% 672 0 obj +% 947 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 366.126 333.538 378.185] -/A << /S /GoTo /D (subsection.4.15) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [254.125 638.21 261.099 647.166] +/A << /S /GoTo /D (cite.2007d) >> >> -% 673 0 obj +% 952 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 354.046 337.602 366.106] -/A << /S /GoTo /D (subsection.4.16) >> +/D [950 0 R /XYZ 98.895 753.953 null] >> -% 674 0 obj +% 953 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 334.551 280.368 343.701] -/A << /S /GoTo /D (section.5) >> +/D [950 0 R /XYZ 99.895 353.614 null] >> -% 675 0 obj +% 23 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 319.841 362.031 331.9] -/A << /S /GoTo /D (subsection.5.1) >> +/D [950 0 R /XYZ 99.895 270.035 null] >> -% 676 0 obj +% 949 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 307.761 313.065 319.821] -/A << /S /GoTo /D (subsection.5.2) >> +/Font << /F75 685 0 R /F84 687 0 R /F190 941 0 R /F78 686 0 R /F192 942 0 R >> +/XObject << /Im3 948 0 R >> +/ProcSet [ /PDF /Text ] >> -% 677 0 obj +% 962 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 295.681 376.127 307.741] -/A << /S /GoTo /D (subsection.5.3) >> +/Type /Page +/Contents 963 0 R +/Resources 961 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 894 0 R +/Annots [ 959 0 R 960 0 R ] >> -% 678 0 obj +% 959 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 283.602 376.187 295.661] -/A << /S /GoTo /D (subsection.5.4) >> +/Rect [455.548 354.196 462.522 366.255] +/A << /S /GoTo /D (section.3) >> >> -% 679 0 obj +% 960 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 261.636 289.504 273.257] +/Rect [226.669 318.033 233.743 330.093] /A << /S /GoTo /D (section.6) >> >> -% 680 0 obj +% 964 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 249.397 412.092 261.456] -/A << /S /GoTo /D (subsection.6.1) >> +/D [962 0 R /XYZ 149.705 753.953 null] >> -% 681 0 obj +% 961 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 237.317 428.052 249.377] -/A << /S /GoTo /D (subsection.6.2) >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] >> -% 682 0 obj +% 967 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 225.237 445.915 237.297] -/A << /S /GoTo /D (subsection.6.3) >> +/Type /Page +/Contents 968 0 R +/Resources 966 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 978 0 R >> -% 683 0 obj +% 969 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 213.158 407.011 225.217] -/A << /S /GoTo /D (subsection.6.4) >> +/D [967 0 R /XYZ 98.895 753.953 null] >> -% 684 0 obj +% 27 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 201.078 400.356 213.138] -/A << /S /GoTo /D (subsection.6.5) >> +/D [967 0 R /XYZ 99.895 716.092 null] >> -% 685 0 obj +% 970 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 188.998 461.277 201.058] -/A << /S /GoTo /D (subsection.6.6) >> +/D [967 0 R /XYZ 99.895 282.521 null] >> -% 686 0 obj +% 971 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 176.918 355.017 188.978] -/A << /S /GoTo /D (subsection.6.7) >> +/D [967 0 R /XYZ 99.895 261.733 null] >> -% 687 0 obj +% 972 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 164.839 446.841 176.898] -/A << /S /GoTo /D (subsection.6.8) >> +/D [967 0 R /XYZ 99.895 240.946 null] >> -% 688 0 obj +% 973 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 152.759 387.206 164.819] -/A << /S /GoTo /D (subsection.6.9) >> +/D [967 0 R /XYZ 99.895 220.159 null] >> -% 689 0 obj +% 974 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 140.679 343.281 152.739] -/A << /S /GoTo /D (subsection.6.10) >> +/D [967 0 R /XYZ 99.895 188.012 null] >> -% 690 0 obj +% 975 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 128.6 460.789 140.659] -/A << /S /GoTo /D (subsection.6.11) >> +/D [967 0 R /XYZ 99.895 167.072 null] >> -% 691 0 obj +% 976 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 116.52 352.646 128.58] -/A << /S /GoTo /D (subsection.6.12) >> +/D [967 0 R /XYZ 99.895 148.646 null] >> -% 696 0 obj +% 977 0 obj << -/D [694 0 R /XYZ 149.705 753.953 null] +/D [967 0 R /XYZ 99.895 132.275 null] >> -% 693 0 obj + +endstream +endobj +983 0 obj << -/Font << /F54 586 0 R /F51 584 0 R >> -/ProcSet [ /PDF /Text ] +/Length 8991 >> -% 740 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F84 9.9626 Tf 163.158 706.129 Td [(6.)]TJ +0 g 0 G + 0.984 0 0 1 175.611 706.129 Tm [(Choose)-254(the)-254(pr)19(econditioner)-254(to)-254(be)-254(u)1(sed)-254(with)]TJ/F145 9.9626 Tf 1 0 0 1 362.392 706.129 Tm [(prec%init)]TJ/F84 9.9626 Tf 0.984 0 0 1 411.953 706.129 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 431.037 706.129 Tm [(prec%set)]TJ/F84 9.9626 Tf 0.984 0 0 1 472.88 706.129 Tm [(,)-254(and)]TJ 1 0 0 1 175.611 694.174 Tm [(build)-250(it)-250(with)]TJ/F145 9.9626 Tf 57.275 0 Td [(prec%build)]TJ +0 0 1 rg 0 0 1 RG +/F84 7.5716 Tf 52.303 3.616 Td [(3)]TJ +0 g 0 G +/F84 9.9626 Tf 4.284 -3.616 Td [(;)]TJ +0 g 0 G + -126.315 -18.137 Td [(7.)]TJ +0 g 0 G + 0.98 0 0 1 175.611 676.037 Tm [(Call)-204(one)-203(of)-204(the)-204(iterative)-203(drivers)-204(with)-204(the)-203(method)-204(of)-204(choice,)-214(e.g.)]TJ/F145 9.9626 Tf 1 0 0 1 442.374 676.037 Tm [(psb_krylov)]TJ/F84 9.9626 Tf -267.181 -11.955 Td [(with)]TJ/F145 9.9626 Tf 22.744 0 Td [(bicgstab)]TJ/F84 9.9626 Tf 41.843 0 Td [(.)]TJ -89.384 -16.347 Td [(This)-250(is)-250(the)-250(str)8(uctur)18(e)-250(of)-250(the)-250(sample)-250(pr)18(ograms)-250(in)-250(the)-250(dir)18(ectory)]TJ/F145 9.9626 Tf 266.418 0 Td [(test/pargen/)]TJ/F84 9.9626 Tf 62.764 0 Td [(.)]TJ 0.98 0 0 1 165.649 635.78 Tm [(For)-227(a)-227(simulation)-227(in)-227(which)-227(the)-226(same)-227(discr)18(etization)-227(mesh)-227(is)-227(used)-227(over)-227(multiple)]TJ 1 0 0 1 150.705 623.824 Tm [(time)-250(steps,)-250(the)-250(following)-250(str)8(uctur)18(e)-250(may)-250(be)-250(mor)18(e)-250(appr)18(opriate:)]TJ +0 g 0 G + 12.453 -16.347 Td [(1.)]TJ +0 g 0 G + [-500(Initialize)-250(parallel)-250(envir)18(onment)-250(with)]TJ/F145 9.9626 Tf 171.465 0 Td [(psb_init)]TJ +0 g 0 G +/F84 9.9626 Tf -171.465 -18.136 Td [(2.)]TJ +0 g 0 G + [-500(Initialize)-250(index)-250(space)-250(with)]TJ/F145 9.9626 Tf 130.489 0 Td [(psb_cdall)]TJ +0 g 0 G +/F84 9.9626 Tf -130.489 -18.137 Td [(3.)]TJ +0 g 0 G + 0.98 0 0 1 175.611 571.204 Tm [(Loop)-224(over)-225(the)-224(topology)-224(of)-224(the)-225(discr)19(etization)-225(mesh)-224(and)-224(build)-224(the)-225(descriptor)]TJ 1 0 0 1 175.193 559.249 Tm [(with)]TJ/F145 9.9626 Tf 22.744 0 Td [(psb_cdins)]TJ/F84 9.9626 Tf 47.074 0 Td [(;)]TJ +0 g 0 G + -81.853 -18.136 Td [(4.)]TJ +0 g 0 G + [-461(Assemble)-250(the)-250(descriptor)-250(with)]TJ/F145 9.9626 Tf 143.998 0 Td [(psb_cdasb)]TJ/F84 9.9626 Tf 47.073 0 Td [(;)]TJ +0 g 0 G + -191.071 -18.136 Td [(5.)]TJ +0 g 0 G + 1.02 0 0 1 175.223 522.977 Tm [(Allocate)-407(the)-408(sparse)-407(matrices)-408(and)-407(dense)-408(vectors)-407(with;)]TJ/F145 9.9626 Tf 1 0 0 1 425.998 522.977 Tm [(psb_spall)]TJ/F84 9.9626 Tf 1.02 0 0 1 477.212 522.977 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 175.611 511.021 Tm [(psb_geall)]TJ/F84 9.9626 Tf 47.074 0 Td [(;)]TJ +0 g 0 G + -59.527 -18.136 Td [(6.)]TJ +0 g 0 G + [-500(Loop)-250(over)-250(the)-250(time)-250(steps:)]TJ +0 g 0 G + 17.774 -18.136 Td [(\050a\051)]TJ +0 g 0 G + 1.02 0 0 1 197.529 474.749 Tm [(If)-260(after)-261(\002rst)-260(time)-260(step,)-264(r)18(einitialize)-261(the)-260(sparse)-260(matrix)-260(with)]TJ/F145 9.9626 Tf 1 0 0 1 451.278 474.749 Tm [(psb_sprn)]TJ/F84 9.9626 Tf 1.02 0 0 1 493.121 474.749 Tm [(;)]TJ 1 0 0 1 197.529 462.794 Tm [(also)-250(zer)18(o)-250(out)-250(the)-250(dense)-250(vectors;)]TJ +0 g 0 G + -17.125 -14.152 Td [(\050b\051)]TJ +0 g 0 G + 1.02 0 0 1 197.529 448.642 Tm [(Loop)-358(o)1(ver)-358(the)-357(mesh,)-386(generate)-357(the)-358(coef)18(\002cients)-357(and)-358(insert/update)]TJ 1 0 0 1 197.529 436.687 Tm [(them)-250(with)]TJ/F145 9.9626 Tf 47.85 0 Td [(psb_spins)]TJ/F84 9.9626 Tf 49.564 0 Td [(and)]TJ/F145 9.9626 Tf 19.357 0 Td [(psb_geins)]TJ/F84 9.9626 Tf 47.073 0 Td [(;)]TJ +0 g 0 G + -179.883 -14.151 Td [(\050c\051)]TJ +0 g 0 G + [-461(Assemble)-250(with)]TJ/F145 9.9626 Tf 83.834 0 Td [(psb_spasb)]TJ/F84 9.9626 Tf 49.564 0 Td [(and)]TJ/F145 9.9626 Tf 19.357 0 Td [(psb_geasb)]TJ/F84 9.9626 Tf 47.073 0 Td [(;)]TJ +0 g 0 G + -201.492 -14.151 Td [(\050d\051)]TJ +0 g 0 G +0 g 0 G + 1.315 -14.152 Td [(\050e\051)]TJ +0 g 0 G + 0.98 0 0 1 197.529 394.233 Tm [(Choose)-245(the)-246(pr)18(econditione)1(r)-246(to)-245(be)-246(used)-245(with)]TJ/F145 9.9626 Tf 1 0 0 1 382.982 394.233 Tm [(prec%init)]TJ/F84 9.9626 Tf 0.98 0 0 1 432.452 394.233 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 451.378 394.233 Tm [(prec%set)]TJ/F84 9.9626 Tf 0.98 0 0 1 493.22 394.233 Tm [(,)]TJ 1 0 0 1 197.529 382.278 Tm [(and)-250(build)-250(it)-250(with)]TJ/F145 9.9626 Tf 76.632 0 Td [(prec%build)]TJ/F84 9.9626 Tf 52.303 0 Td [(;)]TJ +0 g 0 G + -143.869 -14.151 Td [(\050f\051)]TJ +0 g 0 G + 1.02 0 0 1 197.529 368.127 Tm [(Call)-416(one)-415(of)-416(the)-416(it)1(erative)-416(drivers)-416(with)-415(the)-416(method)-416(of)-415(choice,)-458(e.g.)]TJ/F145 9.9626 Tf 1 0 0 1 197.529 356.172 Tm [(psb_krylov)]TJ/F84 9.9626 Tf 54.794 0 Td [(with)]TJ/F145 9.9626 Tf 22.745 0 Td [(bicgstab)]TJ/F84 9.9626 Tf 41.842 0 Td [(.)]TJ 0.98 0 0 1 150.396 338.035 Tm [(The)-247(insertion)-247(r)19(outines)-247(will)-247(be)-247(called)-247(as)-247(many)-247(times)-247(as)-247(needed;)-249(they)-247(only)-247(need)-247(to)]TJ 1.02 0 0 1 150.705 326.08 Tm [(be)-245(called)-245(on)-245(the)-245(data)-245(that)-245(is)-246(actually)-245(allocated)-245(to)-245(the)-245(curr)18(ent)-245(pr)17(ocess,)-245(i.e.)-304(each)]TJ 1 0 0 1 150.406 314.125 Tm [(pr)18(ocess)-250(generates)-250(its)-250(own)-250(data.)]TJ 0.981 0 0 1 165.649 302.17 Tm [(In)-256(principle)-255(ther)18(e)-256(is)-255(no)-256(speci\002c)-255(or)18(der)-256(in)-255(the)-256(calls)-256(to)]TJ/F145 9.9626 Tf 1 0 0 1 386.226 302.17 Tm [(psb_spins)]TJ/F84 9.9626 Tf 0.981 0 0 1 433.299 302.17 Tm [(,)-256(nor)-256(is)-255(ther)18(e)-255(a)]TJ 0.997 0 0 1 150.705 290.215 Tm [(r)18(equir)18(ement)-251(to)-251(build)-251(a)-251(matrix)-251(r)18(ow)-251(in)-251(its)-251(entir)18(ety)-251(befor)18(e)-251(calling)-251(the)-251(r)18(outine;)-251(this)]TJ 1.02 0 0 1 150.705 278.26 Tm [(allows)-288(the)-288(application)-289(p)1(r)17(ogrammer)-288(to)-288(walk)-288(thr)17(ough)-288(the)-288(discr)18(etization)-289(mesh)]TJ 1.02 0 0 1 150.705 266.304 Tm [(element)-265(by)-265(element,)-271(gen)1(erating)-266(the)-265(main)-265(part)-265(of)-265(a)-266(given)-265(matrix)-265(r)18(ow)-265(but)-266(also)]TJ 1 0 0 1 150.705 254.349 Tm [(contributions)-250(to)-250(the)-250(r)18(ows)-250(corr)18(esponding)-250(to)-250(neighbouring)-250(elements.)]TJ 1.02 0 0 1 165.649 242.394 Tm [(Fr)18(om)-284(a)-283(functional)-283(point)-284(of)-283(view)-284(it)-283(is)-283(even)-284(possible)-283(to)-284(execut)1(e)-284(one)-283(call)-284(for)]TJ 0.988 0 0 1 150.705 230.439 Tm [(each)-252(nonzer)18(o)-252(coef)18(\002cient;)-252(however)-252(this)-252(would)-252(have)-252(a)-252(substantial)-252(computational)]TJ 0.98 0 0 1 150.705 218.484 Tm [(over)18(head.)-306(It)-224(is)-225(ther)19(efor)18(e)-224(advisable)-225(to)-224(pack)-225(a)-224(certain)-225(amount)-224(of)-225(data)-224(into)-225(each)-224(call)]TJ 0.988 0 0 1 150.705 206.529 Tm [(to)-253(the)-254(insertion)-253(r)18(outine,)-253(say)-253(touching)-254(on)-253(a)-253(few)-254(tens)-253(of)-253(r)18(ows;)-254(the)-253(best)-253(performng)]TJ 1.009 0 0 1 150.426 194.573 Tm [(value)-247(would)-248(depend)-247(on)-248(both)-247(the)-248(ar)18(chitectur)18(e)-248(of)-247(the)-248(computer)-247(being)-248(used)-247(and)]TJ 1.02 0 0 1 150.705 182.618 Tm [(on)-333(the)-333(pr)18(oblem)-333(str)8(uctur)18(e.)-567(At)-333(the)-333(opposite)-332(extr)17(eme,)-355(it)-332(would)-333(be)-333(possible)-333(to)]TJ 0.996 0 0 1 150.705 170.663 Tm [(generate)-250(the)-251(entir)18(e)-250(part)-251(of)-250(a)-250(coef)18(\002cient)-251(matrix)-250(r)18(esiding)-251(on)-250(a)-250(pr)18(ocess)-251(and)-250(pass)-251(it)]TJ 0.981 0 0 1 150.705 158.708 Tm [(in)-255(a)-255(single)-254(call)-255(to)]TJ/F145 9.9626 Tf 1 0 0 1 225.645 158.708 Tm [(psb_spins)]TJ/F84 9.9626 Tf 0.981 0 0 1 272.718 158.708 Tm [(;)-255(this,)-255(however)76(,)-255(would)-255(entail)-254(a)-255(doubling)-255(of)-255(memory)]TJ 1 0 0 1 150.705 146.753 Tm [(occupation,)-250(and)-250(thus)-250(would)-250(be)-250(almost)-250(always)-250(far)-250(fr)18(om)-250(optimal.)]TJ +0 g 0 G +ET +q +1 0 0 1 150.705 139.555 cm +[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S +Q +BT +/F84 5.9776 Tf 161.564 132.683 Td [(3)]TJ/F84 7.9701 Tf 0.981 0 0 1 164.804 129.79 Tm [(The)-255(subr)18(outine)-256(sty)1(le)]TJ/F215 7.9701 Tf 1 0 0 1 237.517 129.79 Tm [(psb)]TJ +ET +q +1 0 0 1 250.728 129.989 cm +[]0 d 0 J 0.398 w 0 0 m 2.541 0 l S +Q +BT +/F215 7.9701 Tf 253.269 129.79 Td [(precinit)]TJ/F84 7.9701 Tf 0.981 0 0 1 289.14 129.79 Tm [(and)]TJ/F215 7.9701 Tf 1 0 0 1 304.374 129.79 Tm [(psb)]TJ +ET +q +1 0 0 1 317.585 129.989 cm +[]0 d 0 J 0.398 w 0 0 m 2.541 0 l S +Q +BT +/F215 7.9701 Tf 320.125 129.79 Td [(precbld)]TJ/F84 7.9701 Tf 0.981 0 0 1 351.762 129.79 Tm [(ar)18(e)-255(still)-256(support)1(ed)-256(for)-255(backwar)18(d)-256(compati)1(-)]TJ 1 0 0 1 150.705 120.326 Tm [(bility)]TJ +0 g 0 G +0 g 0 G +/F84 9.9626 Tf 169.365 -29.888 Td [(8)]TJ +0 g 0 G +ET + +endstream +endobj +1004 0 obj << -/Type /Page -/Contents 741 0 R -/Resources 739 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R -/Annots [ 692 0 R 697 0 R 698 0 R 699 0 R 700 0 R 701 0 R 702 0 R 703 0 R 704 0 R 705 0 R 706 0 R 707 0 R 708 0 R 709 0 R 710 0 R 711 0 R 712 0 R 713 0 R 743 0 R 714 0 R 715 0 R 716 0 R 717 0 R 718 0 R 719 0 R 720 0 R 721 0 R 722 0 R 723 0 R 724 0 R 725 0 R 726 0 R 727 0 R 728 0 R 729 0 R 730 0 R 731 0 R 732 0 R 733 0 R 744 0 R 734 0 R 735 0 R 745 0 R 736 0 R 746 0 R 737 0 R 747 0 R ] +/Length 7843 >> -% 692 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F75 9.9626 Tf 99.895 706.129 Td [(2.3.1)-1000(User)18(-de\002ned)-250(index)-250(mappings)]TJ/F84 9.9626 Tf 1.02 0 0 1 99.895 687.165 Tm [(PSBLAS)-250(supports)-249(user)18(-de\002ned)-250(global)-249(to)-250(local)-250(i)1(ndex)-250(mappings,)-251(subject)-249(to)-250(the)]TJ 1 0 0 1 99.895 675.21 Tm [(constraints)-250(outlined)-250(in)-250(sec.)]TJ +0 0 1 rg 0 0 1 RG + [-250(2.3)]TJ +0 g 0 G + [(:)]TJ +0 g 0 G + 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-469(The)-250(set)-250(of)-250(indices)-250(owned)-250(locally)-250(must)-250(be)-250(mapped)-250(to)-250(the)-250(set)-250(1)-179(.)-192(.)-191(.)]TJ/F78 9.9626 Tf 294.185 0 Td [(n)]TJ/F84 9.9626 Tf 5.664 -1.495 Td [(r)18(ow)]TJ/F78 5.9776 Tf 17.537 -1.648 Td [(i)]TJ/F84 9.9626 Tf 2.774 3.143 Td [(;)]TJ +0 g 0 G + -320.16 -19.926 Td [(2.)]TJ +0 g 0 G + [-469(The)-250(set)-250(of)-250(halo)-250(points)-250(must)-250(be)-250(mapped)-250(to)-250(the)-250(set)]TJ/F78 9.9626 Tf 227.977 0 Td [(n)]TJ/F84 9.9626 Tf 5.664 -1.494 Td [(r)18(ow)]TJ/F78 5.9776 Tf 17.537 -1.648 Td [(i)]TJ/F192 10.3811 Tf 4.836 3.142 Td [(+)]TJ/F84 9.9626 Tf 10.131 0 Td [(1)-179(.)-192(.)-192(.)]TJ/F78 9.9626 Tf 19.967 0 Td [(n)]TJ/F84 9.9626 Tf 5.664 -3.83 Td [(col)]TJ/F78 5.9776 Tf 12.795 -1.649 Td [(i)]TJ/F84 9.9626 Tf 2.774 5.479 Td [(;)]TJ 1.016 0 0 1 99.895 613.6 Tm [(but)-246(otherwise)-247(the)-246(mapping)-246(is)-247(arbit)1(rary)109(.)-306(The)-246(user)-247(application)-246(is)-246(r)17(esponsible)-246(to)]TJ 1.009 0 0 1 99.895 601.644 Tm [(ensur)18(e)-247(consistency)-247(of)-246(this)-247(mapping;)-247(some)-247(err)18(ors)-247(may)-246(be)-247(caught)-247(by)-247(the)-246(library)110(,)]TJ 0.994 0 0 1 99.895 589.689 Tm [(but)-253(this)-252(is)-253(not)-252(guaranteed.)-315(The)-253(application)-252(str)8(uctur)18(e)-253(to)-252(support)-253(this)-252(usage)-253(is)-252(as)]TJ 1 0 0 1 99.895 577.734 Tm [(follows:)]TJ +0 g 0 G + 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + 0.98 0 0 1 124.802 557.809 Tm [(Initialize)-194(index)-194(space)-194(with)]TJ/F145 9.9626 Tf 1 0 0 1 238.285 557.809 Tm [(psb_cdall\050ictx,desc,info,vl=vl,lidx=lidx\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 124.503 545.854 Tm [(passing)-205(the)-205(vectors)]TJ/F145 9.9626 Tf 1 0 0 1 208.066 545.854 Tm [(vl\050:\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 236.218 545.854 Tm [(containing)-205(the)-205(set)-205(of)-205(global)-205(indices)-205(owned)-205(by)-205(the)]TJ 1 0 0 1 124.802 533.898 Tm [(curr)18(ent)-250(pr)18(ocess)-250(and)]TJ/F145 9.9626 Tf 89.105 0 Td [(lidx\050:\051)]TJ/F84 9.9626 Tf 39.103 0 Td [(containing)-250(the)-250(corr)18(esponding)-250(local)-250(indices;)]TJ +0 g 0 G + -140.661 -19.925 Td [(2.)]TJ +0 g 0 G + 0.98 0 0 1 124.413 513.973 Tm [(Add)-241(the)-241(halo)-241(points)]TJ/F145 9.9626 Tf 1 0 0 1 212.672 513.973 Tm [(ja\050:\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 241.177 513.973 Tm [(and)-241(their)-241(associated)-241(local)-241(indices)]TJ/F145 9.9626 Tf 1 0 0 1 384.793 513.973 Tm [(lidx\050:\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 423.758 513.973 Tm [(with)]TJ 1 0 0 1 124.802 502.018 Tm [(a\050some\051)-250(call\050s\051)-250(to)]TJ/F145 9.9626 Tf 77.07 0 Td [(psb_cdins\050nz,ja,desc,info,lidx=lidx\051)]TJ/F84 9.9626 Tf 188.292 0 Td [(;)]TJ +0 g 0 G + -277.815 -19.925 Td [(3.)]TJ +0 g 0 G + [-461(Assemble)-250(the)-250(descriptor)-250(with)]TJ/F145 9.9626 Tf 143.998 0 Td [(psb_cdasb)]TJ/F84 9.9626 Tf 47.073 0 Td [(;)]TJ +0 g 0 G + -191.071 -19.926 Td [(4.)]TJ +0 g 0 G + 0.98 0 0 1 124.802 462.167 Tm [(Build)-206(the)-207(sparse)-206(matrices)-207(and)-206(vectors,)-216(optionally)-207(making)-206(use)-206(in)]TJ/F145 9.9626 Tf 1 0 0 1 396.533 462.167 Tm [(psb_spins)]TJ/F84 9.9626 Tf 1.02 0 0 1 124.802 450.212 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 144.844 450.212 Tm [(psb_geins)]TJ/F84 9.9626 Tf 1.02 0 0 1 194.755 450.212 Tm [(of)-279(the)]TJ/F145 9.9626 Tf 1 0 0 1 223.457 450.212 Tm [(local)]TJ/F84 9.9626 Tf 1.02 0 0 1 252.446 450.212 Tm [(ar)18(gument)-280(specifying)-279(that)-279(the)-279(indices)-279(in)]TJ/F145 9.9626 Tf 1 0 0 1 431.851 450.212 Tm [(ia)]TJ/F84 9.9626 Tf 1.02 0 0 1 442.311 450.212 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 124.802 438.257 Tm [(ja)]TJ/F84 9.9626 Tf 12.951 0 Td [(and)]TJ/F145 9.9626 Tf 19.358 0 Td [(irw)]TJ/F84 9.9626 Tf 15.691 0 Td [(,)-250(r)18(espectively)111(,)-250(ar)18(e)-250(alr)18(eady)-250(local)-250(indices.)]TJ/F75 11.9552 Tf -72.907 -29.133 Td [(2.4)-1000(Programming)-250(model)]TJ/F84 9.9626 Tf 1.02 0 0 1 99.587 390.16 Tm [(The)-266(PSBLAS)-265(librarary)-266(is)-266(based)-265(on)-266(the)-266(Single)-266(Pr)18(ogram)-266(Multiple)-265(Data)-266(\050SPMD\051)]TJ 1.008 0 0 1 99.596 378.205 Tm [(pr)18(ogramming)-250(model:)-310(each)-250(pr)18(ocess)-250(participating)-249(in)-250(the)-250(computation)-249(performs)]TJ 1 0 0 1 99.895 366.25 Tm [(the)-250(same)-250(actions)-250(on)-250(a)-250(chunk)-250(of)-250(data.)-310(Parallelism)-250(is)-250(thus)-250(data-driven.)]TJ 1.019 0 0 1 114.839 354.295 Tm [(Because)-246(of)-246(this)-246(str)8(uctur)17(e,)-246(many)-246(subr)18(outines)-246(coor)18(dinate)-246(their)-246(action)-247(acr)18(oss)]TJ 1.02 0 0 1 99.895 342.34 Tm [(the)-265(various)-264(pr)18(ocesses,)-270(thus)-264(pr)17(oviding)-264(an)-265(implicit)-264(synchr)17(onization)-264(point,)-270(and)]TJ 1.02 0 0 1 99.895 330.384 Tm [(ther)18(efor)17(e)]TJ/F78 9.9626 Tf 1.02 0 0 1 143.363 330.384 Tm [(must)]TJ/F84 9.9626 Tf 1.02 0 0 1 167.293 330.384 Tm [(be)-299(called)-299(simultaneously)-299(by)-298(all)-299(pr)17(ocesses)-299(participat)1(ing)-299(in)-299(the)]TJ 0.98 0 0 1 99.895 318.429 Tm [(computation.)-306(This)-225(is)-225(certainly)-225(tr)9(ue)-225(for)-225(the)-225(data)-225(allocation)-225(and)-224(assembly)-225(r)18(outines,)]TJ 1 0 0 1 99.895 306.474 Tm [(for)-250(all)-250(the)-250(computational)-250(r)18(outines)-250(and)-250(for)-250(some)-250(of)-250(the)-250(tools)-250(r)18(outines.)]TJ 1.02 0 0 1 114.839 294.519 Tm [(However)-269(ther)17(e)-269(ar)18(e)-270(many)-269(cases)-269(wher)18(e)-270(no)-269(synchr)18(onization,)-276(and)-269(indeed)-269(no)]TJ 1.02 0 0 1 99.895 282.564 Tm [(communication)-282(among)-281(pr)17(ocesses,)-290(is)-282(implied;)-300(for)-282(instance,)-290(all)-282(the)-282(r)18(outines)-282(in)]TJ 1.02 0 0 1 99.895 270.609 Tm [(sec.)]TJ +0 0 1 rg 0 0 1 RG + [-377(3)]TJ +0 g 0 G + [-377(ar)17(e)-377(only)-377(acting)-377(on)-378(the)-377(local)-377(data)-377(str)8(uctur)17(es,)-410(and)-377(thus)-377(may)-378(b)1(e)-378(called)]TJ 1.02 0 0 1 99.895 258.653 Tm [(independently)109(.)-657(The)-362(most)-363(important)-363(case)-362(is)-363(that)-363(of)-362(the)-363(coef)18(\002cient)-363(insertion)]TJ 0.98 0 0 1 99.895 246.698 Tm [(r)18(outines:)-296(since)-215(the)-215(number)-216(of)-215(coef)19(\002)-1(cients)-215(in)-215(the)-215(sparse)-216(and)-215(dense)-215(matrices)-215(varies)]TJ 0.996 0 0 1 99.895 234.743 Tm [(among)-252(the)-251(pr)18(ocessors,)-252(and)-252(since)-252(the)-251(user)-252(is)-252(fr)18(ee)-251(to)-252(choose)-252(an)-251(arbitrary)-252(or)18(der)-252(in)]TJ 1 0 0 1 99.895 222.788 Tm [(builiding)-250(the)-250(matrix)-250(entries,)-250(these)-250(r)18(outines)-250(cannot)-250(imply)-250(a)-250(synchr)18(onization.)]TJ 14.944 -11.955 Td [(Thr)18(oughout)-250(this)-250(user)-74('s)-250(guide)-250(each)-250(subr)18(outine)-250(will)-250(be)-250(clearly)-250(indicated)-250(as:)]TJ +0 g 0 G +/F75 9.9626 Tf -14.944 -19.926 Td [(Synchronous:)]TJ +0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 167.143 190.907 Tm [(must)-200(be)-200(called)-200(simultaneously)-200(by)-200(all)-199(the)-200(pr)18(ocesses)-200(in)-200(the)-200(r)19(elevant)]TJ 1 0 0 1 124.802 178.952 Tm [(communication)-250(context;)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.925 Td [(Asynchronous:)]TJ +0 g 0 G +/F84 9.9626 Tf 73.335 0 Td [(may)-250(be)-250(called)-250(in)-250(a)-250(totally)-250(independent)-250(manner)74(.)]TJ +0 g 0 G + 96.03 -68.589 Td [(9)]TJ +0 g 0 G +ET + +endstream +endobj +1015 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 702.323 330.917 714.383] -/A << /S /GoTo /D (subsection.6.13) >> +/Length 8766 >> -% 697 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F75 14.3462 Tf 150.705 705.784 Td [(3)-1000(Data)-250(Structures)-250(and)-250(Classes)]TJ/F84 9.9626 Tf 1.015 0 0 1 150.705 682.693 Tm [(In)-246(this)-246(chapter)-246(we)-247(illustrate)-246(the)-246(data)-246(str)8(uctur)18(es)-246(used)-247(for)-246(de\002nition)-246(of)-246(r)18(outines)]TJ 1.02 0 0 1 150.705 670.737 Tm [(interfaces.)-430(They)-287(include)-287(data)-287(str)8(uctur)18(es)-287(for)-287(sparse)-287(matrices,)-298(communication)]TJ 1 0 0 1 150.705 658.782 Tm [(descriptors)-250(and)-250(pr)18(econditioners.)]TJ 0.999 0 0 1 165.649 646.626 Tm [(All)-251(the)-251(data)-252(types)-251(and)-251(the)-251(basic)-251(subr)18(outine)-252(interfaces)-251(r)18(elated)-251(to)-251(descriptors)]TJ 1.02 0 0 1 150.705 634.671 Tm [(and)-296(sparse)-297(matrices)-296(ar)17(e)-296(de\002ned)-297(in)-296(the)-297(module)]TJ/F145 9.9626 Tf 1 0 0 1 364.369 634.671 Tm [(psb_base_mod)]TJ/F84 9.9626 Tf 1.02 0 0 1 427.133 634.671 Tm [(;)-322(this)-297(will)-296(have)]TJ 1.02 0 0 1 150.705 622.716 Tm [(to)-335(be)-335(included)-335(by)-335(every)-335(user)-336(subr)18(outine)-335(that)-335(makes)-335(use)-335(of)-335(the)-336(lib)1(rary)108(.)-574(The)]TJ 1 0 0 1 150.406 610.76 Tm [(pr)18(econditioners)-250(ar)18(e)-250(de\002ned)-250(in)-250(the)-250(module)]TJ/F145 9.9626 Tf 187.993 0 Td [(psb_prec_mod)]TJ/F84 9.9626 Tf 1.02 0 0 1 165.649 598.604 Tm [(Integer)73(,)-362(r)17(eal)-338(and)-339(complex)-338(data)-339(types)-338(ar)17(e)-338(parametrized)-339(with)-338(a)-339(kind)-338(type)]TJ 1 0 0 1 150.705 586.649 Tm [(de\002ned)-250(in)-250(the)-250(library)-250(as)-250(follows:)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -20.528 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 566.32 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 170.889 566.121 Td [(spk)]TJ +ET +q +1 0 0 1 188.084 566.32 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F84 9.9626 Tf 1.02 0 0 1 196.055 566.121 Tm [(Kind)-361(parameter)-362(for)-361(short)-362(pr)18(ecision)-362(r)18(eal)-362(and)-361(complex)-362(data;)-419(corr)17(e-)]TJ 1 0 0 1 175.611 554.166 Tm [(sponds)-250(to)-250(a)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F145 9.9626 Tf 52.902 0 Td [(REAL)]TJ +0 g 0 G +/F84 9.9626 Tf 23.412 0 Td [(declaration)-250(and)-250(is)-250(normally)-250(4)-250(bytes;)]TJ +0 g 0 G +/F75 9.9626 Tf -101.22 -20.73 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 533.635 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 170.889 533.436 Td [(dpk)]TJ +ET +q +1 0 0 1 189.748 533.635 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F84 9.9626 Tf 1.02 0 0 1 197.718 533.436 Tm [(Kind)-376(parameter)-375(for)-376(long)-376(pr)18(ecision)-375(r)17(eal)-375(and)-376(complex)-376(data;)-441(corr)18(e-)]TJ 1 0 0 1 175.611 521.481 Tm [(sponds)-250(to)-250(a)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F145 9.9626 Tf 52.902 0 Td [(DOUBLE)-525(PRECISION)]TJ +0 g 0 G +/F84 9.9626 Tf 86.176 0 Td [(declaration)-250(and)-250(is)-250(normally)-250(8)-250(bytes;)]TJ +0 g 0 G +/F75 9.9626 Tf -163.984 -20.73 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 500.951 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 170.889 500.751 Td [(mpk)]TJ +ET +q +1 0 0 1 192.518 500.951 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F84 9.9626 Tf 200.488 500.751 Td [(Kind)-250(parameter)-250(for)-250(4-bytes)-250(integer)-250(data,)-250(as)-250(is)-250(always)-250(used)-250(by)-250(MPI;)]TJ +0 g 0 G +/F75 9.9626 Tf -49.783 -20.729 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 480.221 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 170.889 480.022 Td [(epk)]TJ +ET +q +1 0 0 1 188.642 480.221 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F84 9.9626 Tf 1.02 0 0 1 196.613 480.022 Tm [(Kind)-311(parameter)-312(for)-311(8-bytes)-311(integer)-311(data,)-328(as)-312(is)-311(always)-311(used)-312(by)-311(the)]TJ/F145 9.9626 Tf 1 0 0 1 175.611 468.067 Tm [(sizeof)]TJ/F84 9.9626 Tf 33.873 0 Td [(methods;)]TJ +0 g 0 G +/F75 9.9626 Tf -58.779 -20.73 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 447.537 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 170.889 447.337 Td [(ipk)]TJ +ET +q +1 0 0 1 186.979 447.537 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F84 9.9626 Tf 1.02 0 0 1 194.949 447.337 Tm [(Kind)-336(parameter)-336(for)-336(\223local\224)-336(integer)-336(indices)-336(and)-336(data;)-382(with)-336(default)]TJ 1 0 0 1 175.611 435.382 Tm [(build)-250(options)-250(this)-250(is)-250(a)-250(4)-250(bytes)-250(integer;)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -20.729 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 414.852 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 170.889 414.653 Td [(lpk)]TJ +ET +q +1 0 0 1 186.979 414.852 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F84 9.9626 Tf 1.02 0 0 1 194.949 414.653 Tm [(Kind)-266(parameter)-266(for)-266(\223global\224)-266(integer)-266(indices)-266(and)-266(data;)-277(with)-266(default)]TJ 1 0 0 1 175.611 402.698 Tm [(build)-250(options)-250(this)-250(is)-250(an)-250(8)-250(bytes)-250(integer;)]TJ 1.017 0 0 1 150.396 382.169 Tm [(The)-246(integer)-246(kinds)-246(for)-246(local)-246(and)-246(global)-246(indices)-246(can)-246(be)-246(chosen)-246(at)-246(con\002gur)18(e)-246(time)]TJ 1.02 0 0 1 150.705 370.214 Tm [(to)-273(hold)-273(4)-273(or)-273(8)-273(bytes,)-280(with)-273(the)-273(global)-273(indices)-273(at)-273(least)-273(as)-273(lar)18(ge)-273(as)-273(the)-273(local)-273(ones.)]TJ 1.002 0 0 1 150.396 358.259 Tm [(T)92(ogether)-249(with)-249(the)-250(classes)-249(attributes)-249(we)-249(also)-249(discuss)-249(their)-249(methods.)-310(Most)-249(meth-)]TJ 1.016 0 0 1 150.705 346.304 Tm [(ods)-245(detailed)-245(her)18(e)-245(only)-245(act)-245(on)-245(the)-244(local)-245(variable,)-245(i.e.)-305(their)-245(action)-245(is)-245(pur)18(ely)-245(local)]TJ 1.02 0 0 1 150.705 334.349 Tm [(and)-346(asynchr)18(onous)-346(unless)-346(otherwise)-346(stated.)-607(The)-346(list)-346(of)-346(methods)-346(her)17(e)-346(is)-346(not)]TJ 1.006 0 0 1 150.705 322.393 Tm [(completely)-247(exhaustive;)-248(many)-248(methods,)-247(especially)-248(those)-247(that)-248(alt)1(er)-248(the)-247(contents)]TJ 0.984 0 0 1 150.705 310.438 Tm [(of)-253(the)-254(various)-253(objects,)-253(ar)18(e)-253(usually)-254(not)-253(needed)-253(by)-254(the)-253(end-user)75(,)-253(and)-254(ther)19(efor)18(e)-253(ar)18(e)]TJ 1 0 0 1 150.705 298.483 Tm [(described)-250(in)-250(the)-250(developer)-74('s)-250(documentation.)]TJ/F75 11.9552 Tf 0 -30.277 Td [(3.1)-1000(Descriptor)-250(data)-250(structure)]TJ/F84 9.9626 Tf 1.02 0 0 1 150.316 248.853 Tm [(All)-387(the)-386(general)-387(matrix)-387(informations)-386(and)-387(elements)-387(to)-387(be)-386(exchanged)-387(among)]TJ 1.02 0 0 1 150.406 236.897 Tm [(pr)18(ocesses)-247(ar)18(e)-247(stor)18(ed)-247(within)-246(a)-247(data)-246(str)7(uctur)18(e)-246(of)-247(the)-247(type)]TJ/F145 9.9626 Tf 1 0 0 1 397.584 236.897 Tm [(psb)]TJ +ET +q +1 0 0 1 413.902 237.097 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 417.041 236.897 Td [(desc)]TJ +ET +q +1 0 0 1 438.59 237.097 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 441.728 236.897 Td [(type)]TJ/F84 9.9626 Tf 1.02 0 0 1 462.649 236.897 Tm [(.)-308(Every)]TJ 0.98 0 0 1 150.705 224.942 Tm [(str)8(uctur)19(e)-237(of)-237(this)-237(type)-237(is)-238(as)1(sociated)-237(with)-238(a)-237(discr)19(etization)-237(pattern)-237(and)-237(enables)-237(data)]TJ 0.997 0 0 1 150.705 212.987 Tm [(communications)-252(and)-252(other)-253(operations)-252(that)-252(ar)18(e)-252(necessary)-253(for)-252(implementing)-252(the)]TJ 1 0 0 1 150.426 201.032 Tm [(various)-250(algorithms)-250(of)-250(inter)18(est)-250(to)-250(us.)]TJ 1.007 0 0 1 165.649 188.876 Tm [(The)-249(data)-248(str)8(uctur)17(e)-248(itself)]TJ/F145 9.9626 Tf 1 0 0 1 273.186 188.876 Tm [(psb_desc_type)]TJ/F84 9.9626 Tf 1.007 0 0 1 343.676 188.876 Tm [(can)-249(be)-248(tr)17(eated)-248(as)-249(an)-249(opaque)-248(object)]TJ 1.02 0 0 1 150.705 176.921 Tm [(handled)-278(via)-279(the)-278(tools)-278(r)17(outines)-278(of)-278(Sec.)]TJ +0 0 1 rg 0 0 1 RG + [-279(6)]TJ +0 g 0 G + [-278(or)-279(t)1(he)-279(query)-278(r)17(outines)-278(detailed)-278(below;)]TJ 1 0 0 1 150.705 164.965 Tm [(nevertheless)-250(we)-250(include)-250(her)18(e)-250(a)-250(description)-250(for)-250(the)-250(curious)-250(r)18(eader)74(.)]TJ 0.99 0 0 1 165.649 152.809 Tm [(First)-252(we)-252(describe)-252(the)]TJ/F145 9.9626 Tf 1 0 0 1 258.128 152.809 Tm [(psb_indx_map)]TJ/F84 9.9626 Tf 0.99 0 0 1 323.379 152.809 Tm [(type.)-313(This)-252(is)-252(a)-252(data)-252(str)8(uctur)18(e)-252(that)-252(keeps)]TJ 1 0 0 1 150.705 140.854 Tm [(track)-250(of)-250(a)-250(certain)-250(number)-250(of)-250(basic)-250(issues)-250(such)-250(as:)]TJ +0 g 0 G + 13.888 -20.528 Td [(\225)]TJ +0 g 0 G + [-469(The)-250(value)-250(of)-250(the)-250(communication)-250(context;)]TJ +0 g 0 G + 152.986 -29.888 Td [(10)]TJ +0 g 0 G +ET + +endstream +endobj +1023 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 690.229 308.85 702.289] -/A << /S /GoTo /D (subsection.6.14) >> +/Length 6419 >> -% 698 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F84 9.9626 Tf 113.783 706.129 Td [(\225)]TJ +0 g 0 G + 0.995 0 0 1 124.493 706.129 Tm [(The)-252(nu)1(mber)-252(of)-251(indices)-252(in)-251(the)-252(index)-251(space,)-252(i.e.)-312(global)-252(number)-251(of)-252(r)18(ows)-251(and)]TJ 1 0 0 1 124.802 694.174 Tm [(columns)-250(of)-250(a)-250(sparse)-250(matrix;)]TJ +0 g 0 G + -11.019 -20.409 Td [(\225)]TJ +0 g 0 G + [-469(The)-250(local)-250(set)-250(of)-250(indices,)-250(including:)]TJ +0 g 0 G +/F75 9.9626 Tf 22.974 -20.408 Td [(\226)]TJ +0 g 0 G +/F84 9.9626 Tf 9.654 0 Td [(The)-250(number)-250(of)-250(local)-250(indices)-250(\050and)-250(local)-250(r)18(ows\051;)]TJ +0 g 0 G +/F75 9.9626 Tf -9.654 -16.182 Td [(\226)]TJ +0 g 0 G +/F84 9.9626 Tf 9.654 0 Td [(The)-250(number)-250(of)-250(halo)-250(indices)-250(\050and)-250(ther)18(efor)18(e)-250(local)-250(columns\051;)]TJ +0 g 0 G +/F75 9.9626 Tf -9.654 -16.181 Td [(\226)]TJ +0 g 0 G +/F84 9.9626 Tf 9.654 0 Td [(The)-250(global)-250(indices)-250(corr)18(esponding)-250(to)-250(the)-250(local)-250(ones.)]TJ 1.02 0 0 1 99.587 600.585 Tm [(Ther)18(e)-249(ar)17(e)-248(many)-249(dif)17(f)1(er)17(ent)-249(schemes)-248(for)-249(storing)-249(these)-249(data;)-251(ther)18(efor)17(e)-249(t)1(her)17(e)-249(ar)18(e)-249(a)]TJ 1.02 0 0 1 99.895 588.63 Tm [(number)-244(of)-244(types)-244(ext)1(ending)-244(the)-244(base)-244(one,)-244(and)-244(the)-244(descriptor)-244(str)8(uctur)18(e)-244(holds)-244(a)]TJ 0.988 0 0 1 99.596 576.675 Tm [(polymorphic)-252(object)-251(whose)-252(dynamic)-252(type)-252(can)-252(be)-251(any)-252(of)-252(the)-252(extended)-251(types.)-314(The)]TJ 1 0 0 1 99.895 564.72 Tm [(methods)-250(associated)-250(with)-250(this)-250(data)-250(type)-250(answer)-250(the)-250(following)-250(queries:)]TJ +0 g 0 G + 13.888 -20.288 Td [(\225)]TJ +0 g 0 G + 0.98 0 0 1 124.802 544.432 Tm [(For)-222(a)-222(given)-222(set)-222(of)-222(local)-222(indices,)-229(\002nd)-222(the)-222(corr)18(esponding)-222(indices)-222(in)-222(the)-222(global)]TJ 1 0 0 1 124.802 532.477 Tm [(numbering;)]TJ +0 g 0 G + -11.019 -20.408 Td [(\225)]TJ +0 g 0 G + 1.02 0 0 1 124.802 512.069 Tm [(For)-306(a)-306(given)-305(set)-306(of)-306(global)-306(indices,)-321(\002nd)-305(the)-306(corr)18(esponding)-306(indices)-306(in)-306(the)]TJ 1 0 0 1 124.802 500.114 Tm [(local)-250(numbering,)-250(if)-250(any)111(,)-250(or)-250(r)18(eturn)-250(an)-250(invalid)]TJ +0 g 0 G + -11.019 -20.409 Td [(\225)]TJ +0 g 0 G + [-461(Add)-250(a)-250(global)-250(index)-250(to)-250(the)-250(set)-250(of)-250(halo)-250(indices;)]TJ +0 g 0 G + 0 -20.408 Td [(\225)]TJ +0 g 0 G + [-500(Find)-250(the)-250(pr)18(ocess)-250(owner)-250(of)-250(each)-250(member)-250(of)-250(a)-250(set)-250(of)-250(global)-250(indices.)]TJ 1.019 0 0 1 99.507 439.009 Tm [(All)-246(methods)-246(but)-246(the)-246(last)-247(ar)18(e)-246(pur)18(ely)-247(l)1(ocal;)-247(the)-246(last)-246(method)-246(potentially)-246(r)17(equir)18(es)]TJ 0.98 0 0 1 99.895 427.054 Tm [(communication)-202(among)-201(pr)18(ocesses,)-212(and)-201(thus)-202(is)-202(a)-201(synchr)18(onous)-201(method.)-299(The)-201(choice)]TJ 0.996 0 0 1 99.895 415.099 Tm [(of)-251(a)-250(speci\002c)-251(dynamic)-251(type)-250(for)-251(the)-251(index)-250(map)-251(is)-251(made)-250(at)-251(the)-251(time)-250(the)-251(descriptor)]TJ 1 0 0 1 99.895 403.144 Tm [(is)-250(initially)-250(allocated,)-250(accor)18(ding)-250(to)-250(the)-250(mode)-250(of)-250(initialization)-250(\050see)-250(also)]TJ +0 0 1 rg 0 0 1 RG + [-250(6)]TJ +0 g 0 G + [(\051.)]TJ 14.944 -12.076 Td [(The)-250(descriptor)-250(contents)-250(ar)18(e)-250(as)-250(follows:)]TJ +0 g 0 G +/F75 9.9626 Tf -14.944 -20.288 Td [(indxmap)]TJ +0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 144.886 370.78 Tm [(A)-194(polymorphic)-194(variable)-194(of)-193(a)-194(type)-194(that)-194(is)-194(any)-194(extension)-194(of)-193(the)-194(indx)]TJ +ET +q +1 0 0 1 422.112 370.98 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 0.98 0 0 1 425.1 370.78 Tm [(map)]TJ 1 0 0 1 124.802 358.825 Tm [(type)-250(described)-250(above.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -32.363 Td [(halo)]TJ +ET +q +1 0 0 1 120.418 326.661 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 123.407 326.462 Td [(index)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 153.454 326.462 Tm [(A)-283(list)-283(of)-283(the)-283(halo)-283(and)-283(boundary)-283(elements)-283(for)-283(the)-283(curr)17(ent)-283(pr)18(ocess)]TJ 0.999 0 0 1 124.802 314.507 Tm [(to)-250(be)-249(exchanged)-250(with)-249(other)-250(pr)18(ocesses;)-250(for)-249(each)-250(pr)18(ocesses)-249(with)-250(which)-250(it)-249(is)]TJ 1 0 0 1 124.802 302.551 Tm [(necessary)-250(to)-250(communicate:)]TJ +0 g 0 G + 9.465 -20.408 Td [(1.)]TJ +0 g 0 G + [-500(Pr)18(ocess)-250(identi\002er;)]TJ +0 g 0 G + 0 -16.182 Td [(2.)]TJ +0 g 0 G + [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ +0 g 0 G + 0 -16.181 Td [(3.)]TJ +0 g 0 G + [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ +0 g 0 G + 0 -16.182 Td [(4.)]TJ +0 g 0 G + [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ +0 g 0 G + 0 -16.182 Td [(5.)]TJ +0 g 0 G + [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ -9.465 -20.408 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ +0 0 1 rg 0 0 1 RG + [-250(3.3)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -20.409 Td [(ext)]TJ +ET +q +1 0 0 1 113.773 176.799 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 116.762 176.599 Td [(index)]TJ +0 g 0 G +/F84 9.9626 Tf 0.988 0 0 1 146.809 176.599 Tm [(A)-253(list)-253(of)-253(element)-253(indices)-253(to)-252(be)-253(exchanged)-253(to)-253(implement)-253(the)-253(mapping)]TJ 1 0 0 1 124.802 164.644 Tm [(between)-250(a)-250(base)-250(descriptor)-250(and)-250(a)-250(descriptor)-250(with)-250(overlap.)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ +0 0 1 rg 0 0 1 RG + [-250(3.3)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -20.408 Td [(ovrlap)]TJ +ET +q +1 0 0 1 129.833 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 132.822 132.281 Td [(index)]TJ +0 g 0 G +/F84 9.9626 Tf 1.005 0 0 1 162.869 132.281 Tm [(A)-249(list)-249(of)-249(the)-248(overlap)-249(elements)-249(for)-249(the)-249(curr)18(ent)-249(pr)18(ocess,)-249(or)18(ganized)]TJ 1 0 0 1 124.802 120.326 Tm [(in)-250(gr)18(oups)-250(like)-250(the)-250(pr)18(evious)-250(vector:)]TJ +0 g 0 G + 141.968 -29.888 Td [(11)]TJ +0 g 0 G +ET + +endstream +endobj +1034 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 678.135 290.101 690.194] -/A << /S /GoTo /D (subsection.6.15) >> +/Length 7288 >> +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F84 9.9626 Tf 185.076 706.129 Td [(1.)]TJ +0 g 0 G + [-500(Pr)18(ocess)-250(identi\002er;)]TJ +0 g 0 G + 0 -16.693 Td [(2.)]TJ +0 g 0 G + [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ +0 g 0 G + 0 -16.694 Td [(3.)]TJ +0 g 0 G + [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ +0 g 0 G + 0 -16.693 Td [(4.)]TJ +0 g 0 G + [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ +0 g 0 G + 0 -16.693 Td [(5.)]TJ +0 g 0 G + [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ -9.465 -21.431 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ +0 0 1 rg 0 0 1 RG + [-250(3.3)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -21.431 Td [(ovr)]TJ +ET +q +1 0 0 1 166.256 596.693 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 169.245 596.494 Td [(mst)]TJ +ET +q +1 0 0 1 186.44 596.693 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 189.429 596.494 Td [(idx)]TJ +0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 208.408 596.494 Tm [(A)-235(list)-236(to)-235(r)18(etr)1(ieve)-236(the)-235(value)-235(of)-236(each)-235(overlap)-235(element)-236(fr)19(om)-236(t)1(he)-236(r)19(espec-)]TJ 1 0 0 1 175.611 584.538 Tm [(tive)-250(master)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ +0 0 1 rg 0 0 1 RG + [-250(3.3)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -21.431 Td [(ovrlap)]TJ +ET +q +1 0 0 1 180.642 551.351 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 183.631 551.152 Td [(elem)]TJ +0 g 0 G +/F84 9.9626 Tf 27.118 0 Td [(For)-250(all)-250(overlap)-250(points)-250(belonging)-250(to)-250(th)-250(ecurr)18(ent)-250(pr)18(ocess:)]TJ +0 g 0 G + -25.673 -21.431 Td [(1.)]TJ +0 g 0 G + [-500(Overlap)-250(point)-250(index;)]TJ +0 g 0 G + 0 -16.693 Td [(2.)]TJ +0 g 0 G + [-500(Number)-250(of)-250(pr)18(ocesses)-250(sharing)-250(that)-250(overlap)-250(points;)]TJ +0 g 0 G + 0 -16.694 Td [(3.)]TJ +0 g 0 G + [-500(Index)-250(of)-250(a)-250(\223master)-74(\224)-250(pr)18(ocess:)]TJ -9.465 -21.431 Td [(Speci\002ed)-250(as:)-310(an)-250(allocatable)-250(integer)-250(array)-250(of)-250(rank)-250(two.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -21.431 Td [(bnd)]TJ +ET +q +1 0 0 1 169.564 453.671 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 172.553 453.472 Td [(elem)]TJ +0 g 0 G +/F84 9.9626 Tf 0.995 0 0 1 199.282 453.472 Tm [(A)-252(list)-251(of)-252(all)-252(boundary)-251(points,)-252(i.e.)-313(points)-251(that)-252(have)-252(a)-251(connection)-252(with)]TJ 1 0 0 1 175.611 441.517 Tm [(other)-250(pr)18(ocesses.)]TJ 1.02 0 0 1 150.396 420.462 Tm [(The)-350(Fortran)-350(2003)-350(declaration)-350(for)]TJ/F145 9.9626 Tf 1 0 0 1 302.443 420.462 Tm [(psb_desc_type)]TJ/F84 9.9626 Tf 1.02 0 0 1 373.993 420.462 Tm [(str)8(uctur)18(es)-350(is)-350(as)-350(follows:)-514(A)]TJ +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 150.705 294.955 cm +0 0 343.711 104.608 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F233 8.9664 Tf 153.694 388.902 Td [(type)]TJ +0 g 0 G + [-525(psb_desc_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 18.829 -10.959 Td [(class)]TJ +0 g 0 G + [(\050psb_indx_map\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(indxmap)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_i_vect_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v_halo_index)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.958 Td [(type)]TJ +0 g 0 G + [(\050psb_i_vect_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v_ext_index)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_i_vect_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v_ovrlap_index)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_i_vect_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v_ovr_mst_idx)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-1050(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(ovrlap_elem\050:,:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-1050(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(bnd_elem\050:\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -18.829 -10.959 Td [(end)-525(type)]TJ +0 g 0 G + [-525(psb_desc_type)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 150.705 259.801 Tm [(Listing)-350(1:)-513(The)-350(PSBLAS)-350(de\002ned)-350(data)-349(type)-350(that)-350(contains)-350(the)-350(communication)]TJ 1 0 0 1 150.705 247.846 Tm [(descriptor)74(.)]TJ 1.02 0 0 1 150.705 222.587 Tm [(communication)-253(descriptor)-253(associated)-254(with)-253(a)-253(sparse)-254(matr)1(ix)-254(has)-253(a)-253(state,)-256(which)]TJ 1 0 0 1 150.705 210.632 Tm [(can)-250(take)-250(the)-250(following)-250(values:)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.054 Td [(Build:)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 183.631 189.578 Tm [(State)-350(enter)18(ed)-350(after)-349(the)-350(\002rst)-349(allocation,)-376(and)-350(befor)18(e)-350(the)-349(\002rst)-350(assembly;)]TJ 1.02 0 0 1 175.611 177.622 Tm [(in)-344(this)-344(state)-343(it)-344(is)-344(possible)-344(to)-344(add)-343(communication)-344(r)17(equir)18(ements)-344(among)]TJ 1 0 0 1 175.611 165.667 Tm [(dif)18(fer)18(ent)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -21.431 Td [(Assembled:)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 209.086 144.236 Tm [(State)-261(enter)18(ed)-261(after)-261(the)-261(assembly;)-269(computations)-261(u)1(sing)-261(the)-261(associ-)]TJ 1.02 0 0 1 175.611 132.281 Tm [(ated)-250(sparse)-250(matrix,)-251(such)-250(as)-250(matrix-vector)-249(pr)17(oducts,)-251(ar)18(e)-250(only)-250(possible)-250(in)]TJ 1 0 0 1 175.611 120.326 Tm [(this)-250(state.)]TJ +0 g 0 G + 141.968 -29.888 Td [(12)]TJ +0 g 0 G +ET endstream endobj -762 0 obj +1050 0 obj << -/Length 4437 +/Length 5149 >> stream 0 g 0 G 0 g 0 G -0 0 1 rg 0 0 1 RG BT -/F54 9.9626 Tf 165.649 706.129 Td [(9.4)-1050(mm)]TJ +/F75 9.9626 Tf 99.895 706.129 Td [(3.1.1)-1000(Descriptor)-250(Methods)]TJ 0 -19 Td [(3.1.2)-1000(get)]TJ ET q -1 0 0 1 206.755 706.328 cm +1 0 0 1 144.219 687.328 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 209.743 706.129 Td [(array)]TJ +/F75 9.9626 Tf 147.208 687.129 Td [(local)]TJ ET q -1 0 0 1 233.713 706.328 cm +1 0 0 1 169.384 687.328 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 236.702 706.129 Td [(r)18(ead)-281(\227)-281(Read)-281(a)-281(dense)-281(array)-281(fr)18(om)-281(a)-281(\002le)-281(in)-281(the)-281(Matrix-)]TJ -48.139 -11.955 Td [(Market)-250(format)]TJ +/F75 9.9626 Tf 172.373 687.129 Td [(rows)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(rows)]TJ 0 g 0 G - [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1000(146)]TJ +/F145 9.9626 Tf -72.478 -19 Td [(nr)-525(=)-525(desc%get_local_rows\050\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -20.001 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20 Td [(desc)]TJ +0 g 0 G +/F84 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G +/F75 9.9626 Tf -77.918 -33.929 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 172.363 540.269 Tm [(The)-249(number)-249(of)-249(local)-249(r)18(ows,)-250(i.e.)-316(t)1(he)-249(number)-249(of)-249(r)17(ows)-249(owned)-249(by)]TJ 0.98 0 0 1 124.802 528.314 Tm [(the)-211(curr)18(ent)-211(pr)19(ocess;)-226(as)-211(explained)-211(in)]TJ 0 0 1 rg 0 0 1 RG - -22.914 -11.955 Td [(9.5)-1050(mm)]TJ + [-211(1)]TJ +0 g 0 G + [(,)-220(it)-211(is)-211(equal)-211(to)]TJ/F190 10.3811 Tf 1 0 0 1 339.88 528.314 Tm [(j)-24(I)]TJ/F78 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.876 1.96 Td [(j)]TJ/F192 10.3811 Tf 4.667 0 Td [(+)]TJ/F190 10.3811 Tf 9.858 0 Td [(j)-24(B)]TJ/F78 7.5716 Tf 10.109 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F84 9.9626 Tf 0.98 0 0 1 382.212 528.314 Tm [(.)-302(The)-211(r)19(eturned)]TJ 1 0 0 1 124.523 516.359 Tm [(value)-250(is)-250(speci\002c)-250(to)-250(the)-250(calling)-250(pr)18(ocess.)]TJ/F75 9.9626 Tf -24.628 -27.247 Td [(3.1.3)-1000(get)]TJ ET q -1 0 0 1 206.755 682.418 cm +1 0 0 1 144.219 489.311 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 209.743 682.219 Td [(mat)]TJ +/F75 9.9626 Tf 147.208 489.112 Td [(local)]TJ ET q -1 0 0 1 227.367 682.418 cm +1 0 0 1 169.384 489.311 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 230.356 682.219 Td [(write)-333(\227)-333(W)74(rite)-334(a)-333(sparse)-333(matrix)-333(to)-333(a)-334(\002le)-333(in)-333(the)-333(Matrix-)]TJ -41.793 -11.956 Td [(Market)-250(format)]TJ +/F75 9.9626 Tf 172.373 489.112 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(cols)]TJ 0 g 0 G - [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1000(147)]TJ +/F145 9.9626 Tf -72.478 -19 Td [(nc)-525(=)-525(desc%get_local_cols\050\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ 0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -20 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.001 Td [(desc)]TJ +0 g 0 G +/F84 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -77.918 -33.929 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 172.363 342.253 Tm [(The)-239(number)-239(of)-238(local)-239(cols,)-242(i.e.)-311(the)-239(number)-239(of)-238(indices)-239(used)-239(by)-239(the)]TJ 0.98 0 0 1 124.802 330.298 Tm [(curr)18(ent)-256(pr)19(ocess,)-257(including)-256(both)-256(local)-256(and)-256(halo)-257(indices;)-256(as)-256(explained)-257(in)]TJ 0 0 1 rg 0 0 1 RG - -22.914 -11.955 Td [(9.6)-1050(mm)]TJ + [-256(1)]TJ +0 g 0 G + [(,)-256(it)]TJ 1.017 0 0 1 124.802 318.342 Tm [(is)-246(equal)-245(to)]TJ/F190 10.3811 Tf 1 0 0 1 173.122 318.342 Tm [(j)-24(I)]TJ/F78 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F192 10.3811 Tf 5.063 0 Td [(+)]TJ/F190 10.3811 Tf 10.254 0 Td [(j)-24(B)]TJ/F78 7.5716 Tf 10.109 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F192 10.3811 Tf 5.064 0 Td [(+)]TJ/F190 10.3811 Tf 10.253 0 Td [(j)-24(H)]TJ/F78 7.5716 Tf 12.052 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F84 9.9626 Tf 1.017 0 0 1 246.489 318.342 Tm [(.)-305(The)-245(r)17(eturned)-245(value)-246(is)-245(speci\002c)-246(to)-245(the)-246(calling)]TJ 1 0 0 1 124.503 306.387 Tm [(pr)18(ocess.)]TJ/F75 9.9626 Tf -24.608 -27.247 Td [(3.1.4)-1000(get)]TJ ET q -1 0 0 1 206.755 658.507 cm +1 0 0 1 144.219 279.339 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 209.743 658.308 Td [(array)]TJ +/F75 9.9626 Tf 147.208 279.14 Td [(global)]TJ ET q -1 0 0 1 233.713 658.507 cm +1 0 0 1 176.587 279.339 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 236.702 658.308 Td [(write)-234(\227)-234(W)74(rite)-234(a)-234(dense)-234(array)-234(fr)18(om)-234(a)-234(\002le)-234(in)-234(the)-234(Matrix-)]TJ -48.139 -11.955 Td [(Market)-250(format)]TJ +/F75 9.9626 Tf 179.576 279.14 Td [(rows)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(rows)]TJ 0 g 0 G - [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1000(148)]TJ +/F145 9.9626 Tf -79.681 -19 Td [(nr)-525(=)-525(desc%get_global_rows\050\051)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -37.858 -21.918 Td [(10)-500(Preconditioner)-250(routines)]TJ +/F75 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ 0 g 0 G - [-20696(150)]TJ -0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -11.955 Td [(10.1)-550(init)-250(\227)-250(Initialize)-250(a)-250(pr)18(econditioner)]TJ +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [-772(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +/F75 9.9626 Tf -29.44 -20 Td [(On)-250(Entry)]TJ 0 g 0 G - [-1000(151)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - 0 -11.955 Td [(10.2)-550(build)-250(\227)-250(Builds)-250(a)-250(pr)18(econditioner)]TJ + 0 -20.001 Td [(desc)]TJ 0 g 0 G - [-970(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +/F84 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G - [-1000(152)]TJ +/F75 9.9626 Tf -77.918 -33.929 Td [(On)-250(Return)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - 0 -11.955 Td [(10.3)-550(apply)-250(\227)-250(Pr)18(econditioner)-250(application)-250(r)18(outine)]TJ 0 g 0 G - [-421(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ + 0 -20 Td [(Function)-250(value)]TJ 0 g 0 G - [-1000(154)]TJ +/F84 9.9626 Tf 1.02 0 0 1 172.363 132.281 Tm [(The)-314(number)-314(of)-314(global)-314(r)17(ows,)-331(i.e.)-511(the)-314(size)-314(of)-314(the)-314(global)-314(index)]TJ 1 0 0 1 124.802 120.326 Tm [(space.)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - 0 -11.955 Td [(10.4)-550(descr)-250(\227)-250(Prints)-250(a)-250(description)-250(of)-250(curr)18(ent)-250(pr)18(econditioner)]TJ + 141.968 -29.888 Td [(13)]TJ 0 g 0 G - [-350(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +ET + +endstream +endobj +1055 0 obj +<< +/Length 4489 +>> +stream 0 g 0 G - [-1000(155)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - 0 -11.956 Td [(10.5)-550(clone)-250(\227)-250(clone)-250(curr)18(ent)-250(pr)18(econditioner)]TJ +BT +/F75 9.9626 Tf 150.705 706.129 Td [(3.1.5)-1000(get)]TJ +ET +q +1 0 0 1 195.029 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 198.017 706.129 Td [(global)]TJ +ET +q +1 0 0 1 227.397 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 230.386 706.129 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(cols)]TJ 0 g 0 G - [-260(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1000(156)]TJ +/F145 9.9626 Tf -79.681 -18.974 Td [(nr)-525(=)-525(desc%get_global_cols\050\051)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG - 0 -11.955 Td [(10.6)-550(fr)18(ee)-250(\227)-250(Fr)18(ee)-250(a)-250(pr)18(econditioner)]TJ +/F75 9.9626 Tf 0 -21.935 Td [(T)90(ype:)]TJ 0 g 0 G - [-341(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [-1000(157)]TJ +/F75 9.9626 Tf -29.439 -19.947 Td [(On)-250(Entry)]TJ 0 g 0 G -0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -14.944 -21.918 Td [(11)-500(Iterative)-250(Methods)]TJ 0 g 0 G - [-23362(158)]TJ -0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -11.955 Td [(11.1)-550(psb)]TJ + 0 -19.947 Td [(desc)]TJ +0 g 0 G +/F84 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -77.917 -33.889 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 0.998 0 0 1 223.173 559.535 Tm [(The)-252(number)-251(of)-252(global)-251(cols;)-252(usually)-252(this)-251(is)-252(equal)-252(to)-251(the)-252(number)]TJ 1 0 0 1 175.611 547.58 Tm [(of)-250(global)-250(r)18(ows.)]TJ/F75 9.9626 Tf -24.906 -27.172 Td [(3.1.6)-1000(get)]TJ +ET +q +1 0 0 1 195.029 520.607 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 198.017 520.408 Td [(global)]TJ +ET +q +1 0 0 1 227.397 520.607 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 230.386 520.408 Td [(indices)-250(\227)-250(Get)-250(vector)-250(of)-250(global)-250(indices)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -79.681 -18.974 Td [(myidx)-525(=)-525(desc%get_global_indices\050[owned]\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.934 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -19.947 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(desc)]TJ +0 g 0 G +/F84 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -90.978 -31.902 Td [(owned)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 186.68 383.793 Tm [(Choose)-281(if)-282(you)-281(only)-282(want)-281(owned)-281(indices)-282(\050)]TJ/F145 9.9626 Tf 1 0 0 1 372.731 383.793 Tm [(owned)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(.true.)]TJ/F84 9.9626 Tf 1.02 0 0 1 435.495 383.793 Tm [(\051)-281(or)-282(also)-281(halo)]TJ 1 0 0 1 175.611 371.838 Tm [(indices)-250(\050)]TJ/F145 9.9626 Tf 36.912 0 Td [(owned)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(.false.)]TJ/F84 9.9626 Tf 67.994 0 Td [(\051.)-310(Scope:)]TJ/F75 9.9626 Tf 40.328 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -167.121 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(;)-250(default:)]TJ/F145 9.9626 Tf 41.873 0 Td [(.true.)]TJ/F84 9.9626 Tf 31.382 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -163.127 -33.89 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 1.011 0 0 1 223.173 306.046 Tm [(The)-248(global)-249(indi)1(ces,)-249(r)18(eturned)-248(as)-249(an)-248(allocatable)-248(integer)-249(array)-248(of)]TJ 1 0 0 1 175.611 294.091 Tm [(kind)]TJ/F145 9.9626 Tf 22.815 0 Td [(psb_lpk_)]TJ/F84 9.9626 Tf 44.333 0 Td [(and)-250(rank)-250(1.)]TJ/F75 9.9626 Tf -92.054 -27.171 Td [(3.1.7)-1000(get)]TJ +ET +q +1 0 0 1 195.029 267.119 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 198.017 266.92 Td [(context)-250(\227)-250(Get)-250(communication)-250(context)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -47.312 -18.975 Td [(ctxt)-525(=)-525(desc%get_context\050\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.934 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -19.947 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(desc)]TJ +0 g 0 G +/F84 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -77.917 -33.889 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 72.468 0 Td [(The)-250(communication)-250(context.)]TJ +0 g 0 G + 94.406 -29.888 Td [(14)]TJ +0 g 0 G +ET + +endstream +endobj +1059 0 obj +<< +/Length 5019 +>> +stream +0 g 0 G +0 g 0 G +BT +/F75 9.9626 Tf 99.895 706.129 Td [(3.1.8)-1000(Clone)-250(\227)-250(clone)-250(current)-250(object)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf 0 -19.289 Td [(call)-1050(desc%clone\050descout,info\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -20.597 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.598 Td [(desc)]TJ +0 g 0 G +/F84 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -77.918 -34.377 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.597 Td [(descout)]TJ +0 g 0 G +/F84 9.9626 Tf 39.452 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ +0 g 0 G +/F75 9.9626 Tf -39.452 -20.597 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F75 9.9626 Tf -23.801 -28.097 Td [(3.1.9)-1000(CNV)-250(\227)-250(convert)-250(internal)-250(storage)-250(format)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf 0 -19.289 Td [(call)-1050(desc%cnv\050mold\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -20.597 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.597 Td [(desc)]TJ +0 g 0 G +/F84 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -77.918 -32.553 Td [(mold)]TJ +0 g 0 G +/F84 9.9626 Tf 28.782 0 Td [(the)-250(desir)18(ed)-250(integer)-250(storage)-250(format.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ 0.98 0 0 1 124.802 356.277 Tm [(Speci\002ed)-212(as:)-295(a)-212(object)-212(of)-212(type)-213(der)1(ived)-213(fr)19(om)-212(\050integer\051)]TJ/F145 9.9626 Tf 1 0 0 1 344.16 356.277 Tm [(psb)]TJ +ET +q +1 0 0 1 360.479 356.476 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 363.617 356.277 Td [(T)]TJ +ET +q +1 0 0 1 369.475 356.476 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 372.613 356.277 Td [(base)]TJ +ET +q +1 0 0 1 394.162 356.476 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 397.301 356.277 Td [(vect)]TJ +ET +q +1 0 0 1 418.849 356.476 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 421.988 356.277 Td [(type)]TJ/F84 9.9626 Tf 0.98 0 0 1 442.909 356.277 Tm [(.)]TJ 0.98 0 0 1 99.587 333.687 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 118.361 333.687 Tm [(mold)]TJ/F84 9.9626 Tf 0.98 0 0 1 141.713 333.687 Tm [(ar)18(guments)-249(may)-248(be)-249(employed)-249(to)-249(interface)-249(with)-249(special)-249(devices,)-250(such)-249(as)]TJ 1 0 0 1 99.895 321.732 Tm [(GPUs)-250(and)-250(other)-250(accelerators.)]TJ/F75 9.9626 Tf 0 -28.096 Td [(3.1.10)]TJ 0.98 0 0 1 134.765 293.636 Tm [(psb)]TJ +ET +q +1 0 0 1 151.628 293.835 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 0.98 0 0 1 154.617 293.636 Tm [(cd)]TJ +ET +q +1 0 0 1 165.515 293.835 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 0.98 0 0 1 168.504 293.636 Tm [(get)]TJ +ET +q +1 0 0 1 182.663 293.835 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 0.98 0 0 1 185.652 293.636 Tm [(hash)]TJ +ET +q +1 0 0 1 207.397 293.835 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 0.98 0 0 1 210.386 293.636 Tm [(threshold)-207(\227)-206(Get)-207(threshold)-207(for)-207(index)-207(mapping)-206(switch)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf 1 0 0 1 99.895 274.346 Tm [(ith)-525(=)-525(psb_cd_get_hash_threshold\050\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -22.421 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -20.598 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.597 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 72.468 0 Td [(The)-250(curr)18(ent)-250(value)-250(for)-250(the)-250(size)-250(thr)18(eshold.)]TJ/F75 9.9626 Tf -72.468 -28.096 Td [(3.1.11)]TJ 0.98 0 0 1 134.765 182.634 Tm [(psb)]TJ +ET +q +1 0 0 1 151.628 182.833 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 0.98 0 0 1 154.617 182.634 Tm [(cd)]TJ +ET +q +1 0 0 1 165.515 182.833 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 0.98 0 0 1 168.504 182.634 Tm [(set)]TJ +ET +q +1 0 0 1 181.569 182.833 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 0.98 0 0 1 184.558 182.634 Tm [(hash)]TJ ET q -1 0 0 1 204.881 519.031 cm +1 0 0 1 206.303 182.833 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 518.831 Td [(krylov)-250(\227)-250(Krylov)-250(Methods)-250(Driver)-250(Routine)]TJ +/F75 9.9626 Tf 0.98 0 0 1 209.292 182.634 Tm [(threshold)-254(\227)-255(Set)-254(threshold)-255(for)-254(index)-255(mapping)-254(switch)]TJ 0 g 0 G - [-716(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G - [-1000(159)]TJ +/F145 9.9626 Tf 1 0 0 1 99.895 163.345 Tm [(call)-525(psb_cd_set_hash_threshold\050ith\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G +/F75 9.9626 Tf -29.828 -20.597 Td [(On)-250(Entry)]TJ 0 g 0 G - 110.426 -428.393 Td [(iv)]TJ +0 g 0 G +/F84 9.9626 Tf 166.875 -29.888 Td [(15)]TJ 0 g 0 G ET endstream endobj -779 0 obj +1064 0 obj << -/Length 8044 +/Length 5826 >> stream 0 g 0 G 0 g 0 G -BT -/F51 14.3462 Tf 99.895 705.784 Td [(1)-1000(Introduction)]TJ/F54 9.9626 Tf 0 -22.913 Td [(The)-272(PSBLAS)-271(library)111(,)-277(developed)-272(with)-272(t)1(he)-272(aim)-272(to)-271(facilitate)-272(the)-272(parallelization)-271(of)]TJ 0 -11.955 Td [(computationally)-348(intensive)-347(scienti\002c)-348(applications,)-372(is)-347(designed)-348(to)-348(addr)18(ess)-347(par)18(-)]TJ 0 -11.955 Td [(allel)-282(implementation)-283(of)-282(iterative)-282(solvers)-283(for)-282(sparse)-282(linear)-283(systems)-282(thr)18(ough)-282(the)]TJ 0 -11.955 Td [(distributed)-232(memory)-232(paradigm.)-304(It)-232(includes)-233(r)18(outines)-232(for)-232(multiplying)-232(sparse)-232(ma-)]TJ 0 -11.955 Td [(trices)-211(by)-211(dense)-211(matrices,)-219(solving)-211(block)-211(diagonal)-211(systems)-211(with)-211(triangular)-211(diago-)]TJ 0 -11.956 Td [(nal)-229(entries,)-233(pr)18(epr)18(ocessing)-228(sparse)-229(matrices,)-233(and)-228(contains)-229(additional)-229(r)18(outines)-228(for)]TJ 0 -11.955 Td [(dense)-292(matrix)-292(operations.)-436(The)-292(curr)18(ent)-292(implementation)-292(of)-292(PSBLAS)-292(addr)18(esses)-292(a)]TJ 0 -11.955 Td [(distributed)-250(memory)-250(execution)-250(model)-250(operating)-250(with)-250(message)-250(passing.)]TJ 14.944 -12.064 Td [(The)-267(PSBLAS)-267(library)-268(version)-267(3)-267(is)-267(implemented)-267(in)-267(the)-268(Fortran)-267(2003)-267([)]TJ -1 0 0 rg 1 0 0 RG - [(17)]TJ -0 g 0 G - [(])-267(pr)18(o-)]TJ -14.944 -11.955 Td [(gramming)-278(language,)-284(with)-277(r)18(euse)-278(and/or)-278(adaptation)-277(of)-278(existing)-277(Fortran)-278(77)-277(and)]TJ 0 -11.955 Td [(Fortran)-250(95)-250(softwar)18(e,)-250(plus)-250(a)-250(handful)-250(of)-250(C)-250(r)18(outines.)]TJ 14.944 -12.064 Td [(The)-391(use)-392(of)-391(Fortran)-392(2003)-391(of)18(fers)-392(a)-391(number)-391(of)-392(advantages)-391(over)-392(Fortran)-391(95,)]TJ -14.944 -11.955 Td [(mostly)-385(in)-385(the)-385(handling)-385(of)-385(r)18(equir)18(ements)-385(for)-385(evolution)-385(and)-385(adaptation)-385(of)-385(the)]TJ 0 -11.956 Td [(library)-431(to)-432(new)-431(computing)-432(ar)18(chitectur)18(es)-431(and)-432(integration)-431(of)-432(new)-431(algorithms.)]TJ 0 -11.955 Td [(For)-365(a)-365(detail)1(ed)-365(discussion)-365(of)-365(our)-364(design)-365(see)-365([)]TJ -1 0 0 rg 1 0 0 RG - [(11)]TJ -0 g 0 G - [(];)-422(other)-365(works)-364(discussing)-365(ad-)]TJ 0 -11.955 Td [(vanced)-213(pr)18(ogramming)-214(in)-213(Fortran)-213(2003)-213(include)-214([)]TJ -1 0 0 rg 1 0 0 RG - [(1)]TJ 0 g 0 G - [(,)]TJ -1 0 0 rg 1 0 0 RG - [-213(18)]TJ +BT +/F75 9.9626 Tf 150.705 706.129 Td [(ith)]TJ 0 g 0 G - [(];)-225(suf)18(\002cient)-214(support)-213(for)-213(For)18(-)]TJ 0 -11.955 Td [(tran)-315(2003)-314(is)-315(now)-314(available)-315(fr)18(om)-314(many)-315(compilers,)-331(including)-314(the)-315(GNU)-314(Fortran)]TJ 0 -11.955 Td [(compiler)-250(fr)18(om)-250(the)-250(Fr)18(ee)-250(Softwar)18(e)-250(Foundation)-250(\050as)-250(of)-250(version)-250(4.8\051.)]TJ 14.944 -12.064 Td [(Pr)18(evious)-311(appr)18(oaches)-312(have)-311(been)-311(based)-311(on)-312(mixing)-311(Fortran)-311(95,)-327(with)-311(its)-311(sup-)]TJ -14.944 -11.955 Td [(port)-249(for)-249(object-based)-249(design,)-249(with)-249(other)-249(languages;)-249(these)-249(have)-249(been)-249(advocated)]TJ 0 -11.956 Td [(by)-346(a)-346(number)-346(of)-347(authors,)-370(e.g.)-346([)]TJ -1 0 0 rg 1 0 0 RG - [(16)]TJ +/F84 9.9626 Tf 17.703 0 Td [(the)-250(new)-250(thr)18(eshold)-250(for)-250(communication)-250(descriptors.)]TJ 7.203 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.522 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -66.072 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(gr)18(eater)-250(than)-250(zer)18(o.)]TJ 1.02 0 0 1 150.396 634.849 Tm [(This)-247(thr)17(eshold)-247(guides)-247(the)-248(library)-247(into)-247(using)-248(a)-247(list)-248(based)-247(or)-247(a)-248(hash-table)-247(based)]TJ 1.02 0 0 1 150.705 622.893 Tm [(descriptor)-277(for)-276(global)-277(to)-277(local)-276(index)-277(conversion;)-292(if)-277(the)-277(size)-276(of)-277(the)-277(gl)1(obal)-277(index)]TJ 1.02 0 0 1 150.705 610.938 Tm [(space)-351(is)-351(below)-351(this)-351(thr)18(eshold,)-377(a)-351(list)-351(based)-351(str)8(uctur)17(e)-351(is)-351(used,)-377(if)-351(it)-351(is)-351(above)-351(a)]TJ 0.986 0 0 1 150.705 598.983 Tm [(hash-table)-252(based)-252(str)8(uctur)18(e)-252(is)-253(used.)-314(Note:)-313(the)-253(thr)19(eshold)-253(value)-252(is)-252(only)-252(queried)-253(by)]TJ 1.02 0 0 1 150.705 587.028 Tm [(the)-259(library)-259(at)-259(the)-260(time)-259(a)-259(call)-259(to)]TJ/F145 9.9626 Tf 1 0 0 1 288.208 587.028 Tm [(psb_cdall)]TJ/F84 9.9626 Tf 1.02 0 0 1 337.915 587.028 Tm [(is)-259(executed,)-263(ther)18(efor)17(e)-259(changing)-259(the)]TJ 1.017 0 0 1 150.705 575.073 Tm [(thr)18(eshold)-245(has)-244(no)-244(ef)18(fect)-244(on)-245(communication)-244(descriptors)-244(that)-245(have)-244(alr)18(eady)-244(been)]TJ 1 0 0 1 150.705 563.118 Tm [(initialized.)-310(Mor)18(eover)-250(the)-250(thr)18(eshold)-250(must)-250(have)-250(the)-250(same)-250(value)-250(on)-250(all)-250(pr)18(ocesses.)]TJ/F75 9.9626 Tf 0 -29.334 Td [(3.1.12)-1000(get)]TJ +ET +q +1 0 0 1 200.01 533.983 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 202.999 533.784 Td [(p)]TJ +ET +q +1 0 0 1 209.684 533.983 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 212.672 533.784 Td [(adjcncy)-250(\227)-250(Get)-250(process)-250(adjacency)-250(list)]TJ 0 g 0 G - [(].)-598(Mor)18(eover)74(,)-371(the)-346(Fortran)-346(95)-346(facilities)-346(for)-346(dy-)]TJ 0 -11.955 Td [(namic)-411(memory)-410(management)-411(and)-410(interface)-411(overloading)-410(gr)18(eatly)-411(enhance)-410(the)]TJ 0 -11.955 Td [(usability)-397(of)-398(the)-397(PSBLAS)-398(subr)18(outines.)-752(In)-398(this)-397(way)111(,)-434(the)-398(library)-397(can)-398(take)-397(car)18(e)]TJ 0 -11.955 Td [(of)-267(r)8(untime)-266(memory)-267(r)18(equir)18(ements)-266(that)-267(ar)18(e)-266(quite)-267(dif)18(\002cult)-267(or)-266(even)-267(impossible)-266(to)]TJ 0 -11.955 Td [(pr)18(edict)-250(at)-250(implementation)-250(or)-250(compilation)-250(time.)]TJ 14.944 -12.064 Td [(The)-249(pr)18(esentation)-250(of)-249(the)-250(PSBLAS)-249(library)-249(follows)-250(the)-249(general)-249(str)8(uctur)18(e)-250(of)-249(the)]TJ -14.944 -11.955 Td [(pr)18(oposal)-207(for)-206(serial)-207(Sparse)-207(BLAS)-207([)]TJ -1 0 0 rg 1 0 0 RG - [(8)]TJ 0 g 0 G - [(,)]TJ -1 0 0 rg 1 0 0 RG - [-206(9)]TJ +/F145 9.9626 Tf -61.967 -19.711 Td [(list)-525(=)-1050(desc%get_p_adjcncy\050\051)]TJ 0 g 0 G - [(],)-216(which)-206(in)-207(its)-207(turn)-206(is)-207(based)-207(on)-207(t)1(he)-207(pr)18(oposal)]TJ 0 -11.956 Td [(for)-250(BLAS)-250(on)-250(dense)-250(matrices)-250([)]TJ -1 0 0 rg 1 0 0 RG - [(15)]TJ +/F75 9.9626 Tf 0 -23.074 Td [(T)90(ype:)]TJ 0 g 0 G - [(,)]TJ -1 0 0 rg 1 0 0 RG - [-250(5)]TJ +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [(,)]TJ -1 0 0 rg 1 0 0 RG - [-250(6)]TJ +/F75 9.9626 Tf -29.439 -21.467 Td [(On)-250(Return)]TJ 0 g 0 G - [(].)]TJ 14.944 -12.063 Td [(The)-297(applicability)-297(of)-298(sparse)-297(iterative)-297(solvers)-297(to)-297(many)-298(dif)18(fer)18(ent)-297(ar)18(eas)-297(causes)]TJ -14.944 -11.956 Td [(some)-190(terminology)-190(pr)18(oblems)-190(because)-190(the)-190(same)-190(concept)-190(may)-190(be)-190(denoted)-190(thr)18(ough)]TJ 0 -11.955 Td [(dif)18(fer)18(ent)-271(names)-271(depending)-272(on)-271(the)-271(application)-271(ar)18(ea.)-374(The)-271(PSBLAS)-271(featur)18(es)-271(pr)18(e-)]TJ 0 -11.955 Td [(sented)-332(in)-333(this)-332(document)-332(will)-333(be)-332(discussed)-332(r)18(eferring)-333(to)-332(a)-333(\002ni)1(te)-333(dif)18(fer)18(ence)-332(dis-)]TJ 0 -11.955 Td [(cr)18(etization)-284(of)-285(a)-284(Partial)-285(Dif)18(fer)18(ential)-284(Equation)-284(\050PDE\051.)-285(However)74(,)-293(the)-284(scope)-285(of)-284(the)]TJ 0 -11.955 Td [(library)-283(is)-283(wider)-283(than)-284(that:)-376(for)-283(example,)-291(it)-283(can)-284(be)-283(applied)-283(to)-283(\002nite)-283(element)-283(dis-)]TJ 0 -11.956 Td [(cr)18(etizations)-267(of)-267(PDEs,)-271(and)-267(even)-266(to)-267(dif)18(fer)18(ent)-267(classes)-267(of)-267(pr)18(oblems)-267(such)-266(as)-267(nonlin-)]TJ 0 -11.955 Td [(ear)-250(optimization,)-250(for)-250(example)-250(in)-250(optimal)-250(contr)18(ol)-250(pr)18(oblems.)]TJ 14.944 -12.064 Td [(The)-383(design)-383(of)-383(a)-383(solver)-383(for)-384(sparse)-383(linear)-383(systems)-383(is)-383(driven)-383(by)-383(many)-383(con-)]TJ -14.944 -11.955 Td [(\003icting)-271(objectives,)-277(such)-272(as)-271(limiting)-271(occupation)-272(of)-271(storage)-271(r)18(esour)18(ces,)-277(exploiting)]TJ 0 -11.955 Td [(r)18(egularities)-274(in)-274(the)-275(input)-274(data,)-280(exploiting)-274(har)18(dwar)18(e)-275(characteristi)1(cs)-275(of)-274(the)-274(paral-)]TJ 0 -11.955 Td [(lel)-350(platform.)-610(T)92(o)-350(achieve)-350(an)-350(optimal)-350(communication)-350(to)-350(computation)-350(ratio)-350(on)]TJ 0 -11.955 Td [(distributed)-379(memory)-378(machines)-379(it)-378(is)-379(essential)-379(to)-378(keep)-379(the)]TJ/F52 9.9626 Tf 256.501 0 Td [(data)-379(locality)]TJ/F54 9.9626 Tf 54.198 0 Td [(as)-379(high)]TJ -310.699 -11.956 Td [(as)-315(possible;)-346(this)-315(can)-314(be)-315(done)-315(thr)18(ough)-314(an)-315(appr)18(opriate)-314(data)-315(allocation)-314(strategy)111(.)]TJ 0 -11.955 Td [(The)-323(choice)-323(of)-324(the)-323(pr)18(econditioner)-323(is)-323(another)-323(very)-324(important)-323(factor)-323(that)-323(af)18(fects)]TJ 0 -11.955 Td [(ef)18(\002ciency)-300(of)-300(the)-300(im)1(plemented)-300(application.)-460(Optimal)-300(data)-299(distribution)-300(r)18(equir)18(e-)]TJ 0 -11.955 Td [(ments)-300(for)-299(a)-300(given)-300(pr)18(econditioner)-299(may)-300(con\003ict)-300(with)-300(distribution)-299(r)18(equir)18(ements)]TJ 0 -11.955 Td [(of)-356(the)-356(r)18(est)-356(of)-357(the)-356(solver)74(.)-628(Finding)-356(the)-357(o)1(ptimal)-357(trade-of)18(f)-356(may)-356(be)-356(very)-356(dif)18(\002cult)]TJ 0 -11.955 Td [(because)-292(it)-291(is)-292(application)-291(dependent.)-435(Possible)-292(solutions)-291(to)-292(these)-292(pr)18(oblems)-291(and)]TJ 0 -11.956 Td [(other)-342(important)-342(inputs)-342(to)-342(the)-342(development)-342(of)-341(the)-342(PSBLAS)-342(softwar)18(e)-342(package)]TJ 0 g 0 G - 169.365 -29.887 Td [(1)]TJ + 0 -21.467 Td [(Function)-250(value)]TJ 0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 223.173 448.065 Tm [(The)-252(curr)18(ent)-252(list)-252(of)-253(adjacent)-252(pr)19(ocesses,)-253(i.e.)-316(pr)19(ocesses)-253(w)1(ith)-253(which)]TJ 1 0 0 1 175.611 436.11 Tm [(the)-250(curr)18(ent)-250(one)-250(has)-250(to)-250(exchange)-250(halo)-250(data.)]TJ/F75 9.9626 Tf -24.906 -29.334 Td [(3.1.13)-1000(set)]TJ ET - -endstream -endobj -798 0 obj -<< -/Length 5269 ->> -stream +q +1 0 0 1 198.894 406.975 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 201.883 406.776 Td [(p)]TJ +ET +q +1 0 0 1 208.568 406.975 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 211.557 406.776 Td [(adjcncy)-250(\227)-250(Set)-250(process)-250(adjacency)-250(list)]TJ 0 g 0 G 0 g 0 G -BT -/F54 9.9626 Tf 150.705 706.129 Td [(have)-292(come)-291(fr)18(om)-292(an)-292(established)-291(experience)-292(in)-292(applying)-291(the)-292(PSBLAS)-291(solvers)-292(to)]TJ 0 -11.955 Td [(computational)-250(\003uid)-250(dynamics)-250(applications.)]TJ/F51 14.3462 Tf 0 -33.474 Td [(2)-1000(General)-250(overview)]TJ/F54 9.9626 Tf 0 -22.702 Td [(The)-190(PSBLAS)-190(library)-190(is)-190(designed)-190(to)-190(handle)-190(the)-190(implementation)-190(of)-190(iterative)-190(solvers)]TJ 0 -11.955 Td [(for)-275(sparse)-275(linear)-275(systems)-275(on)-275(distributed)-275(memory)-274(parallel)-275(computers.)-385(The)-275(sys-)]TJ 0 -11.955 Td [(tem)-307(coef)18(\002cient)-308(matrix)]TJ/F52 9.9626 Tf 100.571 0 Td [(A)]TJ/F54 9.9626 Tf 10.381 0 Td [(must)-307(be)-308(squar)18(e;)-336(it)-308(may)-307(be)-308(r)18(eal)-307(or)-307(complex,)-322(nonsym-)]TJ -110.952 -11.955 Td [(metric,)-301(and)-291(its)-291(sparsity)-291(pattern)-291(needs)-291(not)-291(to)-291(be)-291(symmetric.)-433(The)-291(serial)-291(compu-)]TJ 0 -11.955 Td [(tation)-240(parts)-239(ar)18(e)-240(based)-240(on)-239(the)-240(serial)-240(sparse)-239(BLAS,)-240(so)-240(that)-239(any)-240(extension)-239(made)-240(to)]TJ 0 -11.956 Td [(the)-258(data)-258(str)8(uctur)18(es)-259(of)-258(the)-258(serial)-258(kernels)-258(is)-259(available)-258(to)-258(the)-258(parallel)-258(version.)-335(The)]TJ 0 -11.955 Td [(overall)-294(design)-294(and)-294(parallelization)-294(strategy)-294(have)-294(been)-294(in\003uenced)-294(by)-294(the)-294(str)8(uc-)]TJ 0 -11.955 Td [(tur)18(e)-306(of)-307(the)-306(ScaLAP)92(ACK)-306(parallel)-307(library)111(.)-479(The)-306(layer)18(ed)-306(str)8(uctur)18(e)-306(of)-307(the)-306(PSBLAS)]TJ 0 -11.955 Td [(library)-349(is)-349(shown)-348(in)-349(\002gur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-349(1)]TJ +/F145 9.9626 Tf -60.852 -19.711 Td [(call)-525(desc%set_p_adjcncy\050list\051)]TJ 0 g 0 G - [(;)-398(lower)-349(layers)-349(of)-349(the)-349(library)-349(in)1(dicate)-349(an)-349(encapsu-)]TJ 0 -11.955 Td [(lation)-314(r)18(elationship)-314(with)-313(upper)-314(layers.)-502(The)-314(ongoing)-314(discussion)-313(focuses)-314(on)-314(the)]TJ 0 -11.955 Td [(Fortran)-244(2003)-244(layer)-245(immediately)-244(below)-244(the)-244(application)-244(layer)74(.)-308(The)-245(serial)-244(parts)-244(of)]TJ 0 -11.956 Td [(the)-230(computation)-230(on)-230(each)-230(pr)18(ocess)-230(ar)18(e)-230(executed)-230(thr)18(ough)-230(calls)-230(to)-230(the)-230(serial)-230(sparse)]TJ 0 -11.955 Td [(BLAS)-307(subr)18(outines.)-482(In)-307(a)-307(similar)-308(way)111(,)-321(the)-307(inter)18(-pr)18(ocess)-308(message)-307(exchanges)-307(ar)18(e)]TJ 0 -11.955 Td [(encapsulated)-244(in)-243(an)-244(applicaiton)-244(layer)-243(that)-244(has)-244(been)-243(str)18(ongly)-244(inspir)18(ed)-244(by)-243(the)-244(Ba-)]TJ 0 -11.955 Td [(sic)-314(Linear)-313(Algebra)-314(Communication)-313(Subr)18(outines)-314(\050BLACS\051)-314(library)-313([)]TJ -1 0 0 rg 1 0 0 RG - [(7)]TJ +/F75 9.9626 Tf 0 -23.074 Td [(T)90(ype:)]TJ 0 g 0 G - [(].)-501(Usually)]TJ 0 -11.955 Td [(ther)18(e)-315(is)-314(no)-315(need)-315(to)-314(deal)-315(dir)18(ectly)-314(with)-315(MPI;)-315(however)74(,)-330(in)-315(some)-315(cases,)-331(MPI)-314(r)18(ou-)]TJ 0 -11.955 Td [(tines)-219(ar)18(e)-219(used)-218(dir)18(ectly)-219(to)-219(impr)18(ove)-219(ef)18(\002ciency)111(.)-299(For)-219(further)-219(details)-219(on)-218(our)-219(commu-)]TJ 0 -11.956 Td [(nication)-250(layer)-250(see)-250(Sec.)]TJ -0 0 1 rg 0 0 1 RG - [-250(7)]TJ +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [(.)]TJ +/F75 9.9626 Tf -29.439 -21.467 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G + 0 -21.467 Td [(list)]TJ 0 g 0 G +/F84 9.9626 Tf 19.357 0 Td [(the)-250(list)-250(of)-250(adjacent)-250(pr)18(ocesses.)]TJ 5.549 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(one-dimensional)-250(array)-250(of)-250(integers)-250(of)-250(kind)]TJ/F145 9.9626 Tf 250.21 0 Td [(psb_ipk_)]TJ/F84 9.9626 Tf 41.842 0 Td [(.)]TJ 1.02 0 0 1 150.705 249.777 Tm [(Note:)-521(this)-354(method)-353(can)-354(be)-353(called)-354(after)-354(a)-353(call)-354(to)]TJ/F145 9.9626 Tf 1 0 0 1 367.056 249.777 Tm [(psb_cdall)]TJ/F84 9.9626 Tf 1.02 0 0 1 417.722 249.777 Tm [(and)-354(befor)18(e)-354(a)-353(call)]TJ 1.02 0 0 1 150.705 237.822 Tm [(to)]TJ/F145 9.9626 Tf 1 0 0 1 162.313 237.822 Tm [(psb_cdasb)]TJ/F84 9.9626 Tf 1.02 0 0 1 209.387 237.822 Tm [(.)-380(The)-270(user)-270(is)-271(specifying)-270(her)17(e)-270(some)-270(knowledge)-271(about)-270(which)-270(pr)17(o-)]TJ 0.986 0 0 1 150.705 225.866 Tm [(cesses)-255(ar)19(e)-255(topological)-255(neighbours)-254(of)-255(the)-254(curr)18(ent)-255(pr)18(oce)1(ss.)-318(The)-255(availability)-254(of)-255(this)]TJ 1 0 0 1 150.705 213.911 Tm [(information)-250(may)-250(speed)-250(up)-250(the)-250(execution)-250(of)-250(the)-250(assembly)-250(call)]TJ/F145 9.9626 Tf 269.655 0 Td [(psb_cdasb)]TJ/F84 9.9626 Tf 47.073 0 Td [(.)]TJ/F75 9.9626 Tf -316.728 -29.333 Td [(3.1.14)-1000(fnd)]TJ ET -1 0 0 1 258.536 281.98 cm -q -.65 0 0 .65 0 0 cm q -1 0 0 1 0 0 cm -/Im2 Do -Q +1 0 0 1 202.221 184.777 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G -1 0 0 1 -258.536 -281.98 cm BT -/F54 9.9626 Tf 216.385 250.1 Td [(Figur)18(e)-250(1:)-310(PSBLAS)-250(library)-250(components)-250(hierar)18(chy)111(.)]TJ +/F75 9.9626 Tf 205.21 184.578 Td [(owner)-250(\227)-250(Find)-250(the)-250(owner)-250(process)-250(of)-250(a)-250(set)-250(of)-250(indices)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -54.505 -19.711 Td [(call)-525(desc%fnd_owner\050idx,iprc,info\051)]TJ 0 g 0 G +/F75 9.9626 Tf 0 -23.074 Td [(T)90(ype:)]TJ 0 g 0 G - -50.736 -22.178 Td [(The)-370(type)-369(of)-370(linear)-369(system)-370(matrices)-370(that)-369(we)-370(addr)18(ess)-369(typically)-370(arise)-370(in)-369(the)]TJ -14.944 -11.955 Td [(numerical)-260(solution)-261(of)-260(PDEs;)-266(in)-260(such)-260(a)-261(context,)-263(it)-260(is)-261(necessary)-260(to)-260(pay)-261(special)-260(at-)]TJ 0 -11.955 Td [(tention)-297(to)-298(the)-297(str)8(uctur)18(e)-298(of)-297(the)-298(pr)18(oblem)-297(fr)18(om)-298(which)-297(the)-298(application)-297(originates.)]TJ 0 -11.955 Td [(The)-277(nonzer)18(o)-276(pattern)-277(of)-277(a)-276(matrix)-277(arising)-277(fr)18(om)-276(the)-277(discr)18(etization)-276(of)-277(a)-277(PDE)-276(is)-277(in-)]TJ 0 -11.956 Td [(\003uenced)-232(by)-232(various)-231(factors,)-236(such)-232(as)-232(the)-231(shape)-232(of)-232(the)-232(domain,)-235(the)-232(discr)18(etization)]TJ 0 -11.955 Td [(strategy)111(,)-313(and)-301(the)-300(equation/unknown)-301(or)18(dering.)-461(The)-301(matrix)-301(it)1(self)-301(can)-301(be)-300(inter)18(-)]TJ 0 -11.955 Td [(pr)18(eted)-291(as)-291(the)-291(adjacency)-291(matrix)-292(of)-291(the)-291(graph)-291(associated)-291(with)-291(the)-291(discr)18(etization)]TJ 0 -11.955 Td [(mesh.)]TJ 14.944 -11.955 Td [(The)-308(distribution)-308(of)-308(the)-309(coef)18(\002cient)-308(matrix)-308(for)-308(the)-308(linear)-309(system)-308(is)-308(based)-308(on)]TJ -14.944 -11.955 Td [(the)-314(\223owner)-314(computes\224)-314(r)8(ule:)-438(the)-314(variable)-314(associated)-314(to)-314(each)-314(mesh)-314(point)-314(is)-314(as-)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - 169.365 -29.888 Td [(2)]TJ +/F75 9.9626 Tf -29.828 -21.467 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G +/F84 9.9626 Tf 166.874 -29.888 Td [(16)]TJ 0 g 0 G ET endstream endobj -794 0 obj -<< -/Type /XObject -/Subtype /Form -/FormType 1 -/PTEX.FileName (./figures/psblas.pdf) -/PTEX.PageNumber 1 -/PTEX.InfoDict 800 0 R -/BBox [0 0 197 215] -/Resources << -/ProcSet [ /PDF /Text ] -/ExtGState << -/R7 801 0 R ->>/Font << /R8 802 0 R>> ->> -/Length 898 -/Filter /FlateDecode ->> -stream -xœµVM7 ½ëWèÖ4€Y‘ú>&@[HMl ‡¢câu·;{þý’#QvvÝKÐÂÏ{#=RüÐð«u€ÖɯÿOóÓÇl÷g³°öã¯ýá´7_ ¶çþ7ìÛ // ¢Ãl7÷¦É E„ŒŽ,E(%’ÝÌ«õîô°íúq{:ï~Üüe0ƒsµØÀíæ³yõöý›µ¼ûyc>Ÿ(Ú¿M, -¯ ¼”ìá3›"dQžÙ7_Û³ËXm2‚0— -¼î(:HhU3vܪÅ“Aª f6è=A΃AŸÄ!9`³Ç -PwæAt_UOÏ¡OæO³æl8f¢ãPHÄY¥æ«‡åÕÌ+k(*«¦XÁI䓉uyQ¥ -Åá ¢ï¹Éø*Þcv ( ñÌÿôÚÇJÊBt«FpÍ©vvË×VþÕ‘” -„rÓ‘û×&@^ -éÿ2!)Õš\‘Œ÷¼ža®8Î7L¤ÂõŸÇ )]aôà¥`;¡vç ãp½ªba=WY³¨Hj.8‰µ2Rt%ùÁ` R=EÝ 4¢{4_póXÕFwt{Š[Íf.óÈv¾†Ê‘yÎÌÆGÞ§Áxq€Èúš‹ú‚c—++SJëgÅ¡Br5XUäÛFZYMv8™\øH”˜M!‚¢%rl9ª§¸Ù›Œbõh¾0ÝgÔ3©EÅ-:º‹¼i¾>9ÎßÑò…/aëUËv±Òu‹ß`ú®›Lk„¡ÀtÕ—ßö¼E÷õ¶Ž¿áæsù—¼0ȽýßI -~·œíÕÏãÎh§ÀP‰ó+‰I‰Ë`ä8Äwsâ~L¥¼–U9‰–2ó¢ß”ÇM®F‘ -ÁóÕÞ²þ-ÃÉu1 ƒéuÄóŠ -}¦öqëXË|Vb´A—Ó6QƒŠY¡8Õ†% -Ûæ“Ž=…åËE²A5) -í}óiV<\îrãDÝ âÖß7x¤U -«Ͻ'ƒÇ›ÇÇùaÚ>=|9Êh±Büuæ¹£$îËLËàq·–ÑÃÊŸ5×k^Þ½;>íN÷Ûi™ZŠ\V+9D£­8îNËLÓG™÷×»~0+¾’”àŠ'¢ˆ±íúmw>o÷;{·=ŸŽûEý—»a¥ÃѲîîÝâ8SË4Â%ÕÇ¥_¾œžNÛ#OαéƒùüÐ -endstream -endobj -804 0 obj -<< -/Filter /FlateDecode -/Subtype /Type1C -/Length 2887 ->> -stream -xœe–iXSWÇo É=Z¤-iʦ µ­m]‘Ö*ÕVYÔZQU (! KBI ,A8,!, Ö„M‚T¥*(¶ÖQ¬ ÖœÇn:Jg9—¹|˜‹vž~˜/÷¹÷žåyß÷ÿ?¿÷Ð0‡%Fs ¤ðÓ׋SbE‹ßë±b ±’IÑü?æw0VbôåБšV82ÑÔ«Èò2*x£Óhr}›¿X’•&HHÌð|÷ppø{k×®ûóÏ&ÏãYÿñ à§ Dž«©_(–¤ðEÛ<ý©ÙB¡à„g‚0K’˜îÇ[\+ä'{î‰Xæù®ÿ{žÞ^^›ÖSÍ)Ç¥éžÏ#÷< öñ ô æ'H…±iÿ?‚a˜›¯ÈO,IÛž±76ðÄ~~|P‚ Dèéåý>†­ÂbobAØ!,{ Åck0?l=€mÀvc{°MاØgØØ~Œ†½Š±°×0wªx˜ÆÇîѲiW—ì]ÒA~Êá‡}¿2ü&&ÎŒg^Æ·g YúêÒK-=½ ,Ó'æã¡E oXhóafö±rMMⲌ¸îšÉ$ãrö‘«s²£‹Ýä(ÊŒGgm†ò:‘{-e sp­A¢O…*7˜¦È+rT…/¨Q áȆic8'¡‰p¿DûvÌÐë‰UìÊòšÒZlÕÊ(.YŠÃà\UDP¢DQ¡4@;@§ðÏGš =œkîå’|ª‘‹“âLx”.ÇÏ”…ÿ»ì{T¨Ø·‡ó/(‹Êä†f»9Ío„fy1Û'7;#ÆÒÎ}„pVRÃü6Ë$Çg´†\è ‚pè«Vo= XIrôW|N5õ²w[Fàçp8k@Ø‘b•>«36Õ´6–°­ÙÔ7莼'H_Žù˜ý7xVkWRËÇÓ­'ÝáaY܉Œä¬ãù{!ˆRUwsÑ e“¾ÚÄf²„ŸŒöþ8Œ·žKUEc#¼m´ŽPÝtTNİÉ×½Ö’ÒãçÕÈ9?ýâ —OÈ׸j1{æÊZr%É8¶gg\|뀌+˾§À“ÑkSj/2¥Aј…R4Ý„ÓUç»3HúÈ…¥­&V²«Kõ°‚SzÍîB"Ãssƒ -kµiqÖ±F|g©º~ Ð&¼7©U3Zör@o£Wv=^ÂK;ÌãÞÇ ËËÕ¡ -7´f'»ó²ýìs;H@Ò£vú ¶wrœÐ#Jã-´{3t=áή+¯.5@ÐVÍã’å8Œ,(Ø—CiW`Â÷”+Lp )Þ}æ¼µ²Z«©åÔ«j ´šl=2‹ V”Åý ×–…IÔAr7'â hF“ƒ¨ºŸRôïs.¬D"e³‘2ÏÙ–¥Üù6JcÚt•VZÁD جlØÚ˜'Ñ›Œ“u_Žß)4ªà.°° ‡[sÕ;´”U{Ìø'ª:x z”†€á¦iα,êfç ×Ï-Ö:ôtƒ]„V0r™ÅÅy%%EЭjÊru€ÕP%•V¤zlŽðé>1ÍHêÍlH‡B7^²8B 46Ë9²¶¼õ8È&½ŒL}MEUV`5¬+n¡Äh(´Y‹=î5që|zÏA;—ľ×+Û Õm Ëvv¼SàÝBÕ¸K3F8v;‘–Ü?ãšEUD›us5Ìš’òBNA^a^†]X³¹…Zwx²´¸¼°n~A^dn錹r®¿u¬‡£®•¥)4R说~ÉE§qÖì3梌æy3 ástêÓØp§:gËIªTç͸OyvüÔ©ù¦Î8SÌäy9>ûâ,9Ašñà µÕùò÷è݇.¬âÀ×l…*G«„ £ n˜‹Âq8¦íP¶Jí1æ ÖûFfX2[Z›,Í%º=·¸º¤ -êµ»yàœUt˜s'7ìWäÇð¥™*L¬ˆÀaÞäð`ÓØ$‡e ×5Ê=NÁ&Co?5Dnüž …ÚÕ7 'a«Øøx‡wºQ¬öû ]£\K¾L™vÂÜÕßT§5rêS šZÌ-v^ûÁýa’£©ÜT^~BÉ6à“üyƒ ýrsüño4QFÌ­…wºœðhh@çaÿÛÊF4 -´ÇÐ0»½¸¡¤NË5gZ¯vÛ'á×pHÕ/é81üm=…Œ&«Ò7¿FýŸsaK¾fGÁ‹ân/W.š%$dV½ÞŠ<®ykMP$‰gªJË2ž3W4ŽÞ· —6”tZg£Ùn¡ìûVH"\Ù§?Á3¡²G»‡þšÜÝ‹ºhLø^]~Í"KÎ""Œ-ÉÈ‹›3l-ͶŒÕô¾PÙ¥‚Xœ§!Õ3Ö -"d~+[Æ,ÊÈÍU@ TT(ª«§.>¾*Ù#òI¢DaV, ‡Û†CCÐmþØñ6~µZ'ƒéàÓÈ£~Þ—S$'ƒÙI&2š™¬@s©¡ÎäQ ŠÌÔNÊþü„?~ðûw¡×I×)Øax\ê¿62$Oèåt%×¥ÖQíâyú´_gèæy:»’Y[j(­‚ »FEõÎTV©µT®¹&<°"§^Ä/xQY¸‚¯Ù­psBå”+TíóÎí´Ö§Èø”ŽôTj¡0VŸx(„Oâ\ -IZ¹t4¤w!}^€gì½—ú€h9@±^è Ò…s2•=3B2H>ÉçynÞÌû7ŠC‰#ˆþhñÚï-whèÇGtBŒV²Õºø`uÞ1èFæ3ÑvðøçÓ÷à´ÛoÝ34\z"ž“,P&+š´®¿Ú»oB03¼å㣶xsÉÝä!FáŽ/†m'~¼êŒn?àßvaÝDëÑ÷ì+°¥¸5ïšôt0Ü ¶æ+ÉÔ·ÆsRz…t¥J–xFzëþƒöÁQîè`û¼/dŠº2›åž™j@7GÛú.º?Üwyc$O‘œÀI‘(S¥ME®ƒßŒô| Áõ^`‚2!-›’"VîIN™"Ì‚B-ÌÎe³/uV9:Î6:.ǰÿMɪH -endstream -endobj -810 0 obj +1071 0 obj << -/Length 8252 +/Length 8300 >> stream 0 g 0 G 0 g 0 G +0 g 0 G BT -/F54 9.9626 Tf 99.895 706.129 Td [(signed)-263(to)-264(a)-263(pr)18(ocess)-263(that)-263(will)-264(own)-263(the)-263(corr)18(esponding)-263(r)18(ow)-263(in)-264(the)-263(coef)18(\002cient)-263(ma-)]TJ 0 -11.955 Td [(trix)-406(and)-406(will)-406(carry)-405(out)-406(all)-406(r)18(elated)-406(computations.)-778(This)-406(all)1(ocation)-406(strategy)-406(is)]TJ 0 -11.955 Td [(equivalent)-353(to)-353(a)-353(partition)-353(of)-353(the)-353(discr)18(etization)-353(mesh)-353(in)1(to)]TJ/F52 9.9626 Tf 253.543 0 Td [(sub-domains)]TJ/F54 9.9626 Tf 51.107 0 Td [(.)-619(Our)-353(li-)]TJ -304.65 -11.956 Td [(brary)-220(supports)-220(any)-220(distribution)-220(that)-220(keeps)-220(together)-220(the)-220(coef)18(\002cients)-220(of)-220(each)-220(ma-)]TJ 0 -11.955 Td [(trix)-244(r)18(ow;)-245(ther)18(e)-244(ar)18(e)-243(no)-244(other)-243(constraints)-244(on)-243(the)-243(variable)-244(assignment.)-308(This)-243(choice)]TJ 0 -11.955 Td [(is)-324(consistent)-324(with)-324(simple)-324(data)-325(distributions)-324(such)-324(as)]TJ/F59 9.9626 Tf 232.237 0 Td [(CYCLIC\050N\051)]TJ/F54 9.9626 Tf 50.302 0 Td [(and)]TJ/F59 9.9626 Tf 20.095 0 Td [(BLOCK)]TJ/F54 9.9626 Tf 26.152 0 Td [(,)-324(as)]TJ -328.786 -11.955 Td [(well)-310(as)-309(completely)-310(arbitrary)-310(assignments)-309(of)-310(equation)-310(indices)-309(to)-310(pr)18(ocesses.)-489(In)]TJ 0 -11.955 Td [(particular)-250(it)-250(is)-251(consistent)-250(with)-250(the)-250(usage)-250(of)-251(graph)-250(partitioning)-250(tools)-250(commonly)]TJ 0 -11.956 Td [(available)-333(in)-332(the)-333(literatur)18(e,)-353(e.g.)-558(METIS)-332([)]TJ -1 0 0 rg 1 0 0 RG - [(14)]TJ +/F75 9.9626 Tf 99.895 706.129 Td [(idx)]TJ 0 g 0 G - [(].)-558(Dense)-333(ve)1(ctors)-333(conform)-333(to)-332(sparse)]TJ 0 -11.955 Td [(matrices,)-257(that)-255(is,)-257(the)-255(entries)-255(of)-256(a)-255(vector)-255(follow)-256(the)-255(same)-255(distribution)-256(of)-255(the)-255(ma-)]TJ 0 -11.955 Td [(trix)-250(r)18(ows.)]TJ 14.944 -12.648 Td [(W)92(e)-343(assume)-344(that)-343(the)-344(sparse)-343(matrix)-343(is)-344(built)-343(in)-344(parallel,)-366(wher)18(e)-344(each)-343(pr)18(ocess)]TJ -14.944 -11.955 Td [(generates)-254(its)-254(own)-255(portion.)-322(W)92(e)-255(never)-254(r)18(equir)18(e)-254(that)-254(the)-255(entir)18(e)-254(matrix)-254(be)-254(available)]TJ 0 -11.955 Td [(on)-288(a)-288(single)-288(node.)-423(However)74(,)-298(it)-287(is)-288(possible)-288(to)-288(hold)-288(the)-288(entir)18(e)-288(matrix)-287(in)-288(one)-288(pr)18(o-)]TJ 0 -11.955 Td [(cess)-241(and)-242(distribute)-241(it)-241(explicitly)]TJ -0 0 1 rg 0 0 1 RG -/F54 7.5716 Tf 133.807 3.616 Td [(1)]TJ +/F84 9.9626 Tf 19.368 0 Td [(the)-250(list)-250(of)-250(global)-250(indices)-250(for)-250(which)-250(we)-250(need)-250(the)-250(owning)-250(pr)18(ocesses.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one-dimensional)-250(array)-250(of)-250(integers)-250(of)-250(kind)]TJ/F145 9.9626 Tf 250.209 0 Td [(psb_lpk_)]TJ/F84 9.9626 Tf 41.843 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 4.284 -3.616 Td [(,)-243(even)-241(though)-242(the)-241(r)18(esulting)-241(memory)-241(bottleneck)]TJ -138.091 -11.955 Td [(would)-250(make)-250(this)-250(option)-250(unattractive)-250(in)-250(most)-250(cases.)]TJ/F51 11.9552 Tf 0 -33.074 Td [(2.1)-1000(Basic)-250(Nomenclature)]TJ/F54 9.9626 Tf 0 -20.306 Td [(Our)-301(computational)-301(model)-301(implies)-301(that)-301(the)-301(data)-301(al)1(location)-301(on)-301(the)-301(parallel)-301(dis-)]TJ 0 -11.955 Td [(tributed)-370(memory)-369(machine)-370(is)-370(guided)-370(by)-369(the)-370(str)8(uctur)18(e)-370(of)-370(the)-369(physical)-370(model,)]TJ 0 -11.955 Td [(and)-250(speci\002cally)-250(by)-250(the)-250(discr)18(etization)-250(mesh)-250(of)-250(the)-250(PDE.)]TJ 14.944 -12.648 Td [(Each)-400(point)-400(of)-400(the)-399(discr)18(etization)-400(mesh)-400(will)-400(have)-400(\050at)-400(least)1(\051)-400(one)-400(associated)]TJ -14.944 -11.955 Td [(equation/variable,)-416(and)-384(ther)18(efor)18(e)-383(one)-383(index.)-710(W)92(e)-383(say)-383(that)-384(point)]TJ/F52 9.9626 Tf 289.765 0 Td [(i)-403(depends)]TJ/F54 9.9626 Tf 42.709 0 Td [(on)]TJ -332.474 -11.955 Td [(point)]TJ/F52 9.9626 Tf 26.955 0 Td [(j)]TJ/F54 9.9626 Tf 6.004 0 Td [(if)-312(the)-312(equation)-312(for)-312(a)-312(variable)-313(associated)-312(with)]TJ/F52 9.9626 Tf 202.502 0 Td [(i)]TJ/F54 9.9626 Tf 6.074 0 Td [(contains)-312(a)-312(term)-312(in)]TJ/F52 9.9626 Tf 84.153 0 Td [(j)]TJ/F54 9.9626 Tf 2.894 0 Td [(,)-328(or)]TJ -328.582 -11.955 Td [(equivalently)-291(if)]TJ/F52 9.9626 Tf 67.321 0 Td [(a)]TJ/F52 7.5716 Tf 4.59 -1.96 Td [(i)-67(j)]TJ/F83 10.3811 Tf 8.967 1.96 Td [(6)]TJ/F85 10.3811 Tf 0.249 0 Td [(=)]TJ/F54 9.9626 Tf 11.726 0 Td [(0.)-434(After)-292(the)-291(partition)-292(of)-291(the)-292(discr)18(etization)-291(mesh)-292(into)]TJ/F52 9.9626 Tf 233.514 0 Td [(sub-)]TJ -326.367 -11.955 Td [(domains)]TJ/F54 9.9626 Tf 37.559 0 Td [(assigned)-381(to)-381(the)-381(parallel)-381(pr)18(ocesses,)-413(we)-381(classify)-381(the)-381(points)-381(of)-381(a)-381(given)]TJ -37.559 -11.955 Td [(sub-domain)-250(as)-250(following.)]TJ +/F75 9.9626 Tf -316.959 -20.539 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.003 Td [(Internal.)]TJ 0 g 0 G -/F54 9.9626 Tf 43.995 0 Td [(An)-359(internal)-359(poi)1(nt)-359(of)-359(a)-359(given)-359(domain)]TJ/F52 9.9626 Tf 168.65 0 Td [(depends)]TJ/F54 9.9626 Tf 35.684 0 Td [(only)-359(on)-359(points)-358(of)-359(the)]TJ -223.422 -11.955 Td [(same)-264(domain.)-351(If)-264(all)-264(points)-264(of)-264(a)-264(domain)-263(ar)18(e)-264(assigned)-264(to)-264(one)-264(pr)18(ocess,)-267(then)]TJ 0 -11.956 Td [(a)-196(computational)-196(step)-195(\050e.g.,)-207(a)-196(matrix-vector)-196(pr)18(oduct\051)-196(of)-195(the)-196(equations)-196(asso-)]TJ 0 -11.955 Td [(ciated)-214(with)-213(the)-214(internal)-214(points)-214(r)18(equir)18(es)-213(no)-214(data)-214(items)-214(fr)18(om)-213(other)-214(domains)]TJ 0 -11.955 Td [(and)-250(no)-250(communications.)]TJ + 0 -20.54 Td [(iprc)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.695 Td [(Boundary)92(.)]TJ +/F84 9.9626 Tf 22.685 0 Td [(the)-250(list)-250(of)-250(pr)18(ocesses)-250(owning)-250(the)-250(indices)-250(in)]TJ/F145 9.9626 Tf 184.994 0 Td [(idx)]TJ/F84 9.9626 Tf 15.691 0 Td [(.)]TJ -198.463 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.02 0 0 1 124.802 569.408 Tm [(Speci\002ed)-345(as:)-504(an)-345(allocatable)-345(one-dimensional)-345(array)-345(of)-345(integers)-346(of)-345(kind)]TJ/F145 9.9626 Tf 1 0 0 1 124.802 557.453 Tm [(psb_ipk_)]TJ/F84 9.9626 Tf 41.843 0 Td [(.)]TJ 0.98 0 0 1 99.895 534.921 Tm [(Note:)-304(this)-231(method)-231(may)-231(or)-230(may)-231(not)-231(actually)-231(r)19(equir)18(e)-231(communications,)-235(depending)]TJ 0.98 0 0 1 99.895 522.966 Tm [(on)-217(the)-216(exact)-217(internal)-217(data)-217(storage;)-229(given)-217(that)-216(the)-217(choice)-217(of)-217(st)1(orage)-217(may)-217(be)-217(alter)19(ed)]TJ 0.98 0 0 1 99.895 511.011 Tm [(by)-250(r)8(untime)-250(parameters,)-251(it)-249(is)-250(necessary)-250(for)-250(safety)-250(that)-250(this)-250(method)-250(is)-249(called)-250(by)-250(all)]TJ 1 0 0 1 99.596 499.056 Tm [(pr)18(ocesses.)]TJ/F75 9.9626 Tf 0.299 -28.015 Td [(3.1.15)-1000(Named)-250(Constants)]TJ 0 g 0 G -/F54 9.9626 Tf 51.397 0 Td [(A)-192(point)-191(of)-192(a)-192(given)-191(domain)-192(is)-192(a)-191(boundary)-192(point)-192(if)-191(it)]TJ/F52 9.9626 Tf 217.552 0 Td [(depends)]TJ/F54 9.9626 Tf 34.019 0 Td [(on)-192(points)]TJ -278.061 -11.955 Td [(belonging)-250(to)-250(other)-250(domains.)]TJ + 0 -19.261 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 451.979 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 120.08 451.78 Td [(none)]TJ +ET +q +1 0 0 1 143.372 451.979 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q 0 g 0 G -/F51 9.9626 Tf -24.907 -22.696 Td [(Halo.)]TJ +BT +/F84 9.9626 Tf 151.342 451.78 Td [(Generic)-250(no-op;)]TJ 0 g 0 G -/F54 9.9626 Tf 29.609 0 Td [(A)-389(halo)-389(point)-389(for)-389(a)-389(given)-389(domain)-389(i)1(s)-389(a)-389(point)-389(belonging)-389(to)-389(another)-389(do-)]TJ -4.702 -11.955 Td [(main)-267(such)-267(that)-266(ther)18(e)-267(is)-267(a)-267(boundary)-267(point)-266(which)]TJ/F52 9.9626 Tf 212.474 0 Td [(depends)]TJ/F54 9.9626 Tf 34.767 0 Td [(on)-267(it.)-360(Whenever)]TJ -247.241 -11.955 Td [(performing)-360(a)-361(computational)-360(step,)-388(such)-361(as)-360(a)-361(matrix-vector)-360(pr)18(oduct,)-388(the)]TJ 0 -11.955 Td [(values)-274(associated)-273(with)-274(halo)-274(points)-274(ar)18(e)-274(r)18(equested)-273(fr)18(om)-274(other)-274(domains.)-381(A)]TJ 0 -11.955 Td [(boundary)-259(point)-258(of)-259(a)-258(given)-259(domain)-258(is)-259(usually)-258(a)-259(halo)-258(point)-259(for)-259(some)-258(other)]TJ 0 -11.956 Td [(domain)]TJ -0 0 1 rg 0 0 1 RG -/F54 7.5716 Tf 34.002 3.617 Td [(2)]TJ +/F75 9.9626 Tf -51.447 -20.539 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 431.44 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 120.08 431.241 Td [(root)]TJ +ET +q +1 0 0 1 138.949 431.44 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q 0 g 0 G -/F54 9.9626 Tf 4.284 -3.617 Td [(;)-238(ther)18(efor)18(e)-232(the)-233(car)18(dinality)-232(of)-232(the)-232(boundary)-233(p)1(oints)-233(set)-232(denotes)-232(the)]TJ -38.286 -11.955 Td [(amount)-250(of)-250(data)-250(sent)-250(to)-250(other)-250(domains.)]TJ +BT +/F84 9.9626 Tf 146.919 431.241 Td [(Default)-250(r)18(oot)-250(pr)18(ocess)-250(for)-250(br)18(oadcast)-250(and)-250(scatter)-250(operations;)]TJ 0 g 0 G +/F75 9.9626 Tf -47.024 -20.54 Td [(psb)]TJ ET q -1 0 0 1 99.895 168.389 cm -[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S +1 0 0 1 117.091 410.9 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 120.08 410.701 Td [(nohalo)]TJ +ET +q +1 0 0 1 152.229 410.9 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q +0 g 0 G BT -/F54 5.9776 Tf 110.755 161.427 Td [(1)]TJ/F54 7.9701 Tf 3.487 -2.893 Td [(In)-250(our)-250(pr)18(ototype)-250(implementation)-250(we)-250(pr)18(ovide)-250(sample)-250(scatter/gather)-250(r)18(outines.)]TJ/F54 5.9776 Tf -3.487 -6.922 Td [(2)]TJ/F54 7.9701 Tf 3.487 -2.893 Td [(This)-401(is)-402(the)-401(normal)-402(situation)-401(when)-402(the)-401(pattern)-402(of)-401(the)-402(sparse)-401(matrix)-402(is)-401(symmetric,)-440(which)-401(is)]TJ -14.347 -9.464 Td [(equivalent)-358(to)-358(say)-358(that)-358(the)-357(interaction)-358(between)-358(two)-358(variables)-358(is)-358(r)18(ecipr)18(ocal.)-634(If)-357(the)-358(matrix)-358(pattern)]TJ 0 -9.465 Td [(is)-241(non-symmetric)-241(we)-242(may)-241(have)-241(one-way)-241(interactions,)-243(and)-241(these)-241(could)-241(cause)-242(a)-241(situation)-241(in)-241(which)-241(a)]TJ 0 -9.464 Td [(boundary)-250(point)-250(is)-250(not)-250(a)-250(halo)-250(point)-250(for)-250(its)-250(neighbour)74(.)]TJ +/F84 9.9626 Tf 160.199 410.701 Td [(Do)-250(not)-250(fetch)-250(halo)-250(elements;)]TJ 0 g 0 G +/F75 9.9626 Tf -60.304 -20.54 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 390.361 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 120.08 390.161 Td [(halo)]TJ +ET +q +1 0 0 1 140.603 390.361 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q 0 g 0 G -/F54 9.9626 Tf 169.365 -29.888 Td [(3)]TJ +BT +/F84 9.9626 Tf 148.573 390.161 Td [(Fetch)-250(halo)-250(elements)-250(fr)18(om)-250(neighbouring)-250(pr)18(ocesses;)]TJ 0 g 0 G +/F75 9.9626 Tf -48.678 -20.539 Td [(psb)]TJ ET - -endstream -endobj -823 0 obj -<< -/Length 4830 ->> -stream +q +1 0 0 1 117.091 369.821 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 120.08 369.622 Td [(sum)]TJ +ET +q +1 0 0 1 140.045 369.821 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F84 9.9626 Tf 148.015 369.622 Td [(Sum)-250(overlapped)-250(elements)]TJ 0 g 0 G +/F75 9.9626 Tf -48.12 -20.54 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 349.282 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 120.08 349.082 Td [(avg)]TJ +ET +q +1 0 0 1 136.737 349.282 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q 0 g 0 G +BT +/F84 9.9626 Tf 144.319 349.082 Td [(A)92(verage)-250(overlapped)-250(elements)]TJ 0 g 0 G +/F75 9.9626 Tf -44.424 -20.539 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 328.742 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 120.08 328.543 Td [(comm)]TJ +ET +q +1 0 0 1 148.353 328.742 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F51 9.9626 Tf 150.705 706.129 Td [(Overlap.)]TJ +/F75 9.9626 Tf 151.342 328.543 Td [(halo)]TJ +ET +q +1 0 0 1 171.865 328.742 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q 0 g 0 G -/F54 9.9626 Tf 44.553 0 Td [(An)-245(overlap)-244(point)-245(is)-245(a)-245(boundary)-244(point)-245(assigned)-245(to)-244(multiple)-245(domains.)]TJ -19.647 -11.955 Td [(Any)-204(operation)-204(that)-204(involves)-204(an)-204(overlap)-204(point)-204(has)-204(to)-204(be)-204(r)18(eplicated)-204(for)-204(each)]TJ 0 -11.955 Td [(assignment.)]TJ -24.906 -18.943 Td [(Overlap)-358(points)-359(do)-358(not)-358(usually)-359(exist)-358(in)-359(the)-358(basic)-358(data)-359(distributions;)-412(however)]TJ 0 -11.955 Td [(they)-325(ar)18(e)-326(a)-325(featur)18(e)-326(of)-325(Domain)-326(Decomposition)-325(Schwarz)-326(pr)18(econditioners)-325(which)]TJ 0 -11.956 Td [(ar)18(e)-250(the)-250(subject)-250(of)-250(r)18(elated)-250(r)18(esear)18(ch)-250(work)-250([)]TJ -1 0 0 rg 1 0 0 RG - [(4)]TJ +BT +/F84 9.9626 Tf 179.835 328.543 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F145 9.9626 Tf 124.92 0 Td [(halo_index)]TJ/F84 9.9626 Tf 54.795 0 Td [(list;)]TJ 0 g 0 G - [(,)]TJ -1 0 0 rg 1 0 0 RG - [-250(3)]TJ +/F75 9.9626 Tf -259.655 -20.54 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 308.203 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 120.08 308.003 Td [(comm)]TJ +ET +q +1 0 0 1 148.353 308.203 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 151.342 308.003 Td [(ext)]TJ +ET +q +1 0 0 1 165.22 308.203 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q 0 g 0 G - [(].)]TJ 14.944 -11.955 Td [(W)92(e)-225(denote)-225(the)-225(sets)-225(of)-225(internal,)-230(boundary)-225(and)-225(halo)-225(points)-225(for)-225(a)-225(given)-225(subdo-)]TJ -14.944 -11.955 Td [(main)-251(by)]TJ/F83 10.3811 Tf 38.66 0 Td [(I)]TJ/F54 9.9626 Tf 6.53 0 Td [(,)]TJ/F83 10.3811 Tf 5.125 0 Td [(B)]TJ/F54 9.9626 Tf 9.753 0 Td [(and)]TJ/F83 10.3811 Tf 19.497 0 Td [(H)]TJ/F54 9.9626 Tf 8.972 0 Td [(.)-314(Each)-252(subdomain)-251(is)-252(assigned)-251(to)-252(one)-251(pr)18(ocess;)-253(each)-251(pr)18(ocess)]TJ -88.537 -11.955 Td [(usually)-346(owns)-346(one)-346(su)1(bdomain,)-370(although)-346(the)-346(user)-346(may)-346(choose)-345(to)-346(assign)-346(mor)18(e)]TJ 0 -11.955 Td [(than)-302(one)-301(subdomain)-302(to)-301(a)-302(pr)18(ocess.)-465(If)-302(each)-301(pr)18(ocess)]TJ/F52 9.9626 Tf 222.767 0 Td [(i)]TJ/F54 9.9626 Tf 5.968 0 Td [(owns)-302(one)-301(subdomain,)-315(the)]TJ -228.735 -11.956 Td [(number)-221(of)-221(r)18(ows)-221(in)-221(the)-221(local)-221(sparse)-221(matrix)-221(is)]TJ/F83 10.3811 Tf 192.655 0 Td [(j)-24(I)]TJ/F52 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F85 10.3811 Tf 4.799 0 Td [(+)]TJ/F83 10.3811 Tf 9.989 0 Td [(j)-24(B)]TJ/F52 7.5716 Tf 10.108 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.876 1.96 Td [(j)]TJ/F54 9.9626 Tf 3.003 0 Td [(,)-227(and)-221(the)-221(number)-221(of)-221(local)]TJ -235.248 -11.955 Td [(columns)-207(\050i.e.)-296(those)-207(for)-207(which)-207(ther)18(e)-208(exists)-207(at)-207(least)-207(one)-207(non-zer)18(o)-208(entry)-207(in)-207(the)-207(local)]TJ 0 -11.955 Td [(r)18(ows\051)-250(is)]TJ/F83 10.3811 Tf 37.275 0 Td [(j)-24(I)]TJ/F52 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F85 10.3811 Tf 5.066 0 Td [(+)]TJ/F83 10.3811 Tf 10.255 0 Td [(j)-24(B)]TJ/F52 7.5716 Tf 10.109 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F85 10.3811 Tf 5.066 0 Td [(+)]TJ/F83 10.3811 Tf 10.256 0 Td [(j)-24(H)]TJ/F52 7.5716 Tf 12.051 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F54 9.9626 Tf 3.004 0 Td [(.)]TJ +BT +/F84 9.9626 Tf 173.19 308.003 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F145 9.9626 Tf 124.92 0 Td [(ext_index)]TJ/F84 9.9626 Tf 49.564 0 Td [(list;)]TJ 0 g 0 G +/F75 9.9626 Tf -247.779 -20.539 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 287.663 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 120.08 287.464 Td [(comm)]TJ +ET +q +1 0 0 1 148.353 287.663 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 151.342 287.464 Td [(ovr)]TJ +ET +q +1 0 0 1 166.893 287.663 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q 0 g 0 G +BT +/F84 9.9626 Tf 174.864 287.464 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F145 9.9626 Tf 124.92 0 Td [(ovrlap_index)]TJ/F84 9.9626 Tf 65.255 0 Td [(list;)]TJ 0 g 0 G +/F75 9.9626 Tf -265.144 -20.54 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 267.124 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 120.08 266.924 Td [(comm)]TJ +ET +q +1 0 0 1 148.353 267.124 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 151.342 266.924 Td [(mov)]TJ ET -1 0 0 1 222.462 541.675 cm q -0 -1 1 0 0 0 cm -q -.65 0 0 .65 0 0 cm -q -1 0 0 1 0 0 cm -/Im3 Do +1 0 0 1 171.875 267.124 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q +0 g 0 G +BT +/F84 9.9626 Tf 179.845 266.924 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F145 9.9626 Tf 124.92 0 Td [(ovr_mst_idx)]TJ/F84 9.9626 Tf 60.025 0 Td [(list;)]TJ/F75 11.9552 Tf -264.895 -30.006 Td [(3.2)-1000(Sparse)-250(Matrix)-250(class)]TJ/F84 9.9626 Tf 1.02 0 0 1 99.587 217.656 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 119.097 217.656 Tm [(psb)]TJ +ET +q +1 0 0 1 135.416 217.855 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q +BT +/F145 9.9626 Tf 138.554 217.656 Td [(Tspmat)]TJ +ET +q +1 0 0 1 170.564 217.855 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0 g 0 G -1 0 0 1 -222.462 -541.675 cm BT -/F54 9.9626 Tf 260.803 335.398 Td [(Figur)18(e)-250(2:)-310(Point)-250(class\002cation.)]TJ -0 g 0 G -0 g 0 G - -95.154 -23.688 Td [(This)-190(classi\002cation)-190(of)-190(mesh)-190(points)-190(guides)-190(the)-190(naming)-190(scheme)-190(that)-190(we)-190(adopted)]TJ -14.944 -11.956 Td [(in)-190(the)-190(library)-190(internals)-190(and)-190(in)-190(the)-190(data)-190(str)8(uctur)18(es.)-290(W)92(e)-190(explicitly)-190(note)-190(that)-190(\223Halo\224)]TJ 0 -11.955 Td [(points)-250(ar)18(e)-250(also)-250(often)-250(called)-250(\223ghost\224)-250(points)-250(in)-250(the)-250(literatur)18(e.)]TJ/F51 11.9552 Tf 0 -28.902 Td [(2.2)-1000(Library)-250(contents)]TJ/F54 9.9626 Tf 0 -18.964 Td [(The)-250(PSBLAS)-250(library)-250(consists)-250(of)-250(various)-250(classes)-250(of)-250(subr)18(outines:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -18.943 Td [(Computational)-250(routines)]TJ -0 g 0 G -/F54 9.9626 Tf 113.723 0 Td [(comprising:)]TJ -0 g 0 G - -77.917 -19.434 Td [(\225)]TJ -0 g 0 G - [-500(Sparse)-250(matrix)-250(by)-250(dense)-250(matrix)-250(pr)18(oduct;)]TJ -0 g 0 G - 0 -15.449 Td [(\225)]TJ +/F145 9.9626 Tf 173.702 217.656 Td [(type)]TJ/F84 9.9626 Tf 1.02 0 0 1 197.123 217.656 Tm [(class)-246(contains)-246(all)-246(information)-246(about)-246(the)-246(local)-246(portion)-246(of)]TJ 0.98 0 0 1 99.895 205.701 Tm [(the)-252(sparse)-253(matrix)-252(and)-252(its)-253(st)1(orage)-253(mode.)-315(Its)-252(design)-253(is)-252(based)-252(on)-253(the)-252(ST)76(A)75(TE)-252(design)]TJ 0.98 0 0 1 99.596 193.746 Tm [(pattern)-251([)]TJ +1 0 0 rg 1 0 0 RG + 1 0 0 1 136.63 193.746 Tm [(13)]TJ 0 g 0 G - [-500(Sparse)-250(triangular)-250(systems)-250(solution)-250(for)-250(block)-250(diagonal)-250(matrices;)]TJ + 0.98 0 0 1 146.593 193.746 Tm [(])-251(as)-251(detailed)-251(in)-252([)]TJ +1 0 0 rg 1 0 0 RG + 1 0 0 1 215.48 193.746 Tm [(11)]TJ 0 g 0 G - 0 -15.449 Td [(\225)]TJ + 0.98 0 0 1 225.443 193.746 Tm [(];)-252(the)-252(type)-251(declaration)-251(is)-251(shown)-251(in)-251(\002gur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-251(2)]TJ 0 g 0 G - [-500(V)111(ector)-250(and)-250(matrix)-250(norms;)]TJ + [-251(wher)18(e)]TJ/F145 9.9626 Tf 1 0 0 1 438.638 193.746 Tm [(T)]TJ/F84 9.9626 Tf -338.743 -11.955 Td [(is)-250(a)-250(placeholder)-250(for)-250(the)-250(data)-250(type)-250(and)-250(pr)18(ecision)-250(variants)]TJ 0 g 0 G - 0 -15.449 Td [(\225)]TJ +/F75 9.9626 Tf 0 -20.386 Td [(S)]TJ 0 g 0 G - [-500(Dense)-250(matrix)-250(sums;)]TJ +/F84 9.9626 Tf 11.069 0 Td [(Single)-250(pr)18(ecision)-250(r)18(eal;)]TJ 0 g 0 G - 0 -15.449 Td [(\225)]TJ +/F75 9.9626 Tf -11.069 -20.54 Td [(D)]TJ 0 g 0 G - [-500(Dot)-250(pr)18(oducts.)]TJ +/F84 9.9626 Tf 13.281 0 Td [(Double)-250(pr)18(ecision)-250(r)18(eal;)]TJ 0 g 0 G -/F51 9.9626 Tf -35.806 -19.434 Td [(Communication)-250(routines)]TJ +/F75 9.9626 Tf -13.281 -20.539 Td [(C)]TJ 0 g 0 G -/F54 9.9626 Tf 118.704 0 Td [(handling)-250(halo)-250(and)-250(overlap)-250(communications;)]TJ +/F84 9.9626 Tf 12.175 0 Td [(Single)-250(pr)18(ecision)-250(complex;)]TJ 0 g 0 G - 50.661 -29.888 Td [(4)]TJ + 154.7 -29.888 Td [(17)]TJ 0 g 0 G ET endstream endobj -820 0 obj -<< -/Type /XObject -/Subtype /Form -/FormType 1 -/PTEX.FileName (./figures/points.pdf) -/PTEX.PageNumber 1 -/PTEX.InfoDict 826 0 R -/BBox [0 0 274 308] -/Resources << -/ProcSet [ /PDF /Text ] -/ExtGState << -/R7 827 0 R ->>/Font << /R8 828 0 R>> ->> -/Length 1397 -/Filter /FlateDecode ->> -stream -xœÝYËŽ5Ý÷Wô’ q±Ën»ï6 -– 󣄠¹3ÊBü=®§«æ±bA‡HŒ}Ï©c·í²»?­é”ׄÿäïÍeùö]_?ü¾¤Ó©d êwßGüðaù´d"®òçæ²¾¾ä}ÍíëÕûe4­ß ,äýÔ×sÿ»º,_ýx÷Ç/w×·¯®~[¾»ZÞ.ø›Œ1¸ð™âuóâ¯ïÿ¼ûùúáoO*žþx/þÃõí½Î22Tø<ᜇd†&Âoî/×ïV˜âÿõèCê1V^õd¨æõãR ¬Û9ŸÎç¶^–ºµÓ¾ÍšÚýÝz¦zõ¯7‹!€S®ûjì§”êJÚR¿–ðWZSöN•m˜´ ide«3çûfyÿõROÛú×|J_F¿~]~z2ò–}×òVÐÕämë¦Î€sQ<I<³¦uiüd¸r͵9.Ö¤¢ÆR’ÉÑãY~ОÐCÑÝ¥Ÿ}öçÙ^â<3LA ‰c‹YÒ¶®ôçY¯qž&mCÙØâÌû懣ç—Ñ#|H–_rƧšÇÒ³,wš0s>}yüÇ5ÒNóË p%U¤ –ðW@E’§$§•|¡pxõE`&ÆøåU ™¤ó«›%AÝIUÍ0Gš]ý‘&ûÖM’ î Jšx÷¬…T.ù)~¼C²8˜}~‚­ÛÍWÛ¢íÁvKÑö¶K,8ÛÍ—&†`[C*—ü¨ONÔÇs­ƒ ½m‚ê ò9؆Áu¶!×`{P9¦m‚êKI7oÛB*—ü¨O샹~ñ̳·Ç'­¡Á^ÝIaÏvRy!œzw'ó¤`Íx"0.Ѥb'…iÄù|ùÌs¼žP:-%X/[´^º“#Àa°há…dÞPÓY/)Z‡Ýqˆ&-VŠÖ½ON¬Çtnƒ®G±À¹ÍY–& é›Ë’וB¿Ìœ¤¡¹M…ÁnngäŽ%¤Ò#ØœÃÉÙÇ‚"d;’Àô)ùÃ(˜\X‹³Ž¥²£0}Z¡pø#`Ó†Sò‹%Hvt§Ð̧f£`ú`-Î+”ÐŽQ4ó9ƒ…Ç,x›O/,îf,z»âißn«ªÝìv«$½úæ-ÜŒå`?›“禩™|,ˆ7cïó™;Ìñº@!osõé]Ц?ݲta0€yýÒ¥¤Zdy›«OïRÜ<%9­äƒ€[}拇ú6m8uõIPžþhǃf>m))…YÞæê“ Ò<%9­äƒ€[}ækçÿÜæ“WO’rõ= A} £ Ñ0'Ë 9‘S,irêÕ÷+\_ã­uâÝ¿›ÑÆE?æóé{¦ƒÙÇá'È‹ÎB#4_²$&†`[–’qq‘‘&/> Mõ5^_'†`[Bý˜OõºÖÁ–%©¡ ª/]07o[šqq ’&/M Íõ5^_'nÞ¶†4.ú1Ÿ6ØsýÜ¥%]Š!ƒCÞgVe@Ù–‹’…$)š5-ƒÃØ5}‡ä²?ÖLg+‡ |>{é>hO‘jøX5~,ê>–0àxÕ},1’š¬ác ”ø±ŠûX€5‹ûXb$3òø³ Ú…t¡í¡=Å>tpº8Õ‡’Ô$iÎ>´-ö¡Ç%ÀšTÔXJR#ÞgL¼í“-J/0®jãȶw.Þâªick£Z,”Ô¤š^”Ñk·ì«éUÝ ‹¯WjÇ‚µÛçƒ.ÁºUE³zÉgýãPˆ,é"›Ñe±ûÌ‹:t˜!*%~ Ö *«QÊÒ@emPMÓ1:¾Þ’àX¼÷(˜®4æ ¤Nƒ¾]þÎJ¦' -endstream -endobj -748 0 obj +979 0 obj << /Type /ObjStm /N 100 -/First 919 -/Length 15283 ->> -stream -699 0 700 151 701 304 702 457 703 610 704 762 705 914 706 1067 707 1220 708 1373 -709 1526 710 1678 711 1831 712 1977 713 2129 743 2281 714 2432 715 2584 716 2736 717 2888 -718 3040 719 3190 720 3342 721 3494 722 3647 723 3800 724 3951 725 4103 726 4256 727 4409 -728 4562 729 4715 730 4861 731 5013 732 5165 733 5317 744 5469 734 5620 735 5766 745 5918 -736 6069 746 6221 737 6372 747 6524 742 6673 739 6729 761 6809 738 7055 764 7207 749 7359 -765 7511 750 7663 766 7815 751 7967 752 8112 753 8265 754 8418 755 8571 756 8724 757 8877 -758 9030 759 9177 763 9330 760 9387 778 9467 767 9673 768 9823 769 9974 770 10125 771 10279 -772 10429 773 10579 774 10729 775 10877 776 11025 7 11173 777 11227 797 11320 800 11470 801 11711 -802 11753 803 12139 791 12439 792 12585 793 12732 11 12879 799 12935 796 12992 809 13113 795 13263 -806 13411 807 13560 811 13709 15 13765 815 13820 816 13877 808 13934 822 14066 826 14208 827 14322 -% 699 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 666.041 386.806 678.1] -/A << /S /GoTo /D (subsection.6.16) >> ->> -% 700 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 653.946 368.116 666.006] -/A << /S /GoTo /D (subsection.6.17) >> ->> -% 701 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 641.852 370.219 653.912] -/A << /S /GoTo /D (subsection.6.18) >> ->> -% 702 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 629.758 214.116 641.818] -/A << /S /GoTo /D (subsection.6.19) >> ->> -% 703 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 617.664 231.69 629.724] -/A << /S /GoTo /D (subsection.6.20) >> ->> -% 704 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 605.57 204.353 617.629] -/A << /S /GoTo /D (subsection.6.21) >> ->> -% 705 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 593.476 221.927 605.535] -/A << /S /GoTo /D (subsection.6.22) >> ->> -% 706 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 581.381 379.076 593.441] -/A << /S /GoTo /D (subsection.6.23) >> ->> -% 707 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 569.287 359.768 581.347] -/A << /S /GoTo /D (subsection.6.24) >> ->> -% 708 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 557.193 373.158 569.253] -/A << /S /GoTo /D (subsection.6.25) >> +/First 926 +/Length 9242 >> -% 709 0 obj +stream +966 0 982 121 965 255 984 404 985 461 986 518 987 575 988 632 989 689 990 746 +991 803 992 860 993 917 994 974 995 1031 996 1088 997 1145 998 1202 999 1259 981 1316 +1003 1412 980 1558 1001 1710 1005 1858 31 1916 1006 1972 1007 2030 1008 2086 1009 2143 1010 2201 +1011 2259 35 2317 1002 2373 1014 2495 1012 2633 1016 2781 39 2840 43 2897 1017 2954 1013 3013 +1022 3108 1018 3264 1019 3411 1020 3564 1024 3717 1025 3775 1026 3833 1027 3891 1028 3949 1029 4007 +1021 4065 1033 4146 1030 4293 1031 4446 1035 4599 1036 4658 1037 4717 1038 4776 1039 4835 1040 4894 +1041 4953 1042 5012 1043 5071 1045 5130 1032 5189 1049 5299 1046 5447 1047 5595 1051 5742 47 5800 +51 5856 55 5912 59 5968 1048 6024 1054 6160 1056 6278 63 6337 67 6394 71 6451 1053 6508 +1058 6603 1060 6721 75 6779 79 6835 1061 6891 83 6949 87 7003 1057 7059 1063 7154 1065 7272 +91 7331 95 7388 99 7445 1062 7502 1070 7597 1066 7754 1067 7911 1068 8062 1072 8201 103 8259 +% 966 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 545.099 287.68 557.158] -/A << /S /GoTo /D (subsection.6.26) >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F145 940 0 R /F192 942 0 R >> +/ProcSet [ /PDF /Text ] >> -% 710 0 obj +% 982 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 533.005 221.369 545.064] -/A << /S /GoTo /D (subsection.6.27) >> +/Type /Page +/Contents 983 0 R +/Resources 981 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 978 0 R +/Annots [ 965 0 R ] >> -% 711 0 obj +% 965 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 513.485 251.974 522.815] -/A << /S /GoTo /D (section.7) >> +/Rect [284.193 690.964 290.469 703.958] +/A << /S /GoTo /D (Hfootnote.3) >> >> -% 712 0 obj +% 984 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 498.761 364.739 510.821] -/A << /S /GoTo /D (subsection.7.1) >> +/D [982 0 R /XYZ 149.705 753.953 null] >> -% 713 0 obj +% 985 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 486.667 444.603 498.726] -/A << /S /GoTo /D (subsection.7.2) >> +/D [982 0 R /XYZ 150.705 716.092 null] >> -% 743 0 obj +% 986 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 477.362 161.365 486.467] -/A << /S /GoTo /D (subsection.7.2) >> +/D [982 0 R /XYZ 150.705 688.869 null] >> -% 714 0 obj +% 987 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 462.617 364.172 474.677] -/A << /S /GoTo /D (subsection.7.3) >> +/D [982 0 R /XYZ 150.705 619.713 null] >> -% 715 0 obj +% 988 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 450.523 358.772 462.583] -/A << /S /GoTo /D (subsection.7.4) >> +/D [982 0 R /XYZ 150.705 601.577 null] >> -% 716 0 obj +% 989 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 438.429 307.635 450.489] -/A << /S /GoTo /D (subsection.7.5) >> +/D [982 0 R /XYZ 150.705 583.441 null] >> -% 717 0 obj +% 990 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 426.335 277.737 438.395] -/A << /S /GoTo /D (subsection.7.6) >> +/D [982 0 R /XYZ 150.705 553.945 null] >> -% 718 0 obj +% 991 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 414.241 393.262 426.3] -/A << /S /GoTo /D (subsection.7.7) >> +/D [982 0 R /XYZ 150.705 535.213 null] >> -% 719 0 obj +% 992 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 402.147 288.227 414.206] -/A << /S /GoTo /D (subsection.7.8) >> +/D [982 0 R /XYZ 150.705 505.717 null] >> -% 720 0 obj +% 993 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 390.052 260.403 402.112] -/A << /S /GoTo /D (subsection.7.9) >> +/D [982 0 R /XYZ 150.705 484.993 null] >> -% 721 0 obj +% 994 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 377.958 242.878 390.018] -/A << /S /GoTo /D (subsection.7.10) >> +/D [982 0 R /XYZ 150.705 460.161 null] >> -% 722 0 obj +% 995 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 365.864 269.159 377.924] -/A << /S /GoTo /D (subsection.7.11) >> +/D [982 0 R /XYZ 150.705 433.375 null] >> -% 723 0 obj +% 996 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 353.77 266.31 365.829] -/A << /S /GoTo /D (subsection.7.12) >> +/D [982 0 R /XYZ 150.705 419.224 null] >> -% 724 0 obj +% 997 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 341.676 335.51 353.735] -/A << /S /GoTo /D (subsection.7.13) >> +/D [982 0 R /XYZ 150.705 405.145 null] >> -% 725 0 obj +% 998 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 329.581 334.742 341.641] -/A << /S /GoTo /D (subsection.7.14) >> +/D [982 0 R /XYZ 150.705 378.966 null] >> -% 726 0 obj +% 999 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 317.487 305.523 329.547] -/A << /S /GoTo /D (subsection.7.15) >> +/D [982 0 R /XYZ 165.051 139.255 null] >> -% 727 0 obj +% 981 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 305.393 232.786 317.453] -/A << /S /GoTo /D (subsection.7.16) >> +/Font << /F84 687 0 R /F145 940 0 R /F215 1000 0 R >> +/ProcSet [ /PDF /Text ] >> -% 728 0 obj +% 1003 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 293.299 242.519 305.359] -/A << /S /GoTo /D (subsection.7.17) >> +/Type /Page +/Contents 1004 0 R +/Resources 1002 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 978 0 R +/Annots [ 980 0 R 1001 0 R ] >> -% 729 0 obj +% 980 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 271.309 183.083 283.109] -/A << /S /GoTo /D (section.8) >> +/Rect [218.838 674.054 233.284 683.464] +/A << /S /GoTo /D (subsection.2.3) >> >> -% 730 0 obj +% 1001 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 259.055 387.474 271.115] -/A << /S /GoTo /D (subsection.8.1) >> +/Rect [118.961 266.803 126.034 278.862] +/A << /S /GoTo /D (section.3) >> >> -% 731 0 obj +% 1005 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 246.961 415.897 259.021] -/A << /S /GoTo /D (subsection.8.2) >> +/D [1003 0 R /XYZ 98.895 753.953 null] >> -% 732 0 obj +% 31 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 234.867 399.738 246.926] -/A << /S /GoTo /D (subsection.8.3) >> +/D [1003 0 R /XYZ 99.895 716.092 null] >> -% 733 0 obj +% 1006 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 222.773 444.603 234.832] -/A << /S /GoTo /D (subsection.8.4) >> +/D [1003 0 R /XYZ 99.895 671.065 null] >> -% 744 0 obj +% 1007 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 213.468 180.781 222.877] -/A << /S /GoTo /D (subsection.8.4) >> +/D [1003 0 R /XYZ 99.895 648.1 null] >> -% 734 0 obj +% 1008 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 191.298 152.896 200.628] -/A << /S /GoTo /D (section.9) >> +/D [1003 0 R /XYZ 99.895 573.59 null] >> -% 735 0 obj +% 1009 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 176.574 444.603 188.633] -/A << /S /GoTo /D (subsection.9.1) >> +/D [1003 0 R /XYZ 99.895 527.104 null] >> -% 745 0 obj +% 1010 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 164.619 201.494 176.678] -/A << /S /GoTo /D (subsection.9.1) >> +/D [1003 0 R /XYZ 99.895 495.819 null] >> -% 736 0 obj +% 1011 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 152.524 444.603 164.584] -/A << /S /GoTo /D (subsection.9.2) >> +/D [1003 0 R /XYZ 99.895 475.298 null] >> -% 746 0 obj +% 35 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 143.219 168.468 152.629] -/A << /S /GoTo /D (subsection.9.2) >> +/D [1003 0 R /XYZ 99.895 420.262 null] >> -% 737 0 obj +% 1002 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 128.475 444.603 140.535] -/A << /S /GoTo /D (subsection.9.3) >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F192 942 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] >> -% 747 0 obj +% 1014 0 obj +<< +/Type /Page +/Contents 1015 0 R +/Resources 1013 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 978 0 R +/Annots [ 1012 0 R ] +>> +% 1012 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 119.17 202.859 128.58] -/A << /S /GoTo /D (subsection.9.3) >> +/Rect [319.267 173.115 326.341 185.175] +/A << /S /GoTo /D (section.6) >> >> -% 742 0 obj +% 1016 0 obj << -/D [740 0 R /XYZ 98.895 753.953 null] +/D [1014 0 R /XYZ 149.705 753.953 null] >> -% 739 0 obj +% 39 0 obj +<< +/D [1014 0 R /XYZ 150.705 716.092 null] +>> +% 43 0 obj +<< +/D [1014 0 R /XYZ 150.705 279.545 null] +>> +% 1017 0 obj +<< +/D [1014 0 R /XYZ 397.584 236.897 null] +>> +% 1013 0 obj << -/Font << /F54 586 0 R /F51 584 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 761 0 obj +% 1022 0 obj << /Type /Page -/Contents 762 0 R -/Resources 760 0 R +/Contents 1023 0 R +/Resources 1021 0 R /MediaBox [0 0 595.276 841.89] -/Parent 587 0 R -/Annots [ 738 0 R 764 0 R 749 0 R 765 0 R 750 0 R 766 0 R 751 0 R 752 0 R 753 0 R 754 0 R 755 0 R 756 0 R 757 0 R 758 0 R 759 0 R ] +/Parent 978 0 R +/Annots [ 1018 0 R 1019 0 R 1020 0 R ] >> -% 738 0 obj +% 1018 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 702.323 495.412 714.383] -/A << /S /GoTo /D (subsection.9.4) >> +/Rect [399.906 399.338 406.88 411.398] +/A << /S /GoTo /D (section.6) >> >> -% 764 0 obj +% 1019 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 693.018 253.668 702.428] -/A << /S /GoTo /D (subsection.9.4) >> +/Rect [304.177 193.202 318.623 205.262] +/A << /S /GoTo /D (subsection.3.3) >> >> -% 749 0 obj +% 1020 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 678.413 495.412 690.472] -/A << /S /GoTo /D (subsection.9.5) >> +/Rect [304.177 148.883 318.623 160.943] +/A << /S /GoTo /D (subsection.3.3) >> >> -% 765 0 obj +% 1024 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 669.108 253.668 678.517] -/A << /S /GoTo /D (subsection.9.5) >> +/D [1022 0 R /XYZ 98.895 753.953 null] >> -% 750 0 obj +% 1025 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 654.503 495.412 666.562] -/A << /S /GoTo /D (subsection.9.6) >> +/D [1022 0 R /XYZ 99.895 293.402 null] >> -% 766 0 obj +% 1026 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 645.197 253.668 654.607] -/A << /S /GoTo /D (subsection.9.6) >> +/D [1022 0 R /XYZ 99.895 278.496 null] >> -% 751 0 obj +% 1027 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 623.26 274.28 632.59] -/A << /S /GoTo /D (section.10) >> +/D [1022 0 R /XYZ 99.895 261.039 null] >> -% 752 0 obj +% 1028 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 608.674 333.298 620.734] -/A << /S /GoTo /D (subsection.10.1) >> +/D [1022 0 R /XYZ 99.895 244.857 null] >> -% 753 0 obj +% 1029 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 596.719 331.326 608.779] -/A << /S /GoTo /D (subsection.10.2) >> +/D [1022 0 R /XYZ 99.895 228.675 null] >> -% 754 0 obj +% 1021 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 584.764 381.626 596.824] -/A << /S /GoTo /D (subsection.10.3) >> +/Font << /F84 687 0 R /F75 685 0 R >> +/ProcSet [ /PDF /Text ] >> -% 755 0 obj +% 1033 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 572.809 427.165 584.869] -/A << /S /GoTo /D (subsection.10.4) >> +/Type /Page +/Contents 1034 0 R +/Resources 1032 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 978 0 R +/Annots [ 1030 0 R 1031 0 R ] >> -% 756 0 obj +% 1030 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 560.854 353.343 572.913] -/A << /S /GoTo /D (subsection.10.5) >> +/Rect [354.987 614.119 369.432 626.179] +/A << /S /GoTo /D (subsection.3.3) >> >> -% 757 0 obj +% 1031 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 548.899 315.177 560.958] -/A << /S /GoTo /D (subsection.10.6) >> +/Rect [354.987 568.778 369.432 580.837] +/A << /S /GoTo /D (subsection.3.3) >> >> -% 758 0 obj +% 1035 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 529.611 247.72 538.941] -/A << /S /GoTo /D (section.11) >> +/D [1033 0 R /XYZ 149.705 753.953 null] >> -% 759 0 obj +% 1036 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 515.026 393.631 527.085] -/A << /S /GoTo /D (subsection.11.1) >> +/D [1033 0 R /XYZ 150.705 716.092 null] >> -% 763 0 obj +% 1037 0 obj << -/D [761 0 R /XYZ 149.705 753.953 null] +/D [1033 0 R /XYZ 150.705 702.226 null] >> -% 760 0 obj +% 1038 0 obj +<< +/D [1033 0 R /XYZ 150.705 684.257 null] +>> +% 1039 0 obj +<< +/D [1033 0 R /XYZ 150.705 667.564 null] +>> +% 1040 0 obj +<< +/D [1033 0 R /XYZ 150.705 650.871 null] +>> +% 1041 0 obj +<< +/D [1033 0 R /XYZ 150.705 541.236 null] +>> +% 1042 0 obj +<< +/D [1033 0 R /XYZ 150.705 524.542 null] +>> +% 1043 0 obj +<< +/D [1033 0 R /XYZ 150.705 507.849 null] +>> +% 1045 0 obj +<< +/D [1033 0 R /XYZ 150.705 288.977 null] +>> +% 1032 0 obj << -/Font << /F54 586 0 R /F51 584 0 R >> +/Font << /F84 687 0 R /F75 685 0 R /F145 940 0 R /F233 1044 0 R >> /ProcSet [ /PDF /Text ] >> -% 778 0 obj +% 1049 0 obj << /Type /Page -/Contents 779 0 R -/Resources 777 0 R +/Contents 1050 0 R +/Resources 1048 0 R /MediaBox [0 0 595.276 841.89] -/Parent 780 0 R -/Annots [ 767 0 R 768 0 R 769 0 R 770 0 R 771 0 R 772 0 R 773 0 R 774 0 R 775 0 R 776 0 R ] +/Parent 1052 0 R +/Annots [ 1046 0 R 1047 0 R ] >> -% 767 0 obj +% 1046 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [408.168 586.065 420.123 595.071] -/A << /S /GoTo /D (cite.metcalf) >> +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [275.775 524.509 282.649 537.101] +/A << /S /GoTo /D (section.1) >> >> -% 768 0 obj +% 1047 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [300.381 514.225 312.336 523.231] -/A << /S /GoTo /D (cite.Sparse03) >> +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [426.76 326.492 433.634 338.552] +/A << /S /GoTo /D (section.1) >> >> -% 769 0 obj +% 1051 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [302.511 502.27 309.484 511.276] -/A << /S /GoTo /D (cite.DesPat:11) >> +/D [1049 0 R /XYZ 98.895 753.953 null] >> -% 770 0 obj +% 47 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [312.107 502.171 324.063 511.276] -/A << /S /GoTo /D (cite.RouXiaXu:11) >> +/D [1049 0 R /XYZ 99.895 716.092 null] >> -% 771 0 obj +% 51 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [234.17 442.286 246.125 451.392] -/A << /S /GoTo /D (cite.machiels) >> +/D [1049 0 R /XYZ 99.895 696.532 null] >> -% 772 0 obj +% 55 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [241.917 370.446 248.891 379.402] -/A << /S /GoTo /D (cite.sblas97) >> +/D [1049 0 R /XYZ 99.895 498.276 null] >> -% 773 0 obj +% 59 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [251.448 370.446 258.422 379.402] -/A << /S /GoTo /D (cite.sblas02) >> +/D [1049 0 R /XYZ 99.895 288.305 null] >> -% 774 0 obj +% 1048 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F190 941 0 R /F78 686 0 R /F192 942 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1054 0 obj +<< +/Type /Page +/Contents 1055 0 R +/Resources 1053 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1052 0 R +>> +% 1056 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [226.689 358.491 238.644 367.597] -/A << /S /GoTo /D (cite.BLAS1) >> +/D [1054 0 R /XYZ 149.705 753.953 null] >> -% 775 0 obj +% 63 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [241.633 358.491 248.606 367.447] -/A << /S /GoTo /D (cite.BLAS2) >> +/D [1054 0 R /XYZ 150.705 716.092 null] >> -% 776 0 obj +% 67 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [251.595 358.491 258.569 367.447] -/A << /S /GoTo /D (cite.BLAS3) >> +/D [1054 0 R /XYZ 150.705 529.559 null] >> -% 7 0 obj +% 71 0 obj << -/D [778 0 R /XYZ 99.895 716.092 null] +/D [1054 0 R /XYZ 150.705 276.666 null] >> -% 777 0 obj +% 1053 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 797 0 obj +% 1058 0 obj << /Type /Page -/Contents 798 0 R -/Resources 796 0 R +/Contents 1059 0 R +/Resources 1057 0 R /MediaBox [0 0 595.276 841.89] -/Parent 780 0 R -/Annots [ 791 0 R 792 0 R 793 0 R ] +/Parent 1052 0 R >> -% 800 0 obj +% 1060 0 obj << -/Producer (GPL Ghostscript 9.04) -/CreationDate (D:20111215145523+01'00') -/ModDate (D:20111215145523+01'00') -/Title (psblas.fig) -/Creator (fig2dev Version 3.2 Patchlevel 5d) -/Author (sfilippo@donald \(Salvatore Filippone\)) +/D [1058 0 R /XYZ 98.895 753.953 null] >> -% 801 0 obj +% 75 0 obj << -/Type /ExtGState -/OPM 1 +/D [1058 0 R /XYZ 99.895 716.092 null] >> -% 802 0 obj +% 79 0 obj << -/BaseFont /JEJNJE+Times-Roman -/FontDescriptor 803 0 R -/Type /Font -/FirstChar 32 -/LastChar 116 -/Widths [ 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 500 0 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 722 667 0 0 0 556 0 0 333 0 0 611 889 0 0 556 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 444 0 444 0 444 333 500 0 278 0 0 278 0 500 500 500 0 333 389 278] -/Encoding /WinAnsiEncoding -/Subtype /Type1 +/D [1058 0 R /XYZ 99.895 519.544 null] >> -% 803 0 obj +% 1061 0 obj << -/Type /FontDescriptor -/FontName /JEJNJE+Times-Roman -/FontBBox [ 0 -218 863 683] -/Flags 32 -/Ascent 683 -/CapHeight 676 -/Descent -218 -/ItalicAngle 0 -/StemV 129 -/MissingWidth 500 -/XHeight 460 -/CharSet (/A/B/F/I/L/M/P/S/a/c/e/f/g/i/l/n/o/p/r/s/space/t/three/two/zero) -/FontFile3 804 0 R +/D [1058 0 R /XYZ 344.16 356.277 null] >> -% 791 0 obj +% 83 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [268.275 538.551 275.249 550.611] -/A << /S /GoTo /D (figure.1) >> +/D [1058 0 R /XYZ 99.895 305.6 null] >> -% 792 0 obj +% 87 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [443.339 469.57 450.312 478.427] -/A << /S /GoTo /D (cite.BLACS) >> +/D [1058 0 R /XYZ 99.895 194.578 null] >> -% 793 0 obj +% 1057 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [247.969 430.955 254.943 443.014] -/A << /S /GoTo /D (section.7) >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> +/ProcSet [ /PDF /Text ] >> -% 11 0 obj +% 1063 0 obj << -/D [797 0 R /XYZ 150.705 675.823 null] +/Type /Page +/Contents 1064 0 R +/Resources 1062 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1052 0 R >> -% 799 0 obj +% 1065 0 obj +<< +/D [1063 0 R /XYZ 149.705 753.953 null] +>> +% 91 0 obj +<< +/D [1063 0 R /XYZ 150.705 543.315 null] +>> +% 95 0 obj +<< +/D [1063 0 R /XYZ 150.705 416.307 null] +>> +% 99 0 obj << -/D [797 0 R /XYZ 150.705 272.018 null] +/D [1063 0 R /XYZ 150.705 194.109 null] >> -% 796 0 obj +% 1062 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F52 585 0 R >> -/XObject << /Im2 794 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 809 0 obj +% 1070 0 obj << /Type /Page -/Contents 810 0 R -/Resources 808 0 R +/Contents 1071 0 R +/Resources 1069 0 R /MediaBox [0 0 595.276 841.89] -/Parent 780 0 R -/Annots [ 795 0 R 806 0 R 807 0 R ] +/Parent 1052 0 R +/Annots [ 1066 0 R 1067 0 R 1068 0 R ] >> -% 795 0 obj +% 1066 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [275.119 609.432 287.074 618.438] -/A << /S /GoTo /D (cite.METIS) >> +/Rect [135.634 192.59 147.589 201.696] +/A << /S /GoTo /D (cite.DesignPatterns) >> >> -% 806 0 obj +% 1067 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [232.706 534.258 238.983 547.962] -/A << /S /GoTo /D (Hfootnote.1) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [214.484 192.69 226.439 201.696] +/A << /S /GoTo /D (cite.Sparse03) >> >> -% 807 0 obj +% 1068 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [157.808 185.645 164.084 199.235] -/A << /S /GoTo /D (Hfootnote.2) >> ->> -% 811 0 obj -<< -/D [809 0 R /XYZ 98.895 753.953 null] +/Rect [401 189.94 407.874 202] +/A << /S /GoTo /D (listing.2) >> >> -% 15 0 obj +% 1072 0 obj << -/D [809 0 R /XYZ 99.895 504.866 null] +/D [1070 0 R /XYZ 98.895 753.953 null] >> -% 815 0 obj +% 103 0 obj << -/D [809 0 R /XYZ 114.242 167.999 null] +/D [1070 0 R /XYZ 99.895 480.341 null] >> -% 816 0 obj + +endstream +endobj +1079 0 obj << -/D [809 0 R /XYZ 114.242 158.184 null] +/Length 8179 >> -% 808 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F75 9.9626 Tf 150.705 706.129 Td [(Z)]TJ +0 g 0 G +/F84 9.9626 Tf 11.626 0 Td [(Double)-250(pr)18(ecision)-250(complex;)]TJ +0 g 0 G +/F75 9.9626 Tf -11.626 -18.188 Td [(LS,LD,LC,LZ)]TJ +0 g 0 G +/F84 9.9626 Tf 0.994 0 0 1 215.731 687.941 Tm [(Same)-250(numeric)-251(type)-250(as)-251(above,)-251(but)-250(with)]TJ/F145 9.9626 Tf 1 0 0 1 385.185 687.941 Tm [(psb_lpk_)]TJ/F84 9.9626 Tf 0.994 0 0 1 429.509 687.941 Tm [(integer)-250(indices.)]TJ 0.98 0 0 1 150.396 671.491 Tm [(The)-194(actual)-194(data)-194(is)-193(contained)-194(in)-194(the)-194(polymorphic)-194(component)]TJ/F145 9.9626 Tf 1 0 0 1 405.031 671.491 Tm [(a%a)]TJ/F84 9.9626 Tf 0.98 0 0 1 422.615 671.491 Tm [(of)-194(type)]TJ/F145 9.9626 Tf 1 0 0 1 454.138 671.491 Tm [(psb)]TJ +ET +q +1 0 0 1 470.457 671.69 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 473.595 671.491 Td [(T)]TJ +ET +q +1 0 0 1 479.453 671.69 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 482.591 671.491 Td [(base)]TJ +ET +q +1 0 0 1 504.14 671.69 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 507.278 671.491 Td [(sparse)]TJ +ET +q +1 0 0 1 539.288 671.69 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 542.426 671.491 Td [(mat)]TJ/F84 9.9626 Tf 0.98 0 0 1 558.117 671.491 Tm [(;)]TJ 1.02 0 0 1 150.705 659.535 Tm [(its)-247(speci\002c)-247(layout)-247(can)-247(be)-247(chosen)-247(dynamically)-247(among)-247(the)-247(pr)18(ede\002ned)-247(types,)-247(or)]TJ 1.02 0 0 1 150.705 647.58 Tm [(an)-252(entir)18(ely)-252(new)-251(storage)-252(layout)-252(can)-252(be)-251(implemented)-252(and)-252(passed)-251(to)-252(the)-252(library)]TJ 0.993 0 0 1 150.705 635.625 Tm [(at)-251(r)8(untime)-252(via)-251(the)]TJ/F145 9.9626 Tf 1 0 0 1 231.087 635.625 Tm [(psb_spasb)]TJ/F84 9.9626 Tf 0.993 0 0 1 280.647 635.625 Tm [(r)18(outine.)-312(The)-251(following)-252(very)-251(common)-251(formats)-252(ar)19(e)]TJ +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 150.705 577.766 cm +0 0 343.711 38.854 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F233 8.9664 Tf 163.108 605.96 Td [(type)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_Tspmat_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.415 -10.958 Td [(class)]TJ +0 g 0 G + [(\050psb_T_base_sparse_mat\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-1050(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(a)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.415 -10.959 Td [(end)-525(type)]TJ +0 g 0 G + [-1050(psb_Tspmat_type)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F84 9.9626 Tf 4.296 -41.43 Td [(Listing)-250(2:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(sparse)-250(matrix.)]TJ -16.998 -22.173 Td [(pr)18(ecompiled)-250(in)-250(PSBLAS)-250(and)-250(thus)-250(ar)18(e)-250(always)-250(available:)]TJ +0 g 0 G +/F75 9.9626 Tf 0.299 -16.45 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 504.189 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 170.889 503.99 Td [(T)]TJ +ET +q +1 0 0 1 178.132 504.189 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 181.121 503.99 Td [(coo)]TJ +ET +q +1 0 0 1 197.22 504.189 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 200.209 503.99 Td [(sparse)]TJ +ET +q +1 0 0 1 229.578 504.189 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 232.567 503.99 Td [(mat)]TJ +0 g 0 G +/F84 9.9626 Tf 22.137 0 Td [(Coor)18(dinate)-250(storage;)]TJ +0 g 0 G +/F75 9.9626 Tf -103.999 -18.188 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 486.001 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 170.889 485.802 Td [(T)]TJ +ET +q +1 0 0 1 178.132 486.001 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 181.121 485.802 Td [(csr)]TJ +ET +q +1 0 0 1 194.44 486.001 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 197.429 485.802 Td [(sparse)]TJ +ET +q +1 0 0 1 226.799 486.001 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 229.788 485.802 Td [(mat)]TJ +0 g 0 G +/F84 9.9626 Tf 22.137 0 Td [(Compr)18(essed)-250(storage)-250(by)-250(r)18(ows;)]TJ +0 g 0 G +/F75 9.9626 Tf -101.22 -18.188 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 467.813 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 170.889 467.614 Td [(T)]TJ +ET +q +1 0 0 1 178.132 467.813 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 181.121 467.614 Td [(csc)]TJ +ET +q +1 0 0 1 194.988 467.813 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 197.977 467.614 Td [(sparse)]TJ +ET +q +1 0 0 1 227.347 467.813 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 230.336 467.614 Td [(mat)]TJ +0 g 0 G +/F84 9.9626 Tf 22.137 0 Td [(Compr)18(essed)-250(storage)-250(by)-250(columns;)]TJ 1.019 0 0 1 150.396 451.164 Tm [(The)-245(inner)-244(sparse)-245(matrix)-244(has)-245(an)-245(associated)-244(state,)-245(which)-245(c)1(an)-245(take)-245(the)-244(following)]TJ 1 0 0 1 150.426 439.209 Tm [(values:)]TJ +0 g 0 G +/F75 9.9626 Tf 0.279 -16.451 Td [(Build:)]TJ +0 g 0 G +/F84 9.9626 Tf 1.014 0 0 1 183.631 422.758 Tm [(State)-246(enter)18(ed)-247(after)-246(the)-246(\002rst)-246(allocation,)-246(and)-246(befor)17(e)-246(the)-246(\002rst)-246(assembly;)-246(in)]TJ 1 0 0 1 175.611 410.803 Tm [(this)-250(state)-250(it)-250(is)-250(possible)-250(to)-250(add)-250(nonzer)18(o)-250(entries.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -18.188 Td [(Assembled:)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 209.086 392.615 Tm [(State)-258(enter)18(ed)-258(after)-259(the)-258(assembly;)-264(computations)-258(using)-258(the)-259(sparse)]TJ 1 0 0 1 175.611 380.66 Tm [(matrix,)-250(such)-250(as)-250(matrix-vector)-250(pr)18(oducts,)-250(ar)18(e)-250(only)-250(possible)-250(in)-250(this)-250(state;)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -18.188 Td [(Update:)]TJ +0 g 0 G +/F84 9.9626 Tf 0.989 0 0 1 191.382 362.472 Tm [(State)-254(enter)18(ed)-254(after)-254(a)-254(r)18(einitalization;)-255(this)-254(is)-254(used)-254(to)-254(handle)-255(app)1(lications)]TJ 1.015 0 0 1 175.611 350.517 Tm [(in)-245(which)-245(the)-245(same)-245(sparsity)-246(pat)1(tern)-246(is)-245(used)-245(multiple)-245(times)-245(with)-245(dif)18(fer)18(ent)]TJ 0.987 0 0 1 175.611 338.562 Tm [(coef)18(\002cients.)-316(In)-253(this)-254(state)-254(it)-254(is)-254(only)-253(possible)-254(to)-254(enter)-254(coef)18(\002cients)-254(for)-253(alr)18(eady)]TJ 1 0 0 1 175.611 326.607 Tm [(existing)-250(nonzer)18(o)-250(entries.)]TJ 1.018 0 0 1 150.396 310.156 Tm [(The)-244(only)-245(storage)-244(variant)-244(supporting)-244(the)-245(build)-244(state)-244(is)-245(COO;)-244(all)-244(other)-245(variants)]TJ 1 0 0 1 150.705 298.201 Tm [(ar)18(e)-250(obtained)-250(by)-250(conversion)-250(to/fr)18(om)-250(it.)]TJ/F75 9.9626 Tf 0 -26.326 Td [(3.2.1)-1000(Sparse)-250(Matrix)-250(Methods)]TJ 0 -18.963 Td [(3.2.2)-1000(get)]TJ +ET +q +1 0 0 1 195.029 253.111 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 198.017 252.912 Td [(nrows)-250(\227)-250(Get)-250(number)-250(of)-250(rows)-250(in)-250(a)-250(sparse)-250(matrix)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -47.312 -18.964 Td [(nr)-525(=)-525(a%get_nrows\050\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -17.574 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -18.188 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -18.188 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.338 -29.53 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -18.187 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 72.468 0 Td [(The)-250(number)-250(of)-250(r)18(ows)-250(of)-250(sparse)-250(matrix)]TJ/F145 9.9626 Tf 165.298 0 Td [(a)]TJ/F84 9.9626 Tf 5.23 0 Td [(.)]TJ +0 g 0 G + -76.122 -29.888 Td [(18)]TJ +0 g 0 G +ET + +endstream +endobj +1084 0 obj << -/Font << /F54 586 0 R /F52 585 0 R /F59 812 0 R /F51 584 0 R /F83 813 0 R /F85 814 0 R >> -/ProcSet [ /PDF /Text ] +/Length 4012 >> -% 822 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F75 9.9626 Tf 99.895 706.129 Td [(3.2.3)-1000(get)]TJ +ET +q +1 0 0 1 144.219 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 147.208 706.129 Td [(ncols)-250(\227)-250(Get)-250(number)-250(of)-250(columns)-250(in)-250(a)-250(sparse)-250(matrix)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -47.313 -19.023 Td [(nc)-525(=)-525(a%get_ncols\050\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -20.049 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.048 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.339 -33.965 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.048 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 72.468 0 Td [(The)-250(number)-250(of)-250(columns)-250(of)-250(sparse)-250(matrix)]TJ/F145 9.9626 Tf 181.159 0 Td [(a)]TJ/F84 9.9626 Tf 5.23 0 Td [(.)]TJ/F75 9.9626 Tf -258.857 -27.315 Td [(3.2.4)-1000(get)]TJ +ET +q +1 0 0 1 144.219 531.915 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 147.208 531.716 Td [(nnzeros)-250(\227)-250(Get)-250(number)-250(of)-250(nonzero)-250(elements)-250(in)-250(a)-250(sparse)-250(matrix)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -47.313 -19.024 Td [(nz)-525(=)-525(a%get_nnzeros\050\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -20.048 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.048 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.339 -33.965 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.048 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 72.468 0 Td [(The)-250(number)-250(of)-250(nonzer)18(o)-250(elements)-250(stor)18(ed)-250(in)-250(sparse)-250(matrix)]TJ/F145 9.9626 Tf 251.285 0 Td [(a)]TJ/F84 9.9626 Tf 5.23 0 Td [(.)]TJ/F75 9.9626 Tf -328.983 -22.041 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.454 -20.017 Td [(1.)]TJ +0 g 0 G + 1.02 0 0 1 124.493 342.56 Tm [(The)-348(function)-348(value)-348(is)-348(speci\002c)-348(to)-347(the)-348(storage)-348(format)-348(of)-348(matrix)]TJ/F145 9.9626 Tf 1 0 0 1 408.08 342.56 Tm [(a)]TJ/F84 9.9626 Tf 1.02 0 0 1 413.311 342.56 Tm [(;)-399(some)]TJ 1.02 0 0 1 124.802 330.604 Tm [(storage)-281(formats)-282(employ)-281(padding,)-290(thus)-282(the)-281(r)18(eturned)-282(value)-281(for)-281(the)-282(same)]TJ 1 0 0 1 124.802 318.649 Tm [(matrix)-250(may)-250(be)-250(dif)18(fer)18(ent)-250(for)-250(dif)18(fer)18(ent)-250(storage)-250(choices.)]TJ/F75 9.9626 Tf -24.907 -27.315 Td [(3.2.5)]TJ 1.02 0 0 1 129.783 291.334 Tm [(get)]TJ +ET +q +1 0 0 1 144.496 291.533 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 1.02 0 0 1 147.485 291.334 Tm [(size)-337(\227)-336(Get)-337(maximum)-336(number)-337(of)-336(nonzero)-337(elements)-336(in)-337(a)-336(sparse)]TJ 1 0 0 1 129.783 279.379 Tm [(matrix)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -29.888 -19.024 Td [(maxnz)-525(=)-525(a%get_size\050\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -20.048 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.048 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.339 -33.965 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.048 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 0.99 0 0 1 172.363 132.281 Tm [(The)-254(maximum)-254(number)-254(of)-253(nonzer)18(o)-254(elements)-254(that)-254(can)-254(be)-253(stor)18(ed)]TJ 1 0 0 1 124.802 120.326 Tm [(in)-250(sparse)-250(matrix)]TJ/F145 9.9626 Tf 73.294 0 Td [(a)]TJ/F84 9.9626 Tf 7.721 0 Td [(using)-250(its)-250(curr)18(ent)-250(memory)-250(allocation.)]TJ +0 g 0 G + 60.953 -29.888 Td [(19)]TJ +0 g 0 G +ET + +endstream +endobj +1090 0 obj << -/Type /Page -/Contents 823 0 R -/Resources 821 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 780 0 R -/Annots [ 818 0 R 819 0 R ] +/Length 4548 >> -% 826 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F75 9.9626 Tf 150.705 706.129 Td [(3.2.6)-1000(sizeof)-250(\227)-250(Get)-250(memory)-250(occupation)-250(in)-250(bytes)-250(of)-250(a)-250(sparse)-250(matrix)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf 0 -20.135 Td [(memory_size)-525(=)-525(a%sizeof\050\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -23.732 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -22.343 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.343 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.338 -35.687 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -22.343 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 72.468 0 Td [(The)-250(memory)-250(occupation)-250(in)-250(bytes.)]TJ/F75 9.9626 Tf -72.468 -30.58 Td [(3.2.7)-1000(get)]TJ +ET +q +1 0 0 1 195.029 517.21 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 198.017 517.011 Td [(fmt)-250(\227)-250(Short)-250(description)-250(of)-250(the)-250(dynamic)-250(type)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -47.574 -20.135 Td [(write)]TJ +0 g 0 G + [(\050)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ +0 g 0 G + [(,)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ +0 g 0 G + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(a%get_fmt\050\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0.262 -24.336 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -22.343 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.343 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.338 -35.686 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -22.343 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 1.006 0 0 1 223.093 357.869 Tm [(A)-249(short)-249(string)-249(describing)-249(the)-249(dynamic)-248(type)-249(of)-249(the)-249(matrix.)-310(Pr)18(e-)]TJ 1 0 0 1 175.611 345.914 Tm [(de\002ned)-250(values)-250(include)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf 102.415 0 Td [(NULL)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(,)]TJ/F145 9.9626 Tf 4.982 0 Td [(COO)]TJ/F84 9.9626 Tf 15.691 0 Td [(,)]TJ/F145 9.9626 Tf 4.981 0 Td [(CSR)]TJ/F84 9.9626 Tf 18.182 0 Td [(and)]TJ/F145 9.9626 Tf 19.357 0 Td [(CSC)]TJ/F84 9.9626 Tf 15.691 0 Td [(.)]TJ/F75 9.9626 Tf -227.126 -30.581 Td [(3.2.8)-1000(is)]TJ +ET +q +1 0 0 1 188.931 315.533 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 191.92 315.333 Td [(bld,)-250(is)]TJ +ET +q +1 0 0 1 220.732 315.533 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 223.721 315.333 Td [(upd,)-250(is)]TJ +ET +q +1 0 0 1 255.302 315.533 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 258.291 315.333 Td [(asb)-250(\227)-250(Status)-250(check)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -107.586 -20.135 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050a%is_bld\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050a%is_upd\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050a%is_asb\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0 g 0 G +/F75 9.9626 Tf 0 -24.336 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -22.343 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.343 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ +0 g 0 G + -56.338 -35.686 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -22.343 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 0.995 0 0 1 223.093 132.281 Tm [(A)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F145 9.9626 Tf 1 0 0 1 233.29 132.281 Tm [(logical)]TJ +0 g 0 G +/F84 9.9626 Tf 0.995 0 0 1 272.387 132.281 Tm [(value)-251(indicating)-250(whether)-251(the)-250(matrix)-251(is)-251(in)-250(the)-251(Build,)]TJ 1 0 0 1 175.611 120.326 Tm [(Update)-250(or)-250(Assembled)-250(state,)-250(r)18(espectively)111(.)]TJ +0 g 0 G + 141.968 -29.888 Td [(20)]TJ +0 g 0 G +ET + +endstream +endobj +1094 0 obj << -/Producer (ESP Ghostscript 815.03) -/CreationDate (D:20070123225315) -/ModDate (D:20070123225315) +/Length 6058 >> -% 827 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F75 9.9626 Tf 99.895 706.129 Td [(3.2.9)-1000(is)]TJ +ET +q +1 0 0 1 138.122 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 141.111 706.129 Td [(lower)55(,)-250(is)]TJ +ET +q +1 0 0 1 179.895 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 182.884 706.129 Td [(upper)55(,)-250(is)]TJ +ET +q +1 0 0 1 222.774 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 225.763 706.129 Td [(triangle,)-250(is)]TJ +ET +q +1 0 0 1 274.5 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 277.488 706.129 Td [(unit)-250(\227)-250(Format)-250(check)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -177.593 -19.573 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050a%is_triangle\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050a%is_upper\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050a%is_lower\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.956 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050a%is_unit\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0 g 0 G +/F75 9.9626 Tf 0 -22.86 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -21.183 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -21.183 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.339 -34.816 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -21.183 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 0.982 0 0 1 172.283 517.51 Tm [(A)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F145 9.9626 Tf 1 0 0 1 182.393 517.51 Tm [(logical)]TJ +0 g 0 G +/F84 9.9626 Tf 0.982 0 0 1 221.503 517.51 Tm [(value)-255(indicating)-256(whether)-255(the)-255(matrix)-255(is)-256(triangular;)-255(if)]TJ/F145 9.9626 Tf 1 0 0 1 124.802 505.555 Tm [(is_triangle\050\051)]TJ/F84 9.9626 Tf 70.478 0 Td [(r)18(eturns)]TJ/F145 9.9626 Tf 34.224 0 Td [(.true.)]TJ/F84 9.9626 Tf 33.865 0 Td [(check)-249(also)-250(if)-249(it)-249(is)-249(lower)74(,)-250(upper)-249(and)-249(with)-249(a)]TJ -138.567 -11.955 Td [(unit)-250(\050i.e.)-310(assumed\051)-250(diagonal.)]TJ/F75 9.9626 Tf -24.907 -28.929 Td [(3.2.10)-1000(cscnv)-250(\227)-250(Convert)-250(to)-250(a)-250(dif)18(ferent)-250(storage)-250(format)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf 0 -19.573 Td [(call)]TJ +0 g 0 G + [-1050(a%cscnv\050b,info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(type)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(mold,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(dupl]\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ +0 g 0 G + [-1050(a%cscnv\050info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(type)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(mold,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(dupl]\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -22.861 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -21.183 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -21.183 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.555 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F145 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F84 9.9626 Tf 78.455 0 Td [(.)]TJ -159.689 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -77.918 -33.138 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 24.907 0 Td [(a)-250(string)-250(r)18(equesting)-250(a)-250(new)-250(format.)]TJ -0.309 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.598 -21.183 Td [(mold)]TJ +0 g 0 G +/F84 9.9626 Tf 0.994 0 0 1 128.677 277.73 Tm [(a)-251(variable)-251(of)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf 1 0 0 1 185.176 277.73 Tm [(class)]TJ +0 g 0 G + [(\050psb_T_base_sparse_mat\051)]TJ/F84 9.9626 Tf 0.994 0 0 1 334.109 277.73 Tm [(r)18(equesting)-251(a)-250(new)-251(format.)]TJ 1 0 0 1 124.493 265.775 Tm [(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.598 -21.182 Td [(dupl)]TJ +0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 126.456 244.593 Tm [(an)-199(integer)-198(value)-199(speci\002ng)-199(how)-198(to)-199(handle)-199(duplicates)-198(\050see)-199(Named)-199(Constants)]TJ 1 0 0 1 124.802 232.637 Tm [(below\051)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -22.86 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -21.183 Td [(b,a)]TJ +0 g 0 G +/F84 9.9626 Tf 18.152 0 Td [(A)-250(copy)-250(of)]TJ/F145 9.9626 Tf 45.37 0 Td [(a)]TJ/F84 9.9626 Tf 7.721 0 Td [(with)-250(a)-250(new)-250(storage)-250(format.)]TJ -46.725 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F145 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F84 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -184.596 -21.183 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ 0.98 0 0 1 99.587 132.281 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 118.361 132.281 Tm [(mold)]TJ/F84 9.9626 Tf 0.98 0 0 1 141.713 132.281 Tm [(ar)18(guments)-249(may)-248(be)-249(employed)-249(to)-249(interface)-249(with)-249(special)-249(devices,)-250(such)-249(as)]TJ 1 0 0 1 99.895 120.326 Tm [(GPUs)-250(and)-250(other)-250(accelerators.)]TJ +0 g 0 G + 166.875 -29.888 Td [(21)]TJ +0 g 0 G +ET + +endstream +endobj +1098 0 obj << -/Type /ExtGState -/OPM 1 +/Length 5075 >> +stream +0 g 0 G +0 g 0 G +BT +/F75 9.9626 Tf 150.705 706.129 Td [(3.2.11)-1000(csclip)-250(\227)-250(Reduce)-250(to)-250(a)-250(submatrix)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf 20.921 -18.964 Td [(call)]TJ +0 g 0 G + [-525(a%csclip\050b,info[,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 15.691 -11.955 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(imin,imax,jmin,jmax,rscale,cscale]\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 165.649 654.656 Tm [(Returns)-212(the)-213(submatrix)]TJ/F145 9.9626 Tf 1 0 0 1 263.207 654.656 Tm [(A\050imin:imax,jmin:jmax\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 378.274 654.656 Tm [(,)-221(optionally)-212(r)18(escaling)-212(r)18(ow/-)]TJ 1 0 0 1 150.705 642.7 Tm [(col)-250(indices)-250(to)-250(the)-250(range)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG +/F145 9.9626 Tf 103.849 0 Td [(1)]TJ +0 g 0 G + [(:imax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ +0 g 0 G + [(imin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(,)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(:jmax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ +0 g 0 G + [(jmin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G +/F84 9.9626 Tf 141.219 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -245.068 -18.834 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -19.38 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.38 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.556 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F145 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F84 9.9626 Tf 78.455 0 Td [(.)]TJ -159.689 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -77.917 -31.335 Td [(imin,imax,jmin,jmax)]TJ +0 g 0 G +/F84 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -75.287 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.598 -19.38 Td [(rscale,cscale)]TJ +0 g 0 G +/F84 9.9626 Tf 59.526 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F75 9.9626 Tf -59.526 -20.554 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.38 Td [(b)]TJ +0 g 0 G +/F84 9.9626 Tf 10.68 0 Td [(A)-250(copy)-250(of)-250(a)-250(submatrix)-250(of)]TJ/F145 9.9626 Tf 111.321 0 Td [(a)]TJ/F84 9.9626 Tf 5.23 0 Td [(.)]TJ -102.713 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F145 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F84 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -184.596 -19.38 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F75 9.9626 Tf -23.8 -26.885 Td [(3.2.12)-1000(clean)]TJ +ET +q +1 0 0 1 209.962 400.571 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 212.951 400.372 Td [(zeros)-250(\227)-250(Eliminate)-250(zero)-250(coef)18(\002cients)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -62.246 -18.964 Td [(call)]TJ +0 g 0 G + [-525(a%clean_zeros\050info\051)]TJ/F84 9.9626 Tf 14.944 -11.955 Td [(Eliminates)-250(zer)18(o)-250(coef)18(\002cients)-250(explicitly)-250(stor)18(ed)-250(in)-250(the)-250(input)-250(matrix.)]TJ +0 g 0 G +/F75 9.9626 Tf -14.944 -20.554 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -19.38 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.38 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.555 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F145 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F84 9.9626 Tf 78.455 0 Td [(.)]TJ -159.689 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -77.917 -32.51 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.379 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.654 0 Td [(The)-250(matrix)]TJ/F145 9.9626 Tf 50.659 0 Td [(a)]TJ/F84 9.9626 Tf 7.721 0 Td [(without)-250(zer)18(o)-250(coef)18(\002cients.)]TJ -43.516 -11.956 Td [(A)-250(variable)-250(of)-250(type)]TJ/F145 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F84 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -184.596 -19.38 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F75 9.9626 Tf -23.8 -20.554 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.453 -18.834 Td [(1.)]TJ +0 g 0 G + 0.98 0 0 1 175.611 163.616 Tm [(Depending)-242(on)-243(the)-242(internal)-243(storage)-242(format,)-245(ther)18(e)-242(may)-243(still)-242(be)-243(some)-242(amount)]TJ 1 0 0 1 175.611 151.661 Tm [(of)-250(zer)18(o)-250(padding)-250(in)-250(the)-250(output.)]TJ +0 g 0 G + -12.453 -19.38 Td [(2.)]TJ +0 g 0 G + 1.02 0 0 1 175.223 132.281 Tm [(Any)-388(explicit)-389(zer)18(os)-389(on)-388(the)-388(main)-389(diagonal)-388(ar)17(e)-388(always)-389(kept)-388(in)-388(the)-389(data)]TJ 1 0 0 1 175.611 120.326 Tm [(str)8(uctur)18(e.)]TJ +0 g 0 G + 141.968 -29.888 Td [(22)]TJ +0 g 0 G +ET endstream endobj -836 0 obj +1104 0 obj << -/Length 4927 +/Length 4701 >> stream 0 g 0 G 0 g 0 G +BT +/F75 9.9626 Tf 99.895 706.129 Td [(3.2.13)-1000(get)]TJ +ET +q +1 0 0 1 149.2 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 152.189 706.129 Td [(diag)-250(\227)-250(Get)-250(main)-250(diagonal)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -52.294 -19.329 Td [(call)]TJ +0 g 0 G + [-525(a%get_diag\050d,info\051)]TJ/F84 9.9626 Tf 14.944 -12.144 Td [(Returns)-250(a)-250(copy)-250(of)-250(the)-250(main)-250(diagonal.)]TJ +0 g 0 G +/F75 9.9626 Tf -14.944 -20.49 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -20.679 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.679 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.555 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F145 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F84 9.9626 Tf 78.455 0 Td [(.)]TJ -159.689 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -77.918 -34.627 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.679 Td [(d)]TJ +0 g 0 G +/F84 9.9626 Tf 10.68 0 Td [(A)-250(copy)-250(of)-250(the)-250(main)-250(diagonal.)]TJ 13.838 -11.955 Td [(A)-250(one-dimensional)-250(array)-250(of)-250(the)-250(appr)18(opriate)-250(type.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.518 -20.679 Td [(info)]TJ 0 g 0 G +/F84 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F75 9.9626 Tf -23.801 -28.213 Td [(3.2.14)-1000(clip)]TJ +ET +q +1 0 0 1 152.508 472.944 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F51 9.9626 Tf 99.895 706.129 Td [(Data)-250(management)-250(and)-250(auxiliary)-250(routines)]TJ +/F75 9.9626 Tf 155.497 472.745 Td [(diag)-250(\227)-250(Cut)-250(out)-250(main)-250(diagonal)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -55.602 -19.329 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 190.375 0 Td [(including:)]TJ + [-525(a%clip_diag\050b,info\051)]TJ/F84 9.9626 Tf 14.944 -12.144 Td [(Returns)-250(a)-250(copy)-250(of)]TJ/F145 9.9626 Tf 79.73 0 Td [(a)]TJ/F84 9.9626 Tf 7.721 0 Td [(without)-250(the)-250(main)-250(diagonal.)]TJ 0 g 0 G - -154.569 -24.208 Td [(\225)]TJ +/F75 9.9626 Tf -102.395 -20.49 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(Parallel)-250(envir)18(onment)-250(management)]TJ +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - 0 -18.081 Td [(\225)]TJ +/F75 9.9626 Tf -29.44 -20.679 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(Communication)-250(descriptors)-250(allocation;)]TJ 0 g 0 G - 0 -18.082 Td [(\225)]TJ + 0 -20.679 Td [(a)]TJ 0 g 0 G - [-500(Dense)-250(and)-250(sparse)-250(matrix)-250(allocation;)]TJ +/F84 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.555 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F145 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F84 9.9626 Tf 78.455 0 Td [(.)]TJ -159.689 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G - 0 -18.081 Td [(\225)]TJ +/F75 9.9626 Tf -77.918 -34.627 Td [(On)-250(Return)]TJ 0 g 0 G - [-500(Dense)-250(and)-250(sparse)-250(matrix)-250(build)-250(and)-250(update;)]TJ 0 g 0 G - 0 -18.082 Td [(\225)]TJ + 0 -20.679 Td [(b)]TJ 0 g 0 G - [-500(Sparse)-250(matrix)-250(and)-250(data)-250(distribution)-250(pr)18(epr)18(ocessing.)]TJ +/F84 9.9626 Tf 10.68 0 Td [(A)-250(copy)-250(of)]TJ/F145 9.9626 Tf 45.37 0 Td [(a)]TJ/F84 9.9626 Tf 7.721 0 Td [(without)-250(the)-250(main)-250(diagonal.)]TJ -39.253 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F145 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F84 9.9626 Tf 78.455 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -35.806 -24.207 Td [(Preconditioner)-250(routines)]TJ +/F75 9.9626 Tf -184.596 -20.679 Td [(info)]TJ 0 g 0 G +/F84 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F75 9.9626 Tf -23.801 -28.213 Td [(3.2.15)-1000(tril)-250(\227)-250(Return)-250(the)-250(lower)-250(triangle)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -24.208 Td [(Iterative)-250(methods)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf 20.922 -19.329 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 84.951 0 Td [(a)-250(subset)-250(of)-250(Krylov)-250(subspace)-250(iterative)-250(methods)]TJ -84.951 -23.137 Td [(The)-262(following)-263(naming)-262(scheme)-262(has)-262(been)-263(adopted)-262(for)-262(all)-263(the)-262(symbols)-262(internally)]TJ 0 -11.955 Td [(de\002ned)-250(in)-250(the)-250(PSBLAS)-250(softwar)18(e)-250(package:)]TJ + [-525(a%tril\050l,info[,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 13.888 -23.137 Td [(\225)]TJ + 15.691 -11.955 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(diag,imin,imax,jmin,jmax,rscale,cscale,u]\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 114.839 185.405 Tm [(Returns)-266(the)-266(lower)-266(triangular)-266(part)-266(of)-266(submatrix)]TJ/F145 9.9626 Tf 1 0 0 1 327.244 185.405 Tm [(A\050imin:imax,jmin:jmax\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 442.311 185.405 Tm [(,)]TJ 0.98 0 0 1 99.895 173.45 Tm [(optionally)-194(r)19(escaling)-194(r)18(ow/col)-194(indices)-194(to)-194(the)-193(range)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG +/F145 9.9626 Tf 1 0 0 1 308.511 173.45 Tm [(1)]TJ +0 g 0 G + [(:imax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ +0 g 0 G + [(imin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(,)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(:jmax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ 0 g 0 G - [-500(all)-250(symbols)-250(\050i.e.)-310(subr)18(outine)-250(names,)-250(data)-250(types...\051)-310(ar)18(e)-250(pr)18(e\002xed)-250(by)]TJ/F59 9.9626 Tf 294.184 0 Td [(psb_)]TJ + [(jmin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ 0 g 0 G -/F54 9.9626 Tf -294.184 -24.208 Td [(\225)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G - [-500(all)-250(data)-250(type)-250(names)-250(ar)18(e)-250(suf)18(\002xed)-250(by)]TJ/F59 9.9626 Tf 166.604 0 Td [(_type)]TJ +/F84 9.9626 Tf -208.616 -11.955 Td [(and)-250(r)18(eturing)-250(the)-250(complementary)-250(upper)-250(triangle.)]TJ 0 g 0 G -/F54 9.9626 Tf -166.604 -24.208 Td [(\225)]TJ +/F75 9.9626 Tf 0 -20.49 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(all)-250(constants)-250(ar)18(e)-250(suf)18(\002xed)-250(by)]TJ/F59 9.9626 Tf 135.59 0 Td [(_)]TJ +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf -135.59 -24.208 Td [(\225)]TJ +/F75 9.9626 Tf -29.44 -20.679 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(all)-279(top-level)-279(subr)18(outine)-279(names)-279(follow)-279(the)-279(r)8(ule)]TJ/F59 9.9626 Tf 216.11 0 Td [(psb_xxname)]TJ/F54 9.9626 Tf 55.083 0 Td [(wher)18(e)]TJ/F59 9.9626 Tf 30.187 0 Td [(xx)]TJ/F54 9.9626 Tf 13.241 0 Td [(can)]TJ -303.602 -11.955 Td [(be)-250(either:)]TJ 0 g 0 G -/F51 9.9626 Tf 11.955 -24.208 Td [(\226)]TJ +/F84 9.9626 Tf 166.875 -29.888 Td [(23)]TJ 0 g 0 G -/F59 9.9626 Tf 9.963 0 Td [(ge)]TJ/F54 9.9626 Tf 10.46 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(dense)-250(data,)]TJ +ET + +endstream +endobj +1108 0 obj +<< +/Length 6535 +>> +stream 0 g 0 G -/F51 9.9626 Tf -20.423 -18.081 Td [(\226)]TJ 0 g 0 G -/F59 9.9626 Tf 9.963 0 Td [(sp)]TJ/F54 9.9626 Tf 10.46 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(sparse)-250(data,)]TJ 0 g 0 G -/F51 9.9626 Tf -20.423 -18.081 Td [(\226)]TJ +BT +/F75 9.9626 Tf 150.705 706.129 Td [(a)]TJ 0 g 0 G -/F59 9.9626 Tf 9.963 0 Td [(cd)]TJ/F54 9.9626 Tf 10.46 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(communication)-250(descriptor)-250(\050see)]TJ -0 0 1 rg 0 0 1 RG - [-250(3)]TJ +/F84 9.9626 Tf 9.962 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.556 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F145 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F84 9.9626 Tf 78.455 0 Td [(.)]TJ -159.689 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G - [(\051.)]TJ -32.378 -24.208 Td [(For)-215(example)-215(the)]TJ/F59 9.9626 Tf 72.515 0 Td [(psb_geins)]TJ/F54 9.9626 Tf 47.073 0 Td [(,)]TJ/F59 9.9626 Tf 4.704 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 49.218 0 Td [(and)]TJ/F59 9.9626 Tf 19.011 0 Td [(psb_cdins)]TJ/F54 9.9626 Tf 49.218 0 Td [(perform)-215(the)-215(same)]TJ -241.739 -11.955 Td [(action)-247(\050see)]TJ -0 0 1 rg 0 0 1 RG - [-246(6)]TJ +/F75 9.9626 Tf -77.917 -30.706 Td [(diag)]TJ 0 g 0 G - [(\051)-247(on)-246(dense)-247(matrices,)-247(sparse)-247(matric)1(es)-247(and)-247(communication)-246(de-)]TJ 0 -11.956 Td [(scriptors)-222(r)18(espectively)111(.)-301(Interface)-222(overloading)-223(allows)-222(the)-222(usage)-222(of)-223(the)-222(same)]TJ 0 -11.955 Td [(subr)18(outine)-250(names)-250(for)-250(both)-250(r)18(eal)-250(and)-250(complex)-250(data.)]TJ -24.907 -23.137 Td [(In)-288(the)-288(description)-288(of)-289(the)-288(subr)18(outines,)-297(ar)18(guments)-289(or)-288(ar)18(gument)-288(entries)-288(ar)18(e)-288(clas-)]TJ 0 -11.955 Td [(si\002ed)-250(as:)]TJ +/F84 9.9626 Tf 1.02 0 0 1 175.611 651.513 Tm [(Include)-258(diagonals)-258(up)-258(to)-258(this)-258(one;)]TJ/F145 9.9626 Tf 1 0 0 1 325.586 651.513 Tm [(diag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -23.137 Td [(global)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G -/F54 9.9626 Tf 33.763 0 Td [(For)-270(input)-270(ar)18(guments,)-275(the)-270(value)-271(must)-270(be)-270(the)-270(same)-270(on)-270(all)-270(pr)18(ocesses)-270(par)18(-)]TJ -8.856 -11.955 Td [(ticipating)-276(in)-277(the)-276(subr)18(outine)-277(call;)-289(for)-277(output)-276(ar)18(guments)-277(the)-276(value)-277(is)-276(guar)18(-)]TJ 0 -11.955 Td [(anteed)-250(to)-250(be)-250(the)-250(same.)]TJ +/F84 9.9626 Tf 1.02 0 0 1 359.59 651.513 Tm [(means)-258(the)-258(\002rst)-258(super)18(diagonal,)]TJ/F145 9.9626 Tf 1 0 0 1 175.611 639.558 Tm [(diag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=-)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -24.208 Td [(local)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G -/F54 9.9626 Tf 26.56 0 Td [(Each)-250(pr)18(ocess)-250(has)-250(its)-250(own)-250(value\050s\051)-250(independently)111(.)]TJ -26.56 -23.137 Td [(T)92(o)-250(\002nish)-250(our)-250(general)-250(description,)-250(we)-250(de\002ne)-250(a)-250(version)-250(string)-250(with)-250(the)-250(constant)]TJ/F59 9.9626 Tf 122.168 -24.059 Td [(psb_version_string_)]TJ/F54 9.9626 Tf -122.168 -24.059 Td [(whose)-250(curr)18(ent)-250(value)-250(is)]TJ/F59 9.9626 Tf 101.857 0 Td [(3.8.0)]TJ +/F84 9.9626 Tf 39.104 0 Td [(means)-250(the)-250(\002rst)-250(subdiagonal.)-310(Default)-250(0.)]TJ 0 g 0 G -/F54 9.9626 Tf 67.508 -29.888 Td [(5)]TJ +/F75 9.9626 Tf -64.01 -18.75 Td [(imin,imax,jmin,jmax)]TJ 0 g 0 G -ET - -endstream -endobj -841 0 obj -<< -/Length 8378 ->> -stream +/F84 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -75.287 -11.955 Td [(T)90(ype:)-310(optional.)]TJ 0 g 0 G +/F75 9.9626 Tf -24.598 -18.75 Td [(rscale,cscale)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(2.3)-1000(Application)-250(structure)]TJ/F54 9.9626 Tf 0 -19.381 Td [(The)-244(main)-244(underlyi)1(ng)-244(principle)-244(of)-244(the)-244(PSBLAS)-243(library)-244(is)-244(that)-244(the)-243(library)-244(objects)]TJ 0 -11.956 Td [(ar)18(e)-236(cr)18(eated)-235(and)-236(exist)-235(with)-236(r)18(efer)18(ence)-235(to)-236(a)-235(discr)18(etized)-236(space)-236(t)1(o)-236(which)-236(ther)18(e)-235(corr)18(e-)]TJ 0 -11.955 Td [(sponds)-258(an)-257(index)-258(space)-257(and)-258(a)-258(matrix)-257(sparsity)-258(pattern.)-332(As)-258(an)-258(example,)-259(consider)]TJ 0 -11.955 Td [(a)-310(cell-center)18(ed)-309(\002nite-volume)-310(discr)18(etization)-310(of)-309(the)-310(Navier)18(-Stokes)-310(e)1(quations)-310(on)]TJ 0 -11.955 Td [(a)-234(simulation)-235(domain;)-239(the)-234(index)-235(space)-234(1)-179(.)-192(.)-192(.)]TJ/F52 9.9626 Tf 185.595 0 Td [(n)]TJ/F54 9.9626 Tf 7.998 0 Td [(is)-234(isomorphic)-235(to)-234(the)-234(set)-235(of)-234(cell)-234(cen-)]TJ -193.593 -11.955 Td [(ters,)-210(wher)18(eas)-200(the)-201(pattern)-200(of)-200(the)-201(associated)-200(linear)-200(system)-200(matrix)-201(is)-200(isomorphic)-200(to)]TJ 0 -11.956 Td [(the)-294(adjacency)-294(graph)-294(imposed)-294(on)-294(the)-294(discr)18(et)1(ization)-294(mesh)-294(by)-294(the)-294(discr)18(etization)]TJ 0 -11.955 Td [(stencil.)]TJ 14.944 -12.17 Td [(Thus)-343(the)-343(\002rst)-343(or)18(der)-344(of)-343(business)-343(is)-343(to)-343(establish)-343(an)-343(index)-343(space,)-367(and)-343(this)-343(is)]TJ -14.944 -11.955 Td [(done)-287(with)-287(a)-287(call)-287(to)]TJ/F59 9.9626 Tf 85.52 0 Td [(psb_cdall)]TJ/F54 9.9626 Tf 49.932 0 Td [(in)-287(which)-287(we)-287(specify)-287(the)-287(size)-287(of)-287(the)-287(index)-287(space)]TJ/F52 9.9626 Tf -135.328 -11.956 Td [(n)]TJ/F54 9.9626 Tf 8.041 0 Td [(and)-238(the)-239(allocation)-238(of)-239(the)-239(ele)1(ments)-239(of)-239(the)-238(index)-239(space)-238(to)-239(the)-238(various)-239(pr)18(ocesses)]TJ -8.165 -11.955 Td [(making)-250(up)-250(the)-250(MPI)-250(\050virtual\051)-250(parallel)-250(machine.)]TJ 14.944 -12.17 Td [(The)-366(index)-367(space)-366(is)-366(partitioned)-367(among)-366(pr)18(ocesses,)-396(and)-366(this)-366(cr)18(eates)-367(a)-366(map-)]TJ -14.944 -11.956 Td [(ping)-301(fr)18(om)-300(the)-301(\223global\224)-301(numbering)-300(1)-180(.)-191(.)-192(.)]TJ/F52 9.9626 Tf 176.584 0 Td [(n)]TJ/F54 9.9626 Tf 8.659 0 Td [(to)-301(a)-300(numbering)-301(\223local\224)-301(to)-301(each)-300(pr)18(o-)]TJ -185.243 -11.955 Td [(cess;)-230(each)-221(pr)18(ocess)]TJ/F52 9.9626 Tf 79.682 0 Td [(i)]TJ/F54 9.9626 Tf 5.162 0 Td [(will)-221(own)-220(a)-221(certain)-220(subset)-221(1)-179(.)-192(.)-192(.)]TJ/F52 9.9626 Tf 130.532 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 -1.494 Td [(r)18(ow)]TJ/F52 5.9776 Tf 17.537 -1.649 Td [(i)]TJ/F54 9.9626 Tf 2.775 3.143 Td [(,)-226(each)-221(element)-221(of)-220(which)]TJ -241.352 -11.955 Td [(corr)18(esponds)-258(to)-259(a)-258(certain)-258(element)-258(of)-259(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 177.035 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 0 Td [(.)-335(The)-258(user)-259(doe)1(s)-259(not)-258(set)-258(explicitly)-259(this)]TJ -182.699 -11.955 Td [(mapping;)-225(when)-212(the)-212(application)-212(needs)-213(to)-212(indicate)-212(to)-212(which)-213(el)1(ement)-213(of)-212(the)-212(index)]TJ 0 -11.955 Td [(space)-305(a)-306(certain)-305(item)-306(is)-305(r)18(elated,)-320(such)-305(as)-306(the)-305(r)18(ow)-306(and)-305(column)-306(index)-305(of)-306(a)-305(matrix)]TJ 0 -11.956 Td [(coef)18(\002cient,)-283(it)-276(does)-277(so)-276(in)-277(the)-276(\223global\224)-277(numb)1(ering,)-284(and)-276(the)-276(library)-277(will)-276(translate)]TJ 0 -11.955 Td [(into)-250(the)-250(appr)18(opriate)-250(\223local\224)-250(numbering.)]TJ 14.944 -12.17 Td [(For)-324(a)-325(given)-324(index)-324(space)-325(1)-179(.)-191(.)-192(.)]TJ/F52 9.9626 Tf 129.74 0 Td [(n)]TJ/F54 9.9626 Tf 8.895 0 Td [(ther)18(e)-324(ar)18(e)-325(many)-324(possible)-324(associated)-325(topolo-)]TJ -153.579 -11.956 Td [(gies,)-213(i.e.)-295(many)-204(dif)18(fer)18(ent)-204(discr)18(etization)-204(stencils;)-220(thus)-204(the)-204(description)-204(of)-204(the)-204(index)]TJ 0 -11.955 Td [(space)-277(is)-278(not)-277(completed)-278(until)-277(the)-277(user)-278(has)-277(de\002ned)-278(a)-277(sparsity)-278(p)1(attern,)-285(either)-277(ex-)]TJ 0 -11.955 Td [(plicitly)-263(thr)18(ough)]TJ/F59 9.9626 Tf 71.63 0 Td [(psb_cdins)]TJ/F54 9.9626 Tf 49.698 0 Td [(or)-264(im)1(plicitly)-264(thr)18(ough)]TJ/F59 9.9626 Tf 95.326 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 47.073 0 Td [(.)-351(T)1(he)-264(descriptor)-263(is)]TJ -263.727 -11.955 Td [(\002nalized)-225(with)-225(a)-225(call)-226(to)]TJ/F59 9.9626 Tf 98.787 0 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 49.316 0 Td [(and)-225(a)-225(sparse)-225(matrix)-226(with)-225(a)-225(call)-225(to)]TJ/F59 9.9626 Tf 146.044 0 Td [(psb_spasb)]TJ/F54 9.9626 Tf 47.073 0 Td [(.)]TJ -341.22 -11.955 Td [(After)]TJ/F59 9.9626 Tf 26.16 0 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 50.21 0 Td [(each)-315(pr)18(ocess)]TJ/F52 9.9626 Tf 59.13 0 Td [(i)]TJ/F54 9.9626 Tf 6.101 0 Td [(will)-315(have)-315(de\002ned)-315(a)-314(set)-315(of)-315(\223halo\224)-315(\050or)-315(\223ghost\224\051)]TJ -141.601 -11.955 Td [(indices)]TJ/F52 9.9626 Tf 34.731 0 Td [(n)]TJ/F54 9.9626 Tf 5.663 -1.495 Td [(r)18(ow)]TJ/F52 5.9776 Tf 17.538 -1.648 Td [(i)]TJ/F85 10.3811 Tf 5.211 3.143 Td [(+)]TJ/F54 9.9626 Tf 10.506 0 Td [(1)-179(.)-192(.)-192(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 -3.831 Td [(col)]TJ/F52 5.9776 Tf 12.794 -1.648 Td [(i)]TJ/F54 9.9626 Tf 2.775 5.479 Td [(,)-377(denoting)-352(elements)-351(of)-352(the)-352(index)-351(space)-352(that)-351(ar)18(e)]TJ/F52 9.9626 Tf 215.582 0 Td [(not)]TJ/F54 9.9626 Tf -330.431 -13.79 Td [(assigned)-289(to)-290(pr)18(ocess)]TJ/F52 9.9626 Tf 88.744 0 Td [(i)]TJ/F54 9.9626 Tf 2.964 0 Td [(;)-309(however)-290(t)1(he)-290(variables)-289(associated)-290(with)-289(them)-290(ar)18(e)-289(needed)]TJ -91.708 -11.955 Td [(to)-289(complete)-289(computations)-289(associated)-289(with)-290(the)-289(sparse)-289(matrix)]TJ/F52 9.9626 Tf 269.662 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(,)-299(and)-289(thus)-289(they)]TJ -276.98 -11.955 Td [(have)-266(to)-266(be)-266(fetched)-265(fr)18(om)-266(\050neighbouring\051)-266(pr)18(ocesses.)-358(The)-266(descriptor)-265(of)-266(the)-266(index)]TJ 0 -11.956 Td [(space)-294(is)-293(built)-294(exactly)-294(for)-293(the)-294(purpose)-294(of)-293(pr)18(operly)-294(sequencing)-294(the)-293(communica-)]TJ 0 -11.955 Td [(tion)-250(steps)-250(r)18(equir)18(ed)-250(to)-250(achieve)-250(this)-250(objective.)]TJ 14.944 -12.17 Td [(A)-197(simple)-197(application)-197(str)8(uctur)18(e)-197(will)-197(walk)-197(thr)18(ough)-197(the)-197(index)-197(space)-197(allocation,)]TJ -14.944 -11.956 Td [(matrix/vector)-250(cr)18(eation)-250(and)-250(linear)-250(system)-250(solution)-250(as)-250(follows:)]TJ +/F84 9.9626 Tf 59.526 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ 0 g 0 G - 12.453 -20.571 Td [(1.)]TJ +/F75 9.9626 Tf -59.526 -18.979 Td [(On)-250(Return)]TJ 0 g 0 G - [-500(Initialize)-250(parallel)-250(envir)18(onment)-250(with)]TJ/F59 9.9626 Tf 171.465 0 Td [(psb_init)]TJ 0 g 0 G -/F54 9.9626 Tf -171.465 -20.787 Td [(2.)]TJ + 0 -18.75 Td [(l)]TJ 0 g 0 G - [-500(Initialize)-250(index)-250(space)-250(with)]TJ/F59 9.9626 Tf 130.489 0 Td [(psb_cdall)]TJ +/F84 9.9626 Tf 7.91 0 Td [(A)-250(copy)-250(of)-250(the)-250(lower)-250(triangle)-250(of)]TJ/F145 9.9626 Tf 137.333 0 Td [(a)]TJ/F84 9.9626 Tf 5.231 0 Td [(.)]TJ -125.956 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F145 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F84 9.9626 Tf 78.455 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf -130.489 -20.788 Td [(3.)]TJ +/F75 9.9626 Tf -184.596 -18.75 Td [(u)]TJ 0 g 0 G - [-500(Allocate)-221(sparse)-221(matrix)-221(and)-221(dense)-221(vectors)-220(with)]TJ/F59 9.9626 Tf 215.843 0 Td [(psb_spall)]TJ/F54 9.9626 Tf 49.274 0 Td [(and)]TJ/F59 9.9626 Tf 19.068 0 Td [(psb_geall)]TJ +/F84 9.9626 Tf 10.74 0 Td [(\050optional\051)-250(A)-250(copy)-250(of)-250(the)-250(upper)-250(triangle)-250(of)]TJ/F145 9.9626 Tf 184.485 0 Td [(a)]TJ/F84 9.9626 Tf 5.231 0 Td [(.)]TJ -175.938 -11.956 Td [(A)-250(variable)-250(of)-250(type)]TJ/F145 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F84 9.9626 Tf 78.455 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf -284.185 -20.787 Td [(4.)]TJ +/F75 9.9626 Tf -184.596 -18.749 Td [(info)]TJ 0 g 0 G - [-500(Loop)-320(over)-320(all)-320(local)-320(r)18(ows,)-338(generate)-320(matrix)-320(and)-320(vector)-320(entries,)-337(and)-320(insert)]TJ 12.454 -11.955 Td [(them)-250(with)]TJ/F59 9.9626 Tf 47.849 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 49.564 0 Td [(and)]TJ/F59 9.9626 Tf 19.358 0 Td [(psb_geins)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F75 9.9626 Tf -23.8 -26.59 Td [(3.2.16)-1000(triu)-250(\227)-250(Return)-250(the)-250(upper)-250(triangle)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf -129.225 -20.787 Td [(5.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf 20.921 -18.964 Td [(call)]TJ 0 g 0 G - [-500(Assemble)-250(the)-250(various)-250(entities:)]TJ + [-525(a%triu\050u,info[,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 17.774 -20.787 Td [(\050a\051)]TJ + 15.691 -11.955 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf 16.597 0 Td [(psb_cdasb)]TJ + [-525(diag,imin,imax,jmin,jmax,rscale,cscale,l]\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 165.649 414.476 Tm [(Returns)-247(the)-247(upper)-247(triangular)-246(part)-247(of)-247(submatrix)]TJ/F145 9.9626 Tf 1 0 0 1 378.053 414.476 Tm [(A\050imin:imax,jmin:jmax\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 493.121 414.476 Tm [(,)]TJ 0.98 0 0 1 150.705 402.521 Tm [(optionally)-194(r)19(escaling)-194(r)18(ow/col)-194(indices)-194(to)-193(the)-194(range)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG +/F145 9.9626 Tf 1 0 0 1 359.321 402.521 Tm [(1)]TJ 0 g 0 G -/F54 9.9626 Tf -17.125 -16.371 Td [(\050b\051)]TJ + [(:imax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ 0 g 0 G -/F59 9.9626 Tf 17.125 0 Td [(psb_spasb)]TJ + [(imin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ 0 g 0 G -/F54 9.9626 Tf -16.039 -16.371 Td [(\050c\051)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G -/F59 9.9626 Tf 16.039 0 Td [(psb_geasb)]TJ + [(,)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G -/F54 9.9626 Tf 122.541 -29.888 Td [(6)]TJ + [(:jmax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ 0 g 0 G -ET - -endstream -endobj -854 0 obj -<< -/Length 7484 ->> -stream + [(jmin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ 0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 500.54 402.521 Tm [(,)]TJ 1 0 0 1 150.705 390.565 Tm [(and)-250(r)18(eturing)-250(the)-250(complementary)-250(lower)-250(triangle.)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 112.349 706.129 Td [(6.)]TJ +/F75 9.9626 Tf 0 -17.574 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(Choose)-301(the)-300(pr)18(econditioner)-301(to)-300(be)-301(used)-300(with)]TJ/F59 9.9626 Tf 205.537 0 Td [(prec%init)]TJ/F54 9.9626 Tf 50.068 0 Td [(and)-301(build)-300(it)-301(with)]TJ/F59 9.9626 Tf -243.152 -11.955 Td [(prec%build)]TJ -0 0 1 rg 0 0 1 RG -/F54 7.5716 Tf 52.303 3.616 Td [(3)]TJ +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 4.284 -3.616 Td [(.)]TJ +/F75 9.9626 Tf -29.439 -18.75 Td [(On)-250(Entry)]TJ 0 g 0 G - -69.04 -22.307 Td [(7.)]TJ 0 g 0 G - [-500(Call)-190(the)-190(iterative)-190(driver)]TJ/F59 9.9626 Tf 115.326 0 Td [(psb_krylov)]TJ/F54 9.9626 Tf 54.196 0 Td [(with)-190(the)-190(method)-190(of)-190(choice,)-202(e.g.)]TJ/F59 9.9626 Tf 134.982 0 Td [(bicgstab)]TJ/F54 9.9626 Tf 41.843 0 Td [(.)]TJ -358.801 -21.712 Td [(This)-250(is)-250(the)-250(str)8(uctur)18(e)-250(of)-250(the)-250(sample)-250(pr)18(ograms)-250(in)-250(the)-250(dir)18(ectory)]TJ/F59 9.9626 Tf 266.418 0 Td [(test/pargen/)]TJ/F54 9.9626 Tf 62.764 0 Td [(.)]TJ -314.238 -12.551 Td [(For)-257(a)-258(simulation)-257(in)-257(which)-257(the)-258(same)-257(discr)18(etization)-257(mesh)-257(is)-258(used)-257(over)-257(multi-)]TJ -14.944 -11.955 Td [(ple)-250(time)-250(steps,)-250(the)-250(following)-250(str)8(uctur)18(e)-250(may)-250(be)-250(mor)18(e)-250(appr)18(opriate:)]TJ + 0 -18.75 Td [(a)]TJ 0 g 0 G - 12.454 -21.712 Td [(1.)]TJ +/F84 9.9626 Tf 9.962 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.556 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F145 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F84 9.9626 Tf 78.455 0 Td [(.)]TJ -159.689 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G - [-500(Initialize)-250(parallel)-250(envir)18(onment)-250(with)]TJ/F59 9.9626 Tf 171.464 0 Td [(psb_init)]TJ +/F75 9.9626 Tf -77.917 -30.706 Td [(diag)]TJ 0 g 0 G -/F54 9.9626 Tf -171.464 -22.307 Td [(2.)]TJ +/F84 9.9626 Tf 1.02 0 0 1 175.611 280.876 Tm [(Include)-258(diagonals)-258(up)-258(to)-258(this)-258(one;)]TJ/F145 9.9626 Tf 1 0 0 1 325.586 280.876 Tm [(diag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-500(Initialize)-250(index)-250(space)-250(with)]TJ/F59 9.9626 Tf 130.489 0 Td [(psb_cdall)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G -/F54 9.9626 Tf -130.489 -22.307 Td [(3.)]TJ +/F84 9.9626 Tf 1.02 0 0 1 359.59 280.876 Tm [(means)-258(the)-258(\002rst)-258(super)18(diagonal,)]TJ/F145 9.9626 Tf 1 0 0 1 175.611 268.92 Tm [(diag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=-)]TJ 0 g 0 G - [-500(Loop)-248(over)-248(the)-248(topology)-248(of)-248(the)-248(discr)18(eti)1(zation)-248(mesh)-248(and)-248(build)-248(the)-248(descrip-)]TJ 12.453 -11.955 Td [(tor)-250(with)]TJ/F59 9.9626 Tf 37.857 0 Td [(psb_cdins)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G -/F54 9.9626 Tf -50.31 -22.307 Td [(4.)]TJ +/F84 9.9626 Tf 39.104 0 Td [(means)-250(the)-250(\002rst)-250(subdiagonal.)-310(Default)-250(0.)]TJ 0 g 0 G - [-500(Assemble)-250(the)-250(descriptor)-250(with)]TJ/F59 9.9626 Tf 144.386 0 Td [(psb_cdasb)]TJ +/F75 9.9626 Tf -64.01 -18.75 Td [(imin,imax,jmin,jmax)]TJ 0 g 0 G -/F54 9.9626 Tf -144.386 -22.307 Td [(5.)]TJ +/F84 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -75.287 -11.955 Td [(T)90(ype:)-310(optional.)]TJ 0 g 0 G - [-500(Allocate)-190(the)-190(sparse)-190(matrices)-190(and)-190(dense)-190(vectors)-190(with)]TJ/F59 9.9626 Tf 237.676 0 Td [(psb_spall)]TJ/F54 9.9626 Tf 48.966 0 Td [(and)]TJ/F59 9.9626 Tf 18.759 0 Td [(psb_geall)]TJ +/F75 9.9626 Tf -24.598 -18.75 Td [(rscale,cscale)]TJ 0 g 0 G -/F54 9.9626 Tf -305.401 -22.308 Td [(6.)]TJ +/F84 9.9626 Tf 59.526 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ 0 g 0 G - [-500(Loop)-250(over)-250(the)-250(time)-250(steps:)]TJ +/F75 9.9626 Tf -59.526 -18.979 Td [(On)-250(Return)]TJ 0 g 0 G - 17.773 -22.307 Td [(\050a\051)]TJ 0 g 0 G - [-500(If)-297(after)-298(\002rst)-297(time)-298(step,)-309(r)18(einitialize)-297(the)-298(sparse)-297(matrix)-298(with)]TJ/F59 9.9626 Tf 269.151 0 Td [(psb_sprn)]TJ/F54 9.9626 Tf 41.843 0 Td [(;)]TJ -294.396 -11.955 Td [(also)-250(zer)18(o)-250(out)-250(the)-250(dense)-250(vectors;)]TJ + 0 -18.75 Td [(u)]TJ 0 g 0 G - -17.126 -17.131 Td [(\050b\051)]TJ +/F84 9.9626 Tf 10.68 0 Td [(A)-250(copy)-250(of)-250(the)-250(upper)-250(triangle)-250(of)]TJ/F145 9.9626 Tf 138.668 0 Td [(a)]TJ/F84 9.9626 Tf 5.23 0 Td [(.)]TJ -130.06 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F145 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F84 9.9626 Tf 78.455 0 Td [(.)]TJ 0 g 0 G - [-500(Loop)-428(over)-429(the)-428(mesh,)-473(generate)-429(the)-428(coef)18(\002cients)-429(and)-428(insert/update)]TJ 17.126 -11.955 Td [(them)-250(with)]TJ/F59 9.9626 Tf 47.85 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 49.563 0 Td [(and)]TJ/F59 9.9626 Tf 19.358 0 Td [(psb_geins)]TJ +/F75 9.9626 Tf -184.596 -18.75 Td [(l)]TJ 0 g 0 G -/F54 9.9626 Tf -132.811 -17.131 Td [(\050c\051)]TJ +/F84 9.9626 Tf 7.97 0 Td [(\050optional\051)-250(A)-250(copy)-250(of)-250(the)-250(lower)-250(triangle)-250(of)]TJ/F145 9.9626 Tf 183.151 0 Td [(a)]TJ/F84 9.9626 Tf 5.23 0 Td [(.)]TJ -171.833 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F145 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F84 9.9626 Tf 78.455 0 Td [(.)]TJ 0 g 0 G - [-500(Assemble)-250(with)]TJ/F59 9.9626 Tf 84.223 0 Td [(psb_spasb)]TJ/F54 9.9626 Tf 49.564 0 Td [(and)]TJ/F59 9.9626 Tf 19.357 0 Td [(psb_geasb)]TJ +/F75 9.9626 Tf -184.596 -18.75 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf -154.808 -17.132 Td [(\050d\051)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ 0 g 0 G - [-500(Choose)-250(and)-250(build)-250(pr)18(econditioner)-250(with)]TJ/F59 9.9626 Tf 188.671 0 Td [(prec%init)]TJ/F54 9.9626 Tf 49.563 0 Td [(and)]TJ/F59 9.9626 Tf 19.358 0 Td [(prec%build)]TJ + 143.074 -29.888 Td [(24)]TJ 0 g 0 G -/F54 9.9626 Tf -256.277 -17.131 Td [(\050e\051)]TJ +ET + +endstream +endobj +1114 0 obj +<< +/Length 7921 +>> +stream 0 g 0 G - [-500(Call)-250(the)-250(iterative)-250(method)-250(of)-250(choice,)-250(e.g.)]TJ/F59 9.9626 Tf 190.902 0 Td [(psb_bicgstab)]TJ/F54 9.9626 Tf -221.338 -22.307 Td [(The)-276(insertion)-275(r)18(outines)-276(will)-275(be)-276(called)-275(as)-276(many)-276(times)-275(as)-276(needed;)-288(they)-276(only)-275(need)]TJ 0 -11.955 Td [(to)-214(be)-213(called)-214(on)-213(the)-214(data)-214(t)1(hat)-214(is)-214(actually)-213(allocated)-214(to)-213(the)-214(curr)18(ent)-213(pr)18(ocess,)-221(i.e.)-298(each)]TJ 0 -11.955 Td [(pr)18(ocess)-250(generates)-250(its)-250(own)-250(data.)]TJ 14.944 -12.551 Td [(In)-219(principle)-218(ther)18(e)-219(is)-219(no)-218(speci\002c)-219(or)18(der)-219(in)-218(the)-219(calls)-219(to)]TJ/F59 9.9626 Tf 220.804 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 47.073 0 Td [(,)-225(nor)-219(is)-218(ther)18(e)-219(a)]TJ -282.821 -11.955 Td [(r)18(equir)18(ement)-243(to)-243(build)-243(a)-242(matrix)-243(r)18(ow)-243(in)-243(its)-243(entir)18(ety)-243(befor)18(e)-242(calling)-243(the)-243(r)18(outine;)-245(this)]TJ 0 -11.955 Td [(allows)-364(t)1(he)-364(application)-363(pr)18(ogrammer)-364(to)-363(walk)-364(thr)18(ough)-363(the)-364(discr)18(etization)-363(mesh)]TJ 0 -11.955 Td [(element)-316(by)-317(element,)-333(generating)-316(the)-316(main)-317(part)-316(of)-316(a)-317(given)-316(matrix)-316(r)18(ow)-317(but)-316(also)]TJ 0 -11.956 Td [(contributions)-250(to)-250(the)-250(r)18(ows)-250(corr)18(esponding)-250(to)-250(neighbouring)-250(elements.)]TJ 14.944 -12.55 Td [(Fr)18(om)-328(a)-329(func)1(tional)-329(point)-328(of)-328(view)-328(it)-329(is)-328(even)-328(possible)-328(to)-329(exe)1(cute)-329(one)-328(call)-328(for)]TJ -14.944 -11.955 Td [(each)-204(nonzer)18(o)-204(coef)18(\002cient;)-219(however)-203(this)-204(would)-204(have)-204(a)-204(subst)1(antial)-204(computational)]TJ 0 -11.955 Td [(over)18(head.)-457(It)-299(is)-299(ther)18(efor)18(e)-299(advisable)-299(to)-299(pack)-299(a)-299(certain)-299(amount)-299(of)-299(data)-299(into)-299(each)]TJ 0 -11.956 Td [(call)-303(to)-303(the)-302(insertion)-303(r)18(outine,)-316(say)-303(touching)-303(on)-302(a)-303(few)-303(tens)-303(of)-302(r)18(ows;)-330(the)-302(best)-303(per)18(-)]TJ 0 -11.955 Td [(formng)-342(value)-343(would)-342(depend)-342(on)-342(both)-343(the)-342(ar)18(chitectur)18(e)-342(of)-343(the)-342(computer)-342(being)]TJ 0 -11.955 Td [(used)-223(and)-223(on)-222(the)-223(pr)18(oblem)-223(str)8(uctur)18(e.)-301(At)-222(the)-223(opposite)-223(extr)18(eme,)-228(it)-223(would)-222(be)-223(possi-)]TJ 0 -11.955 Td [(ble)-267(to)-267(generate)-267(the)-267(entir)18(e)-267(part)-267(of)-267(a)-267(coef)18(\002cient)-267(matrix)-267(r)18(esiding)-267(on)-267(a)-267(pr)18(ocess)-267(and)]TJ 0 -11.955 Td [(pass)-275(it)-274(in)-275(a)-275(single)-274(call)-275(to)]TJ/F59 9.9626 Tf 108.421 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 47.073 0 Td [(;)-287(this,)-281(however)74(,)-281(would)-274(entail)-275(a)-275(doubling)-274(of)]TJ -155.494 -11.956 Td [(memory)-250(occupation,)-250(and)-250(thus)-250(would)-250(be)-250(almost)-250(always)-250(far)-250(fr)18(om)-250(optimal.)]TJ 0 g 0 G +BT +/F75 9.9626 Tf 99.895 706.129 Td [(3.2.17)-1000(psb)]TJ ET q -1 0 0 1 99.895 139.555 cm -[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S +1 0 0 1 151.96 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 5.9776 Tf 110.755 132.683 Td [(3)]TJ/F54 7.9701 Tf 3.487 -2.893 Td [(The)-260(subr)18(outine)-260(style)]TJ/F89 7.9701 Tf 74.235 0 Td [(psb)]TJ +/F75 9.9626 Tf 154.949 706.129 Td [(set)]TJ ET q -1 0 0 1 201.687 129.989 cm -[]0 d 0 J 0.398 w 0 0 m 2.541 0 l S +1 0 0 1 168.269 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F89 7.9701 Tf 204.228 129.79 Td [(precinit)]TJ/F54 7.9701 Tf 35.946 0 Td [(and)]TJ/F89 7.9701 Tf 15.567 0 Td [(psb)]TJ +/F75 9.9626 Tf 171.258 706.129 Td [(mat)]TJ ET q -1 0 0 1 268.951 129.989 cm -[]0 d 0 J 0.398 w 0 0 m 2.541 0 l S +1 0 0 1 189.011 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F89 7.9701 Tf 271.492 129.79 Td [(precbl)]TJ/F54 7.9701 Tf 27.478 0 Td [(ar)18(e)-260(still)-260(supported)-260(for)-260(backwar)18(d)-260(compat-)]TJ -199.075 -9.464 Td [(ibility)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 169.365 -29.888 Td [(7)]TJ -0 g 0 G -ET - -endstream -endobj -875 0 obj -<< -/Length 7173 ->> -stream +/F75 9.9626 Tf 192 706.129 Td [(default)-250(\227)-250(Set)-250(default)-250(storage)-250(format)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -92.105 -18.964 Td [(call)]TJ 0 g 0 G + [-1050(psb_set_mat_default\050a\051)]TJ 0 g 0 G -BT -/F51 9.9626 Tf 150.705 706.129 Td [(2.3.1)-1000(User)18(-de\002ned)-250(index)-250(mappings)]TJ/F54 9.9626 Tf 0 -18.964 Td [(PSBLAS)-316(supports)-315(user)18(-de\002ned)-316(global)-316(to)-315(local)-316(index)-316(mappings,)-332(subject)-315(to)-316(the)]TJ 0 -11.955 Td [(constraints)-250(outlined)-250(in)-250(sec.)]TJ -0 0 1 rg 0 0 1 RG - [-250(2.3)]TJ +/F75 9.9626 Tf 0 -20.183 Td [(T)90(ype:)]TJ 0 g 0 G - [(:)]TJ +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - 12.453 -19.925 Td [(1.)]TJ +/F75 9.9626 Tf -29.44 -19.231 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(The)-250(set)-250(of)-250(indices)-250(owned)-250(locally)-250(must)-250(be)-250(mapped)-250(to)-250(the)-250(set)-250(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 294.494 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 -1.495 Td [(r)18(ow)]TJ/F52 5.9776 Tf 17.537 -1.648 Td [(i)]TJ/F54 9.9626 Tf 2.775 3.143 Td [(;)]TJ 0 g 0 G - -320.47 -19.926 Td [(2.)]TJ + 0 -19.231 Td [(a)]TJ 0 g 0 G - [-500(The)-250(set)-250(of)-250(halo)-250(points)-250(must)-250(be)-250(mapped)-250(to)-250(the)-250(set)]TJ/F52 9.9626 Tf 228.286 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 -1.494 Td [(r)18(ow)]TJ/F52 5.9776 Tf 17.537 -1.648 Td [(i)]TJ/F85 10.3811 Tf 4.836 3.142 Td [(+)]TJ/F54 9.9626 Tf 10.132 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.966 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 -3.83 Td [(col)]TJ/F52 5.9776 Tf 12.795 -1.649 Td [(i)]TJ/F54 9.9626 Tf 2.774 5.479 Td [(;)]TJ -320.107 -21.759 Td [(but)-289(otherwise)-289(the)-288(mapping)-289(is)-289(arbitrary)111(.)-426(The)-289(user)-289(application)-289(is)-288(r)18(esponsible)-289(to)]TJ 0 -11.956 Td [(ensur)18(e)-262(consistency)-261(of)-262(this)-262(mapping;)-267(some)-262(err)18(ors)-262(may)-261(be)-262(caught)-262(by)-261(the)-262(library)111(,)]TJ 0 -11.955 Td [(but)-236(this)-236(is)-236(not)-236(guaranteed.)-305(The)-236(application)-236(str)8(uctur)18(e)-236(to)-236(support)-236(this)-236(usage)-236(is)-236(as)]TJ 0 -11.955 Td [(follows:)]TJ +/F84 9.9626 Tf 0.988 0 0 1 109.858 628.52 Tm [(a)-253(variable)-253(of)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf 1 0 0 1 166.078 628.52 Tm [(class)]TJ 0 g 0 G - 12.453 -19.925 Td [(1.)]TJ + [(\050psb_T_base_sparse_mat\051)]TJ/F84 9.9626 Tf 0.988 0 0 1 315.017 628.52 Tm [(r)18(equesting)-253(a)-252(new)-253(default)-253(stor)18(-)]TJ 1 0 0 1 124.802 616.564 Tm [(age)-250(format.)]TJ -0.309 -11.955 Td [(T)90(ype:)-310(r)18(equir)18(ed.)]TJ/F75 9.9626 Tf -24.598 -26.815 Td [(3.2.18)-1000(clone)-250(\227)-250(Clone)-250(current)-250(object)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf 0 -18.964 Td [(call)]TJ 0 g 0 G - [-500(Initialize)-190(index)-190(space)-190(with)]TJ/F59 9.9626 Tf 128.098 0 Td [(psb_cdall\050ictx,desc,info,vl=vl,lidx=lidx\051)]TJ/F54 9.9626 Tf -115.645 -11.955 Td [(passing)-292(the)-293(vectors)]TJ/F59 9.9626 Tf 87.882 0 Td [(vl\050:\051)]TJ/F54 9.9626 Tf 29.064 0 Td [(containing)-292(the)-293(set)-292(of)-293(global)-292(indices)-292(owned)-293(by)]TJ -116.946 -11.956 Td [(the)-327(curr)18(ent)-328(pr)18(ocess)-327(and)]TJ/F59 9.9626 Tf 108.493 0 Td [(lidx\050:\051)]TJ/F54 9.9626 Tf 39.873 0 Td [(containing)-327(the)-327(corr)18(esponding)-328(local)-327(in-)]TJ -148.366 -11.955 Td [(dices;)]TJ + [-1050(a%clone\050b,info\051)]TJ 0 g 0 G - -12.453 -19.925 Td [(2.)]TJ +/F75 9.9626 Tf 0 -20.183 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(Add)-412(the)-412(halo)-412(points)]TJ/F59 9.9626 Tf 109.326 0 Td [(ja\050:\051)]TJ/F54 9.9626 Tf 30.256 0 Td [(and)-412(their)-412(associated)-412(local)-412(indices)]TJ/F59 9.9626 Tf 155.064 0 Td [(lidx\050:\051)]TJ/F54 9.9626 Tf -282.193 -11.955 Td [(with)-250(a\050some\051)-250(call\050s\051)-250(to)]TJ/F59 9.9626 Tf 99.815 0 Td [(psb_cdins\050nz,ja,desc,info,lidx=lidx\051)]TJ/F54 9.9626 Tf 188.292 0 Td [(;)]TJ +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - -300.56 -19.926 Td [(3.)]TJ +/F75 9.9626 Tf -29.44 -19.231 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(Assemble)-250(the)-250(descriptor)-250(with)]TJ/F59 9.9626 Tf 144.387 0 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 47.073 0 Td [(;)]TJ 0 g 0 G - -191.46 -19.925 Td [(4.)]TJ + 0 -19.231 Td [(a)]TJ 0 g 0 G - [-500(Build)-190(the)-190(sparse)-190(matrices)-190(and)-190(vectors,)-202(optionally)-190(making)-190(use)-190(in)]TJ/F59 9.9626 Tf 288.117 0 Td [(psb_spins)]TJ/F54 9.9626 Tf -275.664 -11.955 Td [(and)]TJ/F59 9.9626 Tf 19.958 0 Td [(psb_geins)]TJ/F54 9.9626 Tf 50.163 0 Td [(of)-310(the)]TJ/F59 9.9626 Tf 28.756 0 Td [(local)]TJ/F54 9.9626 Tf 29.243 0 Td [(ar)18(gument)-310(specifying)-310(that)-310(the)-311(indices)-310(in)]TJ/F59 9.9626 Tf 177.734 0 Td [(ia)]TJ/F54 9.9626 Tf 10.46 0 Td [(,)]TJ/F59 9.9626 Tf -316.314 -11.955 Td [(ja)]TJ/F54 9.9626 Tf 12.952 0 Td [(and)]TJ/F59 9.9626 Tf 19.357 0 Td [(irw)]TJ/F54 9.9626 Tf 15.691 0 Td [(,)-250(r)18(espectively)111(,)-250(ar)18(e)-250(alr)18(eady)-250(local)-250(indices.)]TJ/F51 11.9552 Tf -72.906 -29.133 Td [(2.4)-1000(Programming)-250(model)]TJ/F54 9.9626 Tf 0 -18.964 Td [(The)-316(PSBLAS)-315(librarary)-316(is)-315(based)-316(on)-315(the)-316(Single)-315(Pr)18(ogram)-316(Multiple)-316(Data)-315(\050SPMD\051)]TJ 0 -11.955 Td [(pr)18(ogramming)-277(model:)-364(each)-277(pr)18(ocess)-277(participatin)1(g)-277(in)-277(the)-277(computation)-277(performs)]TJ 0 -11.955 Td [(the)-250(same)-250(actions)-250(on)-250(a)-250(chunk)-250(of)-250(data.)-310(Parallelism)-250(is)-250(thus)-250(data-driven.)]TJ 14.944 -11.956 Td [(Because)-313(of)-313(this)-312(str)8(uctur)18(e,)-329(many)-313(subr)18(outines)-313(coor)18(dinate)-312(their)-313(action)-313(acr)18(oss)]TJ -14.944 -11.955 Td [(the)-336(various)-336(pr)18(ocesses,)-358(thus)-336(pr)18(oviding)-336(an)-336(implicit)-336(synchr)18(onization)-336(point,)-358(and)]TJ 0 -11.955 Td [(ther)18(efor)18(e)]TJ/F52 9.9626 Tf 43.283 0 Td [(must)]TJ/F54 9.9626 Tf 24.136 0 Td [(be)-367(called)-366(simultaneously)-367(by)-366(all)-367(pr)18(ocesses)-367(participating)-366(in)-367(the)]TJ -67.419 -11.955 Td [(computation.)-525(This)-321(is)-322(certainly)-322(tr)8(ue)-321(for)-322(the)-322(data)-321(allocation)-322(and)-322(assembl)1(y)-322(r)18(ou-)]TJ 0 -11.955 Td [(tines,)-250(for)-250(all)-250(the)-250(computational)-250(r)18(outines)-250(and)-250(for)-250(some)-250(of)-250(the)-250(tools)-250(r)18(outines.)]TJ 14.944 -11.955 Td [(However)-333(ther)18(e)-332(ar)18(e)-333(many)-333(cases)-332(wher)18(e)-333(no)-333(synchr)18(onizati)1(on,)-354(and)-332(indeed)-333(no)]TJ -14.944 -11.956 Td [(communication)-344(among)-343(pr)18(ocesses,)-367(is)-344(implied;)-390(for)-344(instance,)-367(all)-344(the)-343(r)18(outines)-344(in)]TJ 0 -11.955 Td [(sec.)]TJ -0 0 1 rg 0 0 1 RG - [-246(3)]TJ +/F84 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G - [-247(ar)18(e)-246(only)-246(acting)-246(on)-247(the)-246(local)-246(data)-247(str)8(uctur)18(es,)-247(and)-246(thus)-246(may)-247(be)-246(called)-246(inde-)]TJ 0 -11.955 Td [(pendently)111(.)-306(The)-238(most)-238(important)-237(case)-238(is)-238(that)-238(of)-238(the)-238(coef)18(\002cient)-237(insertion)-238(r)18(outines:)]TJ 0 -11.955 Td [(since)-231(the)-231(number)-231(of)-230(coef)18(\002cients)-231(in)-231(the)-231(sparse)-231(and)-231(dense)-230(matrices)-231(varies)-231(among)]TJ 0 -11.955 Td [(the)-248(pr)18(ocessors,)-249(and)-249(since)-248(the)-249(user)-248(is)-249(fr)18(ee)-248(to)-249(choose)-248(an)-249(arbitrary)-248(or)18(der)-249(in)-248(builid-)]TJ 0 -11.955 Td [(ing)-250(the)-250(matrix)-250(entries,)-250(these)-250(r)18(outines)-250(cannot)-250(imply)-250(a)-250(synchr)18(onization.)]TJ 14.944 -11.956 Td [(Thr)18(oughout)-250(this)-250(user)-74('s)-250(guide)-250(each)-250(subr)18(outine)-250(will)-250(be)-250(clearly)-250(indicated)-250(as:)]TJ +/F75 9.9626 Tf -77.918 -32.138 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -14.944 -19.925 Td [(Synchronous:)]TJ 0 g 0 G -/F54 9.9626 Tf 67.247 0 Td [(must)-307(be)-307(called)-308(simultaneously)-307(by)-307(all)-307(the)-308(pr)18(ocesses)-307(in)-307(the)-307(r)18(ele-)]TJ -42.341 -11.955 Td [(vant)-250(communication)-250(context;)]TJ + 0 -19.232 Td [(b)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(Asynchronous:)]TJ +/F84 9.9626 Tf 10.68 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ 0 g 0 G -/F54 9.9626 Tf 73.334 0 Td [(may)-250(be)-250(called)-250(in)-250(a)-250(totally)-250(independent)-250(manner)74(.)]TJ +/F75 9.9626 Tf -10.68 -19.231 Td [(info)]TJ 0 g 0 G - 96.031 -56.634 Td [(8)]TJ +/F84 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F75 9.9626 Tf -23.801 -26.815 Td [(3.2.19)-1000(Named)-250(Constants)]TJ 0 g 0 G + 0 -18.964 Td [(psb)]TJ ET - -endstream -endobj -886 0 obj -<< -/Length 8187 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 117.091 372.049 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F51 14.3462 Tf 99.895 705.784 Td [(3)-1000(Data)-250(Structures)-250(and)-250(Classes)]TJ/F54 9.9626 Tf 0 -23.091 Td [(In)-289(this)-288(chapter)-289(we)-289(illustrate)-288(the)-289(data)-289(str)8(uctur)18(es)-288(used)-289(for)-289(de\002nition)-289(of)-288(r)18(outines)]TJ 0 -11.956 Td [(interfaces.)-622(They)-354(include)-354(data)-354(str)8(uctur)18(es)-354(for)-354(sparse)-354(matrices,)-380(communication)]TJ 0 -11.955 Td [(descriptors)-250(and)-250(pr)18(econditioners.)]TJ 14.944 -12.156 Td [(All)-248(the)-248(data)-249(types)-248(and)-248(the)-248(basic)-248(subr)18(outine)-249(interfaces)-248(r)18(elated)-248(to)-248(descriptors)]TJ -14.944 -11.955 Td [(and)-345(sparse)-345(matrices)-344(ar)18(e)-345(de\002ned)-345(in)-345(the)-345(module)]TJ/F59 9.9626 Tf 213.323 0 Td [(psb_base_mod)]TJ/F54 9.9626 Tf 62.764 0 Td [(;)-392(this)-345(will)-345(have)]TJ -276.087 -11.955 Td [(to)-381(be)-381(included)-381(by)-381(every)-381(user)-381(subr)18(outine)-381(that)-381(makes)-381(u)1(se)-381(of)-381(the)-381(library)111(.)-703(The)]TJ 0 -11.956 Td [(pr)18(econditioners)-250(ar)18(e)-250(de\002ned)-250(in)-250(the)-250(module)]TJ/F59 9.9626 Tf 187.993 0 Td [(psb_prec_mod)]TJ/F54 9.9626 Tf -173.049 -12.156 Td [(Integer)74(,)-433(r)18(eal)-396(and)-397(complex)-396(data)-396(types)-397(ar)18(e)-396(parametrized)-396(with)-397(a)-396(kind)-396(type)]TJ -14.944 -11.955 Td [(de\002ned)-250(in)-250(the)-250(library)-250(as)-250(follows:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.528 Td [(psb)]TJ +/F75 9.9626 Tf 120.08 371.85 Td [(dupl)]TJ ET q -1 0 0 1 117.091 566.32 cm +1 0 0 1 142.256 372.049 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 566.121 Td [(spk)]TJ +/F75 9.9626 Tf 145.245 371.85 Td [(ovwrt)]TJ ET q -1 0 0 1 137.275 566.32 cm +1 0 0 1 172.413 372.049 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 145.245 566.121 Td [(Kind)-407(parameter)-406(for)-407(short)-406(pr)18(ecision)-407(r)18(eal)-406(and)-407(complex)-406(data;)-485(corr)18(e-)]TJ -20.443 -11.955 Td [(sponds)-250(to)-250(a)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG -/F59 9.9626 Tf 52.901 0 Td [(REAL)]TJ +/F84 9.9626 Tf 1.01 0 0 1 180.383 371.85 Tm [(Duplicate)-247(coef)17(\002)1(cients)-248(should)-247(be)-248(overwritten)-247(\050i.e.)-307(ignor)18(e)-248(du-)]TJ 1 0 0 1 124.503 359.895 Tm [(plications\051)]TJ 0 g 0 G -/F54 9.9626 Tf 23.412 0 Td [(declaration)-250(and)-250(is)-250(normally)-250(4)-250(bytes;)]TJ -0 g 0 G -/F51 9.9626 Tf -101.22 -20.73 Td [(psb)]TJ +/F75 9.9626 Tf -24.608 -19.231 Td [(psb)]TJ ET q -1 0 0 1 117.091 533.635 cm +1 0 0 1 117.091 340.863 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 533.436 Td [(dpk)]TJ +/F75 9.9626 Tf 120.08 340.664 Td [(dupl)]TJ ET q -1 0 0 1 138.939 533.635 cm +1 0 0 1 142.256 340.863 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 146.909 533.436 Td [(Kind)-420(parameter)-421(for)-420(long)-420(pr)18(ecision)-421(r)18(eal)-420(and)-420(complex)-421(data;)-505(corr)18(e-)]TJ -22.107 -11.955 Td [(sponds)-250(to)-250(a)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG -/F59 9.9626 Tf 52.901 0 Td [(DOUBLE)-525(PRECISION)]TJ +/F75 9.9626 Tf 145.245 340.664 Td [(add)]TJ +ET +q +1 0 0 1 162.999 340.863 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q 0 g 0 G -/F54 9.9626 Tf 86.176 0 Td [(declaration)-250(and)-250(is)-250(normally)-250(8)-250(bytes;)]TJ +BT +/F84 9.9626 Tf 170.969 340.664 Td [(Duplicate)-250(coef)18(\002cients)-250(should)-250(be)-250(added;)]TJ 0 g 0 G -/F51 9.9626 Tf -163.984 -20.73 Td [(psb)]TJ +/F75 9.9626 Tf -71.074 -19.232 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 321.632 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 120.08 321.432 Td [(dupl)]TJ ET q -1 0 0 1 117.091 500.951 cm +1 0 0 1 142.256 321.632 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 500.751 Td [(mpk)]TJ +/F75 9.9626 Tf 145.245 321.432 Td [(err)]TJ ET q -1 0 0 1 141.708 500.951 cm +1 0 0 1 158.575 321.632 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 149.678 500.751 Td [(Kind)-250(parameter)-250(for)-250(4-bytes)-250(integer)-250(data,)-250(as)-250(is)-250(always)-250(used)-250(by)-250(MPI;)]TJ +/F84 9.9626 Tf 166.545 321.432 Td [(Duplicate)-250(coef)18(\002cients)-250(should)-250(trigger)-250(an)-250(err)18(or)-250(conditino)]TJ 0 g 0 G -/F51 9.9626 Tf -49.783 -20.729 Td [(psb)]TJ +/F75 9.9626 Tf -66.65 -19.231 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 302.4 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 120.08 302.201 Td [(upd)]TJ ET q -1 0 0 1 117.091 480.221 cm +1 0 0 1 138.939 302.4 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 480.022 Td [(epk)]TJ +/F75 9.9626 Tf 141.928 302.201 Td [(d\003t)]TJ ET q -1 0 0 1 137.833 480.221 cm +1 0 0 1 158.017 302.4 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 145.803 480.022 Td [(Kind)-364(parameter)-363(for)-364(8-bytes)-363(integer)-364(data,)-391(a)-1(s)-363(is)-364(always)-363(used)-364(by)-363(the)]TJ/F59 9.9626 Tf -21.001 -11.955 Td [(sizeof)]TJ/F54 9.9626 Tf 33.873 0 Td [(methods;)]TJ +/F84 9.9626 Tf 165.987 302.201 Td [(Default)-250(update)-250(strategy)-250(for)-250(matrix)-250(coef)18(\002cients;)]TJ 0 g 0 G -/F51 9.9626 Tf -58.78 -20.73 Td [(psb)]TJ +/F75 9.9626 Tf -66.092 -19.231 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 283.169 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 120.08 282.97 Td [(upd)]TJ ET q -1 0 0 1 117.091 447.537 cm +1 0 0 1 138.939 283.169 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 447.337 Td [(ipk)]TJ +/F75 9.9626 Tf 141.928 282.97 Td [(srch)]TJ ET q -1 0 0 1 136.169 447.537 cm +1 0 0 1 161.335 283.169 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 144.139 447.337 Td [(Kind)-398(parameter)-399(for)-398(\223local\224)-398(integer)-399(indices)-398(and)-398(data;)-473(with)-398(default)]TJ -19.337 -11.955 Td [(build)-250(options)-250(this)-250(is)-250(a)-250(4)-250(bytes)-250(integer;)]TJ +/F84 9.9626 Tf 169.305 282.97 Td [(Update)-250(strategy)-250(based)-250(on)-250(sear)18(ch)-250(into)-250(the)-250(data)-250(str)8(uctur)18(e;)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.729 Td [(psb)]TJ +/F75 9.9626 Tf -69.41 -19.232 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 263.938 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 120.08 263.738 Td [(upd)]TJ ET q -1 0 0 1 117.091 414.852 cm +1 0 0 1 138.939 263.938 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 414.653 Td [(lpk)]TJ +/F75 9.9626 Tf 141.928 263.738 Td [(perm)]TJ ET q -1 0 0 1 136.169 414.852 cm +1 0 0 1 166.326 263.938 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G -BT -/F54 9.9626 Tf 144.139 414.653 Td [(Kind)-328(parameter)-329(for)-328(\223global\224)-328(integer)-329(indices)-328(and)-328(data;)-368(with)-328(default)]TJ -19.337 -11.955 Td [(build)-250(options)-250(this)-250(is)-250(an)-250(8)-250(bytes)-250(integer;)]TJ -24.907 -20.529 Td [(The)-205(integer)-205(kinds)-205(for)-206(local)-205(and)-205(global)-205(indices)-205(can)-205(be)-206(chosen)-205(at)-205(con\002gur)18(e)-205(time)-205(to)]TJ 0 -11.955 Td [(hold)-266(4)-267(or)-266(8)-267(bytes,)-270(with)-266(the)-267(global)-266(indices)-266(at)-267(least)-266(as)-267(lar)18(ge)-266(as)-266(the)-267(local)-266(ones.)-359(T)92(o-)]TJ 0 -11.955 Td [(gether)-219(with)-220(the)-219(classes)-219(attributes)-219(we)-219(also)-220(discuss)-219(their)-219(methods.)-300(Most)-219(methods)]TJ 0 -11.955 Td [(detailed)-272(her)18(e)-272(only)-273(act)-272(on)-272(the)-272(local)-272(variable,)-278(i.e.)-376(their)-273(action)-272(is)-272(pur)18(ely)-272(local)-272(and)]TJ 0 -11.956 Td [(asynchr)18(onous)-359(unless)-360(otherwise)-359(stated.)-638(The)-359(list)-360(of)-359(methods)-359(her)18(e)-360(is)-359(not)-359(com-)]TJ 0 -11.955 Td [(pletely)-336(exhaustive;)-380(many)-336(methods,)-358(especially)-336(those)-336(that)-336(alter)-337(the)-336(contents)-336(of)]TJ 0 -11.955 Td [(the)-299(various)-298(objects,)-311(ar)18(e)-299(usually)-299(not)-299(needed)-298(by)-299(the)-299(end-user)74(,)-311(and)-298(ther)18(efor)18(e)-299(ar)18(e)]TJ 0 -11.955 Td [(described)-250(in)-250(the)-250(developer)-74('s)-250(documentation.)]TJ/F51 11.9552 Tf 0 -30.277 Td [(3.1)-1000(Descriptor)-250(data)-250(structure)]TJ/F54 9.9626 Tf 0 -19.353 Td [(All)-241(the)-241(gener)1(a)-1(l)-240(matrix)-241(informations)-241(and)-240(elements)-241(to)-241(be)-241(exchanged)-240(among)-241(pr)18(o-)]TJ 0 -11.956 Td [(cesses)-402(ar)18(e)-401(stor)18(ed)-402(within)-401(a)-402(data)-401(str)8(uctur)18(e)-402(of)-401(the)-402(type)]TJ/F59 9.9626 Tf 242.575 0 Td [(psb)]TJ -ET -q -1 0 0 1 358.788 237.097 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 361.927 236.897 Td [(desc)]TJ +BT +/F84 9.9626 Tf 1.02 0 0 1 174.296 263.738 Tm [(Update)-317(strategy)-316(based)-317(on)-317(additional)-317(permutation)-316(data)-317(\050see)]TJ 1 0 0 1 124.802 251.783 Tm [(tools)-250(r)18(outine)-250(description\051.)]TJ/F75 11.9552 Tf -24.907 -28.807 Td [(3.3)-1000(Dense)-250(V)111(ector)-250(Data)-250(Structure)]TJ/F84 9.9626 Tf 1.02 0 0 1 99.587 204.012 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 119.612 204.012 Tm [(psb)]TJ +ET +q +1 0 0 1 135.931 204.211 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 139.069 204.012 Td [(T)]TJ +ET +q +1 0 0 1 144.927 204.211 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 148.065 204.012 Td [(vect)]TJ +ET +q +1 0 0 1 169.614 204.211 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 172.752 204.012 Td [(type)]TJ/F84 9.9626 Tf 1.02 0 0 1 196.689 204.012 Tm [(data)-297(str)8(uctur)18(e)-297(encapsulates)-296(the)-297(dense)-297(vectors)-296(in)-297(a)-297(way)]TJ 0.98 0 0 1 99.895 192.057 Tm [(similar)-231(t)1(o)-231(sparse)-230(matrices,)-236(i.e.)-308(including)-230(a)-231(base)-230(type)]TJ/F145 9.9626 Tf 1 0 0 1 323.065 192.057 Tm [(psb)]TJ +ET +q +1 0 0 1 339.383 192.256 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 342.522 192.057 Td [(T)]TJ +ET +q +1 0 0 1 348.38 192.256 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 351.518 192.057 Td [(base)]TJ +ET +q +1 0 0 1 373.067 192.256 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 376.205 192.057 Td [(vect)]TJ +ET +q +1 0 0 1 397.754 192.256 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 400.892 192.057 Td [(type)]TJ/F84 9.9626 Tf 0.98 0 0 1 421.814 192.057 Tm [(.)-308(The)]TJ 1.016 0 0 1 99.895 180.101 Tm [(user)-245(will)-245(not,)-245(in)-245(general,)-245(access)-245(the)-244(vector)-245(components)-245(dir)18(ectly)109(,)-245(but)-245(rather)-245(via)]TJ 0.994 0 0 1 99.895 168.146 Tm [(the)-252(r)18(outines)-252(of)-253(sec.)]TJ +0 0 1 rg 0 0 1 RG + [-252(6)]TJ +0 g 0 G + [(.)-314(Among)-252(other)-253(simple)-252(things,)-252(we)-253(de\002ne)-252(her)18(e)-252(an)-252(extraction)]TJ 0.98 0 0 1 99.895 156.191 Tm [(method)-255(that)-254(can)-255(be)-255(used)-254(to)-255(get)-255(a)-254(full)-255(copy)-254(of)-255(the)-255(part)-254(of)-255(the)-255(vector)-254(stor)18(ed)-255(on)-254(the)]TJ 1 0 0 1 99.895 144.236 Tm [(local)-250(pr)18(ocess.)]TJ 1.02 0 0 1 114.839 132.281 Tm [(The)-265(type)-266(declaration)-265(is)-265(shown)-266(in)-265(\002gur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-266(3)]TJ +0 g 0 G + [-265(wher)18(e)]TJ/F145 9.9626 Tf 1 0 0 1 332.473 132.281 Tm [(T)]TJ/F84 9.9626 Tf 1.02 0 0 1 340.4 132.281 Tm [(is)-265(a)-266(placeholder)-265(for)-265(the)]TJ 1 0 0 1 99.895 120.326 Tm [(data)-250(type)-250(and)-250(pr)18(ecision)-250(variants)]TJ +0 g 0 G + 166.875 -29.888 Td [(25)]TJ +0 g 0 G +ET + +endstream +endobj +1121 0 obj +<< +/Length 5172 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F75 9.9626 Tf 150.705 706.129 Td [(I)]TJ +0 g 0 G +/F84 9.9626 Tf 8.857 0 Td [(Integer;)]TJ +0 g 0 G +/F75 9.9626 Tf -8.857 -20.359 Td [(S)]TJ +0 g 0 G +/F84 9.9626 Tf 11.068 0 Td [(Single)-250(pr)18(ecision)-250(r)18(eal;)]TJ +0 g 0 G +/F75 9.9626 Tf -11.068 -20.358 Td [(D)]TJ +0 g 0 G +/F84 9.9626 Tf 13.28 0 Td [(Double)-250(pr)18(ecision)-250(r)18(eal;)]TJ +0 g 0 G +/F75 9.9626 Tf -13.28 -20.359 Td [(C)]TJ +0 g 0 G +/F84 9.9626 Tf 12.174 0 Td [(Single)-250(pr)18(ecision)-250(complex;)]TJ +0 g 0 G +/F75 9.9626 Tf -12.174 -20.358 Td [(Z)]TJ +0 g 0 G +/F84 9.9626 Tf 11.626 0 Td [(Double)-250(pr)18(ecision)-250(complex.)]TJ 0.987 0 0 1 150.396 604.444 Tm [(The)-252(actual)-252(data)-252(is)-252(contained)-252(in)-252(the)-253(polymorphic)-252(component)]TJ/F145 9.9626 Tf 1 0 0 1 412.002 604.444 Tm [(v%v)]TJ/F84 9.9626 Tf 0.987 0 0 1 427.693 604.444 Tm [(;)-253(t)1(he)-253(separation)]TJ 1.02 0 0 1 150.705 592.489 Tm [(between)-301(the)-301(application)-301(and)-300(the)-301(actual)-301(data)-301(is)-301(essential)-301(for)-301(cases)-301(wher)18(e)-301(it)-301(is)]TJ 1.02 0 0 1 150.705 580.534 Tm [(necessary)-259(to)-259(link)-260(to)-259(data)-259(storage)-259(made)-259(available)-259(elsewher)17(e)-259(outside)-259(the)-259(dir)17(ect)]TJ 0.995 0 0 1 150.705 568.579 Tm [(contr)18(ol)-250(of)-250(the)-250(compiler/appl)1(ication,)-251(e.g.)-311(data)-250(stor)18(ed)-249(in)-250(a)-250(graphics)-250(accelerator)-74('s)]TJ 1 0 0 1 150.406 556.624 Tm [(private)-250(memory)111(.)]TJ +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 150.705 452.975 cm +0 0 343.711 82.69 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F233 8.9664 Tf 163.108 525.005 Td [(type)]TJ +0 g 0 G + [-525(psb_T_base_vect_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.415 -10.959 Td [(TYPE)]TJ +0 g 0 G + [(\050KIND_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.415 -10.959 Td [(end)-525(type)]TJ +0 g 0 G + [-525(psb_T_base_vect_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -21.918 Td [(type)]TJ +0 g 0 G + [-525(psb_T_vect_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.415 -10.959 Td [(class)]TJ +0 g 0 G + [(\050psb_T_base_vect_type\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.415 -10.959 Td [(end)-525(type)]TJ +0 g 0 G + [-1050(psb_T_vect_type)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F84 9.9626 Tf 6.677 -41.429 Td [(Listing)-250(3:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(dense)-250(vector)74(.)]TJ/F75 9.9626 Tf -19.08 -39.929 Td [(3.3.1)-1000(V)111(ector)-250(Methods)]TJ 0 -19.174 Td [(3.3.2)-1000(get)]TJ ET q -1 0 0 1 383.476 237.097 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 195.029 358.919 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 386.614 236.897 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(.)-765(Every)]TJ -307.64 -11.955 Td [(str)8(uctur)18(e)-371(of)-370(this)-371(type)-370(is)-371(associated)-370(with)-371(a)-371(discr)18(etization)-370(pattern)-371(and)-370(enables)]TJ 0 -11.955 Td [(data)-301(communications)-302(and)-301(other)-301(operations)-302(that)-301(ar)18(e)-302(nece)1(ssa)-1(r)1(y)-302(for)-301(implement-)]TJ 0 -11.955 Td [(ing)-250(the)-250(various)-250(algorithms)-250(of)-250(inter)18(est)-250(to)-250(us.)]TJ 14.944 -12.156 Td [(The)-265(data)-266(str)8(uctur)18(e)-265(itself)]TJ/F59 9.9626 Tf 107.448 0 Td [(psb_desc_type)]TJ/F54 9.9626 Tf 70.638 0 Td [(can)-265(be)-266(tr)18(eated)-265(as)-265(an)-265(opaque)-266(object)]TJ -193.03 -11.955 Td [(handled)-321(via)-321(the)-321(tools)-321(r)18(outines)-321(of)-321(Sec.)]TJ -0 0 1 rg 0 0 1 RG - [-321(6)]TJ +/F75 9.9626 Tf 198.017 358.719 Td [(nrows)-250(\227)-250(Get)-250(number)-250(of)-250(rows)-250(in)-250(a)-250(dense)-250(vector)]TJ/F145 9.9626 Tf -47.312 -19.173 Td [(nr)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-321(or)-321(the)-321(query)-321(r)18(outines)-321(detailed)-321(below;)]TJ 0 -11.956 Td [(nevertheless)-250(we)-250(include)-250(her)18(e)-250(a)-250(description)-250(for)-250(the)-250(curious)-250(r)18(eader)74(.)]TJ 14.944 -12.156 Td [(First)-229(we)-228(describe)-228(the)]TJ/F59 9.9626 Tf 92.473 0 Td [(psb_indx_map)]TJ/F54 9.9626 Tf 65.04 0 Td [(type.)-303(This)-228(is)-229(a)-228(data)-229(str)8(uctur)18(e)-228(that)-229(keeps)]TJ -172.457 -11.955 Td [(track)-250(of)-250(a)-250(certain)-250(number)-250(of)-250(basic)-250(issues)-250(such)-250(as:)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G - 13.888 -20.528 Td [(\225)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(The)-250(value)-250(of)-250(the)-250(communication)-250(context;)]TJ + [-525(v%get_nrows\050\051)]TJ 0 g 0 G - 155.477 -29.888 Td [(9)]TJ +/F75 9.9626 Tf 0 -22.351 Td [(T)90(ype:)]TJ 0 g 0 G -ET - -endstream -endobj -894 0 obj -<< -/Length 6070 ->> -stream +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G +/F75 9.9626 Tf -29.439 -20.359 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G -BT -/F54 9.9626 Tf 164.593 706.129 Td [(\225)]TJ + 0 -20.358 Td [(v)]TJ 0 g 0 G - [-500(The)-236(number)-236(of)-235(indices)-236(in)-236(the)-236(index)-236(space,)-238(i.e.)-306(global)-236(number)-235(of)-236(r)18(ows)-236(and)]TJ 11.018 -11.955 Td [(columns)-250(of)-250(a)-250(sparse)-250(matrix;)]TJ +/F84 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ 0 g 0 G - -11.018 -20.409 Td [(\225)]TJ + -56.338 -34.198 Td [(On)-250(Return)]TJ 0 g 0 G - [-500(The)-250(local)-250(set)-250(of)-250(indices,)-250(including:)]TJ 0 g 0 G -/F51 9.9626 Tf 22.974 -20.408 Td [(\226)]TJ + 0 -20.358 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(number)-250(of)-250(local)-250(indices)-250(\050and)-250(local)-250(r)18(ows\051;)]TJ +/F84 9.9626 Tf 72.468 0 Td [(The)-250(number)-250(of)-250(r)18(ows)-250(of)-250(dense)-250(vector)]TJ/F145 9.9626 Tf 161.273 0 Td [(v)]TJ/F84 9.9626 Tf 5.23 0 Td [(.)]TJ/F75 9.9626 Tf -238.971 -27.757 Td [(3.3.3)-1000(sizeof)-250(\227)-250(Get)-250(memory)-250(occupation)-250(in)-250(bytes)-250(of)-250(a)-250(dense)-250(vector)]TJ/F145 9.9626 Tf 0 -19.174 Td [(memory_size)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -9.962 -16.182 Td [(\226)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(number)-250(of)-250(halo)-250(indices)-250(\050and)-250(ther)18(efor)18(e)-250(local)-250(columns\051;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -9.962 -16.181 Td [(\226)]TJ + [-525(v%sizeof\050\051)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(global)-250(indices)-250(corr)18(esponding)-250(to)-250(the)-250(local)-250(ones.)]TJ -46.824 -20.409 Td [(Ther)18(e)-301(ar)18(e)-301(many)-301(dif)18(fer)18(ent)-301(schemes)-301(for)-301(storing)-301(these)-301(data;)-326(ther)18(efor)18(e)-301(ther)18(e)-301(ar)18(e)-301(a)]TJ 0 -11.955 Td [(number)-299(of)-299(types)-300(extending)-299(the)-299(base)-299(one,)-312(and)-299(the)-299(descriptor)-300(str)8(uctur)18(e)-299(holds)-299(a)]TJ 0 -11.955 Td [(polymorphic)-212(object)-213(whose)-212(dynamic)-212(type)-213(can)-212(be)-212(any)-213(of)-212(the)-212(extended)-213(types.)-297(The)]TJ 0 -11.955 Td [(methods)-250(associated)-250(with)-250(this)-250(data)-250(type)-250(answer)-250(the)-250(following)-250(queries:)]TJ +/F75 9.9626 Tf 0 -22.351 Td [(T)90(ype:)]TJ 0 g 0 G - 13.888 -20.288 Td [(\225)]TJ +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [-500(For)-411(a)-412(given)-411(set)-412(of)-411(local)-411(indices,)-452(\002nd)-412(the)-411(corr)18(esponding)-411(indices)-412(in)-411(the)]TJ 11.018 -11.955 Td [(global)-250(numbering;)]TJ +/F75 9.9626 Tf -29.439 -20.358 Td [(On)-250(Entry)]TJ 0 g 0 G - -11.018 -20.408 Td [(\225)]TJ 0 g 0 G - [-500(For)-357(a)-357(given)-357(set)-357(of)-358(global)-357(indices,)-384(\002nd)-357(the)-357(corr)18(esponding)-357(indices)-357(in)-357(the)]TJ 11.018 -11.955 Td [(local)-250(numbering,)-250(if)-250(any)111(,)-250(or)-250(r)18(eturn)-250(an)-250(invalid)]TJ +/F84 9.9626 Tf 166.874 -29.888 Td [(26)]TJ 0 g 0 G - -11.018 -20.409 Td [(\225)]TJ +ET + +endstream +endobj +1127 0 obj +<< +/Length 3997 +>> +stream 0 g 0 G - [-500(Add)-250(a)-250(global)-250(index)-250(to)-250(the)-250(set)-250(of)-250(halo)-250(indices;)]TJ 0 g 0 G - 0 -20.408 Td [(\225)]TJ 0 g 0 G - [-500(Find)-250(the)-250(pr)18(ocess)-250(owner)-250(of)-250(each)-250(member)-250(of)-250(a)-250(set)-250(of)-250(global)-250(indices.)]TJ -13.888 -20.288 Td [(All)-295(methods)-295(but)-294(the)-295(last)-295(ar)18(e)-295(pur)18(ely)-295(local;)-317(the)-295(last)-295(method)-294(potentially)-295(r)18(equir)18(es)]TJ 0 -11.955 Td [(communication)-418(among)-419(pr)18(ocesses,)-460(and)-419(thus)-418(is)-418(a)-419(synchr)18(onous)-418(method.)-815(The)]TJ 0 -11.955 Td [(choice)-244(of)-244(a)-244(speci\002c)-244(dynamic)-244(type)-244(for)-244(the)-244(index)-244(map)-244(is)-244(made)-244(at)-244(the)-244(time)-244(the)-244(de-)]TJ 0 -11.955 Td [(scriptor)-210(is)-211(init)1(ially)-211(allocated,)-218(accor)18(ding)-210(to)-211(t)1(he)-211(mode)-210(of)-210(initialization)-211(\050see)-210(also)]TJ -0 0 1 rg 0 0 1 RG - [-210(6)]TJ +BT +/F75 9.9626 Tf 99.895 706.129 Td [(v)]TJ 0 g 0 G - [(\051.)]TJ 14.944 -12.076 Td [(The)-250(descriptor)-250(contents)-250(ar)18(e)-250(as)-250(follows:)]TJ +/F84 9.9626 Tf 10.521 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ 0 g 0 G -/F51 9.9626 Tf -14.944 -20.288 Td [(indxmap)]TJ + -56.339 -36.868 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 45.38 0 Td [(A)-190(polymorphic)-190(variable)-190(of)-190(a)-190(type)-190(that)-190(is)-190(any)-190(extension)-190(of)-190(the)-190(indx)]TJ -ET -q -1 0 0 1 478.491 370.98 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 481.48 370.78 Td [(map)]TJ -305.869 -11.955 Td [(type)-250(described)-250(above.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -32.363 Td [(halo)]TJ -ET -q -1 0 0 1 171.228 326.661 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.217 326.462 Td [(index)]TJ + 0 -23.918 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 30.435 0 Td [(A)-331(list)-332(of)-331(the)-332(halo)-331(and)-332(boundary)-331(elements)-332(for)-331(the)-332(curr)18(ent)-331(pr)18(ocess)]TJ -29.041 -11.955 Td [(to)-247(be)-247(exchanged)-247(with)-246(other)-247(pr)18(ocesses;)-248(for)-247(each)-247(pr)18(ocesses)-247(with)-247(whic)1(h)-247(it)-247(is)]TJ 0 -11.956 Td [(necessary)-250(to)-250(communicate:)]TJ +/F84 9.9626 Tf 72.468 0 Td [(The)-250(memory)-250(occupation)-250(in)-250(bytes.)]TJ/F75 9.9626 Tf -72.468 -32.82 Td [(3.3.4)-1000(set)-250(\227)-250(Set)-250(contents)-250(of)-250(the)-250(vector)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 9.465 -20.408 Td [(1.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf 5.231 -20.898 Td [(call)]TJ 0 g 0 G - [-500(Pr)18(ocess)-250(identi\002er;)]TJ + [-1050(v%set\050alpha[,first,last]\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -16.182 Td [(2.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ 0 g 0 G - [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ + [-1050(v%set\050vect[,first,last]\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -16.181 Td [(3.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ 0 g 0 G - [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ + [-1050(v%zero\050\051)]TJ 0 g 0 G - 0 -16.182 Td [(4.)]TJ +/F75 9.9626 Tf -5.231 -24.913 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - 0 -16.182 Td [(5.)]TJ +/F75 9.9626 Tf -29.44 -23.918 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ -9.465 -20.408 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ +0 g 0 G + 0 -23.918 Td [(v)]TJ +0 g 0 G +/F84 9.9626 Tf 10.521 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.339 -35.873 Td [(alpha)]TJ +0 g 0 G +/F84 9.9626 Tf 30.048 0 Td [(A)-250(scalar)-250(value.)]TJ -5.141 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(3.3)]TJ + [-250(1)]TJ 0 g 0 G [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -20.409 Td [(ext)]TJ -ET -q -1 0 0 1 164.583 176.799 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 167.572 176.599 Td [(index)]TJ +/F75 9.9626 Tf -24.907 -23.918 Td [(\002rst,last)]TJ 0 g 0 G -/F54 9.9626 Tf 30.435 0 Td [(A)-216(list)-217(of)-216(element)-217(indices)-216(to)-217(be)-216(exchanged)-217(to)-216(implement)-217(the)-216(mapping)]TJ -22.396 -11.955 Td [(between)-250(a)-250(base)-250(descriptor)-250(and)-250(a)-250(descriptor)-250(with)-250(overlap.)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ +/F84 9.9626 Tf 41.215 0 Td [(Boundaries)-250(for)-250(setting)-250(in)-250(the)-250(vector)74(.)]TJ -16.308 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(integers.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -23.918 Td [(vect)]TJ +0 g 0 G +/F84 9.9626 Tf 22.854 0 Td [(An)-250(array)]TJ 2.053 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(3.3)]TJ + [-250(1)]TJ 0 g 0 G - [(.)]TJ + [(.)]TJ 1.018 0 0 1 99.895 217.975 Tm [(Note)-246(that)-246(a)-245(call)-246(to)]TJ/F145 9.9626 Tf 1 0 0 1 181.467 217.975 Tm [(v%zero\050\051)]TJ/F84 9.9626 Tf 1.018 0 0 1 225.803 217.975 Tm [(is)-246(pr)18(ovided)-246(as)-246(a)-246(shorth)1(a)-1(n)1(d,)-246(but)-246(is)-246(equivalent)-246(to)-246(a)]TJ 1.02 0 0 1 99.895 206.02 Tm [(call)-293(to)]TJ/F145 9.9626 Tf 1 0 0 1 130.212 206.02 Tm [(v%set\050zero\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 190.721 206.02 Tm [(with)-293(the)]TJ/F145 9.9626 Tf 1 0 0 1 231.423 206.02 Tm [(zero)]TJ/F84 9.9626 Tf 1.02 0 0 1 255.319 206.02 Tm [(constant)-293(having)-292(the)-293(appr)18(opriate)-293(type)-293(and)]TJ 1 0 0 1 99.895 194.064 Tm [(kind.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -20.408 Td [(ovrlap)]TJ -ET -q -1 0 0 1 180.642 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 183.631 132.281 Td [(index)]TJ +/F75 9.9626 Tf 0 -25.91 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -23.918 Td [(v)]TJ 0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(A)-259(list)-259(of)-258(the)-259(overlap)-259(elements)-259(for)-259(the)-258(curr)18(ent)-259(pr)18(ocess,)-261(or)18(ganized)]TJ -38.456 -11.955 Td [(in)-250(gr)18(oups)-250(like)-250(the)-250(pr)18(evious)-250(vector:)]TJ +/F84 9.9626 Tf 10.521 0 Td [(the)-250(dense)-250(vector)74(,)-250(with)-250(updated)-250(entries)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ 0 g 0 G - 141.968 -29.888 Td [(10)]TJ +/F84 9.9626 Tf 110.536 -41.843 Td [(27)]TJ 0 g 0 G ET endstream endobj -905 0 obj +1134 0 obj << -/Length 5988 +/Length 4797 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F54 9.9626 Tf 134.267 706.129 Td [(1.)]TJ +/F75 9.9626 Tf 150.705 706.129 Td [(3.3.5)-1000(get)]TJ +ET +q +1 0 0 1 195.029 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 198.017 706.129 Td [(vect)-250(\227)-250(Get)-250(a)-250(copy)-250(of)-250(the)-250(vector)-250(contents)]TJ 0 g 0 G - [-500(Pr)18(ocess)-250(identi\002er;)]TJ 0 g 0 G - 0 -16.693 Td [(2.)]TJ +/F145 9.9626 Tf -47.312 -19.66 Td [(extv)-525(=)-525(v%get_vect\050[n]\051)]TJ 0 g 0 G - [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ +/F75 9.9626 Tf 0 -22.994 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -16.694 Td [(3.)]TJ +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ +/F75 9.9626 Tf -29.439 -21.362 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -16.693 Td [(4.)]TJ 0 g 0 G - [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ + 0 -21.361 Td [(v)]TJ 0 g 0 G - 0 -16.693 Td [(5.)]TJ +/F84 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ 0 g 0 G - [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ -9.465 -21.431 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ -0 0 1 rg 0 0 1 RG - [-250(3.3)]TJ + -56.338 -33.316 Td [(n)]TJ 0 g 0 G - [(.)]TJ +/F84 9.9626 Tf 11.068 0 Td [(Size)-250(to)-250(be)-250(r)18(eturned)]TJ 13.838 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(;)-250(default:)-310(entir)18(e)-250(vector)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.431 Td [(ovr)]TJ -ET -q -1 0 0 1 115.447 596.693 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 118.436 596.494 Td [(mst)]TJ -ET -q -1 0 0 1 135.631 596.693 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 138.62 596.494 Td [(idx)]TJ +/F75 9.9626 Tf -89.872 -34.95 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(A)-331(list)-332(to)-331(r)18(etrieve)-331(the)-332(value)-331(of)-331(each)-332(overlap)-331(element)-331(fr)18(om)-332(the)-331(r)18(e-)]TJ -33.185 -11.956 Td [(spective)-250(master)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ -0 0 1 rg 0 0 1 RG - [-250(3.3)]TJ 0 g 0 G - [(.)]TJ + 0 -21.361 Td [(Function)-250(value)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.431 Td [(ovrlap)]TJ -ET -q -1 0 0 1 129.833 551.351 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 132.822 551.152 Td [(elem)]TJ +/F84 9.9626 Tf 1.02 0 0 1 223.093 495.259 Tm [(An)-283(allocatable)-283(array)-283(holding)-283(a)-282(copy)-283(of)-283(the)-283(dense)-283(vector)-283(con-)]TJ 1.002 0 0 1 175.611 483.304 Tm [(tents.)-310(If)-249(the)-250(ar)18(gument)]TJ/F78 9.9626 Tf 1 0 0 1 273.652 483.304 Tm [(n)]TJ/F84 9.9626 Tf 1.002 0 0 1 281.807 483.304 Tm [(is)-250(speci\002ed,)-249(the)-250(size)-250(of)-249(the)-250(r)18(eturned)-249(array)-250(equals)]TJ 1.02 0 0 1 175.611 471.349 Tm [(the)-299(minimum)-299(between)]TJ/F78 9.9626 Tf 1 0 0 1 281.747 471.349 Tm [(n)]TJ/F84 9.9626 Tf 1.02 0 0 1 290.45 471.349 Tm [(and)-299(the)-299(internal)-299(size)-299(of)-300(the)-299(vector)73(,)-313(or)-299(0)-299(if)]TJ/F78 9.9626 Tf 1 0 0 1 478.447 471.349 Tm [(n)]TJ/F84 9.9626 Tf 1.02 0 0 1 487.15 471.349 Tm [(is)]TJ 0.981 0 0 1 175.611 459.394 Tm [(negative;)-256(otherwise,)-255(the)-255(size)-256(of)-255(the)-255(array)-256(is)-255(the)-255(same)-256(as)-255(the)-255(internal)-256(size)-255(of)]TJ 1 0 0 1 175.611 447.438 Tm [(the)-250(vector)74(.)]TJ/F75 9.9626 Tf -24.906 -29.183 Td [(3.3.6)-1000(clone)-250(\227)-250(Clone)-250(current)-250(object)]TJ 0 g 0 G -/F54 9.9626 Tf 27.118 0 Td [(For)-250(all)-250(overlap)-250(points)-250(belonging)-250(to)-250(th)-250(ecurr)18(ent)-250(pr)18(ocess:)]TJ 0 g 0 G - -25.673 -21.431 Td [(1.)]TJ +/F145 9.9626 Tf 0 -19.659 Td [(call)-1050(x%clone\050y,info\051)]TJ 0 g 0 G - [-500(Overlap)-250(point)-250(index;)]TJ +/F75 9.9626 Tf 0 -22.995 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -16.693 Td [(2.)]TJ +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [-500(Number)-250(of)-250(pr)18(ocesses)-250(sharing)-250(that)-250(overlap)-250(points;)]TJ +/F75 9.9626 Tf -29.439 -21.361 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -16.694 Td [(3.)]TJ 0 g 0 G - [-500(Index)-250(of)-250(a)-250(\223master)-74(\224)-250(pr)18(ocess:)]TJ -9.465 -21.431 Td [(Speci\002ed)-250(as:)-310(an)-250(allocatable)-250(integer)-250(array)-250(of)-250(rank)-250(two.)]TJ + 0 -21.362 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(the)-250(dense)-250(vector)74(.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -77.917 -34.95 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -21.361 Td [(y)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.431 Td [(bnd)]TJ +/F84 9.9626 Tf 10.132 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ +0 g 0 G +/F75 9.9626 Tf -10.132 -21.361 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F75 11.9552 Tf -23.8 -31.176 Td [(3.4)-1000(Preconditioner)-250(data)-250(structure)]TJ/F84 9.9626 Tf 1.02 0 0 1 150.705 192.416 Tm [(Our)-329(base)-328(library)-329(of)18(fers)-328(support)-329(for)-328(simple)-329(well)-328(known)-329(pr)18(econditioners)-329(like)]TJ 1 0 0 1 150.705 180.46 Tm [(Diagonal)-250(Scaling)-250(or)-250(Block)-250(Jacobi)-250(with)-250(incomplete)-250(factorization)-250(ILU\0500\051.)]TJ 1.02 0 0 1 165.649 168.146 Tm [(A)-258(pr)18(econditioner)-257(is)-258(held)-257(in)-258(the)]TJ/F145 9.9626 Tf 1 0 0 1 305.999 168.146 Tm [(psb)]TJ ET q -1 0 0 1 118.755 453.671 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 322.318 168.346 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 325.456 168.146 Td [(Tprec)]TJ +ET +q +1 0 0 1 352.235 168.346 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 121.743 453.472 Td [(elem)]TJ +/F145 9.9626 Tf 355.374 168.146 Td [(type)]TJ/F84 9.9626 Tf 1.02 0 0 1 378.912 168.146 Tm [(data)-258(str)8(uctur)18(e)-258(r)18(eported)-257(in)]TJ 0.994 0 0 1 150.705 156.191 Tm [(\002gur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-252(4)]TJ +0 g 0 G + [(.)-315(The)]TJ/F145 9.9626 Tf 1 0 0 1 208.773 156.191 Tm [(psb_Tprec_type)]TJ/F84 9.9626 Tf 0.994 0 0 1 284.498 156.191 Tm [(data)-252(type)-253(may)-252(contain)-253(a)-252(simple)-253(pr)18(econditioning)]TJ 1.02 0 0 1 150.705 144.236 Tm [(matrix)-255(with)-254(the)-255(associated)-255(communication)-254(descriptor)72(.)-333(The)-254(internal)-255(pr)18(econdi-)]TJ 1.02 0 0 1 150.705 132.281 Tm [(tioner)-249(is)-250(allocated)-249(appr)17(opriately)-249(with)-250(the)-249(dynamic)-250(type)-249(corr)17(esponding)-249(to)-250(the)]TJ 1 0 0 1 150.705 120.326 Tm [(desir)18(ed)-250(pr)18(econditioner)74(.)]TJ +0 g 0 G + 166.874 -29.888 Td [(28)]TJ +0 g 0 G +ET + +endstream +endobj +1140 0 obj +<< +/Length 4357 +>> +stream +0 g 0 G 0 g 0 G -/F54 9.9626 Tf 27.119 0 Td [(A)-235(list)-235(of)-235(all)-235(boundary)-235(points,)-238(i.e.)-305(points)-235(that)-235(have)-235(a)-235(connection)-235(with)]TJ -24.06 -11.955 Td [(other)-250(pr)18(ocesses.)]TJ -24.907 -21.055 Td [(The)-393(Fortran)-394(2003)-393(declaration)-394(for)]TJ/F59 9.9626 Tf 151.232 0 Td [(psb_desc_type)]TJ/F54 9.9626 Tf 71.913 0 Td [(str)8(uctur)18(es)-393(is)-394(as)-393(follows:)-597(A)]TJ 0 g 0 G 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET q -1 0 0 1 99.895 294.955 cm -0 0 343.711 104.608 re f +1 0 0 1 99.895 671.26 cm +0 0 343.711 38.854 re f Q 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 102.884 388.902 Td [(type)]TJ +/F233 8.9664 Tf 112.299 699.454 Td [(type)]TJ +0 g 0 G + [-525(psb_Tprec_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_desc_type)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 18.829 -10.959 Td [(class)]TJ + 9.414 -10.959 Td [(class)]TJ +0 g 0 G + [(\050psb_T_base_prec_type\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(\050psb_indx_map\051,)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG [-525(allocatable)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG [-525(::)]TJ 0 g 0 G - [-525(indxmap)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(\050psb_i_vect_type\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ + [-525(prec)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(v_halo_index)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.958 Td [(type)]TJ + -9.414 -10.959 Td [(end)-525(type)]TJ 0 g 0 G - [(\050psb_i_vect_type\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ + [-525(psb_Tprec_type)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - [-525(v_ext_index)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(type)]TJ 0 g 0 G - [(\050psb_i_vect_type\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ +/F84 9.9626 Tf 1.845 -41.429 Td [(Listing)-250(4:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(pr)18(econditioner)74(.)]TJ/F75 11.9552 Tf -14.249 -32.698 Td [(3.5)-1000(Heap)-250(data)-250(structure)]TJ/F84 9.9626 Tf 1.02 0 0 1 99.507 584.445 Tm [(Among)-267(the)-266(tools)-267(r)17(outine)1(s)-267(of)-267(sec.)]TJ +0 0 1 rg 0 0 1 RG + [-267(6)]TJ 0 g 0 G - [-525(v_ovrlap_index)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(type)]TJ + [(,)-272(we)-267(have)-266(a)-267(number)-267(of)-267(sorting)-266(utilities;)-278(the)]TJ 1 0 0 1 99.895 572.49 Tm [(heap)-250(sort)-250(is)-250(implemented)-250(in)-250(terms)-250(of)-250(heaps)-250(having)-250(the)-250(following)-250(signatur)18(es:)]TJ 0 g 0 G - [(\050psb_i_vect_type\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ +/F145 9.9626 Tf 0 -19.925 Td [(psb)]TJ +ET +q +1 0 0 1 116.214 552.764 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 119.352 552.565 Td [(T)]TJ +ET +q +1 0 0 1 125.21 552.764 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 128.348 552.565 Td [(heap)]TJ 0 g 0 G - [-525(v_ovr_mst_idx)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG - 0 -10.959 Td [(integer)]TJ +/F84 9.9626 Tf 1.007 0 0 1 154.251 552.565 Tm [(:)-308(a)-249(heap)-249(containing)-248(elements)-249(of)-248(type)-249(T)74(,)-249(wher)18(e)-248(T)-249(can)-249(be)]TJ/F145 9.9626 Tf 1 0 0 1 396.533 552.565 Tm [(i,s,c,d,z)]TJ/F84 9.9626 Tf -271.731 -11.955 Td [(for)-250(integer)74(,)-250(r)18(eal)-250(and)-250(complex)-250(data;)]TJ 0 g 0 G - [(,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ +/F145 9.9626 Tf -24.907 -19.925 Td [(psb)]TJ +ET +q +1 0 0 1 116.214 520.884 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 119.352 520.685 Td [(T)]TJ +ET +q +1 0 0 1 125.21 520.884 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 128.348 520.685 Td [(idx)]TJ +ET +q +1 0 0 1 144.667 520.884 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 147.805 520.685 Td [(heap)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-1050(::)]TJ +/F84 9.9626 Tf 0.982 0 0 1 173.708 520.685 Tm [(:)-316(a)-255(heap)-255(containing)-255(elements)-255(of)-255(type)-255(T)76(,)-255(as)-255(above,)-255(together)-255(with)]TJ 1 0 0 1 124.802 508.729 Tm [(an)-250(integer)-250(index.)]TJ -24.907 -19.925 Td [(Given)-250(a)-250(heap)-250(object,)-250(the)-250(following)-250(methods)-250(ar)18(e)-250(de\002ned)-250(on)-250(it:)]TJ 0 g 0 G - [-525(ovrlap_elem\050:,:\051)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG - 0 -10.959 Td [(integer)]TJ +/F75 9.9626 Tf 0 -19.925 Td [(init)]TJ 0 g 0 G - [(,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ +/F84 9.9626 Tf 21.021 0 Td [(Initialize)-250(memory;)-250(also)-250(choose)-250(ascending)-250(or)-250(descending)-250(or)18(der;)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-1050(::)]TJ +/F75 9.9626 Tf -21.021 -19.925 Td [(howmany)]TJ 0 g 0 G - [-525(bnd_elem\050:\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -18.829 -10.959 Td [(end)-525(type)]TJ +/F84 9.9626 Tf 50.371 0 Td [(Curr)18(ent)-250(heap)-250(occupancy;)]TJ 0 g 0 G - [-525(psb_desc_type)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F75 9.9626 Tf -50.371 -19.926 Td [(insert)]TJ +0 g 0 G +/F84 9.9626 Tf 30.595 0 Td [(Add)-250(an)-250(item)-250(\050or)-250(an)-250(item)-250(and)-250(its)-250(index\051;)]TJ 0 g 0 G +/F75 9.9626 Tf -30.595 -19.925 Td [(get)]TJ +ET +q +1 0 0 1 114.331 409.302 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 117.32 409.103 Td [(\002rst)]TJ 0 g 0 G -/F54 9.9626 Tf -2.989 -41.43 Td [(Listing)-259(1:)-327(The)-259(PSBLAS)-259(de\002ned)-259(data)-258(type)-259(that)-259(contains)-259(the)-258(communication)-259(de-)]TJ 0 -11.955 Td [(scriptor)74(.)]TJ 0 -25.259 Td [(communication)-319(descriptor)-320(associated)-319(with)-319(a)-320(sparse)-319(matrix)-320(has)-319(a)-319(state,)-337(which)]TJ 0 -11.955 Td [(can)-250(take)-250(the)-250(following)-250(values:)]TJ +/F84 9.9626 Tf 22.685 0 Td [(Remove)-250(and)-250(r)18(eturn)-250(the)-250(\002rst)-250(element;)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.054 Td [(Build:)]TJ +/F75 9.9626 Tf -40.11 -19.925 Td [(dump)]TJ 0 g 0 G -/F54 9.9626 Tf 32.927 0 Td [(State)-283(enter)18(ed)-283(after)-283(the)-283(\002rst)-284(allocation,)-291(and)-283(befor)18(e)-283(the)-283(\002rst)-283(assembly;)-300(in)]TJ -8.02 -11.956 Td [(this)-220(state)-220(it)-220(is)-220(possible)-220(to)-220(add)-220(communication)-220(r)18(equir)18(ements)-220(among)-220(dif)18(fer)18(-)]TJ 0 -11.955 Td [(ent)-250(pr)18(ocesses.)]TJ +/F84 9.9626 Tf 32.1 0 Td [(Print)-250(on)-250(\002le;)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.431 Td [(Assembled:)]TJ +/F75 9.9626 Tf -32.1 -19.926 Td [(free)]TJ 0 g 0 G -/F54 9.9626 Tf 58.381 0 Td [(State)-308(enter)18(ed)-308(after)-308(the)-309(assembly;)-337(computations)-308(using)-308(the)-308(associ-)]TJ -33.474 -11.955 Td [(ated)-310(sparse)-310(matrix,)-325(such)-310(as)-310(matrix-vector)-309(pr)18(oducts,)-325(ar)18(e)-310(only)-310(possible)-310(in)]TJ 0 -11.955 Td [(this)-250(state.)]TJ +/F84 9.9626 Tf 22.695 0 Td [(Release)-250(memory)111(.)]TJ 0.98 0 0 1 99.587 349.327 Tm [(These)-206(objects)-206(ar)19(e)-206(used)-206(to)-206(implement)-206(the)-206(factorization)-205(and)-206(appr)18(oximate)-206(inversion)]TJ 1 0 0 1 99.895 337.372 Tm [(algorithms.)]TJ 0 g 0 G - 141.968 -29.888 Td [(11)]TJ + 166.875 -246.934 Td [(29)]TJ 0 g 0 G ET endstream endobj -921 0 obj +1144 0 obj << -/Length 4957 +/Length 159 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.1.1)-1000(Descriptor)-250(Methods)]TJ 0 -19 Td [(3.1.2)-1000(get)]TJ +/F75 14.3462 Tf 150.705 705.784 Td [(4)-1000(Computational)-250(routines)]TJ +0 g 0 G +/F84 9.9626 Tf 166.874 -615.346 Td [(30)]TJ +0 g 0 G ET -q -1 0 0 1 195.029 687.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +1155 0 obj +<< +/Length 7647 +>> +stream +0 g 0 G +0 g 0 G BT -/F51 9.9626 Tf 198.017 687.129 Td [(local)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(4.1)-1000(psb)]TJ ET q -1 0 0 1 220.194 687.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 223.183 687.129 Td [(rows)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(rows)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -72.478 -19 Td [(nr)-525(=)-525(desc%get_local_rows\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ +/F75 11.9552 Tf 151.016 706.129 Td [(geaxpby)-250(\227)-250(General)-250(Dense)-250(Matrix)-250(Sum)]TJ/F84 9.9626 Tf 1.02 0 0 1 99.587 686.94 Tm [(This)-302(subr)18(outine)-302(is)-302(an)-301(interface)-302(to)-302(the)-302(computatio)1(nal)-302(kernel)-302(for)-302(dense)-302(matrix)]TJ 1 0 0 1 99.895 674.985 Tm [(sum:)]TJ/F78 9.9626 Tf 143.149 -12.304 Td [(y)]TJ/F190 10.3811 Tf 7.998 0 Td [(\040)]TJ/F147 9.9626 Tf 13.398 0 Td [(a)]TJ/F78 9.9626 Tf 7.615 0 Td [(x)]TJ/F192 10.3811 Tf 7.267 0 Td [(+)]TJ/F147 9.9626 Tf 10.505 0 Td [(b)]TJ/F78 9.9626 Tf 5.649 0 Td [(y)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -175.406 -18.398 Td [(call)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.001 Td [(On)-250(Entry)]TJ + [-525(psb_geaxpby\050alpha,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -20 Td [(desc)]TJ + [-525(beta,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.011 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -77.917 -33.929 Td [(On)-250(Return)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(info\051)]TJ 0 g 0 G - 0 -20 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-399(number)-398(of)-399(local)-398(r)18(ows,)-436(i.e.)-756(the)-398(number)-399(of)-399(r)18(ows)-398(owned)]TJ -47.87 -11.956 Td [(by)-350(the)-349(curr)18(ent)-350(pr)18(ocess;)-399(as)-350(explained)-350(in)]TJ -0 0 1 rg 0 0 1 RG - [-349(1)]TJ 0 g 0 G - [(,)-375(it)-350(is)-349(equal)-350(to)]TJ/F83 10.3811 Tf 249.705 0 Td [(j)-24(I)]TJ/F52 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.876 1.96 Td [(j)]TJ/F85 10.3811 Tf 5.433 0 Td [(+)]TJ/F83 10.3811 Tf 10.624 0 Td [(j)-23(B)]TJ/F52 7.5716 Tf 10.108 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.876 1.96 Td [(j)]TJ/F54 9.9626 Tf 3.003 0 Td [(.)-609(The)]TJ -293.569 -11.955 Td [(r)18(eturned)-250(value)-250(is)-250(speci\002c)-250(to)-250(the)-250(calling)-250(pr)18(ocess.)]TJ/F51 9.9626 Tf -24.906 -27.247 Td [(3.1.3)-1000(get)]TJ ET q -1 0 0 1 195.029 489.311 cm +1 0 0 1 176.928 629.682 cm +[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S +Q +BT +/F78 9.9626 Tf 183.199 621.114 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(,)]TJ/F78 9.9626 Tf 5.106 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(,)]TJ/F147 9.9626 Tf 5.106 0 Td [(a)]TJ/F84 9.9626 Tf 5.385 0 Td [(,)]TJ/F147 9.9626 Tf 5.355 0 Td [(b)]TJ/F75 9.9626 Tf 89.358 0 Td [(Subroutine)]TJ +ET +q +1 0 0 1 176.928 617.328 cm +[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S +Q +BT +/F84 9.9626 Tf 182.905 608.761 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 320.139 608.96 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 323.128 608.761 Td [(geaxpby)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 320.139 597.005 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 323.128 596.805 Td [(geaxpby)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 320.139 585.05 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 198.017 489.112 Td [(local)]TJ +/F84 9.9626 Tf 323.128 584.85 Td [(geaxpby)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 220.194 489.311 cm +1 0 0 1 320.139 573.094 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 223.183 489.112 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(cols)]TJ +/F84 9.9626 Tf 323.128 572.895 Td [(geaxpby)]TJ +ET +q +1 0 0 1 176.928 569.109 cm +[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S +Q 0 g 0 G +BT +/F84 9.9626 Tf 229.958 540.731 Td [(T)92(able)-250(1:)-310(Data)-250(types)]TJ 0 g 0 G -/F59 9.9626 Tf -72.478 -19 Td [(nc)-525(=)-525(desc%get_local_cols\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F75 9.9626 Tf -130.063 -35.05 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20 Td [(On)-250(Entry)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G +/F75 9.9626 Tf -29.828 -20.39 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -20.001 Td [(desc)]TJ 0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.011 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ + 0 -20.391 Td [(alpha)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -33.929 Td [(On)-250(Return)]TJ +/F84 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F147 9.9626 Tf 44.368 0 Td [(a)]TJ/F84 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ 0 g 0 G + [(.)]TJ 0 g 0 G - 0 -20 Td [(Function)-250(value)]TJ +/F75 9.9626 Tf -24.907 -20.391 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-320(number)-320(of)-321(local)-320(cols,)-338(i.e.)-521(the)-320(number)-320(of)-321(indices)-320(used)-320(by)]TJ -47.87 -11.955 Td [(the)-322(curr)18(ent)-322(pr)18(ocess,)-340(including)-322(both)-322(local)-322(and)-322(halo)-322(indices;)-358(as)-322(explained)]TJ 0 -11.956 Td [(in)]TJ +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.614 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.98 0 0 1 124.802 348.869 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-247(an)-248(object)-247(of)-247(type)]TJ 0 0 1 rg 0 0 1 RG - [-284(1)]TJ -0 g 0 G - [(,)-294(i)1(t)-285(is)-284(equal)-285(to)]TJ/F83 10.3811 Tf 79.58 0 Td [(j)-24(I)]TJ/F52 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F85 10.3811 Tf 5.193 0 Td [(+)]TJ/F83 10.3811 Tf 10.383 0 Td [(j)-24(B)]TJ/F52 7.5716 Tf 10.109 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F85 10.3811 Tf 5.192 0 Td [(+)]TJ/F83 10.3811 Tf 10.383 0 Td [(j)-24(H)]TJ/F52 7.5716 Tf 12.052 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F54 9.9626 Tf 3.004 0 Td [(.)-413(The)-285(r)18(eturned)-284(value)-285(is)-284(speci\002c)-285(to)-284(the)]TJ -153.464 -11.955 Td [(calling)-250(pr)18(ocess.)]TJ/F51 9.9626 Tf -24.907 -27.247 Td [(3.1.4)-1000(get)]TJ +/F145 9.9626 Tf 1 0 0 1 369.545 348.869 Tm [(psb)]TJ ET q -1 0 0 1 195.029 279.339 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 385.864 349.068 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 198.017 279.14 Td [(global)]TJ +/F145 9.9626 Tf 389.002 348.869 Td [(T)]TJ ET q -1 0 0 1 227.397 279.339 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 394.86 349.068 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 230.386 279.14 Td [(rows)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(rows)]TJ -0 g 0 G +/F145 9.9626 Tf 397.998 348.869 Td [(vect)]TJ +ET +q +1 0 0 1 419.547 349.068 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 422.685 348.869 Td [(type)]TJ 0 g 0 G -/F59 9.9626 Tf -79.681 -19 Td [(nr)-525(=)-525(desc%get_global_rows\050\051)]TJ +/F84 9.9626 Tf 1.02 0 0 1 124.802 336.914 Tm [(containing)-270(numbers)-269(of)-270(type)-270(speci\002ed)-270(in)-269(T)90(able)]TJ +0 0 1 rg 0 0 1 RG + [-270(1)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ + [(.)-378(The)-270(rank)-269(of)]TJ/F78 9.9626 Tf 1 0 0 1 399.71 336.914 Tm [(x)]TJ/F84 9.9626 Tf 1.02 0 0 1 407.657 336.914 Tm [(must)-270(be)]TJ 1 0 0 1 124.802 324.958 Tm [(the)-250(same)-250(of)]TJ/F78 9.9626 Tf 52.946 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F75 9.9626 Tf -82.959 -20.39 Td [(beta)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20 Td [(On)-250(Entry)]TJ +/F84 9.9626 Tf 24.349 0 Td [(the)-250(scalar)]TJ/F147 9.9626 Tf 44.617 0 Td [(b)]TJ/F84 9.9626 Tf 5.524 0 Td [(.)]TJ -49.583 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ 0 g 0 G + [(.)]TJ 0 g 0 G - 0 -20.001 Td [(desc)]TJ +/F75 9.9626 Tf -24.907 -20.391 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.011 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F84 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 191.753 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ -182.473 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ 0.98 0 0 1 124.802 188.537 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-247(an)-248(object)-247(of)-247(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 369.545 188.537 Tm [(psb)]TJ +ET +q +1 0 0 1 385.864 188.736 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 389.002 188.537 Td [(T)]TJ +ET +q +1 0 0 1 394.86 188.736 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 397.998 188.537 Td [(vect)]TJ +ET +q +1 0 0 1 419.547 188.736 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 422.685 188.537 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -33.929 Td [(On)-250(Return)]TJ +/F84 9.9626 Tf 1.014 0 0 1 124.802 176.581 Tm [(containing)-247(numbers)-247(of)-246(the)-247(type)-247(indicated)-247(in)-247(T)91(able)]TJ +0 0 1 rg 0 0 1 RG + [-247(1)]TJ 0 g 0 G + [(.)-306(The)-247(rank)-247(of)]TJ/F78 9.9626 Tf 1 0 0 1 413.419 176.581 Tm [(y)]TJ/F84 9.9626 Tf 1.014 0 0 1 421.018 176.581 Tm [(must)]TJ 1 0 0 1 124.802 164.626 Tm [(be)-250(the)-250(same)-250(of)]TJ/F78 9.9626 Tf 65.887 0 Td [(x)]TJ/F84 9.9626 Tf 5.206 0 Td [(.)]TJ 0 g 0 G - 0 -20 Td [(Function)-250(value)]TJ +/F75 9.9626 Tf -96 -20.39 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 144.435 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 123.397 144.236 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-351(number)-351(of)-350(global)-351(r)18(ows,)-376(i.e.)-613(the)-351(size)-351(of)-350(the)-351(global)-351(index)]TJ -47.87 -11.955 Td [(space.)]TJ +/F84 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ 0 g 0 G - 141.967 -29.888 Td [(12)]TJ +/F84 9.9626 Tf 115.189 -29.888 Td [(31)]TJ 0 g 0 G ET endstream endobj -925 0 obj +1163 0 obj << -/Length 4367 +/Length 2434 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(3.1.5)-1000(get)]TJ +/F84 9.9626 Tf 175.611 706.129 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 144.219 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 324.173 694.373 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 147.208 706.129 Td [(global)]TJ +/F145 9.9626 Tf 327.311 694.174 Td [(desc)]TJ ET q -1 0 0 1 176.587 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 348.86 694.373 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 179.576 706.129 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(cols)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -79.681 -18.974 Td [(nr)-525(=)-525(desc%get_global_cols\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.935 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.947 Td [(On)-250(Entry)]TJ -0 g 0 G +/F145 9.9626 Tf 351.998 694.174 Td [(type)]TJ 0 g 0 G - 0 -19.947 Td [(desc)]TJ -0 g 0 G -/F54 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -33.889 Td [(On)-250(Return)]TJ +/F75 9.9626 Tf -222.214 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -19.947 Td [(Function)-250(value)]TJ + 0 -19.925 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-242(number)-242(of)-241(global)-242(cols;)-245(usually)-241(this)-242(is)-242(equal)-242(to)-241(the)-242(number)]TJ -47.87 -11.955 Td [(of)-250(global)-250(r)18(ows.)]TJ/F51 9.9626 Tf -24.907 -27.172 Td [(3.1.6)-1000(get)]TJ +/F84 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F78 9.9626 Tf 160.68 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ -151.4 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ 0.98 0 0 1 175.611 604.51 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-248(an)-247(object)-247(of)-247(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 420.354 604.51 Tm [(psb)]TJ ET q -1 0 0 1 144.219 520.607 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 436.673 604.709 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 147.208 520.408 Td [(global)]TJ +/F145 9.9626 Tf 439.811 604.51 Td [(T)]TJ ET q -1 0 0 1 176.587 520.607 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 445.669 604.709 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 179.576 520.408 Td [(indices)-250(\227)-250(Get)-250(vector)-250(of)-250(global)-250(indices)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -79.681 -18.974 Td [(myidx)-525(=)-525(desc%get_global_indices\050[owned]\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.934 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.947 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.947 Td [(desc)]TJ -0 g 0 G -/F54 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -91.287 -31.902 Td [(owned)]TJ -0 g 0 G -/F54 9.9626 Tf 35.975 0 Td [(Choose)-330(if)-329(you)-330(only)-329(want)-330(owned)-330(indices)-329(\050)]TJ/F59 9.9626 Tf 185.766 0 Td [(owned)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.true.)]TJ/F54 9.9626 Tf 62.764 0 Td [(\051)-330(or)-329(also)-330(halo)]TJ -259.598 -11.955 Td [(indices)-250(\050)]TJ/F59 9.9626 Tf 36.911 0 Td [(owned)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.)]TJ/F54 9.9626 Tf 67.995 0 Td [(\051.)-310(Scope:)]TJ/F51 9.9626 Tf 40.328 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -166.813 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(;)-250(default:)]TJ/F59 9.9626 Tf 41.872 0 Td [(.true.)]TJ/F54 9.9626 Tf 31.382 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -163.436 -33.89 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.947 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-277(global)-277(indices,)-284(r)18(eturned)-277(as)-277(an)-277(allocatable)-277(integer)-277(array)-277(of)]TJ -47.87 -11.955 Td [(kind)]TJ/F59 9.9626 Tf 22.814 0 Td [(psb_lpk_)]TJ/F54 9.9626 Tf 44.334 0 Td [(and)-250(rank)-250(1.)]TJ/F51 9.9626 Tf -92.055 -27.171 Td [(3.1.7)-1000(get)]TJ +/F145 9.9626 Tf 448.807 604.51 Td [(vect)]TJ ET q -1 0 0 1 144.219 267.119 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 470.356 604.709 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 147.208 266.92 Td [(context)-250(\227)-250(Get)-250(communication)-250(context)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -47.313 -18.975 Td [(ctxt)-525(=)-525(desc%get_context\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.934 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.947 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.947 Td [(desc)]TJ -0 g 0 G -/F54 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F145 9.9626 Tf 473.495 604.51 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -33.889 Td [(On)-250(Return)]TJ +/F84 9.9626 Tf -297.884 -11.955 Td [(containing)-250(numbers)-250(of)-250(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ 0 g 0 G + [(.)]TJ 0 g 0 G - 0 -19.947 Td [(Function)-250(value)]TJ +/F75 9.9626 Tf -24.906 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(communication)-250(context.)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 94.098 -29.888 Td [(13)]TJ + 142.356 -434.371 Td [(32)]TJ 0 g 0 G ET endstream endobj -829 0 obj +1076 0 obj << /Type /ObjStm /N 100 -/First 875 -/Length 9086 ->> -stream -828 0 818 69 819 217 824 364 825 421 19 478 821 534 835 681 832 823 833 970 -837 1117 834 1173 840 1266 842 1380 23 1437 843 1493 844 1550 845 1607 846 1664 847 1721 -848 1778 849 1835 850 1892 839 1949 853 2068 838 2202 855 2351 856 2407 857 2463 858 2519 -859 2575 860 2631 861 2687 862 2743 863 2799 864 2855 865 2911 866 2967 867 3023 868 3079 -869 3135 852 3192 874 3285 851 3427 872 3579 876 3726 27 3783 877 3839 878 3896 879 3951 -880 4007 881 4064 882 4121 31 4178 873 4234 885 4353 883 4487 887 4634 35 4690 39 4745 -888 4800 884 4856 893 4949 889 5099 890 5245 891 5397 895 5549 896 5606 897 5663 898 5720 -899 5777 900 5834 892 5891 904 5971 901 6113 902 6265 906 6417 907 6473 908 6529 909 6585 -910 6641 911 6697 912 6753 913 6809 914 6865 916 6921 903 6977 920 7083 917 7225 918 7372 -922 7518 43 7575 47 7631 51 7687 55 7743 919 7799 924 7931 926 8045 59 8101 63 8156 -% 828 0 obj -<< -/BaseFont /Times-Roman -/Type /Font -/Subtype /Type1 ->> -% 818 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [327.281 638.309 334.255 647.315] -/A << /S /GoTo /D (cite.2007c) >> +/First 952 +/Length 9737 >> -% 819 0 obj +stream +107 0 1073 57 1069 116 1078 211 1080 329 1081 388 1075 447 111 506 115 564 1077 622 +1083 732 1085 850 119 908 123 965 1086 1022 127 1080 1082 1136 1089 1231 1091 1349 131 1408 +135 1466 139 1524 1088 1582 1093 1677 1095 1795 143 1853 147 1910 1092 1967 1097 2062 1099 2180 +151 2239 155 2297 1100 2355 1101 2414 1096 2473 1103 2568 1105 2686 159 2744 163 2801 167 2858 +1102 2915 1107 3010 1109 3128 171 3187 1106 3244 1113 3339 1110 3487 1111 3633 1115 3781 175 3839 +179 3896 183 3952 187 4008 1116 4065 1112 4124 1120 4219 1122 4337 1118 4396 191 4455 195 4513 +199 4571 1119 4629 1126 4739 1123 4887 1124 5033 1128 5178 204 5236 1125 5293 1133 5388 1130 5527 +1135 5675 208 5734 212 5792 216 5849 1136 5907 1132 5966 1139 6074 1131 6213 1141 6359 1137 6417 +220 6475 1138 6532 1143 6642 1145 6760 224 6819 1142 6877 1154 6958 1146 7142 1147 7288 1148 7432 +1149 7578 1150 7724 1151 7868 1156 8013 228 8071 1129 8128 1153 8186 1162 8337 1152 8494 1159 8641 +% 107 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [337.243 638.21 344.217 647.166] -/A << /S /GoTo /D (cite.2007d) >> +/D [1070 0 R /XYZ 99.895 248.209 null] >> -% 824 0 obj +% 1073 0 obj << -/D [822 0 R /XYZ 149.705 753.953 null] +/D [1070 0 R /XYZ 119.097 217.656 null] >> -% 825 0 obj +% 1069 0 obj << -/D [822 0 R /XYZ 150.705 353.614 null] +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] >> -% 19 0 obj +% 1078 0 obj << -/D [822 0 R /XYZ 150.705 270.035 null] +/Type /Page +/Contents 1079 0 R +/Resources 1077 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1052 0 R >> -% 821 0 obj +% 1080 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F83 813 0 R /F52 585 0 R /F85 814 0 R >> -/XObject << /Im3 820 0 R >> -/ProcSet [ /PDF /Text ] +/D [1078 0 R /XYZ 149.705 753.953 null] >> -% 835 0 obj +% 1081 0 obj << -/Type /Page -/Contents 836 0 R -/Resources 834 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 780 0 R -/Annots [ 832 0 R 833 0 R ] +/D [1078 0 R /XYZ 454.138 671.491 null] >> -% 832 0 obj +% 1075 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [404.739 354.196 411.713 366.255] -/A << /S /GoTo /D (section.3) >> +/D [1078 0 R /XYZ 150.705 571.789 null] >> -% 833 0 obj +% 111 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [172.593 318.033 179.567 330.093] -/A << /S /GoTo /D (section.6) >> +/D [1078 0 R /XYZ 150.705 281.021 null] >> -% 837 0 obj +% 115 0 obj << -/D [835 0 R /XYZ 98.895 753.953 null] +/D [1078 0 R /XYZ 150.705 262.296 null] >> -% 834 0 obj +% 1077 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R /F233 1044 0 R >> /ProcSet [ /PDF /Text ] >> -% 840 0 obj +% 1083 0 obj << /Type /Page -/Contents 841 0 R -/Resources 839 0 R +/Contents 1084 0 R +/Resources 1082 0 R /MediaBox [0 0 595.276 841.89] -/Parent 780 0 R +/Parent 1087 0 R >> -% 842 0 obj +% 1085 0 obj << -/D [840 0 R /XYZ 149.705 753.953 null] +/D [1083 0 R /XYZ 98.895 753.953 null] >> -% 23 0 obj +% 119 0 obj << -/D [840 0 R /XYZ 150.705 716.092 null] +/D [1083 0 R /XYZ 99.895 716.092 null] >> -% 843 0 obj +% 123 0 obj << -/D [840 0 R /XYZ 150.705 282.521 null] +/D [1083 0 R /XYZ 99.895 540.892 null] >> -% 844 0 obj +% 1086 0 obj << -/D [840 0 R /XYZ 150.705 261.733 null] +/D [1083 0 R /XYZ 99.895 358.382 null] >> -% 845 0 obj +% 127 0 obj << -/D [840 0 R /XYZ 150.705 240.946 null] +/D [1083 0 R /XYZ 99.895 300.51 null] >> -% 846 0 obj +% 1082 0 obj << -/D [840 0 R /XYZ 150.705 220.159 null] +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> +/ProcSet [ /PDF /Text ] >> -% 847 0 obj +% 1089 0 obj << -/D [840 0 R /XYZ 150.705 188.012 null] +/Type /Page +/Contents 1090 0 R +/Resources 1088 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1087 0 R >> -% 848 0 obj +% 1091 0 obj << -/D [840 0 R /XYZ 150.705 167.072 null] +/D [1089 0 R /XYZ 149.705 753.953 null] >> -% 849 0 obj +% 131 0 obj << -/D [840 0 R /XYZ 150.705 148.646 null] +/D [1089 0 R /XYZ 150.705 716.092 null] >> -% 850 0 obj +% 135 0 obj << -/D [840 0 R /XYZ 150.705 132.275 null] +/D [1089 0 R /XYZ 150.705 526.761 null] >> -% 839 0 obj +% 139 0 obj +<< +/D [1089 0 R /XYZ 150.705 326.359 null] +>> +% 1088 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F59 812 0 R /F85 814 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 853 0 obj +% 1093 0 obj << /Type /Page -/Contents 854 0 R -/Resources 852 0 R +/Contents 1094 0 R +/Resources 1092 0 R /MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 838 0 R ] +/Parent 1087 0 R >> -% 838 0 obj +% 1095 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [176.109 690.964 182.386 703.958] -/A << /S /GoTo /D (Hfootnote.3) >> +/D [1093 0 R /XYZ 98.895 753.953 null] +>> +% 143 0 obj +<< +/D [1093 0 R /XYZ 99.895 716.092 null] +>> +% 147 0 obj +<< +/D [1093 0 R /XYZ 99.895 474.131 null] +>> +% 1092 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1097 0 obj +<< +/Type /Page +/Contents 1098 0 R +/Resources 1096 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1087 0 R >> -% 855 0 obj +% 1099 0 obj << -/D [853 0 R /XYZ 98.895 753.953 null] +/D [1097 0 R /XYZ 149.705 753.953 null] >> -% 856 0 obj +% 151 0 obj << -/D [853 0 R /XYZ 99.895 716.092 null] +/D [1097 0 R /XYZ 150.705 716.092 null] >> -% 857 0 obj +% 155 0 obj << -/D [853 0 R /XYZ 99.895 686.784 null] +/D [1097 0 R /XYZ 150.705 412.148 null] >> -% 858 0 obj +% 1100 0 obj << -/D [853 0 R /XYZ 99.895 618.259 null] +/D [1097 0 R /XYZ 150.705 179.104 null] >> -% 859 0 obj +% 1101 0 obj << -/D [853 0 R /XYZ 99.895 595.952 null] +/D [1097 0 R /XYZ 150.705 145.139 null] >> -% 860 0 obj +% 1096 0 obj << -/D [853 0 R /XYZ 99.895 573.645 null] +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> +/ProcSet [ /PDF /Text ] >> -% 861 0 obj +% 1103 0 obj << -/D [853 0 R /XYZ 99.895 539.978 null] +/Type /Page +/Contents 1104 0 R +/Resources 1102 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1087 0 R >> -% 862 0 obj +% 1105 0 obj << -/D [853 0 R /XYZ 99.895 517.075 null] +/D [1103 0 R /XYZ 98.895 753.953 null] >> -% 863 0 obj +% 159 0 obj << -/D [853 0 R /XYZ 99.895 494.768 null] +/D [1103 0 R /XYZ 99.895 716.092 null] >> -% 864 0 obj +% 163 0 obj << -/D [853 0 R /XYZ 99.895 469.873 null] +/D [1103 0 R /XYZ 99.895 484.709 null] >> -% 865 0 obj +% 167 0 obj << -/D [853 0 R /XYZ 99.895 442.062 null] +/D [1103 0 R /XYZ 99.895 251.325 null] >> -% 866 0 obj +% 1102 0 obj << -/D [853 0 R /XYZ 99.895 412.296 null] +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> +/ProcSet [ /PDF /Text ] >> -% 867 0 obj +% 1107 0 obj << -/D [853 0 R /XYZ 99.895 395.165 null] +/Type /Page +/Contents 1108 0 R +/Resources 1106 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1087 0 R >> -% 868 0 obj +% 1109 0 obj << -/D [853 0 R /XYZ 99.895 377.438 null] +/D [1107 0 R /XYZ 149.705 753.953 null] >> -% 869 0 obj +% 171 0 obj << -/D [853 0 R /XYZ 114.242 139.255 null] +/D [1107 0 R /XYZ 150.705 476.15 null] >> -% 852 0 obj +% 1106 0 obj << -/Font << /F54 586 0 R /F59 812 0 R /F89 870 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 874 0 obj +% 1113 0 obj << /Type /Page -/Contents 875 0 R -/Resources 873 0 R +/Contents 1114 0 R +/Resources 1112 0 R /MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 851 0 R 872 0 R ] +/Parent 1117 0 R +/Annots [ 1110 0 R 1111 0 R ] >> -% 851 0 obj +% 1110 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [269.647 674.054 284.093 683.464] -/A << /S /GoTo /D (subsection.2.3) >> +/Rect [183.073 164.341 190.017 176.4] +/A << /S /GoTo /D (section.6) >> >> -% 872 0 obj +% 1111 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [168.073 254.848 175.046 266.907] -/A << /S /GoTo /D (section.3) >> ->> -% 876 0 obj -<< -/D [874 0 R /XYZ 149.705 753.953 null] ->> -% 27 0 obj -<< -/D [874 0 R /XYZ 150.705 716.092 null] ->> -% 877 0 obj -<< -/D [874 0 R /XYZ 150.705 671.065 null] +/Rect [293.044 128.475 300.117 140.535] +/A << /S /GoTo /D (listing.3) >> >> -% 878 0 obj +% 1115 0 obj << -/D [874 0 R /XYZ 150.705 648.1 null] +/D [1113 0 R /XYZ 98.895 753.953 null] >> -% 879 0 obj +% 175 0 obj << -/D [874 0 R /XYZ 150.705 573.59 null] +/D [1113 0 R /XYZ 99.895 716.092 null] >> -% 880 0 obj +% 179 0 obj << -/D [874 0 R /XYZ 150.705 516.424 null] +/D [1113 0 R /XYZ 99.895 586.94 null] >> -% 881 0 obj +% 183 0 obj << -/D [874 0 R /XYZ 150.705 483.864 null] +/D [1113 0 R /XYZ 99.895 402.59 null] >> -% 882 0 obj +% 187 0 obj << -/D [874 0 R /XYZ 150.705 463.343 null] +/D [1113 0 R /XYZ 99.895 234.114 null] >> -% 31 0 obj +% 1116 0 obj << -/D [874 0 R /XYZ 150.705 408.307 null] +/D [1113 0 R /XYZ 119.612 204.012 null] >> -% 873 0 obj +% 1112 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 885 0 obj +% 1120 0 obj << /Type /Page -/Contents 886 0 R -/Resources 884 0 R +/Contents 1121 0 R +/Resources 1119 0 R /MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 883 0 R ] +/Parent 1117 0 R >> -% 883 0 obj +% 1122 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [268.105 173.115 275.079 185.175] -/A << /S /GoTo /D (section.6) >> +/D [1120 0 R /XYZ 149.705 753.953 null] >> -% 887 0 obj +% 1118 0 obj << -/D [885 0 R /XYZ 98.895 753.953 null] +/D [1120 0 R /XYZ 150.705 446.997 null] >> -% 35 0 obj +% 191 0 obj << -/D [885 0 R /XYZ 99.895 716.092 null] +/D [1120 0 R /XYZ 150.705 387.147 null] >> -% 39 0 obj +% 195 0 obj << -/D [885 0 R /XYZ 99.895 279.545 null] +/D [1120 0 R /XYZ 150.705 370.604 null] >> -% 888 0 obj +% 199 0 obj << -/D [885 0 R /XYZ 342.47 236.897 null] +/D [1120 0 R /XYZ 150.705 194.093 null] >> -% 884 0 obj +% 1119 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R /F233 1044 0 R >> /ProcSet [ /PDF /Text ] >> -% 893 0 obj +% 1126 0 obj << /Type /Page -/Contents 894 0 R -/Resources 892 0 R +/Contents 1127 0 R +/Resources 1125 0 R /MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 889 0 R 890 0 R 891 0 R ] +/Parent 1117 0 R +/Annots [ 1123 0 R 1124 0 R ] >> -% 889 0 obj +% 1123 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [482.63 399.338 489.604 411.398] -/A << /S /GoTo /D (section.6) >> +/Rect [378.159 383.557 385.133 395.616] +/A << /S /GoTo /D (table.1) >> >> -% 890 0 obj +% 1124 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [354.987 193.202 369.432 205.262] -/A << /S /GoTo /D (subsection.3.3) >> +/Rect [378.159 240.08 385.133 252.139] +/A << /S /GoTo /D (table.1) >> >> -% 891 0 obj +% 1128 0 obj +<< +/D [1126 0 R /XYZ 98.895 753.953 null] +>> +% 204 0 obj +<< +/D [1126 0 R /XYZ 99.895 610.712 null] +>> +% 1125 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1133 0 obj +<< +/Type /Page +/Contents 1134 0 R +/Resources 1132 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1117 0 R +/Annots [ 1130 0 R ] +>> +% 1130 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [354.987 148.883 369.432 160.943] -/A << /S /GoTo /D (subsection.3.3) >> ->> -% 895 0 obj -<< -/D [893 0 R /XYZ 149.705 753.953 null] +/Rect [178.153 152.385 185.097 164.445] +/A << /S /GoTo /D (listing.4) >> >> -% 896 0 obj +% 1135 0 obj << -/D [893 0 R /XYZ 150.705 293.402 null] +/D [1133 0 R /XYZ 149.705 753.953 null] >> -% 897 0 obj +% 208 0 obj << -/D [893 0 R /XYZ 150.705 278.496 null] +/D [1133 0 R /XYZ 150.705 716.092 null] >> -% 898 0 obj +% 212 0 obj << -/D [893 0 R /XYZ 150.705 261.039 null] +/D [1133 0 R /XYZ 150.705 430.41 null] >> -% 899 0 obj +% 216 0 obj << -/D [893 0 R /XYZ 150.705 244.857 null] +/D [1133 0 R /XYZ 150.705 226.203 null] >> -% 900 0 obj +% 1136 0 obj << -/D [893 0 R /XYZ 150.705 228.675 null] +/D [1133 0 R /XYZ 305.999 168.146 null] >> -% 892 0 obj +% 1132 0 obj << -/Font << /F54 586 0 R /F51 584 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 904 0 obj +% 1139 0 obj << /Type /Page -/Contents 905 0 R -/Resources 903 0 R +/Contents 1140 0 R +/Resources 1138 0 R /MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 901 0 R 902 0 R ] ->> -% 901 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [304.177 614.119 318.623 626.179] -/A << /S /GoTo /D (subsection.3.3) >> +/Parent 1117 0 R +/Annots [ 1131 0 R ] >> -% 902 0 obj +% 1131 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [304.177 568.778 318.623 580.837] -/A << /S /GoTo /D (subsection.3.3) >> ->> -% 906 0 obj -<< -/D [904 0 R /XYZ 98.895 753.953 null] ->> -% 907 0 obj -<< -/D [904 0 R /XYZ 99.895 716.092 null] ->> -% 908 0 obj -<< -/D [904 0 R /XYZ 99.895 702.226 null] +/Rect [246.566 580.64 253.64 592.699] +/A << /S /GoTo /D (section.6) >> >> -% 909 0 obj +% 1141 0 obj << -/D [904 0 R /XYZ 99.895 684.257 null] +/D [1139 0 R /XYZ 98.895 753.953 null] >> -% 910 0 obj +% 1137 0 obj << -/D [904 0 R /XYZ 99.895 667.564 null] +/D [1139 0 R /XYZ 99.895 665.282 null] >> -% 911 0 obj +% 220 0 obj << -/D [904 0 R /XYZ 99.895 650.871 null] +/D [1139 0 R /XYZ 99.895 613.372 null] >> -% 912 0 obj +% 1138 0 obj << -/D [904 0 R /XYZ 99.895 541.236 null] +/Font << /F233 1044 0 R /F84 687 0 R /F75 685 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] >> -% 913 0 obj +% 1143 0 obj << -/D [904 0 R /XYZ 99.895 524.542 null] +/Type /Page +/Contents 1144 0 R +/Resources 1142 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1117 0 R >> -% 914 0 obj +% 1145 0 obj << -/D [904 0 R /XYZ 99.895 507.849 null] +/D [1143 0 R /XYZ 149.705 753.953 null] >> -% 916 0 obj +% 224 0 obj << -/D [904 0 R /XYZ 99.895 288.977 null] +/D [1143 0 R /XYZ 150.705 716.092 null] >> -% 903 0 obj +% 1142 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F94 915 0 R >> +/Font << /F75 685 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 920 0 obj +% 1154 0 obj << /Type /Page -/Contents 921 0 R -/Resources 919 0 R +/Contents 1155 0 R +/Resources 1153 0 R /MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 917 0 R 918 0 R ] +/Parent 1158 0 R +/Annots [ 1146 0 R 1147 0 R 1148 0 R 1149 0 R 1150 0 R 1151 0 R ] >> -% 917 0 obj +% 1146 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [352.861 524.509 359.835 537.101] -/A << /S /GoTo /D (section.1) >> +/Rect [378.159 413.274 385.133 425.334] +/A << /S /GoTo /D (table.1) >> >> -% 918 0 obj +% 1147 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [186.147 314.537 193.12 327.129] -/A << /S /GoTo /D (section.1) >> +/Rect [368.549 345.063 444.603 357.123] +/A << /S /GoTo /D (vdata) >> >> -% 922 0 obj +% 1148 0 obj << -/D [920 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [332.133 333.108 339.206 345.168] +/A << /S /GoTo /D (table.1) >> >> -% 43 0 obj +% 1149 0 obj << -/D [920 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [378.159 252.942 385.133 265.002] +/A << /S /GoTo /D (table.1) >> >> -% 47 0 obj +% 1150 0 obj << -/D [920 0 R /XYZ 150.705 696.532 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 184.731 444.603 196.791] +/A << /S /GoTo /D (vdata) >> >> -% 51 0 obj +% 1151 0 obj << -/D [920 0 R /XYZ 150.705 498.276 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [347.816 172.776 354.86 184.835] +/A << /S /GoTo /D (table.1) >> >> -% 55 0 obj +% 1156 0 obj << -/D [920 0 R /XYZ 150.705 288.305 null] +/D [1154 0 R /XYZ 98.895 753.953 null] >> -% 919 0 obj +% 228 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F52 585 0 R /F85 814 0 R >> -/ProcSet [ /PDF /Text ] +/D [1154 0 R /XYZ 99.895 716.092 null] >> -% 924 0 obj +% 1129 0 obj << -/Type /Page -/Contents 925 0 R -/Resources 923 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 927 0 R +/D [1154 0 R /XYZ 99.895 558.947 null] >> -% 926 0 obj +% 1153 0 obj << -/D [924 0 R /XYZ 98.895 753.953 null] +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F147 1157 0 R /F192 942 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] >> -% 59 0 obj +% 1162 0 obj << -/D [924 0 R /XYZ 99.895 716.092 null] +/Type /Page +/Contents 1163 0 R +/Resources 1161 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1158 0 R +/Annots [ 1152 0 R 1159 0 R 1160 0 R ] >> -% 63 0 obj +% 1152 0 obj << -/D [924 0 R /XYZ 99.895 529.559 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [306.858 690.368 373.916 702.428] +/A << /S /GoTo /D (descdata) >> >> - -endstream -endobj -931 0 obj +% 1159 0 obj << -/Length 4754 ->> -stream -0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.1.8)-1000(Clone)-250(\227)-250(clone)-250(current)-250(object)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf 0 -19.289 Td [(call)-1050(desc%clone\050descout,info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.597 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -20.598 Td [(desc)]TJ -0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -77.917 -34.377 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -20.597 Td [(descout)]TJ -0 g 0 G -/F54 9.9626 Tf 39.84 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ -0 g 0 G -/F51 9.9626 Tf -39.84 -20.597 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.8 -28.097 Td [(3.1.9)-1000(CNV)-250(\227)-250(convert)-250(internal)-250(storage)-250(format)]TJ -0 g 0 G +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 600.704 495.412 612.764] +/A << /S /GoTo /D (vdata) >> +>> + +endstream +endobj +1173 0 obj +<< +/Length 7835 +>> +stream 0 g 0 G -/F59 9.9626 Tf 0 -19.289 Td [(call)-1050(desc%cnv\050mold\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ +BT +/F75 11.9552 Tf 99.895 706.129 Td [(4.2)-1000(psb)]TJ +ET +q +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 151.016 706.129 Td [(gedot)-250(\227)-250(Dot)-250(Product)]TJ/F84 9.9626 Tf -51.429 -18.976 Td [(This)-250(function)-250(computes)-250(dot)-250(pr)18(oduct)-250(between)-250(two)-250(vectors)]TJ/F78 9.9626 Tf 254.646 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(and)]TJ/F78 9.9626 Tf 19.482 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ -286.622 -11.955 Td [(If)]TJ/F78 9.9626 Tf 9.46 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(and)]TJ/F78 9.9626 Tf 19.482 0 Td [(y)]TJ/F84 9.9626 Tf 7.596 0 Td [(ar)18(e)-250(r)18(eal)-250(vectors)-250(it)-250(computes)-250(dot-pr)18(oduct)-250(as:)]TJ/F78 9.9626 Tf 104.718 -23.132 Td [(d)-25(o)-35(t)]TJ/F190 10.3811 Tf 16.336 0 Td [(\040)]TJ/F78 9.9626 Tf 13.567 0 Td [(x)]TJ/F78 7.5716 Tf 5.398 4.115 Td [(T)]TJ/F78 9.9626 Tf 5.526 -4.115 Td [(y)]TJ/F84 9.9626 Tf -189.779 -21.93 Td [(Else)-250(if)]TJ/F78 9.9626 Tf 29.474 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(and)]TJ/F78 9.9626 Tf 19.482 0 Td [(y)]TJ/F84 9.9626 Tf 7.597 0 Td [(ar)18(e)-250(complex)-250(vectors)-250(then)-250(it)-250(computes)-250(dot-pr)18(oduct)-250(as:)]TJ/F78 9.9626 Tf 83.965 -23.132 Td [(d)-25(o)-35(t)]TJ/F190 10.3811 Tf 16.336 0 Td [(\040)]TJ/F78 9.9626 Tf 13.567 0 Td [(x)]TJ/F78 7.5716 Tf 5.587 4.115 Td [(H)]TJ/F78 9.9626 Tf 6.813 -4.115 Td [(y)]TJ/F145 9.9626 Tf -175.573 -21.937 Td [(psb_gedot\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -20.597 Td [(On)-250(Entry)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -20.597 Td [(desc)]TJ + [-525([,global]\051)]TJ 0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -32.553 Td [(mold)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(the)-250(desir)18(ed)-250(integer)-250(storage)-250(format.)]TJ -3.876 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(object)-190(of)-190(type)-190(derived)-190(fr)18(om)-190(\050integer\051)]TJ/F59 9.9626 Tf 221.926 0 Td [(psb)]TJ -ET -q -1 0 0 1 413.855 356.476 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 416.994 356.277 Td [(T)]TJ ET q -1 0 0 1 422.851 356.476 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 183.119 570.686 cm +[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S Q BT -/F59 9.9626 Tf 425.99 356.277 Td [(base)]TJ +/F78 9.9626 Tf 189.221 562.118 Td [(d)-25(o)-35(t)]TJ/F84 9.9626 Tf 13.445 0 Td [(,)]TJ/F78 9.9626 Tf 5.275 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(,)]TJ/F78 9.9626 Tf 5.106 0 Td [(y)]TJ/F75 9.9626 Tf 91.76 0 Td [(Function)]TJ ET q -1 0 0 1 447.539 356.476 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 183.119 558.332 cm +[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S Q BT -/F59 9.9626 Tf 450.677 356.277 Td [(vect)]TJ +/F84 9.9626 Tf 189.097 549.765 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 472.226 356.476 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 326.331 549.964 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 475.364 356.277 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -345.58 -22.59 Td [(The)]TJ/F59 9.9626 Tf 19.583 0 Td [(mold)]TJ/F54 9.9626 Tf 23.828 0 Td [(ar)18(guments)-292(may)-291(be)-292(employed)-292(to)-292(interface)-291(with)-292(special)-292(devices,)-302(such)]TJ -43.411 -11.955 Td [(as)-250(GPUs)-250(and)-250(other)-250(accelerators.)]TJ/F51 9.9626 Tf 0 -28.096 Td [(3.1.10)-1000(psb)]TJ +/F84 9.9626 Tf 329.319 549.765 Td [(gedot)]TJ -140.222 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 202.769 293.835 cm +1 0 0 1 326.331 538.009 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 205.758 293.636 Td [(cd)]TJ +/F84 9.9626 Tf 329.319 537.809 Td [(gedot)]TJ -140.222 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 216.867 293.835 cm +1 0 0 1 326.331 526.053 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 219.855 293.636 Td [(get)]TJ +/F84 9.9626 Tf 329.319 525.854 Td [(gedot)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 234.291 293.835 cm +1 0 0 1 326.331 514.098 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 237.28 293.636 Td [(large)]TJ +/F84 9.9626 Tf 329.319 513.899 Td [(gedot)]TJ ET q -1 0 0 1 260.572 293.835 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 183.119 510.113 cm +[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S Q +0 g 0 G BT -/F51 9.9626 Tf 263.561 293.636 Td [(threshold)-190(\227)-190(Get)-190(threshold)-190(for)-190(index)-190(mapping)-190(switch)]TJ +/F84 9.9626 Tf 229.958 481.735 Td [(T)92(able)-250(2:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -112.856 -19.29 Td [(ith)-525(=)-525(psb_cd_get_large_threshold\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.421 Td [(T)90(ype:)]TJ +/F75 9.9626 Tf -130.063 -34.507 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.597 Td [(On)-250(Return)]TJ +/F75 9.9626 Tf -29.828 -19.951 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -20.598 Td [(Function)-250(value)]TJ + 0 -19.951 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(curr)18(ent)-250(value)-250(for)-250(the)-250(size)-250(thr)18(eshold.)]TJ/F51 9.9626 Tf -72.777 -28.096 Td [(3.1.11)-1000(psb)]TJ -ET -q -1 0 0 1 202.769 182.833 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 205.758 182.634 Td [(cd)]TJ +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.614 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.98 0 0 1 124.802 359.506 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-247(an)-248(object)-247(of)-247(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 369.545 359.506 Tm [(psb)]TJ ET q -1 0 0 1 216.867 182.833 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 385.864 359.705 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 219.855 182.634 Td [(set)]TJ +/F145 9.9626 Tf 389.002 359.506 Td [(T)]TJ ET q -1 0 0 1 233.175 182.833 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 394.86 359.705 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 236.164 182.634 Td [(large)]TJ +/F145 9.9626 Tf 397.998 359.506 Td [(vect)]TJ ET q -1 0 0 1 259.457 182.833 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 419.547 359.705 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 262.445 182.634 Td [(threshold)-190(\227)-190(Set)-190(threshold)-190(for)-190(index)-190(mapping)-190(switch)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -111.74 -19.289 Td [(call)-525(psb_cd_set_large_threshold\050ith\051)]TJ +/F145 9.9626 Tf 422.685 359.506 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.597 Td [(On)-250(Entry)]TJ +/F84 9.9626 Tf 1.02 0 0 1 124.802 347.55 Tm [(containing)-270(numbers)-269(of)-270(type)-270(speci\002ed)-270(in)-269(T)90(able)]TJ +0 0 1 rg 0 0 1 RG + [-270(2)]TJ 0 g 0 G + [(.)-378(The)-270(rank)-269(of)]TJ/F78 9.9626 Tf 1 0 0 1 399.71 347.55 Tm [(x)]TJ/F84 9.9626 Tf 1.02 0 0 1 407.657 347.55 Tm [(must)-270(be)]TJ 1 0 0 1 124.802 335.595 Tm [(the)-250(same)-250(of)]TJ/F78 9.9626 Tf 52.946 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 166.874 -29.888 Td [(14)]TJ +/F75 9.9626 Tf -82.959 -19.951 Td [(y)]TJ 0 g 0 G +/F84 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.445 0 Td [(y)]TJ/F84 9.9626 Tf 5.105 0 Td [(.)]TJ -166.164 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.98 0 0 1 124.802 267.824 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-247(an)-248(object)-247(of)-247(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 369.545 267.824 Tm [(psb)]TJ ET - -endstream -endobj -936 0 obj -<< -/Length 5640 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G +q +1 0 0 1 385.864 268.023 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 9.9626 Tf 99.895 706.129 Td [(ith)]TJ -0 g 0 G -/F54 9.9626 Tf 17.704 0 Td [(the)-250(new)-250(thr)18(eshold)-250(for)-250(communication)-250(descriptors.)]TJ 7.203 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(gr)18(eater)-250(than)-250(zer)18(o.)]TJ -24.907 -20.813 Td [(Note:)-665(the)-427(thr)18(eshold)-428(value)-427(is)-428(only)-427(queried)-428(by)-427(the)-428(library)-427(at)-428(the)-427(time)-428(a)-427(call)]TJ 0 -11.955 Td [(to)]TJ/F59 9.9626 Tf 12.451 0 Td [(psb_cdall)]TJ/F54 9.9626 Tf 50.837 0 Td [(is)-378(executed,)-409(ther)18(efor)18(e)-378(changing)-378(the)-378(thr)18(eshold)-377(has)-378(no)-378(ef)18(fect)-378(on)]TJ -63.288 -11.955 Td [(communication)-339(descriptors)-340(that)-339(have)-339(alr)18(eady)-340(been)-339(initialized.)-578(Mor)18(eover)-339(the)]TJ 0 -11.955 Td [(thr)18(eshold)-250(must)-250(have)-250(the)-250(same)-250(value)-250(on)-250(all)-250(pr)18(ocesses.)]TJ/F51 9.9626 Tf 0 -26.933 Td [(3.1.12)-1000(get)]TJ +/F145 9.9626 Tf 389.002 267.824 Td [(T)]TJ ET q -1 0 0 1 149.2 574.896 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 394.86 268.023 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 152.189 574.697 Td [(p)]TJ +/F145 9.9626 Tf 397.998 267.824 Td [(vect)]TJ ET q -1 0 0 1 158.874 574.896 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 419.547 268.023 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 161.863 574.697 Td [(adjcncy)-250(\227)-250(Get)-250(process)-250(adjacency)-250(list)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -61.968 -18.964 Td [(list)-525(=)-1050(desc%get_p_adjcncy\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.813 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.483 Td [(On)-250(Return)]TJ +/F145 9.9626 Tf 422.685 267.824 Td [(type)]TJ 0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 124.802 255.869 Tm [(containing)-246(numbers)-247(of)-246(type)-247(speci\002ed)-246(in)-247(T)94(able)]TJ +0 0 1 rg 0 0 1 RG + [-246(2)]TJ 0 g 0 G - 0 -19.484 Td [(Function)-250(value)]TJ + [(.)-314(The)-246(rank)-247(of)]TJ/F78 9.9626 Tf 1 0 0 1 385.833 255.869 Tm [(y)]TJ/F84 9.9626 Tf 0.98 0 0 1 393.346 255.869 Tm [(must)-246(be)-247(the)]TJ 1 0 0 1 124.802 243.913 Tm [(same)-250(of)]TJ/F78 9.9626 Tf 36.807 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-190(curr)18(ent)-190(list)-190(of)-190(adjacent)-190(pr)18(ocesses,)-202(i.e.)-290(pr)18(ocesses)-190(with)-190(which)]TJ -47.87 -11.955 Td [(the)-250(curr)18(ent)-250(one)-250(has)-250(to)-250(exchange)-250(halo)-250(data.)]TJ/F51 9.9626 Tf -24.907 -26.933 Td [(3.1.13)-1000(set)]TJ +/F75 9.9626 Tf -66.919 -19.95 Td [(desc)]TJ ET q -1 0 0 1 148.085 457.264 cm +1 0 0 1 120.408 224.162 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 151.073 457.065 Td [(p)]TJ +/F75 9.9626 Tf 123.397 223.963 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 157.758 457.264 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 273.363 176.341 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 160.747 457.065 Td [(adjcncy)-250(\227)-250(Set)-250(process)-250(adjacency)-250(list)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -60.852 -18.964 Td [(call)-525(desc%set_p_adjcncy\050list\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.813 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.483 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.483 Td [(list)]TJ -0 g 0 G -/F54 9.9626 Tf 19.358 0 Td [(the)-250(list)-250(of)-250(adjacent)-250(pr)18(ocesses.)]TJ 5.549 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one-dimensional)-250(array)-250(of)-250(integers)-250(of)-250(kind)]TJ/F59 9.9626 Tf 250.209 0 Td [(psb_ipk_)]TJ/F54 9.9626 Tf 41.843 0 Td [(.)]TJ -316.959 -20.813 Td [(Note:)-596(this)-392(method)-393(can)-393(be)-393(called)-393(after)-392(a)-393(call)-393(to)]TJ/F59 9.9626 Tf 216.367 0 Td [(psb_cdall)]TJ/F54 9.9626 Tf 50.987 0 Td [(and)-393(befor)18(e)-393(a)-392(call)]TJ -267.354 -11.955 Td [(to)]TJ/F59 9.9626 Tf 11.711 0 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 47.073 0 Td [(.)-470(The)-304(user)-303(is)-304(specifying)-303(her)18(e)-304(some)-303(knowledge)-304(about)-303(which)-304(pr)18(o-)]TJ -58.784 -11.955 Td [(cesses)-208(ar)18(e)-208(topol)1(ogical)-208(neighbours)-208(of)-208(the)-207(curr)18(ent)-208(pr)18(ocess.)-296(The)-208(availability)-207(of)-208(this)]TJ 0 -11.955 Td [(information)-250(may)-250(speed)-250(up)-250(the)-250(execution)-250(of)-250(the)-250(assembly)-250(call)]TJ/F59 9.9626 Tf 269.656 0 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 47.073 0 Td [(.)]TJ/F51 9.9626 Tf -316.729 -26.934 Td [(3.1.14)-1000(fnd)]TJ +/F145 9.9626 Tf 276.501 176.142 Td [(desc)]TJ ET q -1 0 0 1 151.412 247.089 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 298.05 176.341 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 154.401 246.89 Td [(owner)-250(\227)-250(Find)-250(the)-250(owner)-250(process)-250(of)-250(a)-250(set)-250(of)-250(indices)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -54.506 -18.964 Td [(call)-525(desc%fnd_owner\050idx,iprc,info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.813 Td [(T)90(ype:)]TJ +/F145 9.9626 Tf 301.189 176.142 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.483 Td [(On)-250(Entry)]TJ +/F75 9.9626 Tf -222.215 -19.951 Td [(global)]TJ 0 g 0 G +/F84 9.9626 Tf 0.994 0 0 1 133.659 156.191 Tm [(Speci\002es)-250(whether)-249(the)-250(computation)-250(should)-250(include)-249(the)-250(global)-250(r)18(eduction)]TJ 1 0 0 1 124.802 144.236 Tm [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ 0 g 0 G - 0 -19.484 Td [(idx)]TJ -0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(the)-250(list)-250(of)-250(global)-250(indices)-250(for)-250(which)-250(we)-250(need)-250(the)-250(owning)-250(pr)18(ocesses.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one-dimensional)-250(array)-250(of)-250(integers)-250(of)-250(kind)]TJ/F59 9.9626 Tf 250.209 0 Td [(psb_lpk_)]TJ/F54 9.9626 Tf 41.843 0 Td [(.)]TJ -0 g 0 G - -150.084 -29.888 Td [(15)]TJ + 77.002 -29.888 Td [(33)]TJ 0 g 0 G ET endstream endobj -943 0 obj +1179 0 obj << -/Length 7676 +/Length 4448 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -21.934 Td [(iprc)]TJ -0 g 0 G -/F54 9.9626 Tf 22.685 0 Td [(the)-250(list)-250(of)-250(pr)18(ocesses)-250(owning)-250(the)-250(indices)-250(in)]TJ/F59 9.9626 Tf 184.993 0 Td [(idx)]TJ/F54 9.9626 Tf 15.691 0 Td [(.)]TJ -198.463 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.381 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-408(as:)-627(an)-408(allocatable)-408(one-dimensional)-408(array)-409(of)-408(integers)-408(of)-408(kind)]TJ/F59 9.9626 Tf 0 -11.955 Td [(psb_ipk_)]TJ/F54 9.9626 Tf 41.843 0 Td [(.)]TJ -66.749 -23.926 Td [(Note:)-349(this)-269(method)-269(may)-270(or)-269(may)-269(not)-269(actually)-270(r)18(equir)18(e)-269(communications,)-274(depend-)]TJ 0 -11.956 Td [(ing)-283(on)-283(the)-283(ex)1(a)-1(c)1(t)-283(internal)-283(data)-283(storage;)-299(given)-283(that)-283(the)-283(choice)-283(of)-282(storage)-283(may)-283(be)]TJ 0 -11.955 Td [(alter)18(ed)-376(by)-375(r)8(untime)-376(parameters,)-407(it)-376(is)-375(necessary)-376(for)-376(safety)-375(that)-376(this)-375(method)-376(is)]TJ 0 -11.955 Td [(called)-250(by)-250(all)-250(pr)18(ocesses.)]TJ/F51 9.9626 Tf 0 -29.998 Td [(3.1.15)-1000(Named)-250(Constants)]TJ +/F84 9.9626 Tf 175.611 706.129 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F145 9.9626 Tf 165.319 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - 0 -19.937 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 514.891 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 514.692 Td [(none)]TJ -ET -q -1 0 0 1 194.182 514.891 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + [(.true.)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 202.152 514.692 Td [(Generic)-250(no-op;)]TJ +/F75 9.9626 Tf -190.225 -31.881 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -51.447 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 492.957 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 492.758 Td [(root)]TJ -ET -q -1 0 0 1 189.758 492.957 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q 0 g 0 G -BT -/F54 9.9626 Tf 197.728 492.758 Td [(Default)-250(r)18(oot)-250(pr)18(ocess)-250(for)-250(br)18(oadcast)-250(and)-250(scatter)-250(operations;)]TJ + 0 -19.925 Td [(Function)-250(value)]TJ 0 g 0 G -/F51 9.9626 Tf -47.023 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 471.023 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 470.824 Td [(nohalo)]TJ -ET -q -1 0 0 1 203.038 471.023 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q +/F84 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(dot)-250(pr)18(oduct)-250(of)-250(vectors)]TJ/F78 9.9626 Tf 126.329 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(and)]TJ/F78 9.9626 Tf 19.482 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ 0.98 0 0 1 175.611 630.413 Tm [(Scope:)]TJ/F75 9.9626 Tf 0.98 0 0 1 206.422 630.413 Tm [(global)]TJ/F84 9.9626 Tf 0.98 0 0 1 237.009 630.413 Tm [(unless)-244(the)-244(optional)-244(variable)]TJ/F145 9.9626 Tf 1 0 0 1 358.293 630.413 Tm [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 211.008 470.824 Td [(Do)-250(not)-250(fetch)-250(halo)-250(elements;)]TJ + [(.false.)]TJ/F84 9.9626 Tf 0.98 0 0 1 433.899 630.413 Tm [(has)-244(been)-244(spec-)]TJ 1 0 0 1 175.611 618.458 Tm [(i\002ed)]TJ 0 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(2)]TJ 0 g 0 G -/F51 9.9626 Tf -60.303 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 449.089 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 448.89 Td [(halo)]TJ -ET -q -1 0 0 1 191.412 449.089 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + [(.)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 199.382 448.89 Td [(Fetch)-250(halo)-250(elements)-250(fr)18(om)-250(neighbouring)-250(pr)18(ocesses;)]TJ +/F75 9.9626 Tf -24.906 -19.925 Td [(info)]TJ 0 g 0 G -/F51 9.9626 Tf -48.677 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 427.155 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 426.956 Td [(sum)]TJ -ET -q -1 0 0 1 190.854 427.155 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -21.917 Td [(Notes)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 198.824 426.956 Td [(Sum)-250(overlapped)-250(elements)]TJ +/F84 9.9626 Tf 12.453 -19.926 Td [(1.)]TJ 0 g 0 G -/F51 9.9626 Tf -48.119 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 405.221 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 405.022 Td [(avg)]TJ -ET -q -1 0 0 1 187.546 405.221 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + 0.98 0 0 1 175.303 496.913 Tm [(The)-201(computation)-200(of)-201(a)-200(global)-201(r)18(esult)-200(r)18(equir)19(es)-201(a)-201(global)-200(communication,)-212(which)]TJ 1.02 0 0 1 175.611 484.958 Tm [(entails)-265(a)-265(signi\002cant)-264(over)17(head.)-363(It)-265(may)-265(be)-264(necessary)-265(and/or)-265(advisable)-265(to)]TJ 0.98 0 0 1 175.611 473.003 Tm [(compute)-256(multiple)-256(dot)-256(pr)18(oducts)-256(at)-256(the)-256(same)-256(time;)-257(in)-256(this)-256(case,)-256(it)-256(is)-256(possible)]TJ 1 0 0 1 175.611 461.048 Tm [(to)-250(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F54 9.9626 Tf 195.517 405.022 Td [(A)92(verage)-250(overlapped)-250(elements)]TJ +/F145 9.9626 Tf 52.304 -19.925 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G -/F51 9.9626 Tf -44.812 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 383.288 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 383.088 Td [(comm)]TJ -ET -q -1 0 0 1 199.163 383.288 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 202.152 383.088 Td [(halo)]TJ -ET -q -1 0 0 1 222.674 383.288 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F54 9.9626 Tf 230.645 383.088 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F59 9.9626 Tf 124.92 0 Td [(halo_index)]TJ/F54 9.9626 Tf 54.794 0 Td [(list;)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -259.654 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 361.354 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 361.154 Td [(comm)]TJ -ET -q -1 0 0 1 199.163 361.354 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 202.152 361.154 Td [(ext)]TJ -ET -q -1 0 0 1 216.029 361.354 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F54 9.9626 Tf 224 361.154 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F59 9.9626 Tf 124.92 0 Td [(ext_index)]TJ/F54 9.9626 Tf 49.564 0 Td [(list;)]TJ + [-525(psb_gedot\050x1,y1,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -247.779 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 339.42 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 339.22 Td [(comm)]TJ -ET -q -1 0 0 1 199.163 339.42 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 202.152 339.22 Td [(ovr)]TJ -ET -q -1 0 0 1 217.703 339.42 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F54 9.9626 Tf 225.673 339.22 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F59 9.9626 Tf 124.92 0 Td [(ovrlap_index)]TJ/F54 9.9626 Tf 65.255 0 Td [(list;)]TJ + 0 -11.956 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(2)]TJ 0 g 0 G -/F51 9.9626 Tf -265.143 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 317.486 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 317.286 Td [(comm)]TJ -ET -q -1 0 0 1 199.163 317.486 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 202.152 317.286 Td [(mov)]TJ -ET -q -1 0 0 1 222.684 317.486 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F54 9.9626 Tf 230.654 317.286 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F59 9.9626 Tf 124.921 0 Td [(ovr_mst_idx)]TJ/F54 9.9626 Tf 60.024 0 Td [(list;)]TJ/F51 11.9552 Tf -264.894 -31.99 Td [(3.2)-1000(Sparse)-250(Matrix)-250(class)]TJ/F54 9.9626 Tf 0 -19.937 Td [(The)]TJ/F59 9.9626 Tf 19.623 0 Td [(psb)]TJ -ET -q -1 0 0 1 186.647 265.558 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 189.785 265.359 Td [(Tspmat)]TJ -ET -q -1 0 0 1 221.795 265.558 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 224.933 265.359 Td [(type)]TJ/F54 9.9626 Tf 23.868 0 Td [(class)-296(contains)-295(all)-296(information)-296(about)-296(the)-295(local)-296(portion)-296(of)]TJ -98.096 -11.955 Td [(the)-200(sparse)-199(matrix)-200(and)-199(its)-200(storage)-200(mode.)-293(Its)-199(design)-200(is)-200(based)-199(on)-200(the)-200(ST)74(A)74(TE)-199(design)]TJ 0 -11.955 Td [(pattern)-256([)]TJ -1 0 0 rg 1 0 0 RG - [(13)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G - [(])-255(as)-256(detailed)-256(in)-256([)]TJ -1 0 0 rg 1 0 0 RG - [(11)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(];)-258(the)-256(type)-256(declaration)-255(is)-256(shown)-256(in)-256(\002gur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-255(2)]TJ + [-525(psb_gedot\050x2,y2,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-256(wher)18(e)]TJ/F59 9.9626 Tf 0 -11.956 Td [(T)]TJ/F54 9.9626 Tf 7.721 0 Td [(is)-250(a)-250(placeholder)-250(for)-250(the)-250(data)-250(type)-250(and)-250(pr)18(ecision)-250(variants)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -7.721 -21.431 Td [(S)]TJ + 0 -11.955 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(3)]TJ 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(Single)-250(pr)18(ecision)-250(r)18(eal;)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -11.068 -21.934 Td [(D)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F54 9.9626 Tf 13.28 0 Td [(Double)-250(pr)18(ecision)-250(r)18(eal;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -13.28 -21.934 Td [(C)]TJ + [-525(psb_gedot\050x3,y3,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 12.174 0 Td [(Single)-250(pr)18(ecision)-250(complex;)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -12.174 -21.934 Td [(Z)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 11.626 0 Td [(Double)-250(pr)18(ecision)-250(complex;)]TJ + [-525(psb_sum\050ctxt,vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G -/F51 9.9626 Tf -11.626 -21.934 Td [(LS,LD,LC,LZ)]TJ + [(:)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(3)]TJ 0 g 0 G -/F54 9.9626 Tf 65.026 0 Td [(Same)-214(numeric)-214(type)-215(as)-214(above,)-221(but)-214(with)]TJ/F59 9.9626 Tf 168.016 0 Td [(psb_lpk_)]TJ/F54 9.9626 Tf 43.978 0 Td [(integer)-214(indices.)]TJ + [(\051\051)]TJ/F84 9.9626 Tf 1.007 0 0 1 175.611 385.332 Tm [(In)-248(this)-248(way)-248(the)-248(global)-248(communication,)-248(which)-248(for)-248(small)-248(sizes)-248(is)-248(a)-248(latency-)]TJ 1 0 0 1 175.611 373.377 Tm [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ 0 g 0 G - -110.146 -29.888 Td [(16)]TJ + 141.968 -282.939 Td [(34)]TJ 0 g 0 G ET endstream endobj -950 0 obj +1190 0 obj << -/Length 7032 +/Length 8900 >> stream 0 g 0 G 0 g 0 G -0 g 0 G -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +BT +/F75 11.9552 Tf 99.895 706.129 Td [(4.3)-1000(psb)]TJ +ET q -1 0 0 1 99.895 671.26 cm -0 0 343.711 38.854 re f +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 112.299 699.454 Td [(type)]TJ +/F75 11.9552 Tf 151.016 706.129 Td [(gedots)-250(\227)-250(Generalized)-250(Dot)-250(Product)]TJ/F84 9.9626 Tf 1.014 0 0 1 99.587 687.165 Tm [(This)-246(subr)17(ou)1(tine)-247(computes)-246(a)-246(series)-247(of)-246(dot)-246(pr)17(oducts)-246(among)-246(the)-247(columns)-246(of)-246(two)]TJ 1 0 0 1 99.895 675.21 Tm [(dense)-250(matrices)]TJ/F78 9.9626 Tf 68.209 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(and)]TJ/F78 9.9626 Tf 19.482 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(:)]TJ/F78 9.9626 Tf 24.806 -13.101 Td [(r)-17(e)-25(s)]TJ/F192 10.3811 Tf 12.294 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F190 10.3811 Tf 7.042 0 Td [(\040)]TJ/F78 9.9626 Tf 13.567 0 Td [(x)]TJ/F192 10.3811 Tf 5.329 0 Td [(\050)]TJ/F84 9.9626 Tf 4.274 0 Td [(:)-13(,)]TJ/F78 9.9626 Tf 6.821 0 Td [(i)]TJ/F192 10.3811 Tf 3.089 0 Td [(\051)]TJ/F78 7.5716 Tf 4.342 4.114 Td [(T)]TJ/F78 9.9626 Tf 5.525 -4.114 Td [(y)]TJ/F192 10.3811 Tf 5.231 0 Td [(\050)]TJ/F84 9.9626 Tf 4.274 0 Td [(:)-12(,)]TJ/F78 9.9626 Tf 6.821 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 99.895 645.233 Tm [(If)-240(the)-240(matrices)-240(ar)19(e)-240(complex,)-243(then)-240(the)-240(usual)-240(c)1(onvention)-240(applies,)-243(i.e.)-311(the)-240(conjugate)]TJ 1.007 0 0 1 99.895 633.278 Tm [(transpose)-249(of)]TJ/F78 9.9626 Tf 1 0 0 1 156.904 633.278 Tm [(x)]TJ/F84 9.9626 Tf 1.007 0 0 1 164.604 633.278 Tm [(is)-249(used.)-309(If)]TJ/F78 9.9626 Tf 1 0 0 1 210.627 633.278 Tm [(x)]TJ/F84 9.9626 Tf 1.007 0 0 1 218.327 633.278 Tm [(and)]TJ/F78 9.9626 Tf 1 0 0 1 237.93 633.278 Tm [(y)]TJ/F84 9.9626 Tf 1.007 0 0 1 245.531 633.278 Tm [(ar)18(e)-249(of)-248(rank)-249(one,)-249(then)]TJ/F78 9.9626 Tf 1 0 0 1 339.231 633.278 Tm [(r)-17(e)-25(s)]TJ/F84 9.9626 Tf 1.007 0 0 1 353.894 633.278 Tm [(is)-249(a)-248(scalar)73(,)-248(else)-249(it)-249(is)-248(a)]TJ 1 0 0 1 99.895 621.323 Tm [(rank)-250(one)-250(array)111(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ +/F145 9.9626 Tf 20.175 -11.955 Td [(call)]TJ 0 g 0 G - [-525(psb_Tspmat_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 9.414 -10.959 Td [(class)]TJ + [-525(psb_gedots\050res,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(\050psb_T_base_sparse_mat\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-1050(::)]TJ + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(a)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -9.414 -10.959 Td [(end)-525(type)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ 0 g 0 G - [-1050(psb_Tspmat_type)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G 0 g 0 G -/F54 9.9626 Tf 4.295 -41.429 Td [(Listing)-250(2:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(sparse)-250(matrix.)]TJ -16.699 -32.661 Td [(The)-190(actual)-190(data)-190(is)-190(contained)-190(in)-190(the)-190(polymorphic)-190(component)]TJ/F59 9.9626 Tf 259.484 0 Td [(a%a)]TJ/F54 9.9626 Tf 17.584 0 Td [(of)-190(type)]TJ/F59 9.9626 Tf 32.089 0 Td [(psb)]TJ -ET -q -1 0 0 1 425.371 603.645 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 428.509 603.446 Td [(T)]TJ ET q -1 0 0 1 434.367 603.645 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 179.582 595.704 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F59 9.9626 Tf 437.505 603.446 Td [(base)]TJ +/F78 9.9626 Tf 185.585 587.136 Td [(r)-17(e)-25(s)]TJ/F84 9.9626 Tf 12.169 0 Td [(,)]TJ/F78 9.9626 Tf 5.275 0 Td [(x)]TJ/F84 9.9626 Tf 5.206 0 Td [(,)]TJ/F78 9.9626 Tf 5.106 0 Td [(y)]TJ/F75 9.9626 Tf 93.134 0 Td [(Subroutine)]TJ ET q -1 0 0 1 459.054 603.645 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 179.582 583.351 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F59 9.9626 Tf 462.193 603.446 Td [(sparse)]TJ +/F84 9.9626 Tf 185.56 574.783 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 494.202 603.645 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 322.794 574.982 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 497.341 603.446 Td [(mat)]TJ/F54 9.9626 Tf 15.691 0 Td [(;)]TJ -413.137 -11.955 Td [(its)-306(speci)1(\002c)-306(layout)-305(can)-306(be)-305(chosen)-306(dynamically)-305(among)-306(the)-305(pr)18(ede\002ned)-306(types,)-319(or)]TJ 0 -11.955 Td [(an)-305(entir)18(ely)-305(new)-305(storage)-305(layout)-305(can)-305(be)-305(implemented)-304(and)-305(passed)-305(to)-305(the)-305(library)]TJ 0 -11.955 Td [(at)-231(r)8(untime)-231(via)-231(the)]TJ/F59 9.9626 Tf 80.145 0 Td [(psb_spasb)]TJ/F54 9.9626 Tf 49.377 0 Td [(r)18(outine.)-304(The)-231(following)-231(very)-231(common)-231(formats)-231(ar)18(e)]TJ -129.522 -11.955 Td [(pr)18(ecompiled)-250(in)-250(PSBLAS)-250(and)-250(thus)-250(ar)18(e)-250(always)-250(available:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.889 Td [(psb)]TJ +/F84 9.9626 Tf 325.783 574.783 Td [(gedots)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 117.091 535.936 cm +1 0 0 1 322.794 563.027 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 535.737 Td [(T)]TJ +/F84 9.9626 Tf 325.783 562.828 Td [(gedots)]TJ -140.223 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 127.322 535.936 cm +1 0 0 1 322.794 551.072 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 130.311 535.737 Td [(coo)]TJ +/F84 9.9626 Tf 325.783 550.872 Td [(gedots)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 146.411 535.936 cm +1 0 0 1 322.794 539.116 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 149.399 535.737 Td [(sparse)]TJ +/F84 9.9626 Tf 325.783 538.917 Td [(gedots)]TJ ET q -1 0 0 1 178.769 535.936 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 179.582 535.131 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q +0 g 0 G BT -/F51 9.9626 Tf 181.758 535.737 Td [(mat)]TJ +/F84 9.9626 Tf 229.958 506.753 Td [(T)92(able)-250(3:)-310(Data)-250(types)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +/F75 9.9626 Tf -130.063 -32.002 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 22.137 0 Td [(Coor)18(dinate)-250(storage;)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -104 -19.907 Td [(psb)]TJ +/F75 9.9626 Tf -29.828 -19.22 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.22 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.614 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.98 0 0 1 124.802 388.49 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-247(an)-248(object)-247(of)-247(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 369.545 388.49 Tm [(psb)]TJ ET q -1 0 0 1 117.091 516.03 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 385.864 388.689 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 120.08 515.83 Td [(T)]TJ +/F145 9.9626 Tf 389.002 388.49 Td [(T)]TJ ET q -1 0 0 1 127.322 516.03 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 394.86 388.689 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 130.311 515.83 Td [(csr)]TJ +/F145 9.9626 Tf 397.998 388.49 Td [(vect)]TJ ET q -1 0 0 1 143.631 516.03 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 419.547 388.689 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 146.62 515.83 Td [(sparse)]TJ +/F145 9.9626 Tf 422.685 388.49 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 124.802 376.535 Tm [(containing)-270(numbers)-269(of)-270(type)-270(speci\002ed)-270(in)-269(T)90(able)]TJ +0 0 1 rg 0 0 1 RG + [-270(3)]TJ +0 g 0 G + [(.)-378(The)-270(rank)-269(of)]TJ/F78 9.9626 Tf 1 0 0 1 399.71 376.535 Tm [(x)]TJ/F84 9.9626 Tf 1.02 0 0 1 407.657 376.535 Tm [(must)-270(be)]TJ 1 0 0 1 124.802 364.58 Tm [(the)-250(same)-250(of)]TJ/F78 9.9626 Tf 52.946 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -82.959 -19.221 Td [(y)]TJ +0 g 0 G +/F84 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.445 0 Td [(y)]TJ/F84 9.9626 Tf 5.105 0 Td [(.)]TJ -166.164 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.98 0 0 1 124.802 297.539 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-247(an)-248(object)-247(of)-247(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 369.545 297.539 Tm [(psb)]TJ ET q -1 0 0 1 175.989 516.03 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 385.864 297.738 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 178.978 515.83 Td [(mat)]TJ -0 g 0 G -/F54 9.9626 Tf 22.137 0 Td [(Compr)18(essed)-250(storage)-250(by)-250(r)18(ows;)]TJ -0 g 0 G -/F51 9.9626 Tf -101.22 -19.906 Td [(psb)]TJ +/F145 9.9626 Tf 389.002 297.539 Td [(T)]TJ ET q -1 0 0 1 117.091 496.123 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 394.86 297.738 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 120.08 495.924 Td [(T)]TJ +/F145 9.9626 Tf 397.998 297.539 Td [(vect)]TJ ET q -1 0 0 1 127.322 496.123 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 419.547 297.738 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 130.311 495.924 Td [(csc)]TJ +/F145 9.9626 Tf 422.685 297.539 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 124.802 285.583 Tm [(containing)-246(numbers)-247(of)-246(type)-247(speci\002ed)-246(in)-247(T)94(able)]TJ +0 0 1 rg 0 0 1 RG + [-246(3)]TJ +0 g 0 G + [(.)-314(The)-246(rank)-247(of)]TJ/F78 9.9626 Tf 1 0 0 1 385.833 285.583 Tm [(y)]TJ/F84 9.9626 Tf 0.98 0 0 1 393.346 285.583 Tm [(must)-246(be)-247(the)]TJ 1 0 0 1 124.802 273.628 Tm [(same)-250(of)]TJ/F78 9.9626 Tf 36.807 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -66.919 -19.22 Td [(desc)]TJ ET q -1 0 0 1 144.179 496.123 cm +1 0 0 1 120.408 254.607 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 147.168 495.924 Td [(sparse)]TJ +/F75 9.9626 Tf 123.397 254.408 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 176.537 496.123 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 273.363 206.786 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 179.526 495.924 Td [(mat)]TJ -0 g 0 G -/F54 9.9626 Tf 22.137 0 Td [(Compr)18(essed)-250(storage)-250(by)-250(columns;)]TJ -101.768 -19.889 Td [(The)-295(inner)-295(sparse)-294(matrix)-295(has)-295(an)-295(associated)-294(state,)-306(which)-295(can)-295(take)-294(the)-295(following)]TJ 0 -11.955 Td [(values:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.888 Td [(Build:)]TJ -0 g 0 G -/F54 9.9626 Tf 32.927 0 Td [(State)-283(enter)18(ed)-283(after)-283(the)-283(\002rst)-284(allocation,)-291(and)-283(befor)18(e)-283(the)-283(\002rst)-283(assembly;)-300(in)]TJ -8.02 -11.955 Td [(this)-250(state)-250(it)-250(is)-250(possible)-250(to)-250(add)-250(nonzer)18(o)-250(entries.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.907 Td [(Assembled:)]TJ -0 g 0 G -/F54 9.9626 Tf 58.381 0 Td [(State)-324(enter)18(ed)-325(after)-324(the)-325(assembly;)-362(computations)-324(using)-325(the)-324(sparse)]TJ -33.474 -11.955 Td [(matrix,)-250(such)-250(as)-250(matrix-vector)-250(pr)18(oducts,)-250(ar)18(e)-250(only)-250(possible)-250(in)-250(this)-250(state;)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.907 Td [(Update:)]TJ -0 g 0 G -/F54 9.9626 Tf 40.678 0 Td [(State)-219(enter)18(ed)-220(after)-219(a)-219(r)18(einitalization;)-230(this)-219(is)-219(used)-220(to)-219(handle)-219(applications)]TJ -15.771 -11.955 Td [(in)-288(which)-288(the)-288(same)-288(sparsity)-289(pattern)-288(is)-288(used)-288(multiple)-288(times)-288(with)-288(dif)18(fer)18(ent)]TJ 0 -11.955 Td [(coef)18(\002cients.)-298(In)-213(this)-214(state)-213(it)-214(is)-213(only)-214(possible)-213(to)-214(enter)-213(coef)18(\002cients)-214(for)-213(alr)18(eady)]TJ 0 -11.956 Td [(existing)-250(nonzer)18(o)-250(entries.)]TJ -24.907 -19.888 Td [(The)-293(only)-292(storage)-293(variant)-292(supporting)-293(the)-293(build)-292(state)-293(is)-292(COO;)-293(all)-293(other)-292(variants)]TJ 0 -11.955 Td [(ar)18(e)-250(obtained)-250(by)-250(conversion)-250(to/fr)18(om)-250(it.)]TJ/F51 9.9626 Tf 0 -27.132 Td [(3.2.1)-1000(Sparse)-250(Matrix)-250(Methods)]TJ 0 -18.964 Td [(3.2.2)-1000(get)]TJ +/F145 9.9626 Tf 276.501 206.587 Td [(desc)]TJ ET q -1 0 0 1 144.219 266.863 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 298.05 206.786 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 147.208 266.663 Td [(nrows)-250(\227)-250(Get)-250(number)-250(of)-250(rows)-250(in)-250(a)-250(sparse)-250(matrix)]TJ +/F145 9.9626 Tf 301.189 206.587 Td [(type)]TJ 0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F59 9.9626 Tf -47.313 -18.963 Td [(nr)-525(=)-525(a%get_nrows\050\051)]TJ +/F75 9.9626 Tf -222.215 -19.22 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.872 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + 0 -19.221 Td [(res)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.907 Td [(On)-250(Entry)]TJ +/F84 9.9626 Tf 18.262 0 Td [(is)-250(the)-250(dot)-250(pr)18(oduct)-250(of)-250(vectors)]TJ/F78 9.9626 Tf 126.329 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(and)]TJ/F78 9.9626 Tf 19.482 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ -151.968 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ 1.02 0 0 1 124.802 132.281 Tm [(Speci\002ed)-244(as:)-304(a)-244(number)-245(or)-244(a)-244(rank-one)-245(array)-244(of)-244(the)-245(data)-244(type)-244(indicated)-245(in)]TJ 1 0 0 1 124.493 120.326 Tm [(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(2)]TJ 0 g 0 G + [(.)]TJ 0 g 0 G - 0 -19.907 Td [(a)]TJ + 142.277 -29.888 Td [(35)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ +ET + +endstream +endobj +1195 0 obj +<< +/Length 581 +>> +stream 0 g 0 G - -56.339 -33.827 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -19.906 Td [(Function)-250(value)]TJ +BT +/F75 9.9626 Tf 150.705 706.129 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(r)18(ows)-250(of)-250(sparse)-250(matrix)]TJ/F59 9.9626 Tf 165.298 0 Td [(a)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - -76.431 -29.888 Td [(17)]TJ + 142.356 -567.87 Td [(36)]TJ 0 g 0 G ET endstream endobj -955 0 obj +1202 0 obj << -/Length 3860 +/Length 7933 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.2.3)-1000(get)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(4.4)-1000(psb)]TJ ET q -1 0 0 1 195.029 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 198.017 706.129 Td [(ncols)-250(\227)-250(Get)-250(number)-250(of)-250(columns)-250(in)-250(a)-250(sparse)-250(matrix)]TJ +/F75 11.9552 Tf 151.016 706.129 Td [(normi)-250(\227)-250(In\002nity-Norm)-250(of)-250(V)111(ector)]TJ/F84 9.9626 Tf -51.429 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(in\002nity-norm)-250(of)-250(a)-250(vector)]TJ/F78 9.9626 Tf 233.575 0 Td [(x)]TJ/F84 9.9626 Tf 5.206 0 Td [(.)]TJ -238.473 -11.955 Td [(If)]TJ/F78 9.9626 Tf 9.46 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(in\002nity)-250(norm)-250(as:)]TJ/F78 9.9626 Tf 117.806 -18.736 Td [(a)-25(m)-40(a)-42(x)]TJ/F190 10.3811 Tf 25.761 0 Td [(\040)]TJ/F84 9.9626 Tf 13.273 0 Td [(max)]TJ/F78 7.5716 Tf 8.354 -7.21 Td [(i)]TJ/F190 10.3811 Tf 12.35 7.21 Td [(j)]TJ/F78 9.9626 Tf 3.297 0 Td [(x)]TJ/F78 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F84 9.9626 Tf -206.02 -23.313 Td [(else)-250(if)]TJ/F78 9.9626 Tf 28.159 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(the)-250(in\002nity-norm)-250(as:)]TJ/F78 9.9626 Tf 63.42 -18.737 Td [(a)-25(m)-40(a)-42(x)]TJ/F190 10.3811 Tf 25.761 0 Td [(\040)]TJ/F84 9.9626 Tf 13.273 0 Td [(max)]TJ/F78 7.5716 Tf 8.354 -7.21 Td [(i)]TJ/F192 10.3811 Tf 12.35 7.21 Td [(\050)]TJ/F190 10.3811 Tf 4.274 0 Td [(j)]TJ/F78 9.9626 Tf 3.029 0 Td [(r)-17(e)]TJ/F192 10.3811 Tf 8.169 0 Td [(\050)]TJ/F78 9.9626 Tf 4.443 0 Td [(x)]TJ/F78 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F192 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F190 10.3811 Tf 4.274 0 Td [(j)]TJ/F192 10.3811 Tf 5.065 0 Td [(+)]TJ/F190 10.3811 Tf 10.256 0 Td [(j)]TJ/F78 9.9626 Tf 3.059 0 Td [(i)-32(m)]TJ/F192 10.3811 Tf 11.088 0 Td [(\050)]TJ/F78 9.9626 Tf 4.443 0 Td [(x)]TJ/F78 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F192 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F190 10.3811 Tf 4.274 0 Td [(j)]TJ/F192 10.3811 Tf 3.128 0 Td [(\051)]TJ/F145 9.9626 Tf -225.617 -22.974 Td [(psb_geamax\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -47.312 -19.023 Td [(nc)-525(=)-525(a%get_ncols\050\051)]TJ + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ + [-525([,global]\051)]TJ -14.944 -11.955 Td [(psb_normi\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.049 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -20.048 Td [(a)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -56.338 -33.965 Td [(On)-250(Return)]TJ + [-525([,global]\051)]TJ 0 g 0 G 0 g 0 G - 0 -20.048 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(columns)-250(of)-250(sparse)-250(matrix)]TJ/F59 9.9626 Tf 181.158 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ/F51 9.9626 Tf -259.165 -27.315 Td [(3.2.4)-1000(get)]TJ ET q -1 0 0 1 195.029 531.915 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 128.495 566.399 cm +[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S Q BT -/F51 9.9626 Tf 198.017 531.716 Td [(nnzeros)-250(\227)-250(Get)-250(number)-250(of)-250(nonzero)-250(elements)-250(in)-250(a)-250(sparse)-250(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -47.312 -19.024 Td [(nz)-525(=)-525(a%get_nnzeros\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.048 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -20.048 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ -0 g 0 G - -56.338 -33.965 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -20.048 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(nonzer)18(o)-250(elements)-250(stor)18(ed)-250(in)-250(sparse)-250(matrix)]TJ/F59 9.9626 Tf 251.284 0 Td [(a)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ/F51 9.9626 Tf -329.292 -22.041 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.453 -20.017 Td [(1.)]TJ -0 g 0 G - [-500(The)-395(function)-395(value)-395(is)-395(speci\002c)-395(to)-395(the)-395(storage)-395(format)-395(of)-396(matri)1(x)]TJ/F59 9.9626 Tf 295.646 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(;)-468(some)]TJ -288.422 -11.956 Td [(storage)-343(formats)-342(employ)-343(padding,)-366(thus)-343(the)-342(r)18(eturned)-343(value)-343(for)-342(the)-343(same)]TJ 0 -11.955 Td [(matrix)-250(may)-250(be)-250(dif)18(fer)18(ent)-250(for)-250(dif)18(fer)18(ent)-250(storage)-250(choices.)]TJ/F51 9.9626 Tf -24.907 -27.315 Td [(3.2.5)-1000(get)]TJ +/F78 9.9626 Tf 134.746 557.832 Td [(a)-25(m)-40(a)-42(x)-7779(x)]TJ/F75 9.9626 Tf 220.765 0 Td [(Function)]TJ ET q -1 0 0 1 195.029 291.533 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 128.495 554.046 cm +[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S Q BT -/F51 9.9626 Tf 198.017 291.334 Td [(size)-398(\227)-397(Get)-398(maximum)-397(number)-398(of)-398(nonzero)-397(elements)-398(in)-398(a)-397(sparse)]TJ -17.424 -11.955 Td [(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -29.888 -19.024 Td [(maxnz)-525(=)-525(a%get_size\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.048 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -20.048 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ -0 g 0 G - -56.338 -33.965 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -20.048 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-220(maximum)-220(number)-219(of)-220(nonzer)18(o)-220(elements)-220(that)-220(can)-219(be)-220(stor)18(ed)]TJ -47.87 -11.955 Td [(in)-250(sparse)-250(matrix)]TJ/F59 9.9626 Tf 73.294 0 Td [(a)]TJ/F54 9.9626 Tf 7.721 0 Td [(using)-250(its)-250(curr)18(ent)-250(memory)-250(allocation.)]TJ -0 g 0 G - 60.952 -29.888 Td [(18)]TJ -0 g 0 G -ET - -endstream -endobj -960 0 obj -<< -/Length 4133 ->> -stream -0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 99.895 706.129 Td [(3.2.6)-1000(sizeof)-250(\227)-250(Get)-250(memory)-250(occupation)-250(in)-250(bytes)-250(of)-250(a)-250(sparse)-250(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf 0 -20.135 Td [(memory_size)-525(=)-525(a%sizeof\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -23.732 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.343 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.343 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ -0 g 0 G - -56.339 -35.687 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -22.343 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(memory)-250(occupation)-250(in)-250(bytes.)]TJ/F51 9.9626 Tf -72.777 -30.58 Td [(3.2.7)-1000(get)]TJ +/F84 9.9626 Tf 134.472 545.478 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 144.219 517.21 cm +1 0 0 1 371.829 545.677 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 147.208 517.011 Td [(fmt)-250(\227)-250(Short)-250(description)-250(of)-250(the)-250(dynamic)-250(type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -47.313 -20.135 Td [(write)]TJ -0 g 0 G - [(\050)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(*)]TJ -0 g 0 G - [(,)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(*)]TJ -0 g 0 G - [(\051)-525(a%get_fmt\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -24.336 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.343 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.343 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ -0 g 0 G - -56.339 -35.686 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -22.343 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(A)-244(short)-245(string)-244(describing)-245(the)-244(dynamic)-245(type)-244(of)-245(the)-244(matrix.)-308(Pr)18(e-)]TJ -47.87 -11.955 Td [(de\002ned)-250(values)-250(include)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 102.415 0 Td [(NULL)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(,)]TJ/F59 9.9626 Tf 4.981 0 Td [(COO)]TJ/F54 9.9626 Tf 15.691 0 Td [(,)]TJ/F59 9.9626 Tf 4.982 0 Td [(CSR)]TJ/F54 9.9626 Tf 18.181 0 Td [(and)]TJ/F59 9.9626 Tf 19.358 0 Td [(CSC)]TJ/F54 9.9626 Tf 15.691 0 Td [(.)]TJ/F51 9.9626 Tf -227.127 -30.581 Td [(3.2.8)-1000(is)]TJ +/F84 9.9626 Tf 374.818 545.478 Td [(geamax)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 138.122 315.533 cm +1 0 0 1 371.829 533.722 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 141.111 315.333 Td [(bld,)-250(is)]TJ +/F84 9.9626 Tf 374.818 533.523 Td [(geamax)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 169.922 315.533 cm +1 0 0 1 371.829 521.767 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 172.911 315.333 Td [(upd,)-250(is)]TJ +/F84 9.9626 Tf 374.818 521.568 Td [(geamax)]TJ -240.346 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 204.493 315.533 cm +1 0 0 1 371.829 509.812 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 207.482 315.333 Td [(asb)-250(\227)-250(Status)-250(check)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -107.587 -20.135 Td [(if)]TJ -0 g 0 G - [-525(\050a%is_bld\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(if)]TJ -0 g 0 G - [-525(\050a%is_upd\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(if)]TJ -0 g 0 G - [-525(\050a%is_asb\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0 g 0 G -/F51 9.9626 Tf 0 -24.336 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.343 Td [(On)-250(Entry)]TJ -0 g 0 G +/F84 9.9626 Tf 374.818 509.612 Td [(geamax)]TJ +ET +q +1 0 0 1 128.495 505.827 cm +[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S +Q 0 g 0 G - 0 -22.343 Td [(a)]TJ +BT +/F84 9.9626 Tf 229.958 477.448 Td [(T)92(able)-250(4:)-310(Data)-250(types)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ 0 g 0 G - -56.339 -35.686 Td [(On)-250(Return)]TJ 0 g 0 G +/F75 9.9626 Tf -130.063 -30.014 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -22.343 Td [(Function)-250(value)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(A)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG -/F59 9.9626 Tf 9.966 0 Td [(logical)]TJ +/F75 9.9626 Tf -29.828 -18.652 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 38.827 0 Td [(value)-222(indicating)-223(whether)-222(the)-222(matrix)-223(is)-222(in)-222(the)-223(Build,)]TJ -96.663 -11.955 Td [(Update)-250(or)-250(Assembled)-250(state,)-250(r)18(espectively)111(.)]TJ 0 g 0 G - 141.968 -29.888 Td [(19)]TJ + 0 -18.653 Td [(x)]TJ 0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.614 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.98 0 0 1 124.802 362.308 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-247(an)-248(object)-247(of)-247(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 369.545 362.308 Tm [(psb)]TJ ET - -endstream -endobj -965 0 obj -<< -/Length 5143 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 385.864 362.508 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.2.9)-1000(is)]TJ +/F145 9.9626 Tf 389.002 362.308 Td [(T)]TJ ET q -1 0 0 1 188.931 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 394.86 362.508 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 191.92 706.129 Td [(lower)55(,)-250(is)]TJ +/F145 9.9626 Tf 397.998 362.308 Td [(vect)]TJ ET q -1 0 0 1 230.704 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 419.547 362.508 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 233.693 706.129 Td [(upper)55(,)-250(is)]TJ +/F145 9.9626 Tf 422.685 362.308 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(4)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -18.652 Td [(desc)]TJ ET q -1 0 0 1 273.583 706.328 cm +1 0 0 1 120.408 331.9 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 276.572 706.129 Td [(triangle,)-250(is)]TJ +/F75 9.9626 Tf 123.397 331.701 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 325.309 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 273.363 284.079 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 328.298 706.129 Td [(unit)-250(\227)-250(Format)-250(check)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -177.593 -19.573 Td [(if)]TJ -0 g 0 G - [-525(\050a%is_triangle\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(if)]TJ -0 g 0 G - [-525(\050a%is_upper\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ +/F145 9.9626 Tf 276.501 283.88 Td [(desc)]TJ +ET +q +1 0 0 1 298.05 284.079 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 301.189 283.88 Td [(type)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(if)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [-525(\050a%is_lower\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ +/F75 9.9626 Tf -222.215 -18.653 Td [(global)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.956 Td [(if)]TJ +/F84 9.9626 Tf 0.994 0 0 1 133.659 265.227 Tm [(Speci\002es)-250(whether)-249(the)-250(computation)-250(should)-250(include)-249(the)-250(global)-250(r)18(eduction)]TJ 1 0 0 1 124.802 253.272 Tm [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F145 9.9626 Tf 165.318 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-525(\050a%is_unit\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ + [(.true.)]TJ 0 g 0 G +/F75 9.9626 Tf -190.225 -30.607 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.86 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + 0 -18.653 Td [(Function)-250(value)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -21.183 Td [(On)-250(Entry)]TJ +/F84 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(in\002nity)-250(norm)-250(of)-250(vector)]TJ/F78 9.9626 Tf 128.561 0 Td [(x)]TJ/F84 9.9626 Tf 5.206 0 Td [(.)]TJ 0.98 0 0 1 124.802 144.236 Tm [(Scope:)]TJ/F75 9.9626 Tf 0.98 0 0 1 155.612 144.236 Tm [(global)]TJ/F84 9.9626 Tf 0.98 0 0 1 186.2 144.236 Tm [(unless)-244(the)-244(optional)-244(variab)1(le)]TJ/F145 9.9626 Tf 1 0 0 1 307.484 144.236 Tm [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G + [(.false.)]TJ/F84 9.9626 Tf 0.98 0 0 1 383.09 144.236 Tm [(has)-244(been)-244(spec-)]TJ 1 0 0 1 124.802 132.281 Tm [(i\002ed)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G - 0 -21.183 Td [(a)]TJ + 141.968 -29.888 Td [(37)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ +ET + +endstream +endobj +1208 0 obj +<< +/Length 3132 +>> +stream 0 g 0 G - -56.338 -34.816 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -21.183 Td [(Function)-250(value)]TJ +BT +/F75 9.9626 Tf 150.705 706.129 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(A)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG -/F59 9.9626 Tf 10.803 0 Td [(logical)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 39.665 0 Td [(value)-306(indicating)-307(whether)-306(the)-307(matrix)-306(is)-306(triangular;)]TJ -98.338 -11.955 Td [(if)]TJ/F59 9.9626 Tf 8.595 0 Td [(is_triangle\050\051)]TJ/F54 9.9626 Tf 70.373 0 Td [(r)18(eturns)]TJ/F59 9.9626 Tf 34.119 0 Td [(.true.)]TJ/F54 9.9626 Tf 33.761 0 Td [(check)-239(also)-238(if)-239(it)-239(is)-239(lower)74(,)-241(upper)-238(and)-239(with)]TJ -146.848 -11.955 Td [(a)-250(unit)-250(\050i.e.)-310(assumed\051)-250(diagonal.)]TJ/F51 9.9626 Tf -24.907 -28.929 Td [(3.2.10)-1000(cscnv)-250(\227)-250(Convert)-250(to)-250(a)-250(dif)18(ferent)-250(storage)-250(format)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 0 -19.573 Td [(call)]TJ +/F84 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G - [-1050(a%cscnv\050b,info)-525([,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(type)]TJ + 0.98 0 0 1 175.303 616.465 Tm [(The)-201(computation)-200(of)-201(a)-200(global)-201(r)18(esult)-200(r)18(equir)19(es)-201(a)-201(global)-200(communication,)-212(which)]TJ 1.02 0 0 1 175.611 604.51 Tm [(entails)-265(a)-265(signi\002cant)-264(over)17(head.)-363(It)-265(may)-265(be)-264(necessary)-265(and/or)-265(advisable)-265(to)]TJ 1.02 0 0 1 175.611 592.555 Tm [(compute)-287(multiple)-287(norms)-287(at)-287(the)-287(same)-287(time;)-308(in)-287(thi)1(s)-287(case,)-298(it)-287(is)-287(possible)-287(to)]TJ 1 0 0 1 175.611 580.6 Tm [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(,)-525(mold,)-525(dupl]\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ +/F145 9.9626 Tf 52.304 -19.926 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G - [-1050(a%cscnv\050info)-525([,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(type)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(,)-525(mold,)-525(dupl]\051)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.861 Td [(T)90(ype:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [-525(psb_geamax\050x1,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -21.183 Td [(On)-250(Entry)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + 0 -11.955 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(2)]TJ 0 g 0 G - 0 -21.183 Td [(a)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.077 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -33.138 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 24.906 0 Td [(a)-250(string)-250(r)18(equesting)-250(a)-250(new)-250(format.)]TJ 0.001 -11.955 Td [(T)90(ype:)-310(optional.)]TJ + [-525(psb_geamax\050x2,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.183 Td [(mold)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(a)-236(variable)-236(of)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 56.403 0 Td [(class)]TJ + 0 -11.955 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(3)]TJ 0 g 0 G - [(\050psb_T_base_sparse_mat\051)]TJ/F54 9.9626 Tf 148.803 0 Td [(r)18(equesting)-236(a)-236(new)-237(format)1(.)]TJ -209.081 -11.955 Td [(T)90(ype:)-310(optional.)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -21.182 Td [(dupl)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F54 9.9626 Tf 26.56 0 Td [(an)-359(integer)-358(value)-359(speci\002ng)-358(how)-359(to)-359(handle)-358(duplicates)-359(\050see)-359(Named)-358(Con-)]TJ -1.653 -11.956 Td [(stants)-250(below\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -22.861 Td [(On)-250(Return)]TJ + [-525(psb_geamax\050x3,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -21.182 Td [(b,a)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 18.54 0 Td [(A)-250(copy)-250(of)]TJ/F59 9.9626 Tf 45.37 0 Td [(a)]TJ/F54 9.9626 Tf 7.721 0 Td [(with)-250(a)-250(new)-250(storage)-250(format.)]TJ -46.724 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.456 0 Td [(.)]TJ + [-525(psb_amx\050ctxt,vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G -/F51 9.9626 Tf -184.985 -21.183 Td [(info)]TJ + [(:)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(3)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ -23.801 -23.175 Td [(The)]TJ/F59 9.9626 Tf 19.584 0 Td [(mold)]TJ/F54 9.9626 Tf 23.827 0 Td [(ar)18(guments)-292(may)-291(be)-292(employed)-292(to)-292(interface)-291(with)-292(special)-292(devices,)-302(such)]TJ -43.411 -11.955 Td [(as)-250(GPUs)-250(and)-250(other)-250(accelerators.)]TJ + [(\051\051)]TJ/F84 9.9626 Tf 1.007 0 0 1 175.611 504.884 Tm [(In)-248(this)-248(way)-248(the)-248(global)-248(communication,)-248(which)-248(for)-248(small)-248(sizes)-248(is)-248(a)-248(latency-)]TJ 1 0 0 1 175.611 492.928 Tm [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ 0 g 0 G - 166.874 -29.888 Td [(20)]TJ + 141.968 -402.49 Td [(38)]TJ 0 g 0 G ET endstream endobj -969 0 obj +1216 0 obj << -/Length 4477 +/Length 6488 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(3.2.11)-1000(csclip)-250(\227)-250(Reduce)-250(to)-250(a)-250(submatrix)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 20.922 -20.279 Td [(call)]TJ -0 g 0 G - [-525(a%csclip\050b,info[,&)]TJ 15.691 -11.955 Td [(&)-525(imin,imax,jmin,jmax,rscale,cscale]\051)]TJ/F54 9.9626 Tf -21.669 -24.631 Td [(Returns)-190(the)-190(submatrix)]TJ/F59 9.9626 Tf 98.878 0 Td [(A\050imin:imax,jmin:jmax\051)]TJ/F54 9.9626 Tf 115.068 0 Td [(,)-202(optionally)-190(r)18(escaling)-190(r)18(ow/-)]TJ -228.89 -11.955 Td [(col)-250(indices)-250(to)-250(the)-250(range)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG -/F59 9.9626 Tf 103.85 0 Td [(1)]TJ -0 g 0 G - [(:imax)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ -0 g 0 G - [(imin)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ -0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ -0 g 0 G - [(,)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(4.5)-1000(psb)]TJ +ET +q +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 151.016 706.129 Td [(geamaxs)-250(\227)-250(Generalized)-250(In\002nity)-250(Norm)]TJ/F84 9.9626 Tf 1.003 0 0 1 99.587 687.165 Tm [(This)-250(subr)18(outine)-250(computes)-250(a)-251(seri)1(es)-251(of)-250(in\002nity)-250(norms)-250(on)-250(the)-250(columns)-250(of)-251(a)-250(dense)]TJ 1 0 0 1 99.895 675.21 Tm [(matrix)]TJ/F78 9.9626 Tf 31.786 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(:)]TJ/F78 9.9626 Tf 88.54 -11.955 Td [(r)-17(e)-25(s)]TJ/F192 10.3811 Tf 12.293 0 Td [(\050)]TJ/F78 9.9626 Tf 4.205 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F190 10.3811 Tf 7.041 0 Td [(\040)]TJ/F84 9.9626 Tf 13.273 0 Td [(max)]TJ/F78 7.5716 Tf 7.76 -7.336 Td [(k)]TJ/F190 10.3811 Tf 12.944 7.336 Td [(j)]TJ/F78 9.9626 Tf 3.298 0 Td [(x)]TJ/F192 10.3811 Tf 5.329 0 Td [(\050)]TJ/F78 9.9626 Tf 4.274 0 Td [(k)]TJ/F84 9.9626 Tf 4.598 0 Td [(,)]TJ/F78 9.9626 Tf 4.206 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F190 10.3811 Tf 4.274 0 Td [(j)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(:jmax)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -195.027 -22.296 Td [(call)]TJ 0 g 0 G - [(jmin)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ + [-525(psb_geamaxs\050res,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 141.219 0 Td [(.)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -245.069 -21.961 Td [(T)90(ype:)]TJ + [-525(info\051)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.638 Td [(On)-250(Entry)]TJ 0 g 0 G +ET +q +1 0 0 1 126.383 626.591 cm +[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +Q +BT +/F78 9.9626 Tf 132.385 618.023 Td [(r)-17(e)-25(s)-8868(x)]TJ/F75 9.9626 Tf 221.014 0 Td [(Subroutine)]TJ +ET +q +1 0 0 1 126.383 614.237 cm +[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +Q +BT +/F84 9.9626 Tf 132.36 605.669 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 369.717 605.868 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 372.706 605.669 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 369.717 593.913 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 372.706 593.714 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 369.717 581.958 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 372.706 581.759 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 369.717 570.003 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 372.706 569.804 Td [(geamaxs)]TJ +ET +q +1 0 0 1 126.383 566.018 cm +[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +Q 0 g 0 G - 0 -22.639 Td [(a)]TJ +BT +/F84 9.9626 Tf 229.958 537.639 Td [(T)92(able)-250(5:)-310(Data)-250(types)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -34.594 Td [(imin,imax,jmin,jmax)]TJ 0 g 0 G -/F54 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -74.978 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +/F75 9.9626 Tf -130.063 -34.468 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.638 Td [(rscale,cscale)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 60.025 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ +/F75 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -60.025 -24.632 Td [(On)-250(Return)]TJ 0 g 0 G + 0 -19.926 Td [(x)]TJ 0 g 0 G - 0 -22.639 Td [(b)]TJ +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.614 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.98 0 0 1 124.802 415.5 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-247(an)-248(object)-247(of)-247(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 369.545 415.5 Tm [(psb)]TJ +ET +q +1 0 0 1 385.864 415.699 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 389.002 415.5 Td [(T)]TJ +ET +q +1 0 0 1 394.86 415.699 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 397.998 415.5 Td [(vect)]TJ +ET +q +1 0 0 1 419.547 415.699 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 422.685 415.5 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(A)-250(copy)-250(of)-250(a)-250(submatrix)-250(of)]TJ/F59 9.9626 Tf 111.321 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ -102.713 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ +/F84 9.9626 Tf -297.883 -11.956 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(5)]TJ 0 g 0 G -/F51 9.9626 Tf -184.985 -22.639 Td [(info)]TJ + [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.801 -31 Td [(3.2.12)-1000(clean)]TJ +/F75 9.9626 Tf -24.907 -19.925 Td [(desc)]TJ ET q -1 0 0 1 159.153 364.307 cm +1 0 0 1 120.408 383.818 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 162.142 364.108 Td [(zeros)-250(\227)-250(Eliminate)-250(zero)-250(coef)18(\002cients)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -62.247 -20.278 Td [(call)]TJ -0 g 0 G - [-525(a%clean_zeros\050info\051)]TJ/F54 9.9626 Tf 14.944 -12.634 Td [(Eliminates)-214(zer)18(o)-214(coef)18(\002cients)-214(in)-214(the)-214(input)-214(matrix.)-298(Note)-214(that)-214(depending)-214(on)-214(the)]TJ -14.944 -11.955 Td [(internal)-246(storage)-245(format,)-247(ther)18(e)-245(may)-246(still)-245(be)-246(some)-245(amount)-246(of)-246(ze)1(r)18(o)-246(padding)-246(in)-245(the)]TJ 0 -11.955 Td [(output.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -24.632 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F75 9.9626 Tf 123.397 383.619 Td [(a)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.638 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.639 Td [(a)]TJ +/F84 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 273.363 335.998 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 276.501 335.798 Td [(desc)]TJ +ET +q +1 0 0 1 298.05 335.998 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 301.189 335.798 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -35.908 Td [(On)-250(Return)]TJ +/F75 9.9626 Tf -222.215 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -22.638 Td [(a)]TJ + 0 -19.925 Td [(res)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(matrix)]TJ/F59 9.9626 Tf 50.659 0 Td [(a)]TJ/F54 9.9626 Tf 7.721 0 Td [(without)-250(zer)18(o)-250(coef)18(\002cients.)]TJ -43.436 -11.956 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ +/F84 9.9626 Tf 18.262 0 Td [(is)-250(the)-250(in\002nity)-250(norm)-250(of)-250(the)-250(columns)-250(of)]TJ/F78 9.9626 Tf 166.259 0 Td [(x)]TJ/F84 9.9626 Tf 5.206 0 Td [(.)]TJ -164.82 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ 0.981 0 0 1 124.802 260.082 Tm [(Speci\002ed)-255(as:)-316(a)-255(number)-254(or)-255(a)-255(rank-one)-255(array)-255(of)-255(long)-254(pr)18(ecision)-255(r)18(eal)-254(numbers.)]TJ 0 g 0 G -/F51 9.9626 Tf -184.985 -22.638 Td [(info)]TJ +/F75 9.9626 Tf 1 0 0 1 99.895 240.157 Tm [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 143.074 -29.888 Td [(21)]TJ + 142.357 -101.898 Td [(39)]TJ 0 g 0 G ET endstream endobj -973 0 obj +1224 0 obj << -/Length 4488 +/Length 7573 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.2.13)-1000(get)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(4.6)-1000(psb)]TJ ET q -1 0 0 1 200.01 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 202.999 706.129 Td [(diag)-250(\227)-250(Get)-250(main)-250(diagonal)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -52.294 -19.329 Td [(call)]TJ +/F75 11.9552 Tf 201.825 706.129 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(V)111(ector)]TJ/F84 9.9626 Tf -51.429 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(vector)]TJ/F78 9.9626 Tf 206.349 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -211.245 -11.955 Td [(If)]TJ/F78 9.9626 Tf 9.459 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F78 9.9626 Tf 125.989 -21.269 Td [(a)-25(s)-25(u)-25(m)]TJ/F190 10.3811 Tf 25.353 0 Td [(\040)-291(k)]TJ/F78 9.9626 Tf 19.006 0 Td [(x)]TJ/F78 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.876 1.96 Td [(k)]TJ/F84 9.9626 Tf -195.526 -21.269 Td [(else)-250(if)]TJ/F78 9.9626 Tf 28.159 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F78 9.9626 Tf 71.974 -21.269 Td [(a)-25(s)-25(u)-25(m)]TJ/F190 10.3811 Tf 25.353 0 Td [(\040)-291(k)]TJ/F78 9.9626 Tf 18.737 0 Td [(r)-17(e)]TJ/F192 10.3811 Tf 8.169 0 Td [(\050)]TJ/F78 9.9626 Tf 4.443 0 Td [(x)]TJ/F192 10.3811 Tf 5.33 0 Td [(\051)]TJ/F190 10.3811 Tf 4.274 0 Td [(k)]TJ/F84 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F192 10.3811 Tf 6.346 1.858 Td [(+)]TJ/F190 10.3811 Tf 10.256 0 Td [(k)]TJ/F78 9.9626 Tf 5.369 0 Td [(i)-32(m)]TJ/F192 10.3811 Tf 11.089 0 Td [(\050)]TJ/F78 9.9626 Tf 4.443 0 Td [(x)]TJ/F192 10.3811 Tf 5.33 0 Td [(\051)]TJ/F190 10.3811 Tf 4.274 0 Td [(k)]TJ/F84 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F145 9.9626 Tf -216.928 -19.411 Td [(psb_geasum\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(a%get_diag\050d,info\051)]TJ/F54 9.9626 Tf 14.944 -12.144 Td [(Returns)-250(a)-250(copy)-250(of)-250(the)-250(main)-250(diagonal.)]TJ + [-525([,global]\051)-190(psb_norm1\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([,global]\051)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +ET +q +1 0 0 1 179.249 576.025 cm +[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +Q +BT +/F78 9.9626 Tf 185.501 567.457 Td [(a)-25(s)-25(u)-25(m)-7810(x)]TJ/F75 9.9626 Tf 220.765 0 Td [(Function)]TJ +ET +q +1 0 0 1 179.249 563.671 cm +[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +Q +BT +/F84 9.9626 Tf 185.227 555.103 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 422.584 555.303 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 425.573 555.103 Td [(geasum)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 422.584 543.347 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 425.573 543.148 Td [(geasum)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 422.584 531.392 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 425.573 531.193 Td [(geasum)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 422.584 519.437 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 425.573 519.238 Td [(geasum)]TJ +ET +q +1 0 0 1 179.249 515.452 cm +[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +Q 0 g 0 G -/F51 9.9626 Tf -14.944 -20.49 Td [(T)90(ype:)]TJ +BT +/F84 9.9626 Tf 280.768 487.074 Td [(T)92(able)-250(6:)-310(Data)-250(types)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.679 Td [(On)-250(Entry)]TJ 0 g 0 G +/F75 9.9626 Tf -130.063 -33.561 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -20.679 Td [(a)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.077 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F75 9.9626 Tf -29.828 -19.665 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -34.627 Td [(On)-250(Return)]TJ 0 g 0 G + 0 -19.666 Td [(x)]TJ 0 g 0 G - 0 -20.679 Td [(d)]TJ +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.614 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.98 0 0 1 175.611 366.361 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-248(an)-247(object)-247(of)-247(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 420.354 366.361 Tm [(psb)]TJ +ET +q +1 0 0 1 436.673 366.56 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 439.811 366.361 Td [(T)]TJ +ET +q +1 0 0 1 445.669 366.56 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 448.807 366.361 Td [(vect)]TJ +ET +q +1 0 0 1 470.356 366.56 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 473.495 366.361 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(A)-250(copy)-250(of)-250(the)-250(main)-250(diagonal.)]TJ 13.839 -11.955 Td [(A)-250(one-dimensional)-250(array)-250(of)-250(the)-250(appr)18(opriate)-250(type.)]TJ +/F84 9.9626 Tf -297.884 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(6)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.679 Td [(info)]TJ + [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.8 -28.213 Td [(3.2.14)-1000(clip)]TJ +/F75 9.9626 Tf -24.906 -19.666 Td [(desc)]TJ ET q -1 0 0 1 203.317 472.944 cm +1 0 0 1 171.218 334.939 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 206.306 472.745 Td [(diag)-250(\227)-250(Cut)-250(out)-250(main)-250(diagonal)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -55.601 -19.329 Td [(call)]TJ -0 g 0 G - [-525(a%clip_diag\050b,info\051)]TJ/F54 9.9626 Tf 14.944 -12.144 Td [(Returns)-250(a)-250(copy)-250(of)]TJ/F59 9.9626 Tf 79.73 0 Td [(a)]TJ/F54 9.9626 Tf 7.721 0 Td [(without)-250(the)-250(main)-250(diagonal.)]TJ -0 g 0 G -/F51 9.9626 Tf -102.395 -20.49 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.679 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -20.679 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.077 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F75 9.9626 Tf 174.207 334.74 Td [(a)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -34.627 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -20.679 Td [(b)]TJ -0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(A)-250(copy)-250(of)]TJ/F59 9.9626 Tf 45.37 0 Td [(a)]TJ/F54 9.9626 Tf 7.721 0 Td [(without)-250(the)-250(main)-250(diagonal.)]TJ -39.252 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -184.984 -20.679 Td [(info)]TJ +/F84 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 324.173 287.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 327.311 286.919 Td [(desc)]TJ +ET +q +1 0 0 1 348.86 287.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 351.998 286.919 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.8 -28.213 Td [(3.2.15)-1000(tril)-250(\227)-250(Return)-250(the)-250(lower)-250(triangle)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 20.921 -19.329 Td [(call)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [-525(a%tril\050l,info[,&)]TJ 15.691 -11.955 Td [(&)-525(diag,imin,imax,jmin,jmax,rscale,cscale,u]\051)]TJ/F54 9.9626 Tf -21.668 -22.671 Td [(Returns)-309(the)-308(lower)-309(triangular)-308(part)-309(of)-309(submatrix)]TJ/F59 9.9626 Tf 211.209 0 Td [(A\050imin:imax,jmin:jmax\051)]TJ/F54 9.9626 Tf 115.067 0 Td [(,)]TJ -341.22 -11.956 Td [(optionally)-190(r)18(escaling)-190(r)18(ow/col)-190(indices)-190(to)-190(the)-190(range)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG -/F59 9.9626 Tf 212.61 0 Td [(1)]TJ +/F75 9.9626 Tf -222.214 -19.665 Td [(global)]TJ 0 g 0 G - [(:imax)]TJ +/F84 9.9626 Tf 0.994 0 0 1 184.468 267.254 Tm [(Speci\002es)-250(whether)-250(t)1(he)-250(computation)-250(should)-250(include)-250(the)-249(global)-250(r)18(eduction)]TJ 1 0 0 1 175.611 255.299 Tm [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F145 9.9626 Tf 165.319 0 Td [(global)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ + [(=)]TJ 0 g 0 G - [(imin)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ + [(.true.)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +/F75 9.9626 Tf -190.225 -31.621 Td [(On)-250(Return)]TJ 0 g 0 G - [(,)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ 0 g 0 G - [(:jmax)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ + 0 -19.666 Td [(Function)-250(value)]TJ 0 g 0 G - [(jmin)]TJ +/F84 9.9626 Tf 72.776 0 Td [(is)-250(the)-250(1-norm)-250(of)-250(vector)]TJ/F78 9.9626 Tf 102.161 0 Td [(x)]TJ/F84 9.9626 Tf 5.206 0 Td [(.)]TJ 0.98 0 0 1 175.611 144.236 Tm [(Scope:)]TJ/F75 9.9626 Tf 0.98 0 0 1 206.422 144.236 Tm [(global)]TJ/F84 9.9626 Tf 0.98 0 0 1 237.009 144.236 Tm [(unless)-244(the)-244(optional)-244(variable)]TJ/F145 9.9626 Tf 1 0 0 1 358.293 144.236 Tm [(global)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ -0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ -0 g 0 G -/F54 9.9626 Tf -212.61 -11.955 Td [(and)-250(r)18(eturing)-250(the)-250(complementary)-250(upper)-250(triangle.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.49 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.679 Td [(On)-250(Entry)]TJ + [(=)]TJ 0 g 0 G + [(.false.)]TJ/F84 9.9626 Tf 0.98 0 0 1 433.899 144.236 Tm [(has)-244(been)-244(spec-)]TJ 1 0 0 1 175.611 132.281 Tm [(i\002ed)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G -/F54 9.9626 Tf 166.874 -29.888 Td [(22)]TJ + 141.968 -29.888 Td [(40)]TJ 0 g 0 G ET endstream endobj -977 0 obj +1229 0 obj << -/Length 6185 +/Length 3138 >> stream 0 g 0 G 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F75 9.9626 Tf 99.895 706.129 Td [(info)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -30.706 Td [(diag)]TJ -0 g 0 G -/F54 9.9626 Tf 24.907 0 Td [(Include)-300(diagonals)-301(up)-300(to)-301(this)-300(one;)]TJ/F59 9.9626 Tf 149.76 0 Td [(diag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -21.918 Td [(Notes)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +/F84 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 34.376 0 Td [(means)-300(the)-301(\002rst)-300(super)18(diagonal,)]TJ/F59 9.9626 Tf -184.136 -11.955 Td [(diag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=-)]TJ + 0.98 0 0 1 124.493 616.465 Tm [(The)-201(computation)-200(of)-201(a)-200(global)-201(r)18(esult)-200(r)18(equir)18(es)-200(a)-201(global)-200(communication,)-212(which)]TJ 1.02 0 0 1 124.802 604.51 Tm [(entails)-265(a)-265(signi\002cant)-264(over)17(head.)-363(It)-265(may)-264(be)-265(necessary)-265(and/or)-265(advisable)-265(to)]TJ 1.02 0 0 1 124.802 592.555 Tm [(compute)-287(multiple)-287(norms)-287(at)-287(the)-287(same)-287(time;)-308(in)-286(this)-287(case,)-298(it)-287(is)-287(possible)-287(to)]TJ 1 0 0 1 124.802 580.6 Tm [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +/F145 9.9626 Tf 20.921 -19.926 Td [(vres\050)]TJ 0.25 0.63 0.44 rg 0.25 0.63 0.44 RG [(1)]TJ 0 g 0 G -/F54 9.9626 Tf 39.103 0 Td [(means)-250(the)-250(\002rst)-250(subdiagonal.)-310(Default)-250(0.)]TJ -0 g 0 G -/F51 9.9626 Tf -64.01 -18.75 Td [(imin,imax,jmin,jmax)]TJ -0 g 0 G -/F54 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -74.978 -11.955 Td [(T)90(ype:)-310(optional.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -18.75 Td [(rscale,cscale)]TJ -0 g 0 G -/F54 9.9626 Tf 60.025 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ -0 g 0 G -/F51 9.9626 Tf -60.025 -18.979 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -18.75 Td [(l)]TJ -0 g 0 G -/F54 9.9626 Tf 8.299 0 Td [(A)-250(copy)-250(of)-250(the)-250(lower)-250(triangle)-250(of)]TJ/F59 9.9626 Tf 137.333 0 Td [(a)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ -125.956 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -184.985 -18.75 Td [(u)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(\050optional\051)-250(A)-250(copy)-250(of)-250(the)-250(upper)-250(triangle)-250(of)]TJ/F59 9.9626 Tf 184.485 0 Td [(a)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ -175.878 -11.956 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -184.985 -18.749 Td [(info)]TJ + [-525(psb_geasum\050x1,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.801 -26.59 Td [(3.2.16)-1000(triu)-250(\227)-250(Return)-250(the)-250(upper)-250(triangle)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 20.922 -18.964 Td [(call)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(a%triu\050u,info[,&)]TJ 15.691 -11.955 Td [(&)-525(diag,imin,imax,jmin,jmax,rscale,cscale,l]\051)]TJ/F54 9.9626 Tf -21.669 -18.979 Td [(Returns)-289(the)-290(upper)-289(triangular)-290(part)-289(of)-290(submatrix)]TJ/F59 9.9626 Tf 211.209 0 Td [(A\050imin:imax,jmin:jmax\051)]TJ/F54 9.9626 Tf 115.068 0 Td [(,)]TJ -341.221 -11.955 Td [(optionally)-190(r)18(escaling)-190(r)18(ow/col)-190(indices)-190(to)-190(the)-190(range)]TJ + 31.382 -11.955 Td [(vres\050)]TJ 0.25 0.63 0.44 rg 0.25 0.63 0.44 RG -/F59 9.9626 Tf 212.611 0 Td [(1)]TJ + [(2)]TJ 0 g 0 G - [(:imax)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(imin)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ -0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [-525(=)]TJ 0 g 0 G - [(,)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(:jmax)]TJ + [-525(psb_geasum\050x2,desc_a,info,global)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ + [(=)]TJ 0 g 0 G - [(jmin)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + 0 -11.955 Td [(vres\050)]TJ 0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ -0 g 0 G -/F54 9.9626 Tf 141.219 0 Td [(,)]TJ -353.83 -11.956 Td [(and)-250(r)18(eturing)-250(the)-250(complementary)-250(lower)-250(triangle.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -17.574 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -18.75 Td [(On)-250(Entry)]TJ -0 g 0 G + [(3)]TJ 0 g 0 G - 0 -18.75 Td [(a)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -30.706 Td [(diag)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 24.907 0 Td [(Include)-300(diagonals)-301(up)-300(to)-301(this)-300(one;)]TJ/F59 9.9626 Tf 149.76 0 Td [(diag)]TJ + [-525(psb_geasum\050x3,desc_a,info,global)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG [(=)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 34.376 0 Td [(means)-300(the)-301(\002rst)-300(super)18(diagonal,)]TJ/F59 9.9626 Tf -184.136 -11.955 Td [(diag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=-)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ 0 g 0 G + [-525(psb_sum\050ctxt,vres\050)]TJ 0.25 0.63 0.44 rg 0.25 0.63 0.44 RG [(1)]TJ 0 g 0 G -/F54 9.9626 Tf 39.103 0 Td [(means)-250(the)-250(\002rst)-250(subdiagonal.)-310(Default)-250(0.)]TJ -0 g 0 G -/F51 9.9626 Tf -64.01 -18.75 Td [(imin,imax,jmin,jmax)]TJ + [(:)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(3)]TJ 0 g 0 G -/F54 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -74.978 -11.955 Td [(T)90(ype:)-310(optional.)]TJ + [(\051\051)]TJ/F84 9.9626 Tf 1.007 0 0 1 124.802 504.884 Tm [(In)-248(this)-248(way)-248(the)-248(global)-248(communication,)-248(which)-248(for)-248(small)-248(sizes)-248(is)-248(a)-248(laten)1(cy-)]TJ 1 0 0 1 124.802 492.928 Tm [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -18.75 Td [(rscale,cscale)]TJ + 141.968 -402.49 Td [(41)]TJ 0 g 0 G -/F54 9.9626 Tf 60.025 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ +ET + +endstream +endobj +1237 0 obj +<< +/Length 7611 +>> +stream 0 g 0 G -/F51 9.9626 Tf -60.025 -18.979 Td [(On)-250(Return)]TJ 0 g 0 G +BT +/F75 11.9552 Tf 150.705 706.129 Td [(4.7)-1000(psb)]TJ +ET +q +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 201.825 706.129 Td [(geasums)-250(\227)-250(Generalized)-250(1-Norm)-250(of)-250(V)111(ector)]TJ/F84 9.9626 Tf 0.988 0 0 1 150.396 685.937 Tm [(This)-253(subr)18(outine)-253(computes)-253(a)-253(series)-253(of)-253(1-norms)-253(on)-253(the)-254(co)1(lumns)-254(of)-253(a)-253(dense)-253(matrix)]TJ/F78 9.9626 Tf 1 0 0 1 150.999 673.982 Tm [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(:)]TJ/F78 9.9626 Tf 120.031 -13.856 Td [(r)-17(e)-25(s)]TJ/F192 10.3811 Tf 12.294 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F190 10.3811 Tf 7.042 0 Td [(\040)]TJ/F84 9.9626 Tf 13.273 0 Td [(max)]TJ/F78 7.5716 Tf 7.759 -7.335 Td [(k)]TJ/F190 10.3811 Tf 12.944 7.335 Td [(j)]TJ/F78 9.9626 Tf 3.298 0 Td [(x)]TJ/F192 10.3811 Tf 5.33 0 Td [(\050)]TJ/F78 9.9626 Tf 4.274 0 Td [(k)]TJ/F84 9.9626 Tf 4.598 0 Td [(,)]TJ/F78 9.9626 Tf 4.206 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F190 10.3811 Tf 4.274 0 Td [(j)]TJ/F84 9.9626 Tf -215.511 -24.535 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(vector)]TJ/F78 9.9626 Tf 206.349 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -211.245 -11.955 Td [(If)]TJ/F78 9.9626 Tf 9.459 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F78 9.9626 Tf 125.227 -23.185 Td [(r)-17(e)-25(s)]TJ/F192 10.3811 Tf 12.294 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F190 10.3811 Tf 7.042 0 Td [(\040)-291(k)]TJ/F78 9.9626 Tf 19.006 0 Td [(x)]TJ/F78 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.876 1.96 Td [(k)]TJ/F84 9.9626 Tf -196.039 -23.185 Td [(else)-250(if)]TJ/F78 9.9626 Tf 28.159 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F78 9.9626 Tf 71.212 -23.185 Td [(r)-17(e)-25(s)]TJ/F192 10.3811 Tf 12.294 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F190 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F78 9.9626 Tf 18.738 0 Td [(r)-17(e)]TJ/F192 10.3811 Tf 8.169 0 Td [(\050)]TJ/F78 9.9626 Tf 4.443 0 Td [(x)]TJ/F192 10.3811 Tf 5.33 0 Td [(\051)]TJ/F190 10.3811 Tf 4.274 0 Td [(k)]TJ/F84 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F192 10.3811 Tf 6.346 1.858 Td [(+)]TJ/F190 10.3811 Tf 10.256 0 Td [(k)]TJ/F78 9.9626 Tf 5.369 0 Td [(i)-32(m)]TJ/F192 10.3811 Tf 11.089 0 Td [(\050)]TJ/F78 9.9626 Tf 4.443 0 Td [(x)]TJ/F192 10.3811 Tf 5.33 0 Td [(\051)]TJ/F190 10.3811 Tf 4.274 0 Td [(k)]TJ/F84 7.5716 Tf 5.315 -1.858 Td [(1)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -18.75 Td [(u)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -212.211 -21.96 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(A)-250(copy)-250(of)-250(the)-250(upper)-250(triangle)-250(of)]TJ/F59 9.9626 Tf 138.668 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ -130.06 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ + [-525(psb_geasums\050res,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -184.985 -18.75 Td [(l)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 8.299 0 Td [(\050optional\051)-250(A)-250(copy)-250(of)-250(the)-250(lower)-250(triangle)-250(of)]TJ/F59 9.9626 Tf 183.151 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ -171.773 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -184.985 -18.75 Td [(info)]TJ + [-525(info\051)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ 0 g 0 G - 143.074 -29.888 Td [(23)]TJ 0 g 0 G ET - -endstream -endobj -983 0 obj -<< -/Length 7619 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 177.137 514.627 cm +[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +Q BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.2.17)-1000(psb)]TJ +/F78 9.9626 Tf 183.14 506.059 Td [(r)-17(e)-25(s)-8868(x)]TJ/F75 9.9626 Tf 221.013 0 Td [(Subroutine)]TJ ET q -1 0 0 1 202.769 706.328 cm +1 0 0 1 177.137 502.274 cm +[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +Q +BT +/F84 9.9626 Tf 183.115 493.706 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 420.472 493.905 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 205.758 706.129 Td [(set)]TJ +/F84 9.9626 Tf 423.461 493.706 Td [(geasums)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 219.078 706.328 cm +1 0 0 1 420.472 481.95 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 222.067 706.129 Td [(mat)]TJ +/F84 9.9626 Tf 423.461 481.751 Td [(geasums)]TJ -240.346 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 239.82 706.328 cm +1 0 0 1 420.472 469.995 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 242.809 706.129 Td [(default)-250(\227)-250(Set)-250(default)-250(storage)-250(format)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -92.104 -18.964 Td [(call)]TJ -0 g 0 G - [-1050(psb_set_mat_default\050a\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.183 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.231 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.231 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(a)-203(variable)-203(of)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 55.42 0 Td [(class)]TJ -0 g 0 G - [(\050psb_T_base_sparse_mat\051)]TJ/F54 9.9626 Tf 148.475 0 Td [(r)18(equesting)-203(a)-204(new)-203(default)-203(stor)18(-)]TJ -188.951 -11.956 Td [(age)-250(format.)]TJ 0 -11.955 Td [(T)90(ype:)-310(r)18(equir)18(ed.)]TJ/F51 9.9626 Tf -24.907 -26.815 Td [(3.2.18)-1000(clone)-250(\227)-250(Clone)-250(current)-250(object)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 0 -18.964 Td [(call)]TJ -0 g 0 G - [-1050(a%clone\050b,info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.183 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.231 Td [(On)-250(Entry)]TJ -0 g 0 G +/F84 9.9626 Tf 423.461 469.795 Td [(geasums)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 420.472 458.04 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 423.461 457.84 Td [(geasums)]TJ +ET +q +1 0 0 1 177.137 454.054 cm +[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +Q 0 g 0 G - 0 -19.231 Td [(a)]TJ +BT +/F84 9.9626 Tf 280.768 425.676 Td [(T)92(able)-250(7:)-310(Data)-250(types)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -32.138 Td [(On)-250(Return)]TJ 0 g 0 G +/F75 9.9626 Tf -130.063 -37.636 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -19.232 Td [(b)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ +/F75 9.9626 Tf -29.828 -22.46 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -11.068 -19.231 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.8 -26.815 Td [(3.2.19)-1000(Named)-250(Constants)]TJ + 0 -22.459 Td [(x)]TJ 0 g 0 G - 0 -18.964 Td [(psb)]TJ +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.614 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.98 0 0 1 175.611 295.3 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-248(an)-247(object)-247(of)-247(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 420.354 295.3 Tm [(psb)]TJ ET q -1 0 0 1 167.9 372.049 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 436.673 295.5 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 170.889 371.85 Td [(dupl)]TJ +/F145 9.9626 Tf 439.811 295.3 Td [(T)]TJ ET q -1 0 0 1 193.066 372.049 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 445.669 295.5 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 196.055 371.85 Td [(ovwrt)]TJ +/F145 9.9626 Tf 448.807 295.3 Td [(vect)]TJ ET q -1 0 0 1 223.222 372.049 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 470.356 295.5 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 231.193 371.85 Td [(Duplicate)-259(coef)18(\002cients)-259(shou)1(ld)-259(be)-259(overwritten)-259(\050i.e.)-336(ignor)18(e)-259(du-)]TJ -55.582 -11.955 Td [(plications\051)]TJ +/F145 9.9626 Tf 473.495 295.3 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf -297.884 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(7)]TJ +0 g 0 G + [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.231 Td [(psb)]TJ +/F75 9.9626 Tf -24.906 -22.459 Td [(desc)]TJ ET q -1 0 0 1 167.9 340.863 cm +1 0 0 1 171.218 261.085 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 170.889 340.664 Td [(dupl)]TJ +/F75 9.9626 Tf 174.207 260.886 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 193.066 340.863 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 324.173 213.264 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 196.055 340.664 Td [(add)]TJ +/F145 9.9626 Tf 327.311 213.065 Td [(desc)]TJ ET q -1 0 0 1 213.808 340.863 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 348.86 213.264 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 221.778 340.664 Td [(Duplicate)-250(coef)18(\002cients)-250(should)-250(be)-250(added;)]TJ +/F145 9.9626 Tf 351.998 213.065 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -222.214 -22.459 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -22.46 Td [(res)]TJ +0 g 0 G +/F84 9.9626 Tf 18.261 0 Td [(contains)-250(the)-250(1-norm)-250(of)-250(\050the)-250(columns)-250(of\051)]TJ/F78 9.9626 Tf 176.183 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -174.743 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ 1.003 0 0 1 175.611 132.281 Tm [(Short)-248(as:)-309(a)-249(long)-248(pr)18(ecision)-248(r)17(eal)-248(number)74(.)-309(Speci\002ed)-248(as:)-309(a)-248(long)-249(pr)18(ecision)-248(r)18(eal)]TJ 1 0 0 1 175.611 120.326 Tm [(number)74(.)]TJ +0 g 0 G + 141.968 -29.888 Td [(42)]TJ 0 g 0 G -/F51 9.9626 Tf -71.073 -19.232 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 321.632 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 321.432 Td [(dupl)]TJ ET -q -1 0 0 1 193.066 321.632 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +1242 0 obj +<< +/Length 582 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G BT -/F51 9.9626 Tf 196.055 321.432 Td [(err)]TJ +/F75 9.9626 Tf 99.895 706.129 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 142.357 -567.87 Td [(43)]TJ +0 g 0 G ET -q -1 0 0 1 209.384 321.632 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +1250 0 obj +<< +/Length 6755 +>> +stream 0 g 0 G -BT -/F54 9.9626 Tf 217.355 321.432 Td [(Duplicate)-250(coef)18(\002cients)-250(should)-250(trigger)-250(an)-250(err)18(or)-250(conditino)]TJ 0 g 0 G -/F51 9.9626 Tf -66.65 -19.231 Td [(psb)]TJ +BT +/F75 11.9552 Tf 150.705 706.129 Td [(4.8)-1000(psb)]TJ ET q -1 0 0 1 167.9 302.4 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 170.889 302.201 Td [(upd)]TJ +/F75 11.9552 Tf 201.825 706.129 Td [(norm2)-250(\227)-250(2-Norm)-250(of)-250(V)111(ector)]TJ/F84 9.9626 Tf -51.429 -20.076 Td [(This)-250(function)-250(computes)-250(the)-250(2-norm)-250(of)-250(a)-250(vector)]TJ/F78 9.9626 Tf 206.349 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -211.245 -11.955 Td [(If)]TJ/F78 9.9626 Tf 9.459 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(2-norm)-250(as:)]TJ/F78 9.9626 Tf 122.551 -25.46 Td [(n)-15(r)-35(m)]TJ/F84 9.9626 Tf 17.788 0 Td [(2)]TJ/F190 10.3811 Tf 7.873 0 Td [(\040)]TJ 13.398 9.727 Td [(p)]TJ ET q -1 0 0 1 189.748 302.4 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 338.242 658.569 cm +[]0 d 0 J 0.408 w 0 0 m 16.592 0 l S Q BT -/F51 9.9626 Tf 192.737 302.201 Td [(d\003t)]TJ +/F78 9.9626 Tf 338.536 648.638 Td [(x)]TJ/F78 7.5716 Tf 5.398 2.88 Td [(T)]TJ/F78 9.9626 Tf 5.695 -2.88 Td [(x)]TJ/F84 9.9626 Tf -198.924 -23.065 Td [(else)-250(if)]TJ/F78 9.9626 Tf 28.159 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(2-norm)-250(as:)]TJ/F78 9.9626 Tf 103.113 -25.46 Td [(n)-15(r)-35(m)]TJ/F84 9.9626 Tf 17.788 0 Td [(2)]TJ/F190 10.3811 Tf 7.873 0 Td [(\040)]TJ 13.398 9.727 Td [(p)]TJ ET q -1 0 0 1 208.827 302.4 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 337.504 610.044 cm +[]0 d 0 J 0.408 w 0 0 m 18.069 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 216.797 302.201 Td [(Default)-250(update)-250(strategy)-250(for)-250(matrix)-250(coef)18(\002cients;)]TJ +/F78 9.9626 Tf 337.798 600.113 Td [(x)]TJ/F78 7.5716 Tf 5.587 2.88 Td [(H)]TJ/F78 9.9626 Tf 6.982 -2.88 Td [(x)]TJ +0 g 0 G +0 g 0 G 0 g 0 G -/F51 9.9626 Tf -66.092 -19.231 Td [(psb)]TJ ET q -1 0 0 1 167.9 283.169 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 179.498 575.464 cm +[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S Q BT -/F51 9.9626 Tf 170.889 282.97 Td [(upd)]TJ +/F78 9.9626 Tf 185.6 566.896 Td [(n)-15(r)-35(m)]TJ/F84 9.9626 Tf 17.789 0 Td [(2)]TJ/F78 9.9626 Tf 82.504 0 Td [(x)]TJ/F75 9.9626 Tf 120.622 0 Td [(Function)]TJ ET q -1 0 0 1 189.748 283.169 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 179.498 563.111 cm +[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S Q BT -/F51 9.9626 Tf 192.737 282.97 Td [(srch)]TJ +/F84 9.9626 Tf 185.476 554.543 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 212.144 283.169 cm +1 0 0 1 422.833 554.742 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 220.114 282.97 Td [(Update)-250(strategy)-250(based)-250(on)-250(sear)18(ch)-250(into)-250(the)-250(data)-250(str)8(uctur)18(e;)]TJ -0 g 0 G -/F51 9.9626 Tf -69.409 -19.232 Td [(psb)]TJ +/F84 9.9626 Tf 425.822 554.543 Td [(genrm2)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 167.9 263.938 cm +1 0 0 1 422.833 542.787 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 170.889 263.738 Td [(upd)]TJ +/F84 9.9626 Tf 425.822 542.588 Td [(genrm2)]TJ -240.346 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 189.748 263.938 cm +1 0 0 1 422.833 530.832 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 192.737 263.738 Td [(perm)]TJ +/F84 9.9626 Tf 425.822 530.632 Td [(genrm2)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 217.135 263.938 cm +1 0 0 1 422.833 518.876 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 225.106 263.738 Td [(Update)-392(strategy)-393(based)-392(on)-393(additional)-392(permutation)-393(data)-392(\050see)]TJ -49.495 -11.955 Td [(tools)-250(r)18(outine)-250(description\051.)]TJ/F51 11.9552 Tf -24.906 -28.807 Td [(3.3)-1000(Dense)-250(V)111(ector)-250(Data)-250(Structure)]TJ/F54 9.9626 Tf 0 -18.964 Td [(The)]TJ/F59 9.9626 Tf 20.094 0 Td [(psb)]TJ +/F84 9.9626 Tf 425.822 518.677 Td [(genrm2)]TJ ET q -1 0 0 1 187.117 204.211 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 179.498 514.891 cm +[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S Q +0 g 0 G BT -/F59 9.9626 Tf 190.255 204.012 Td [(T)]TJ +/F84 9.9626 Tf 280.768 486.513 Td [(T)92(able)-250(8:)-310(Data)-250(types)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -115.119 -27.631 Td [(psb_genrm2\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([,global]\051)]TJ -14.944 -11.955 Td [(psb_norm2\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([,global]\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -36.169 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -22.221 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.221 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.614 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.98 0 0 1 175.611 318.495 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-248(an)-247(object)-247(of)-247(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 420.354 318.495 Tm [(psb)]TJ ET q -1 0 0 1 196.113 204.211 cm +1 0 0 1 436.673 318.695 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 199.252 204.012 Td [(vect)]TJ +/F145 9.9626 Tf 439.811 318.495 Td [(T)]TJ ET q -1 0 0 1 220.801 204.211 cm +1 0 0 1 445.669 318.695 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 223.939 204.012 Td [(type)]TJ/F54 9.9626 Tf 24.338 0 Td [(data)-343(str)8(uctur)18(e)-343(encapsulates)-343(the)-343(dense)-343(vectors)-342(in)-343(a)-343(way)]TJ -97.572 -11.955 Td [(similar)-368(to)-368(sparse)-368(matrices,)-397(i.e.)-664(including)-368(a)-368(base)-368(type)]TJ/F59 9.9626 Tf 242.472 0 Td [(psb)]TJ +/F145 9.9626 Tf 448.807 318.495 Td [(vect)]TJ ET q -1 0 0 1 409.495 192.256 cm +1 0 0 1 470.356 318.695 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 412.633 192.057 Td [(T)]TJ +/F145 9.9626 Tf 473.495 318.495 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf -297.884 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(8)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -22.221 Td [(desc)]TJ ET q -1 0 0 1 418.491 192.256 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.218 284.518 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 421.63 192.057 Td [(base)]TJ +/F75 9.9626 Tf 174.207 284.319 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 443.178 192.256 cm +1 0 0 1 324.173 236.698 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 446.317 192.057 Td [(vect)]TJ +/F145 9.9626 Tf 327.311 236.499 Td [(desc)]TJ ET q -1 0 0 1 467.866 192.256 cm +1 0 0 1 348.86 236.698 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 471.004 192.057 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -341.22 -11.956 Td [(The)-263(user)-263(will)-263(not,)-266(in)-263(general,)-267(access)-263(the)-263(vector)-263(components)-263(dir)18(ectly)111(,)-266(but)-263(rather)]TJ 0 -11.955 Td [(via)-222(the)-222(r)18(out)1(ines)-222(of)-222(sec.)]TJ -0 0 1 rg 0 0 1 RG - [-222(6)]TJ +/F145 9.9626 Tf 351.998 236.499 Td [(type)]TJ 0 g 0 G - [(.)-300(Among)-222(other)-222(simple)-222(things,)-227(we)-222(de\002ne)-222(her)18(e)-221(an)-222(extrac-)]TJ 0 -11.955 Td [(tion)-273(method)-274(that)-273(can)-274(be)-273(used)-274(to)-273(get)-274(a)-273(full)-274(copy)-273(of)-274(the)-273(part)-274(of)-273(the)-274(vector)-273(stor)18(ed)]TJ 0 -11.955 Td [(on)-250(the)-250(local)-250(pr)18(ocess.)]TJ 14.944 -11.955 Td [(The)-311(type)-311(declaration)-311(is)-310(shown)-311(in)-311(\002gur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-311(3)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -222.214 -22.221 Td [(global)]TJ +0 g 0 G +/F84 9.9626 Tf 0.994 0 0 1 184.468 214.278 Tm [(Speci\002es)-250(whether)-250(t)1(he)-250(computation)-250(should)-250(include)-250(the)-249(global)-250(r)18(eduction)]TJ 1 0 0 1 175.611 202.322 Tm [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F145 9.9626 Tf 165.319 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(.true.)]TJ +0 g 0 G +/F75 9.9626 Tf -190.225 -34.176 Td [(On)-250(Return)]TJ 0 g 0 G - [-311(wher)18(e)]TJ/F59 9.9626 Tf 217.442 0 Td [(T)]TJ/F54 9.9626 Tf 8.327 0 Td [(is)-311(a)-311(placeholder)-311(for)-310(the)]TJ -240.713 -11.955 Td [(data)-250(type)-250(and)-250(pr)18(ecision)-250(variants)]TJ 0 g 0 G - 166.874 -29.888 Td [(24)]TJ +/F84 9.9626 Tf 166.874 -29.888 Td [(44)]TJ 0 g 0 G ET endstream endobj -989 0 obj +1255 0 obj << -/Length 4359 +/Length 4510 >> stream 0 g 0 G 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(I)]TJ -0 g 0 G -/F54 9.9626 Tf 8.857 0 Td [(Integer;)]TJ -0 g 0 G -/F51 9.9626 Tf -8.857 -20.359 Td [(S)]TJ -0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(Single)-250(pr)18(ecision)-250(r)18(eal;)]TJ +/F75 9.9626 Tf 99.895 706.129 Td [(Function)-250(V)111(alue)]TJ 0 g 0 G -/F51 9.9626 Tf -11.069 -20.358 Td [(D)]TJ -0 g 0 G -/F54 9.9626 Tf 13.281 0 Td [(Double)-250(pr)18(ecision)-250(r)18(eal;)]TJ +/F84 9.9626 Tf 73.883 0 Td [(is)-250(the)-250(2-norm)-250(of)-250(vector)]TJ/F78 9.9626 Tf 102.161 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ 0.98 0 0 1 124.802 694.174 Tm [(Scope:)]TJ/F75 9.9626 Tf 0.98 0 0 1 155.612 694.174 Tm [(global)]TJ/F84 9.9626 Tf 0.98 0 0 1 186.2 694.174 Tm [(unless)-244(the)-244(optional)-244(variab)1(le)]TJ/F145 9.9626 Tf 1 0 0 1 307.484 694.174 Tm [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -13.281 -20.359 Td [(C)]TJ + [(.false.)]TJ/F84 9.9626 Tf 0.98 0 0 1 383.09 694.174 Tm [(has)-244(been)-244(spec-)]TJ 1 0 0 1 124.802 682.219 Tm [(i\002ed)]TJ -0.309 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.175 0 Td [(Single)-250(pr)18(ecision)-250(complex;)]TJ +/F75 9.9626 Tf -24.907 -19.925 Td [(info)]TJ 0 g 0 G -/F51 9.9626 Tf -12.175 -20.358 Td [(Z)]TJ +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 11.627 0 Td [(Double)-250(pr)18(ecision)-250(complex.)]TJ -11.627 -20.251 Td [(The)-209(actual)-208(data)-209(is)-208(contained)-209(in)-209(the)-208(polymorphic)-209(component)]TJ/F59 9.9626 Tf 261.152 0 Td [(v%v)]TJ/F54 9.9626 Tf 15.691 0 Td [(;)-222(the)-209(separation)]TJ -276.843 -11.955 Td [(between)-353(the)-353(application)-353(and)-353(the)-353(actual)-353(data)-353(is)-353(esse)1(ntial)-353(for)-353(cases)-353(wher)18(e)-353(it)-353(is)]TJ 0 -11.955 Td [(necessary)-321(to)-321(link)-320(to)-321(data)-321(storage)-321(made)-320(available)-321(elsewher)18(e)-321(outside)-320(the)-321(dir)18(ect)]TJ 0 -11.955 Td [(contr)18(ol)-231(of)-231(the)-231(compiler/application,)-235(e.g.)-304(data)-231(stor)18(ed)-231(in)-231(a)-231(graphics)-231(accelerator)-74('s)]TJ 0 -11.955 Td [(private)-250(memory)111(.)]TJ +/F84 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + 0.98 0 0 1 124.493 548.719 Tm [(The)-201(computation)-200(of)-201(a)-200(global)-201(r)18(esult)-200(r)18(equir)18(es)-200(a)-201(global)-200(communication,)-212(which)]TJ 1.02 0 0 1 124.802 536.764 Tm [(entails)-265(a)-265(signi\002cant)-264(over)17(head.)-363(It)-265(may)-264(be)-265(necessary)-265(and/or)-265(advisable)-265(to)]TJ 1.02 0 0 1 124.802 524.809 Tm [(compute)-287(multiple)-287(norms)-287(at)-287(the)-287(same)-287(time;)-308(in)-286(this)-287(case,)-298(it)-287(is)-287(possible)-287(to)]TJ 1 0 0 1 124.802 512.854 Tm [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ 24.981 -17.933 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(1)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ ET q -1 0 0 1 99.895 452.975 cm -0 0 343.711 82.69 re f +1 0 0 1 227.224 495.12 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 112.299 525.005 Td [(type)]TJ -0 g 0 G - [-525(psb_T_base_vect_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 9.414 -10.959 Td [(TYPE)]TJ -0 g 0 G - [(\050KIND_\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ -0 g 0 G - [-525(v\050:\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -9.414 -10.959 Td [(end)-525(type)]TJ -0 g 0 G - [-525(psb_T_base_vect_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -21.918 Td [(type)]TJ -0 g 0 G - [-525(psb_T_vect_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 9.414 -10.959 Td [(class)]TJ -0 g 0 G - [(\050psb_T_base_vect_type\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ -0 g 0 G - [-525(v)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -9.414 -10.959 Td [(end)-525(type)]TJ -0 g 0 G - [-1050(psb_T_vect_type)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 6.677 -41.429 Td [(Listing)-250(3:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(dense)-250(vector)74(.)]TJ/F51 9.9626 Tf -19.081 -39.929 Td [(3.3.1)-1000(V)111(ector)-250(Methods)]TJ 0 -19.174 Td [(3.3.2)-1000(get)]TJ +/F84 9.9626 Tf 230.817 494.921 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(1)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ ET q -1 0 0 1 144.219 358.919 cm +1 0 0 1 317.15 495.12 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 147.208 358.719 Td [(nrows)-250(\227)-250(Get)-250(number)-250(of)-250(rows)-250(in)-250(a)-250(dense)-250(vector)]TJ/F59 9.9626 Tf -47.313 -19.173 Td [(nr)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(v%get_nrows\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.351 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.359 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -20.358 Td [(v)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ -0 g 0 G - -56.339 -34.198 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -20.358 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(r)18(ows)-250(of)-250(dense)-250(vector)]TJ/F59 9.9626 Tf 161.273 0 Td [(v)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ/F51 9.9626 Tf -239.281 -27.757 Td [(3.3.3)-1000(sizeof)-250(\227)-250(Get)-250(memory)-250(occupation)-250(in)-250(bytes)-250(of)-250(a)-250(dense)-250(vector)]TJ/F59 9.9626 Tf 0 -19.174 Td [(memory_size)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(v%sizeof\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.351 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.358 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 166.875 -29.888 Td [(25)]TJ -0 g 0 G +/F84 9.9626 Tf 321.113 494.921 Td [(a)-370(,)-284(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-162(e)-368(.)-206(\051)]TJ -171.33 -11.955 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(2)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ ET - -endstream -endobj -996 0 obj -<< -/Length 3735 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G +q +1 0 0 1 227.224 483.165 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F51 9.9626 Tf 150.705 706.129 Td [(v)]TJ -0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ -0 g 0 G - -56.338 -36.868 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -23.918 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.776 0 Td [(The)-250(memory)-250(occupation)-250(in)-250(bytes.)]TJ/F51 9.9626 Tf -72.776 -32.82 Td [(3.3.4)-1000(set)-250(\227)-250(Set)-250(contents)-250(of)-250(the)-250(vector)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 5.23 -20.898 Td [(call)]TJ -0 g 0 G - [-1050(v%set\050alpha[,first,last]\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ -0 g 0 G - [-1050(v%set\050vect[,first,last]\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ -0 g 0 G - [-1050(v%zero\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf -5.23 -24.913 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -23.918 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -23.918 Td [(v)]TJ -0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ -0 g 0 G - -56.338 -35.873 Td [(alpha)]TJ -0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(A)-250(scalar)-250(value.)]TJ -5.53 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(1)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -23.918 Td [(\002rst,last)]TJ -0 g 0 G -/F54 9.9626 Tf 41.215 0 Td [(Boundaries)-250(for)-250(setting)-250(in)-250(the)-250(vector)74(.)]TJ -16.309 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(integers.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -23.918 Td [(vect)]TJ -0 g 0 G -/F54 9.9626 Tf 23.242 0 Td [(An)-250(array)]TJ 1.664 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(1)]TJ -0 g 0 G - [(.)]TJ -24.906 -25.91 Td [(Note)-336(that)-336(a)-335(call)-336(to)]TJ/F59 9.9626 Tf 84.614 0 Td [(v%zero\050\051)]TJ/F54 9.9626 Tf 45.189 0 Td [(is)-336(pr)18(ovided)-336(as)-335(a)-336(shorthand,)-358(but)-335(is)-336(equivalent)-336(to)]TJ -129.803 -11.956 Td [(a)-270(call)-270(to)]TJ/F59 9.9626 Tf 36.947 0 Td [(v%set\050zero\051)]TJ/F54 9.9626 Tf 60.225 0 Td [(with)-270(the)]TJ/F59 9.9626 Tf 39.456 0 Td [(zero)]TJ/F54 9.9626 Tf 23.613 0 Td [(constant)-270(having)-270(the)-271(appr)18(opriat)1(e)-271(type)-270(and)]TJ -160.241 -11.955 Td [(kind.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -25.91 Td [(On)-250(Return)]TJ -0 g 0 G +/F84 9.9626 Tf 230.817 482.966 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(2)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ +ET +q +1 0 0 1 317.15 483.165 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 321.113 482.966 Td [(a)-370(,)-284(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-162(e)-368(.)-206(\051)]TJ -171.33 -11.955 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(3)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ +ET +q +1 0 0 1 227.224 471.21 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 230.817 471.011 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(3)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ +ET +q +1 0 0 1 317.15 471.21 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 321.113 471.011 Td [(a)-370(,)-284(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-162(e)-368(.)-206(\051)]TJ -170.659 -11.956 Td [(c)-175(a)-175(l)-174(l)-831(p)-56(s)-56(b)]TJ +ET +q +1 0 0 1 197.143 459.255 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 200.687 459.055 Td [(n)-56(r)-56(m)-55(2)-190(\050)-265(c)-131(t)-131(x)-132(t)-437(,)-283(v)-107(r)-107(e)-108(s)-300(\050)-193(1)-193(:)-193(3)-193(\051)-193(\051)]TJ 0 g 0 G - 0 -23.918 Td [(v)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)74(,)-250(with)-250(updated)-250(entries)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ + 1.007 0 0 1 124.802 437.138 Tm [(In)-248(this)-248(way)-248(the)-248(global)-248(communication,)-248(which)-248(for)-248(small)-248(sizes)-248(is)-248(a)-248(laten)1(cy-)]TJ 1 0 0 1 124.802 425.182 Tm [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ 0 g 0 G -/F54 9.9626 Tf 110.536 -41.843 Td [(26)]TJ + 141.968 -334.744 Td [(45)]TJ 0 g 0 G ET endstream endobj -1003 0 obj +1268 0 obj << -/Length 4464 +/Length 6311 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(3.3.5)-1000(get)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(4.9)-1000(psb)]TJ ET q -1 0 0 1 144.219 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 147.208 706.129 Td [(vect)-250(\227)-250(Get)-250(a)-250(copy)-250(of)-250(the)-250(vector)-250(contents)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -47.313 -19.66 Td [(extv)-525(=)-525(v%get_vect\050[n]\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.994 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -21.362 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -21.361 Td [(v)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ -0 g 0 G - -56.339 -33.316 Td [(n)]TJ -0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(Size)-250(to)-250(be)-250(r)18(eturned)]TJ 13.838 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(;)-250(default:)-310(entir)18(e)-250(vector)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -90.182 -34.95 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -21.361 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(An)-316(allocatable)-316(array)-316(holding)-316(a)-317(copy)-316(of)-316(the)-316(dense)-316(vector)-316(con-)]TJ -47.87 -11.955 Td [(tents.)-321(If)-254(the)-254(ar)18(gument)]TJ/F52 9.9626 Tf 98.086 0 Td [(n)]TJ/F54 9.9626 Tf 8.192 0 Td [(is)-254(speci\002ed,)-255(the)-253(size)-254(of)-254(the)-254(r)18(eturned)-254(array)-253(equals)]TJ -106.278 -11.955 Td [(the)-339(minimum)-339(between)]TJ/F52 9.9626 Tf 105.247 0 Td [(n)]TJ/F54 9.9626 Tf 9.041 0 Td [(and)-339(the)-339(internal)-339(size)-339(of)-339(the)-339(vector)74(,)-361(or)-339(0)-339(if)]TJ/F52 9.9626 Tf 188.353 0 Td [(n)]TJ/F54 9.9626 Tf 9.04 0 Td [(is)]TJ -311.681 -11.956 Td [(negative;)-314(otherwise,)-303(the)-292(size)-293(of)-292(the)-293(array)-292(is)-293(the)-292(same)-293(as)-292(the)-293(internal)-292(size)]TJ 0 -11.955 Td [(of)-250(the)-250(vector)74(.)]TJ/F51 9.9626 Tf -24.907 -29.183 Td [(3.3.6)-1000(clone)-250(\227)-250(Clone)-250(current)-250(object)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf 0 -19.659 Td [(call)-1050(x%clone\050y,info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.995 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -21.361 Td [(On)-250(Entry)]TJ -0 g 0 G +/F75 11.9552 Tf 201.825 706.129 Td [(genrm2s)-250(\227)-250(Generalized)-250(2-Norm)-250(of)-250(V)111(ector)]TJ/F84 9.9626 Tf 0.988 0 0 1 150.396 687.165 Tm [(This)-253(subr)18(outine)-253(computes)-253(a)-253(series)-253(of)-253(2-norms)-253(on)-253(the)-254(co)1(lumns)-254(of)-253(a)-253(dense)-253(matrix)]TJ/F78 9.9626 Tf 1 0 0 1 150.999 675.21 Tm [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(:)]TJ/F78 9.9626 Tf 126.859 -11.955 Td [(r)-17(e)-25(s)]TJ/F192 10.3811 Tf 12.294 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F190 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F78 9.9626 Tf 19.007 0 Td [(x)]TJ/F192 10.3811 Tf 5.33 0 Td [(\050)]TJ/F84 9.9626 Tf 4.274 0 Td [(:)-12(,)]TJ/F78 9.9626 Tf 6.821 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F190 10.3811 Tf 4.274 0 Td [(k)]TJ/F84 7.5716 Tf 5.315 -1.744 Td [(2)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -21.362 Td [(x)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -186.92 -16.189 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(dense)-250(vector)74(.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ + [-525(psb_genrm2s\050res,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -77.918 -34.95 Td [(On)-250(Return)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -21.361 Td [(y)]TJ + [-525(info\051)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ 0 g 0 G -/F51 9.9626 Tf -10.521 -21.361 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F51 11.9552 Tf -23.801 -31.176 Td [(3.4)-1000(Preconditioner)-250(data)-250(structure)]TJ/F54 9.9626 Tf 0 -19.659 Td [(Our)-396(base)-397(l)1(ibrary)-397(of)18(fers)-396(support)-396(for)-396(simple)-397(well)-396(known)-396(pr)18(econditioners)-396(like)]TJ 0 -11.956 Td [(Diagonal)-250(Scaling)-250(or)-250(Block)-250(Jacobi)-250(with)-250(incomplete)-250(factorization)-250(ILU\0500\051.)]TJ 14.944 -12.314 Td [(A)-361(pr)18(econditioner)-361(is)-361(held)-361(in)-361(the)]TJ/F59 9.9626 Tf 143.781 0 Td [(psb)]TJ ET q -1 0 0 1 274.939 168.346 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 177.386 630.954 cm +[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S +Q +BT +/F78 9.9626 Tf 183.389 622.386 Td [(r)-17(e)-25(s)-8868(x)]TJ/F75 9.9626 Tf 221.014 0 Td [(Subroutine)]TJ +ET +q +1 0 0 1 177.386 618.6 cm +[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S Q BT -/F59 9.9626 Tf 278.077 168.146 Td [(prec)]TJ +/F84 9.9626 Tf 183.364 610.032 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 299.626 168.346 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 420.721 610.231 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 302.764 168.146 Td [(type)]TJ/F54 9.9626 Tf 24.519 0 Td [(data)-361(str)8(uctur)18(e)-361(r)18(eported)-361(in)]TJ -227.388 -11.955 Td [(\002gur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-282(4)]TJ -0 g 0 G - [(.)-407(The)]TJ/F59 9.9626 Tf 59.933 0 Td [(psb_prec_type)]TJ/F54 9.9626 Tf 70.808 0 Td [(data)-282(type)-283(may)-282(contain)-282(a)-283(simple)-282(pr)18(econditioning)]TJ -130.741 -11.955 Td [(matrix)-376(with)-376(the)-376(associated)-376(communication)-375(descriptor)74(.The)-376(internal)-376(pr)18(econdi-)]TJ 0 -11.955 Td [(tioner)-317(is)-317(allocated)-318(appr)18(opriately)-317(with)-317(the)-317(dynamic)-318(type)-317(corr)18(esponding)-317(to)-317(the)]TJ 0 -11.955 Td [(desir)18(ed)-250(pr)18(econditioner)74(.)]TJ -0 g 0 G - 166.875 -29.888 Td [(27)]TJ -0 g 0 G +/F84 9.9626 Tf 423.71 610.032 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET - -endstream -endobj -1009 0 obj -<< -/Length 3969 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG q -1 0 0 1 150.705 671.26 cm -0 0 343.711 38.854 re f +1 0 0 1 420.721 598.276 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 423.71 598.077 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 420.721 586.321 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 423.71 586.122 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 420.721 574.366 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 163.108 699.454 Td [(type)]TJ +/F84 9.9626 Tf 423.71 574.167 Td [(genrm2s)]TJ +ET +q +1 0 0 1 177.386 570.381 cm +[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S +Q 0 g 0 G - [-525(psb_Tprec_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 9.415 -10.959 Td [(class)]TJ +BT +/F84 9.9626 Tf 280.768 542.002 Td [(T)92(able)-250(9:)-310(Data)-250(types)]TJ 0 g 0 G - [(\050psb_T_base_prec_type\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ 0 g 0 G - [-525(prec)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -9.415 -10.959 Td [(end)-525(type)]TJ +/F75 9.9626 Tf -130.063 -34.468 Td [(T)90(ype:)]TJ 0 g 0 G - [-525(psb_Tprec_type)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G +/F75 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 1.845 -41.429 Td [(Listing)-250(4:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(pr)18(econditioner)74(.)]TJ/F51 11.9552 Tf -14.248 -32.698 Td [(3.5)-1000(Heap)-250(data)-250(structure)]TJ/F54 9.9626 Tf 0 -18.964 Td [(Among)-310(the)-311(tools)-310(r)18(outines)-310(of)-310(sec.)]TJ -0 0 1 rg 0 0 1 RG - [-311(6)]TJ 0 g 0 G - [(,)-325(we)-310(have)-311(a)-310(number)-310(of)-311(so)1(rting)-311(utilities;)-340(the)]TJ 0 -11.955 Td [(heap)-250(sort)-250(is)-250(implemented)-250(in)-250(terms)-250(of)-250(heaps)-250(having)-250(the)-250(following)-250(signatur)18(es:)]TJ + 0 -19.926 Td [(x)]TJ 0 g 0 G -/F59 9.9626 Tf 0 -19.925 Td [(psb)]TJ +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.614 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.98 0 0 1 175.611 419.863 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-248(an)-247(object)-247(of)-247(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 420.354 419.863 Tm [(psb)]TJ ET q -1 0 0 1 167.023 552.764 cm +1 0 0 1 436.673 420.062 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 170.162 552.565 Td [(T)]TJ +/F145 9.9626 Tf 439.811 419.863 Td [(T)]TJ ET q -1 0 0 1 176.02 552.764 cm +1 0 0 1 445.669 420.062 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 179.158 552.565 Td [(heap)]TJ -0 g 0 G -/F54 9.9626 Tf 25.903 0 Td [(:)-333(a)-262(heap)-262(containing)-262(e)1(lements)-262(of)-262(type)-262(T)74(,)-261(wher)18(e)-262(T)-262(can)-261(be)]TJ/F59 9.9626 Tf 242.282 0 Td [(i,s,c,d,z)]TJ/F54 9.9626 Tf -271.731 -11.955 Td [(for)-250(integer)74(,)-250(r)18(eal)-250(and)-250(complex)-250(data;)]TJ -0 g 0 G -/F59 9.9626 Tf -24.907 -19.925 Td [(psb)]TJ +/F145 9.9626 Tf 448.807 419.863 Td [(vect)]TJ ET q -1 0 0 1 167.023 520.884 cm +1 0 0 1 470.356 420.062 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 170.162 520.685 Td [(T)]TJ +/F145 9.9626 Tf 473.495 419.863 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf -297.884 -11.956 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(9)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -19.925 Td [(desc)]TJ ET q -1 0 0 1 176.02 520.884 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.218 388.181 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 179.158 520.685 Td [(idx)]TJ +/F75 9.9626 Tf 174.207 387.982 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 195.476 520.884 cm +1 0 0 1 324.173 340.361 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 198.615 520.685 Td [(heap)]TJ -0 g 0 G -/F54 9.9626 Tf 25.902 0 Td [(:)-289(a)-207(heap)-207(containing)-207(elements)-207(of)-207(type)-207(T)74(,)-207(as)-207(above,)-215(together)-207(with)]TJ -48.906 -11.956 Td [(an)-250(integer)-250(index.)]TJ -24.906 -19.925 Td [(Given)-250(a)-250(heap)-250(object,)-250(the)-250(following)-250(methods)-250(ar)18(e)-250(de\002ned)-250(on)-250(it:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(init)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(Initialize)-250(memory;)-250(also)-250(choose)-250(ascending)-250(or)-250(descending)-250(or)18(der;)]TJ -0 g 0 G -/F51 9.9626 Tf -21.021 -19.925 Td [(howmany)]TJ -0 g 0 G -/F54 9.9626 Tf 50.371 0 Td [(Curr)18(ent)-250(heap)-250(occupancy;)]TJ -0 g 0 G -/F51 9.9626 Tf -50.371 -19.926 Td [(insert)]TJ -0 g 0 G -/F54 9.9626 Tf 30.983 0 Td [(Add)-250(an)-250(item)-250(\050or)-250(an)-250(item)-250(and)-250(its)-250(index\051;)]TJ -0 g 0 G -/F51 9.9626 Tf -30.983 -19.925 Td [(get)]TJ +/F145 9.9626 Tf 327.311 340.161 Td [(desc)]TJ ET q -1 0 0 1 165.141 409.302 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 348.86 340.361 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 168.129 409.103 Td [(\002rst)]TJ +/F145 9.9626 Tf 351.998 340.161 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 22.685 0 Td [(Remove)-250(and)-250(r)18(eturn)-250(the)-250(\002rst)-250(element;)]TJ +/F75 9.9626 Tf -222.214 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -40.109 -19.925 Td [(dump)]TJ 0 g 0 G -/F54 9.9626 Tf 32.099 0 Td [(Print)-250(on)-250(\002le;)]TJ + 0 -19.925 Td [(res)]TJ +0 g 0 G +/F84 9.9626 Tf 18.261 0 Td [(contains)-250(the)-250(1-norm)-250(of)-250(\050the)-250(columns)-250(of\051)]TJ/F78 9.9626 Tf 176.183 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -174.743 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -32.099 -19.926 Td [(free)]TJ +/F75 9.9626 Tf -24.906 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 22.695 0 Td [(Release)-250(memory)111(.)]TJ -22.695 -19.925 Td [(These)-305(objects)-305(ar)18(e)-305(used)-305(in)-305(AMG4PSBLAS)-305(to)-305(implement)-305(the)-305(factorization)-305(algo-)]TJ 0 -11.955 Td [(rithms.)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 166.874 -246.934 Td [(28)]TJ + 142.356 -106.261 Td [(46)]TJ 0 g 0 G ET endstream endobj -928 0 obj +1165 0 obj << /Type /ObjStm /N 100 -/First 881 -/Length 8939 ->> -stream -67 0 923 55 930 148 932 262 71 319 75 375 933 431 79 488 83 542 929 598 -935 691 937 805 87 861 91 916 95 971 934 1026 942 1119 938 1269 939 1426 940 1577 -944 1724 99 1781 103 1837 945 1894 941 1951 949 2044 951 2158 947 2214 952 2270 107 2327 -111 2383 948 2439 954 2545 956 2659 115 2716 119 2773 957 2830 123 2887 953 2943 959 3036 -961 3150 127 3206 131 3262 135 3318 958 3374 964 3467 966 3581 139 3638 143 3695 963 3752 -968 3845 970 3959 147 4015 151 4071 967 4127 972 4220 974 4334 155 4391 159 4448 163 4505 -971 4562 976 4655 978 4769 167 4825 975 4880 982 4973 979 5115 980 5260 984 5407 171 5464 -175 5521 179 5577 183 5633 985 5690 981 5747 988 5840 990 5954 986 6010 187 6066 191 6122 -195 6178 987 6234 995 6340 992 6482 993 6627 997 6771 199 6828 994 6885 1002 6978 999 7115 -1004 7262 204 7320 208 7377 212 7433 1005 7490 1001 7548 1008 7655 1000 7793 1010 7940 1006 7999 -% 67 0 obj +/First 987 +/Length 11584 +>> +stream +1160 0 1164 146 1161 205 1172 313 1166 488 1167 629 1168 775 1169 919 1170 1064 1174 1211 +232 1269 1175 1326 1171 1384 1178 1506 1176 1645 1180 1791 1181 1850 1177 1909 1189 2017 1182 2201 +1183 2345 1184 2491 1185 2635 1186 2780 1187 2927 1191 3070 236 3128 1192 3185 1188 3242 1194 3378 +1196 3496 1193 3555 1201 3636 1197 3793 1198 3937 1199 4083 1203 4230 240 4288 1204 4345 1200 4403 +1207 4539 1209 4657 1210 4716 1206 4774 1215 4869 1211 5026 1212 5170 1213 5316 1217 5463 244 5521 +1218 5578 1214 5636 1223 5772 1219 5929 1220 6073 1221 6216 1225 6363 248 6422 1226 6480 1222 6538 +1228 6674 1230 6792 1231 6850 1227 6907 1236 7002 1232 7159 1233 7303 1234 7449 1238 7596 252 7655 +1239 7713 1235 7772 1241 7908 1243 8026 1240 8084 1249 8165 1245 8322 1246 8465 1247 8611 1251 8758 +256 8817 1252 8875 1248 8933 1254 9055 1256 9173 1257 9231 1258 9289 1259 9347 1260 9406 1261 9465 +1262 9524 1253 9583 1267 9691 1263 9848 1264 9992 1265 10138 1269 10285 260 10344 1270 10402 1266 10461 +% 1160 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [395.773 588.749 402.747 600.809] +/A << /S /GoTo /D (table.1) >> +>> +% 1164 0 obj << -/D [924 0 R /XYZ 99.895 276.666 null] +/D [1162 0 R /XYZ 149.705 753.953 null] >> -% 923 0 obj +% 1161 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F84 687 0 R /F75 685 0 R /F145 940 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 930 0 obj +% 1172 0 obj << /Type /Page -/Contents 931 0 R -/Resources 929 0 R +/Contents 1173 0 R +/Resources 1171 0 R /MediaBox [0 0 595.276 841.89] -/Parent 927 0 R +/Parent 1158 0 R +/Annots [ 1166 0 R 1167 0 R 1168 0 R 1169 0 R 1170 0 R ] >> -% 932 0 obj +% 1166 0 obj << -/D [930 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 355.7 444.603 367.76] +/A << /S /GoTo /D (vdata) >> >> -% 71 0 obj +% 1167 0 obj << -/D [930 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [332.133 343.745 339.206 355.804] +/A << /S /GoTo /D (table.2) >> >> -% 75 0 obj +% 1168 0 obj << -/D [930 0 R /XYZ 150.705 519.544 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 264.018 444.603 276.078] +/A << /S /GoTo /D (vdata) >> >> -% 933 0 obj +% 1169 0 obj << -/D [930 0 R /XYZ 397.537 356.277 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [322.336 252.063 329.21 264.123] +/A << /S /GoTo /D (table.2) >> >> -% 79 0 obj +% 1170 0 obj << -/D [930 0 R /XYZ 150.705 305.6 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 172.336 323.106 184.396] +/A << /S /GoTo /D (descdata) >> >> -% 83 0 obj +% 1174 0 obj << -/D [930 0 R /XYZ 150.705 194.578 null] +/D [1172 0 R /XYZ 98.895 753.953 null] >> -% 929 0 obj +% 232 0 obj +<< +/D [1172 0 R /XYZ 99.895 716.092 null] +>> +% 1175 0 obj +<< +/D [1172 0 R /XYZ 99.895 499.951 null] +>> +% 1171 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 935 0 obj +% 1178 0 obj << /Type /Page -/Contents 936 0 R -/Resources 934 0 R +/Contents 1179 0 R +/Resources 1177 0 R /MediaBox [0 0 595.276 841.89] -/Parent 927 0 R ->> -% 937 0 obj -<< -/D [935 0 R /XYZ 98.895 753.953 null] +/Parent 1158 0 R +/Annots [ 1176 0 R ] >> -% 87 0 obj +% 1176 0 obj << -/D [935 0 R /XYZ 99.895 583.842 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [428.968 602.697 435.942 614.756] +/A << /S /GoTo /D (table.2) >> >> -% 91 0 obj +% 1180 0 obj << -/D [935 0 R /XYZ 99.895 466.211 null] +/D [1178 0 R /XYZ 149.705 753.953 null] >> -% 95 0 obj +% 1181 0 obj << -/D [935 0 R /XYZ 99.895 256.035 null] +/D [1178 0 R /XYZ 150.705 512.639 null] >> -% 934 0 obj +% 1177 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F84 687 0 R /F75 685 0 R /F145 940 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 942 0 obj +% 1189 0 obj << /Type /Page -/Contents 943 0 R -/Resources 941 0 R +/Contents 1190 0 R +/Resources 1188 0 R /MediaBox [0 0 595.276 841.89] -/Parent 927 0 R -/Annots [ 938 0 R 939 0 R 940 0 R ] +/Parent 1158 0 R +/Annots [ 1182 0 R 1183 0 R 1184 0 R 1185 0 R 1186 0 R 1187 0 R ] >> -% 938 0 obj +% 1182 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [187.544 240.293 199.499 249.399] -/A << /S /GoTo /D (cite.DesignPatterns) >> +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 384.684 444.603 396.744] +/A << /S /GoTo /D (vdata) >> >> -% 939 0 obj +% 1183 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [267.981 240.393 279.936 249.399] -/A << /S /GoTo /D (cite.Sparse03) >> +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [332.133 372.729 339.206 384.789] +/A << /S /GoTo /D (table.3) >> >> -% 940 0 obj +% 1184 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [458.483 237.643 465.457 249.703] -/A << /S /GoTo /D (listing.2) >> ->> -% 944 0 obj -<< -/D [942 0 R /XYZ 149.705 753.953 null] ->> -% 99 0 obj -<< -/D [942 0 R /XYZ 150.705 544.277 null] +/Rect [368.549 293.733 444.603 305.793] +/A << /S /GoTo /D (vdata) >> >> -% 103 0 obj +% 1185 0 obj << -/D [942 0 R /XYZ 150.705 296.936 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [322.336 281.778 329.21 293.837] +/A << /S /GoTo /D (table.3) >> >> -% 945 0 obj +% 1186 0 obj << -/D [942 0 R /XYZ 170.328 265.359 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 202.781 323.106 214.841] +/A << /S /GoTo /D (descdata) >> >> -% 941 0 obj +% 1187 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [149.34 119.17 156.313 128.58] +/A << /S /GoTo /D (table.2) >> >> -% 949 0 obj +% 1191 0 obj << -/Type /Page -/Contents 950 0 R -/Resources 948 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 927 0 R +/D [1189 0 R /XYZ 98.895 753.953 null] >> -% 951 0 obj +% 236 0 obj << -/D [949 0 R /XYZ 98.895 753.953 null] +/D [1189 0 R /XYZ 99.895 716.092 null] >> -% 947 0 obj +% 1192 0 obj << -/D [949 0 R /XYZ 99.895 665.282 null] +/D [1189 0 R /XYZ 99.895 524.97 null] >> -% 952 0 obj +% 1188 0 obj << -/D [949 0 R /XYZ 409.052 603.446 null] +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F192 942 0 R /F190 941 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] >> -% 107 0 obj +% 1194 0 obj << -/D [949 0 R /XYZ 99.895 294.773 null] +/Type /Page +/Contents 1195 0 R +/Resources 1193 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1158 0 R >> -% 111 0 obj +% 1196 0 obj << -/D [949 0 R /XYZ 99.895 276.048 null] +/D [1194 0 R /XYZ 149.705 753.953 null] >> -% 948 0 obj +% 1193 0 obj << -/Font << /F94 915 0 R /F54 586 0 R /F59 812 0 R /F51 584 0 R >> +/Font << /F75 685 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 954 0 obj +% 1201 0 obj << /Type /Page -/Contents 955 0 R -/Resources 953 0 R +/Contents 1202 0 R +/Resources 1200 0 R /MediaBox [0 0 595.276 841.89] -/Parent 927 0 R +/Parent 1205 0 R +/Annots [ 1197 0 R 1198 0 R 1199 0 R ] >> -% 956 0 obj +% 1197 0 obj << -/D [954 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 358.503 444.603 370.562] +/A << /S /GoTo /D (vdata) >> >> -% 115 0 obj +% 1198 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [326.652 346.547 333.626 358.607] +/A << /S /GoTo /D (table.4) >> +>> +% 1199 0 obj << -/D [954 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 280.074 323.106 292.134] +/A << /S /GoTo /D (descdata) >> >> -% 119 0 obj +% 1203 0 obj << -/D [954 0 R /XYZ 150.705 540.892 null] +/D [1201 0 R /XYZ 98.895 753.953 null] >> -% 957 0 obj +% 240 0 obj << -/D [954 0 R /XYZ 150.705 358.382 null] +/D [1201 0 R /XYZ 99.895 716.092 null] >> -% 123 0 obj +% 1204 0 obj << -/D [954 0 R /XYZ 150.705 300.51 null] +/D [1201 0 R /XYZ 99.895 495.665 null] >> -% 953 0 obj +% 1200 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F192 942 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 959 0 obj +% 1207 0 obj << /Type /Page -/Contents 960 0 R -/Resources 958 0 R +/Contents 1208 0 R +/Resources 1206 0 R /MediaBox [0 0 595.276 841.89] -/Parent 962 0 R ->> -% 961 0 obj -<< -/D [959 0 R /XYZ 98.895 753.953 null] ->> -% 127 0 obj -<< -/D [959 0 R /XYZ 99.895 716.092 null] +/Parent 1205 0 R >> -% 131 0 obj +% 1209 0 obj << -/D [959 0 R /XYZ 99.895 526.761 null] +/D [1207 0 R /XYZ 149.705 753.953 null] >> -% 135 0 obj +% 1210 0 obj << -/D [959 0 R /XYZ 99.895 326.359 null] +/D [1207 0 R /XYZ 150.705 632.19 null] >> -% 958 0 obj +% 1206 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 964 0 obj +% 1215 0 obj << /Type /Page -/Contents 965 0 R -/Resources 963 0 R +/Contents 1216 0 R +/Resources 1214 0 R /MediaBox [0 0 595.276 841.89] -/Parent 962 0 R +/Parent 1205 0 R +/Annots [ 1211 0 R 1212 0 R 1213 0 R ] >> -% 966 0 obj +% 1211 0 obj << -/D [964 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 411.694 444.603 423.754] +/A << /S /GoTo /D (vdata) >> >> -% 139 0 obj +% 1212 0 obj << -/D [964 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [326.652 399.739 333.626 411.798] +/A << /S /GoTo /D (table.5) >> >> -% 143 0 obj +% 1213 0 obj << -/D [964 0 R /XYZ 150.705 474.131 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 331.993 323.106 344.052] +/A << /S /GoTo /D (descdata) >> >> -% 963 0 obj +% 1217 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/D [1215 0 R /XYZ 98.895 753.953 null] >> -% 968 0 obj +% 244 0 obj << -/Type /Page -/Contents 969 0 R -/Resources 967 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 962 0 R +/D [1215 0 R /XYZ 99.895 716.092 null] >> -% 970 0 obj +% 1218 0 obj << -/D [968 0 R /XYZ 98.895 753.953 null] +/D [1215 0 R /XYZ 99.895 555.856 null] >> -% 147 0 obj +% 1214 0 obj << -/D [968 0 R /XYZ 99.895 716.092 null] +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F192 942 0 R /F190 941 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] >> -% 151 0 obj +% 1223 0 obj << -/D [968 0 R /XYZ 99.895 376.562 null] +/Type /Page +/Contents 1224 0 R +/Resources 1222 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1205 0 R +/Annots [ 1219 0 R 1220 0 R 1221 0 R ] >> -% 967 0 obj +% 1219 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 362.555 495.412 374.615] +/A << /S /GoTo /D (vdata) >> >> -% 972 0 obj +% 1220 0 obj << -/Type /Page -/Contents 973 0 R -/Resources 971 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 962 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [377.462 350.6 384.436 362.66] +/A << /S /GoTo /D (table.6) >> >> -% 974 0 obj +% 1221 0 obj << -/D [972 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [306.858 283.114 373.916 295.173] +/A << /S /GoTo /D (descdata) >> >> -% 155 0 obj +% 1225 0 obj << -/D [972 0 R /XYZ 150.705 716.092 null] +/D [1223 0 R /XYZ 149.705 753.953 null] >> -% 159 0 obj +% 248 0 obj << -/D [972 0 R /XYZ 150.705 484.709 null] +/D [1223 0 R /XYZ 150.705 716.092 null] >> -% 163 0 obj +% 1226 0 obj << -/D [972 0 R /XYZ 150.705 251.325 null] +/D [1223 0 R /XYZ 150.705 505.29 null] >> -% 971 0 obj +% 1222 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F192 942 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 976 0 obj +% 1228 0 obj << /Type /Page -/Contents 977 0 R -/Resources 975 0 R +/Contents 1229 0 R +/Resources 1227 0 R /MediaBox [0 0 595.276 841.89] -/Parent 962 0 R +/Parent 1205 0 R >> -% 978 0 obj +% 1230 0 obj << -/D [976 0 R /XYZ 98.895 753.953 null] +/D [1228 0 R /XYZ 98.895 753.953 null] >> -% 167 0 obj +% 1231 0 obj << -/D [976 0 R /XYZ 99.895 476.15 null] +/D [1228 0 R /XYZ 99.895 632.19 null] >> -% 975 0 obj +% 1227 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 982 0 obj +% 1236 0 obj << /Type /Page -/Contents 983 0 R -/Resources 981 0 R +/Contents 1237 0 R +/Resources 1235 0 R /MediaBox [0 0 595.276 841.89] -/Parent 962 0 R -/Annots [ 979 0 R 980 0 R ] +/Parent 1205 0 R +/Annots [ 1232 0 R 1233 0 R 1234 0 R ] >> -% 979 0 obj +% 1232 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [248.894 164.341 255.868 176.4] -/A << /S /GoTo /D (section.6) >> +/Rect [419.358 291.495 495.412 303.554] +/A << /S /GoTo /D (vdata) >> >> -% 980 0 obj +% 1233 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [343.512 128.475 350.485 140.535] -/A << /S /GoTo /D (listing.3) >> ->> -% 984 0 obj -<< -/D [982 0 R /XYZ 149.705 753.953 null] ->> -% 171 0 obj -<< -/D [982 0 R /XYZ 150.705 716.092 null] +/Rect [377.462 279.539 384.436 291.599] +/A << /S /GoTo /D (table.7) >> >> -% 175 0 obj +% 1234 0 obj << -/D [982 0 R /XYZ 150.705 586.94 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [306.858 209.259 373.916 221.319] +/A << /S /GoTo /D (descdata) >> >> -% 179 0 obj +% 1238 0 obj << -/D [982 0 R /XYZ 150.705 402.59 null] +/D [1236 0 R /XYZ 149.705 753.953 null] >> -% 183 0 obj +% 252 0 obj << -/D [982 0 R /XYZ 150.705 234.114 null] +/D [1236 0 R /XYZ 150.705 716.092 null] >> -% 985 0 obj +% 1239 0 obj << -/D [982 0 R /XYZ 170.799 204.012 null] +/D [1236 0 R /XYZ 150.705 443.893 null] >> -% 981 0 obj +% 1235 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F192 942 0 R /F190 941 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 988 0 obj +% 1241 0 obj << /Type /Page -/Contents 989 0 R -/Resources 987 0 R +/Contents 1242 0 R +/Resources 1240 0 R /MediaBox [0 0 595.276 841.89] -/Parent 991 0 R ->> -% 990 0 obj -<< -/D [988 0 R /XYZ 98.895 753.953 null] ->> -% 986 0 obj -<< -/D [988 0 R /XYZ 99.895 446.997 null] ->> -% 187 0 obj -<< -/D [988 0 R /XYZ 99.895 387.147 null] +/Parent 1244 0 R >> -% 191 0 obj -<< -/D [988 0 R /XYZ 99.895 370.604 null] ->> -% 195 0 obj +% 1243 0 obj << -/D [988 0 R /XYZ 99.895 194.093 null] +/D [1241 0 R /XYZ 98.895 753.953 null] >> -% 987 0 obj +% 1240 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F94 915 0 R >> +/Font << /F75 685 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 995 0 obj +% 1249 0 obj << /Type /Page -/Contents 996 0 R -/Resources 994 0 R +/Contents 1250 0 R +/Resources 1248 0 R /MediaBox [0 0 595.276 841.89] -/Parent 991 0 R -/Annots [ 992 0 R 993 0 R ] +/Parent 1244 0 R +/Annots [ 1245 0 R 1246 0 R 1247 0 R ] >> -% 992 0 obj +% 1245 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 383.557 435.942 395.616] -/A << /S /GoTo /D (table.1) >> +/Rect [419.358 314.69 495.412 326.749] +/A << /S /GoTo /D (vdata) >> >> -% 993 0 obj +% 1246 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 240.08 435.942 252.139] -/A << /S /GoTo /D (table.1) >> +/Rect [377.462 302.734 384.436 314.794] +/A << /S /GoTo /D (table.8) >> >> -% 997 0 obj +% 1247 0 obj << -/D [995 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [306.858 232.693 373.916 244.753] +/A << /S /GoTo /D (descdata) >> >> -% 199 0 obj +% 1251 0 obj << -/D [995 0 R /XYZ 150.705 610.712 null] +/D [1249 0 R /XYZ 149.705 753.953 null] >> -% 994 0 obj +% 256 0 obj +<< +/D [1249 0 R /XYZ 150.705 716.092 null] +>> +% 1252 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/D [1249 0 R /XYZ 150.705 504.73 null] +>> +% 1248 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1002 0 obj +% 1254 0 obj << /Type /Page -/Contents 1003 0 R -/Resources 1001 0 R +/Contents 1255 0 R +/Resources 1253 0 R /MediaBox [0 0 595.276 841.89] -/Parent 991 0 R -/Annots [ 999 0 R ] +/Parent 1244 0 R >> -% 999 0 obj +% 1256 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [127.814 152.385 134.788 164.445] -/A << /S /GoTo /D (listing.4) >> +/D [1254 0 R /XYZ 98.895 753.953 null] >> -% 1004 0 obj +% 1257 0 obj << -/D [1002 0 R /XYZ 98.895 753.953 null] +/D [1254 0 R /XYZ 99.895 564.444 null] >> -% 204 0 obj +% 1258 0 obj << -/D [1002 0 R /XYZ 99.895 716.092 null] +/D [1254 0 R /XYZ 99.895 504.067 null] >> -% 208 0 obj +% 1259 0 obj << -/D [1002 0 R /XYZ 99.895 430.41 null] +/D [1254 0 R /XYZ 124.802 506.876 null] >> -% 212 0 obj +% 1260 0 obj << -/D [1002 0 R /XYZ 99.895 226.203 null] +/D [1254 0 R /XYZ 124.802 494.921 null] >> -% 1005 0 obj +% 1261 0 obj << -/D [1002 0 R /XYZ 258.62 168.146 null] +/D [1254 0 R /XYZ 124.802 482.966 null] >> -% 1001 0 obj +% 1262 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/D [1254 0 R /XYZ 124.802 471.011 null] +>> +% 1253 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1008 0 obj +% 1267 0 obj << /Type /Page -/Contents 1009 0 R -/Resources 1007 0 R +/Contents 1268 0 R +/Resources 1266 0 R /MediaBox [0 0 595.276 841.89] -/Parent 991 0 R -/Annots [ 1000 0 R ] +/Parent 1244 0 R +/Annots [ 1263 0 R 1264 0 R 1265 0 R ] >> -% 1000 0 obj +% 1263 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [297.461 580.64 304.435 592.699] -/A << /S /GoTo /D (section.6) >> +/Rect [419.358 416.057 495.412 428.117] +/A << /S /GoTo /D (vdata) >> >> -% 1010 0 obj +% 1264 0 obj << -/D [1008 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [377.462 404.102 384.436 416.161] +/A << /S /GoTo /D (table.9) >> >> -% 1006 0 obj +% 1265 0 obj << -/D [1008 0 R /XYZ 150.705 665.282 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [306.858 336.356 373.916 348.415] +/A << /S /GoTo /D (descdata) >> >> - -endstream -endobj -1014 0 obj +% 1269 0 obj << -/Length 158 +/D [1267 0 R /XYZ 149.705 753.953 null] +>> +% 260 0 obj +<< +/D [1267 0 R /XYZ 150.705 716.092 null] +>> +% 1270 0 obj +<< +/D [1267 0 R /XYZ 150.705 560.219 null] +>> +% 1266 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F192 942 0 R /F190 941 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 14.3462 Tf 99.895 705.784 Td [(4)-1000(Computational)-250(routines)]TJ -0 g 0 G -/F54 9.9626 Tf 166.875 -615.346 Td [(29)]TJ -0 g 0 G -ET endstream endobj -1025 0 obj +1275 0 obj << -/Length 7171 +/Length 5396 >> stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.1)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(geaxpby)-250(\227)-250(General)-250(Dense)-250(Matrix)-250(Sum)]TJ/F54 9.9626 Tf -51.12 -19.189 Td [(This)-358(subr)18(outine)-358(is)-359(an)-358(interface)-358(to)-358(the)-358(computational)-359(kernel)-358(for)-358(dense)-358(matrix)]TJ 0 -11.955 Td [(sum:)]TJ/F52 9.9626 Tf 143.149 -12.304 Td [(y)]TJ/F83 10.3811 Tf 7.998 0 Td [(\040)]TJ/F60 9.9626 Tf 13.397 0 Td [(a)]TJ/F52 9.9626 Tf 7.616 0 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -175.407 -18.398 Td [(call)]TJ -0 g 0 G - [-525(psb_geaxpby\050alpha,)-525(x,)-525(beta,)-525(y,)-525(desc_a,)-525(info\051)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -q -1 0 0 1 227.737 629.682 cm -[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S -Q -BT -/F52 9.9626 Tf 234.009 621.114 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(,)]TJ/F60 9.9626 Tf 5.105 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(,)]TJ/F60 9.9626 Tf 5.355 0 Td [(b)]TJ/F51 9.9626 Tf 89.359 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 227.737 617.328 cm -[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S -Q -BT -/F54 9.9626 Tf 233.715 608.761 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 370.948 608.96 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 373.937 608.761 Td [(geaxpby)]TJ -140.222 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 370.948 597.005 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 373.937 596.805 Td [(geaxpby)]TJ -140.222 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 370.948 585.05 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 373.937 584.85 Td [(geaxpby)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 370.948 573.094 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 373.937 572.895 Td [(geaxpby)]TJ -ET -q -1 0 0 1 227.737 569.109 cm -[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S -Q -0 g 0 G -BT -/F54 9.9626 Tf 280.768 540.731 Td [(T)92(able)-250(1:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -130.063 -35.05 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.39 Td [(On)-250(Entry)]TJ -0 g 0 G +BT +/F75 11.9552 Tf 99.895 706.129 Td [(4.10)-1000(psb)]TJ +ET +q +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 156.993 706.129 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(Sparse)-250(Matrix)]TJ/F84 9.9626 Tf -57.406 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(matrix)]TJ/F78 9.9626 Tf 208.231 0 Td [(A)]TJ/F84 9.9626 Tf 7.318 0 Td [(:)]TJ/F78 9.9626 Tf -74.342 -33.873 Td [(n)-15(r)-35(m)]TJ/F84 9.9626 Tf 17.789 0 Td [(1)]TJ/F190 10.3811 Tf 7.873 0 Td [(\040)-291(k)]TJ/F78 9.9626 Tf 19.335 0 Td [(A)]TJ/F190 10.3811 Tf 7.442 0 Td [(k)]TJ/F84 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F84 9.9626 Tf -199.071 -20.06 Td [(wher)18(e:)]TJ 0 g 0 G - 0 -20.391 Td [(alpha)]TJ +/F78 9.9626 Tf 1.041 -19.925 Td [(A)]TJ 0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.368 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(1)]TJ +/F84 9.9626 Tf 12.299 0 Td [(r)18(epr)18(esents)-250(the)-250(global)-250(matrix)]TJ/F78 9.9626 Tf 125.981 0 Td [(A)]TJ 0 g 0 G - [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.391 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(o)1(r)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 436.673 349.068 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 178.8 588.515 cm +[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S Q BT -/F59 9.9626 Tf 439.811 348.869 Td [(T)]TJ +/F78 9.9626 Tf 185.401 579.947 Td [(A)]TJ/F75 9.9626 Tf 120.292 0 Td [(Function)]TJ ET q -1 0 0 1 445.669 349.068 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 178.8 576.161 cm +[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S Q BT -/F59 9.9626 Tf 448.807 348.869 Td [(vect)]TJ +/F84 9.9626 Tf 184.778 567.594 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 470.356 349.068 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 322.012 567.793 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 473.495 348.869 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-312(numbers)-311(of)-312(type)-311(speci\002ed)-312(in)-311(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-312(1)]TJ -0 g 0 G - [(.)-494(The)-312(rank)-312(of)]TJ/F52 9.9626 Tf 274.834 0 Td [(x)]TJ/F54 9.9626 Tf 8.31 0 Td [(must)-311(be)]TJ -283.144 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 52.946 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -82.958 -20.391 Td [(beta)]TJ -0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.618 0 Td [(b)]TJ/F54 9.9626 Tf 5.524 0 Td [(.)]TJ -49.584 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(1)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -20.391 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 191.754 0 Td [(y)]TJ/F54 9.9626 Tf 5.105 0 Td [(.)]TJ -182.473 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F84 9.9626 Tf 325.001 567.594 Td [(spnrm1)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 436.673 188.736 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 322.012 555.838 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 439.811 188.537 Td [(T)]TJ +/F84 9.9626 Tf 325.001 555.638 Td [(spnrm1)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 445.669 188.736 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 322.012 543.882 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 448.807 188.537 Td [(vect)]TJ +/F84 9.9626 Tf 325.001 543.683 Td [(spnrm1)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 470.356 188.736 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 322.012 531.927 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 473.495 188.537 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.884 -11.956 Td [(containing)-276(numbers)-277(of)-276(the)-276(type)-276(indicated)-277(in)-276(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-276(1)]TJ -0 g 0 G - [(.)-389(The)-276(rank)-277(of)]TJ/F52 9.9626 Tf 288.67 0 Td [(y)]TJ/F54 9.9626 Tf 7.859 0 Td [(must)]TJ -296.529 -11.955 Td [(be)-250(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 65.888 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -95.999 -20.39 Td [(desc)]TJ +/F84 9.9626 Tf 325.001 531.728 Td [(spnrm1)]TJ ET q -1 0 0 1 171.218 144.435 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 178.8 527.942 cm +[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S Q +0 g 0 G BT -/F51 9.9626 Tf 174.207 144.236 Td [(a)]TJ +/F84 9.9626 Tf 227.467 499.564 Td [(T)92(able)-250(10:)-310(Data)-250(types)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ 0 g 0 G -/F54 9.9626 Tf 114.879 -29.888 Td [(30)]TJ 0 g 0 G -ET - -endstream -endobj -1032 0 obj -<< -/Length 2404 ->> -stream 0 g 0 G +/F145 9.9626 Tf -127.572 -33.873 Td [(psb_spnrm1\050A,)-525(desc_a,)-525(info\051)]TJ 0 -11.955 Td [(psb_norm1\050A,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 124.802 706.129 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F78 9.9626 Tf 194.722 0 Td [(A)]TJ/F84 9.9626 Tf 7.318 0 Td [(.)]TJ -187.096 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 273.363 694.373 cm +1 0 0 1 273.363 344.346 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 694.174 Td [(desc)]TJ +/F145 9.9626 Tf 276.501 344.147 Td [(Tspmat)]TJ ET q -1 0 0 1 298.05 694.373 cm +1 0 0 1 308.511 344.346 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 694.174 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F145 9.9626 Tf 311.649 344.147 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -222.215 -21.918 Td [(On)-250(Return)]TJ +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -0 g 0 G - 0 -19.925 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F52 9.9626 Tf 160.68 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -151.4 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F75 9.9626 Tf -232.676 -19.926 Td [(desc)]TJ ET q -1 0 0 1 385.864 604.709 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.408 324.421 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 389.002 604.51 Td [(T)]TJ +/F75 9.9626 Tf 123.397 324.221 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 394.86 604.709 cm +1 0 0 1 273.363 276.6 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 604.51 Td [(vect)]TJ +/F145 9.9626 Tf 276.501 276.401 Td [(desc)]TJ ET q -1 0 0 1 419.547 604.709 cm +1 0 0 1 298.05 276.6 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 604.51 Td [(type)]TJ +/F145 9.9626 Tf 301.189 276.401 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(1)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(.)]TJ +/F75 9.9626 Tf -222.215 -19.926 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(1-norm)-250(of)-250(sparse)-250(submatrix)]TJ/F78 9.9626 Tf 150.399 0 Td [(A)]TJ/F84 9.9626 Tf 7.318 0 Td [(.)]TJ -205.587 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +/F75 9.9626 Tf -24.907 -19.926 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 141.968 -434.371 Td [(31)]TJ + 142.357 -54.456 Td [(47)]TJ 0 g 0 G ET endstream endobj -1042 0 obj +1283 0 obj << -/Length 7447 +/Length 5410 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.2)-1000(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(4.11)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(gedot)-250(\227)-250(Dot)-250(Product)]TJ/F54 9.9626 Tf -51.12 -18.976 Td [(This)-250(function)-250(computes)-250(dot)-250(pr)18(oduct)-250(between)-250(two)-250(vectors)]TJ/F52 9.9626 Tf 254.647 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.481 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -286.93 -11.955 Td [(If)]TJ/F52 9.9626 Tf 9.459 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf 7.597 0 Td [(ar)18(e)-250(r)18(eal)-250(vectors)-250(it)-250(computes)-250(dot-pr)18(oduct)-250(as:)]TJ/F52 9.9626 Tf 104.717 -23.132 Td [(d)-25(o)-35(t)]TJ/F83 10.3811 Tf 16.337 0 Td [(\040)]TJ/F52 9.9626 Tf 13.566 0 Td [(x)]TJ/F52 7.5716 Tf 5.399 4.115 Td [(T)]TJ/F52 9.9626 Tf 5.525 -4.115 Td [(y)]TJ/F54 9.9626 Tf -189.778 -21.93 Td [(Else)-250(if)]TJ/F52 9.9626 Tf 29.474 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf 7.596 0 Td [(ar)18(e)-250(complex)-250(vectors)-250(then)-250(it)-250(computes)-250(dot-pr)18(oduct)-250(as:)]TJ/F52 9.9626 Tf 83.965 -23.132 Td [(d)-25(o)-35(t)]TJ/F83 10.3811 Tf 16.336 0 Td [(\040)]TJ/F52 9.9626 Tf 13.567 0 Td [(x)]TJ/F52 7.5716 Tf 5.588 4.115 Td [(H)]TJ/F52 9.9626 Tf 6.812 -4.115 Td [(y)]TJ/F59 9.9626 Tf -175.572 -21.937 Td [(psb_gedot\050x,)-525(y,)-525(desc_a,)-525(info)-525([,global]\051)]TJ +/F75 11.9552 Tf 207.803 706.129 Td [(normi)-250(\227)-250(In\002nity)-250(Norm)-250(of)-250(Sparse)-250(Matrix)]TJ/F84 9.9626 Tf -57.407 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(in\002nity-norm)-250(of)-250(a)-250(matrix)]TJ/F78 9.9626 Tf 235.459 0 Td [(A)]TJ/F84 9.9626 Tf 7.318 0 Td [(:)]TJ/F78 9.9626 Tf -102.019 -33.873 Td [(n)-15(r)-35(m)-18(i)]TJ/F190 10.3811 Tf 23.699 0 Td [(\040)-291(k)]TJ/F78 9.9626 Tf 19.335 0 Td [(A)]TJ/F190 10.3811 Tf 7.442 0 Td [(k)]TJ/F243 7.5716 Tf 5.41 -1.494 Td [(\245)]TJ/F84 9.9626 Tf -196.754 -20.424 Td [(wher)18(e:)]TJ +0 g 0 G +/F78 9.9626 Tf 1.042 -19.925 Td [(A)]TJ +0 g 0 G +/F84 9.9626 Tf 12.298 0 Td [(r)18(epr)18(esents)-250(the)-250(global)-250(matrix)]TJ/F78 9.9626 Tf 125.981 0 Td [(A)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 233.929 570.686 cm -[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S +1 0 0 1 230.651 588.515 cm +[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S Q BT -/F52 9.9626 Tf 240.031 562.118 Td [(d)-25(o)-35(t)]TJ/F54 9.9626 Tf 13.444 0 Td [(,)]TJ/F52 9.9626 Tf 5.276 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F51 9.9626 Tf 91.76 0 Td [(Function)]TJ +/F78 9.9626 Tf 237.251 579.947 Td [(A)]TJ/F75 9.9626 Tf 120.293 0 Td [(Function)]TJ ET q -1 0 0 1 233.929 558.332 cm -[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S +1 0 0 1 230.651 576.161 cm +[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S Q BT -/F54 9.9626 Tf 239.906 549.765 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F84 9.9626 Tf 236.629 567.594 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 377.14 549.964 cm +1 0 0 1 373.862 567.793 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 380.129 549.765 Td [(gedot)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F84 9.9626 Tf 376.851 567.594 Td [(spnrmi)]TJ -140.222 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 377.14 538.009 cm +1 0 0 1 373.862 555.838 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 380.129 537.809 Td [(gedot)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F84 9.9626 Tf 376.851 555.638 Td [(spnrmi)]TJ -140.222 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 377.14 526.053 cm +1 0 0 1 373.862 543.882 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 380.129 525.854 Td [(gedot)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F84 9.9626 Tf 376.851 543.683 Td [(spnrmi)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 377.14 514.098 cm +1 0 0 1 373.862 531.927 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 380.129 513.899 Td [(gedot)]TJ +/F84 9.9626 Tf 376.851 531.728 Td [(spnrmi)]TJ ET q -1 0 0 1 233.929 510.113 cm -[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S +1 0 0 1 230.651 527.942 cm +[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 280.768 481.735 Td [(T)92(able)-250(2:)-310(Data)-250(types)]TJ +/F84 9.9626 Tf 278.277 499.564 Td [(T)92(able)-250(11:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -130.063 -34.507 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F145 9.9626 Tf -127.572 -33.873 Td [(psb_spnrmi\050A,)-525(desc_a,)-525(info\051)]TJ 0 -11.955 Td [(psb_normi\050A,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.951 Td [(On)-250(Entry)]TJ +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - 0 -19.951 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ -ET -q -1 0 0 1 436.673 359.705 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.811 359.506 Td [(T)]TJ -ET -q -1 0 0 1 445.669 359.705 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 448.807 359.506 Td [(vect)]TJ -ET -q -1 0 0 1 470.356 359.705 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 473.495 359.506 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.884 -11.956 Td [(containing)-312(numbers)-311(of)-312(type)-311(speci\002ed)-312(in)-311(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-312(2)]TJ +/F75 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - [(.)-494(The)-312(rank)-312(of)]TJ/F52 9.9626 Tf 274.834 0 Td [(x)]TJ/F54 9.9626 Tf 8.31 0 Td [(must)-311(be)]TJ -283.144 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 52.946 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -82.958 -19.951 Td [(y)]TJ + 0 -19.926 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.445 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F78 9.9626 Tf 194.722 0 Td [(A)]TJ/F84 9.9626 Tf 7.317 0 Td [(.)]TJ -187.095 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ -ET -q -1 0 0 1 436.673 268.023 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.811 267.824 Td [(T)]TJ +/F145 9.9626 Tf 132.242 0 Td [(psb)]TJ ET q -1 0 0 1 445.669 268.023 cm +1 0 0 1 324.173 344.346 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 448.807 267.824 Td [(vect)]TJ +/F145 9.9626 Tf 327.311 344.147 Td [(Tspmat)]TJ ET q -1 0 0 1 470.356 268.023 cm +1 0 0 1 359.321 344.346 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 473.495 267.824 Td [(type)]TJ +/F145 9.9626 Tf 362.459 344.147 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-313(numbers)-314(of)-313(type)-313(speci\002ed)-314(in)-313(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-313(2)]TJ -0 g 0 G - [(.)-500(The)-314(rank)-313(of)]TJ/F52 9.9626 Tf 274.898 0 Td [(y)]TJ/F54 9.9626 Tf 8.228 0 Td [(must)-313(be)]TJ -283.126 -11.956 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 53.116 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -83.227 -19.95 Td [(desc)]TJ +/F75 9.9626 Tf -232.675 -19.926 Td [(desc)]TJ ET q -1 0 0 1 171.218 224.162 cm +1 0 0 1 171.218 324.421 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 223.963 Td [(a)]TJ +/F75 9.9626 Tf 174.207 324.221 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F84 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 324.173 176.341 cm +1 0 0 1 324.173 276.6 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 327.311 176.142 Td [(desc)]TJ +/F145 9.9626 Tf 327.311 276.401 Td [(desc)]TJ ET q -1 0 0 1 348.86 176.341 cm +1 0 0 1 348.86 276.6 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 351.998 176.142 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.214 -19.951 Td [(global)]TJ -0 g 0 G -/F54 9.9626 Tf 33.763 0 Td [(Speci\002es)-226(whether)-227(the)-226(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.955 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -0 g 0 G - 76.693 -29.888 Td [(32)]TJ -0 g 0 G -ET - -endstream -endobj -1048 0 obj -<< -/Length 3827 ->> -stream -0 g 0 G -0 g 0 G -BT -/F54 9.9626 Tf 124.802 706.129 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F59 9.9626 Tf 165.318 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F145 9.9626 Tf 351.998 276.401 Td [(type)]TJ 0 g 0 G - [(.true.)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -190.225 -31.881 Td [(On)-250(Return)]TJ +/F75 9.9626 Tf -222.214 -19.926 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G 0 -19.925 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(dot)-250(pr)18(oduct)-250(of)-250(vectors)]TJ/F52 9.9626 Tf 126.33 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf 5.105 0 Td [(.)]TJ -206.483 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.133 0 Td [(global)]TJ/F54 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F59 9.9626 Tf 121.612 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.)]TJ/F54 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -19.926 Td [(1.)]TJ -0 g 0 G - [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(ove)1(r)18(head.)-514(It)-318(may)-318(be)-318(necessary)-317(and/or)-318(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-204(multiple)-204(dot)-204(pr)18(oducts)-204(at)-204(the)-204(same)-204(time;)-219(in)-204(this)-204(case,)-213(it)-204(is)-204(possible)]TJ 0 -11.955 Td [(to)-250(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ/F59 9.9626 Tf 52.303 -19.925 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_gedot\050x1,y1,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ 0 -11.956 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(2)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_gedot\050x2,y2,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ 0 -11.955 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_gedot\050x3,y3,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_sum\050ctxt,vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +/F84 9.9626 Tf 72.776 0 Td [(is)-250(the)-250(in\002nity-norm)-250(of)-250(sparse)-250(submatrix)]TJ/F78 9.9626 Tf 177.627 0 Td [(A)]TJ/F84 9.9626 Tf 7.318 0 Td [(.)]TJ -232.815 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G - [(:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ +/F75 9.9626 Tf -24.906 -19.926 Td [(info)]TJ 0 g 0 G - [(\051\051)]TJ/F54 9.9626 Tf -52.303 -19.925 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communicati)1(on,)-254(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.955 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 141.968 -282.939 Td [(33)]TJ + 142.356 -54.456 Td [(48)]TJ 0 g 0 G ET endstream endobj -1059 0 obj +1294 0 obj << -/Length 8275 +/Length 8068 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.3)-1000(psb)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(4.12)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 153.407 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(gedots)-250(\227)-250(Generalized)-250(Dot)-250(Product)]TJ/F54 9.9626 Tf -51.12 -18.964 Td [(This)-283(subr)18(outine)-284(computes)-283(a)-284(series)-284(of)-283(dot)-284(pr)18(oducts)-283(among)-284(the)-283(columns)-284(of)-283(two)]TJ 0 -11.955 Td [(dense)-250(matrices)]TJ/F52 9.9626 Tf 68.208 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(:)]TJ/F52 9.9626 Tf 24.807 -13.101 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.293 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 7.041 0 Td [(\040)]TJ/F52 9.9626 Tf 13.567 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F54 9.9626 Tf 4.274 0 Td [(:)-12(,)]TJ/F52 9.9626 Tf 6.821 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F52 7.5716 Tf 4.343 4.114 Td [(T)]TJ/F52 9.9626 Tf 5.525 -4.114 Td [(y)]TJ/F85 10.3811 Tf 5.23 0 Td [(\050)]TJ/F54 9.9626 Tf 4.274 0 Td [(:)-13(,)]TJ/F52 9.9626 Tf 6.821 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F54 9.9626 Tf -214.288 -16.876 Td [(If)-300(the)-299(matrices)-300(ar)18(e)-299(complex,)-312(then)-300(the)-300(usual)-299(convention)-300(applies,)-312(i.e.)-459(the)-299(conju-)]TJ 0 -11.955 Td [(gate)-239(transpose)-239(of)]TJ/F52 9.9626 Tf 77.351 0 Td [(x)]TJ/F54 9.9626 Tf 7.589 0 Td [(is)-239(used.)-307(If)]TJ/F52 9.9626 Tf 45.493 0 Td [(x)]TJ/F54 9.9626 Tf 7.589 0 Td [(and)]TJ/F52 9.9626 Tf 19.375 0 Td [(y)]TJ/F54 9.9626 Tf 7.489 0 Td [(ar)18(e)-239(of)-239(rank)-240(one,)-241(then)]TJ/F52 9.9626 Tf 92.601 0 Td [(r)-17(e)-25(s)]TJ/F54 9.9626 Tf 14.552 0 Td [(is)-239(a)-240(scalar)75(,)-242(else)-239(it)]TJ -272.039 -11.955 Td [(is)-250(a)-250(rank)-250(one)-250(array)111(.)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 20.174 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_gedots\050res,)-525(x,)-525(y,)-525(desc_a,)-525(info\051)]TJ +/F75 11.9552 Tf 156.993 706.129 Td [(spmm)-250(\227)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Product)]TJ/F84 9.9626 Tf -57.406 -19.303 Td [(This)-250(subr)18(outine)-250(computes)-250(the)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Pr)18(oduct:)]TJ/F78 9.9626 Tf 140.456 -24.611 Td [(y)]TJ/F190 10.3811 Tf 7.998 0 Td [(\040)]TJ/F147 9.9626 Tf 13.397 0 Td [(a)]TJ/F78 9.9626 Tf 6.008 0 Td [(A)-42(x)]TJ/F192 10.3811 Tf 14.878 0 Td [(+)]TJ/F147 9.9626 Tf 10.505 0 Td [(b)]TJ/F78 9.9626 Tf 5.649 0 Td [(y)]TJ 0 g 0 G +/F84 9.9626 Tf 134.508 0 Td [(\0501\051)]TJ 0 g 0 G +/F78 9.9626 Tf -195.74 -20.13 Td [(y)]TJ/F190 10.3811 Tf 7.998 0 Td [(\040)]TJ/F147 9.9626 Tf 13.397 0 Td [(a)]TJ/F78 9.9626 Tf 6.008 0 Td [(A)]TJ/F78 7.5716 Tf 7.51 4.115 Td [(T)]TJ/F78 9.9626 Tf 5.695 -4.115 Td [(x)]TJ/F192 10.3811 Tf 7.267 0 Td [(+)]TJ/F147 9.9626 Tf 10.505 0 Td [(b)]TJ/F78 9.9626 Tf 5.649 0 Td [(y)]TJ 0 g 0 G -ET -q -1 0 0 1 230.392 595.704 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F52 9.9626 Tf 236.394 587.136 Td [(r)-17(e)-25(s)]TJ/F54 9.9626 Tf 12.17 0 Td [(,)]TJ/F52 9.9626 Tf 5.275 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F51 9.9626 Tf 93.135 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 230.392 583.351 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F54 9.9626 Tf 236.369 574.783 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 373.603 574.982 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 574.783 Td [(gedots)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 373.603 563.027 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 562.828 Td [(gedots)]TJ -140.223 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 373.603 551.072 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 550.872 Td [(gedots)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 373.603 539.116 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 538.917 Td [(gedots)]TJ -ET -q -1 0 0 1 230.392 535.131 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -0 g 0 G -BT -/F54 9.9626 Tf 280.768 506.753 Td [(T)92(able)-250(3:)-310(Data)-250(types)]TJ +/F84 9.9626 Tf 131.711 0 Td [(\0502\051)]TJ 0 g 0 G +/F78 9.9626 Tf -196.478 -20.129 Td [(y)]TJ/F190 10.3811 Tf 7.997 0 Td [(\040)]TJ/F147 9.9626 Tf 13.398 0 Td [(a)]TJ/F78 9.9626 Tf 6.007 0 Td [(A)]TJ/F78 7.5716 Tf 7.7 4.114 Td [(H)]TJ/F78 9.9626 Tf 6.982 -4.114 Td [(x)]TJ/F192 10.3811 Tf 7.267 0 Td [(+)]TJ/F147 9.9626 Tf 10.505 0 Td [(b)]TJ/F78 9.9626 Tf 5.649 0 Td [(y)]TJ 0 g 0 G +/F84 9.9626 Tf 130.973 0 Td [(\0503\051)]TJ 0 g 0 G -/F51 9.9626 Tf -130.063 -32.002 Td [(T)90(ype:)]TJ + -318.147 -18.633 Td [(wher)18(e:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F78 9.9626 Tf -14.65 -20.451 Td [(x)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.22 Td [(On)-250(Entry)]TJ +/F84 9.9626 Tf 10.187 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 115.61 0 Td [(x)]TJ/F84 7.5716 Tf 5.2 -1.494 Td [(:)-13(,)-12(:)]TJ 0 g 0 G +/F78 9.9626 Tf -131.166 -19.132 Td [(y)]TJ 0 g 0 G - 0 -19.22 Td [(x)]TJ +/F84 9.9626 Tf 10.087 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 115.441 0 Td [(y)]TJ/F84 7.5716 Tf 5.2 -1.494 Td [(:)-13(,)-12(:)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ -ET -q -1 0 0 1 436.673 388.689 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.811 388.49 Td [(T)]TJ -ET -q -1 0 0 1 445.669 388.689 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 448.807 388.49 Td [(vect)]TJ -ET -q -1 0 0 1 470.356 388.689 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 473.495 388.49 Td [(type)]TJ +/F78 9.9626 Tf -130.23 -19.131 Td [(A)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-312(numbers)-311(of)-312(type)-311(speci\002ed)-312(in)-311(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-312(3)]TJ +/F84 9.9626 Tf 12.299 0 Td [(is)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F78 9.9626 Tf 118.409 0 Td [(A)]TJ 0 g 0 G - [(.)-494(The)-312(rank)-312(of)]TJ/F52 9.9626 Tf 274.834 0 Td [(x)]TJ/F54 9.9626 Tf 8.31 0 Td [(must)-311(be)]TJ -283.144 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 52.946 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -82.958 -19.221 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.445 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 436.673 297.738 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 179.582 517.986 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q +BT +/F78 9.9626 Tf 186.183 509.418 Td [(A)]TJ/F84 9.9626 Tf 7.317 0 Td [(,)]TJ/F78 9.9626 Tf 5.275 0 Td [(x)]TJ/F84 9.9626 Tf 5.206 0 Td [(,)]TJ/F78 9.9626 Tf 5.106 0 Td [(y)]TJ/F84 9.9626 Tf 5.105 0 Td [(,)]TJ/F147 9.9626 Tf 5.106 0 Td [(a)]TJ/F84 9.9626 Tf 5.385 0 Td [(,)]TJ/F147 9.9626 Tf 5.355 0 Td [(b)]TJ/F75 9.9626 Tf 76.437 0 Td [(Subroutine)]TJ +ET +q +1 0 0 1 179.582 505.633 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F59 9.9626 Tf 439.811 297.539 Td [(T)]TJ +/F84 9.9626 Tf 185.56 497.065 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 445.669 297.738 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 322.794 497.264 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 448.807 297.539 Td [(vect)]TJ +/F84 9.9626 Tf 325.783 497.065 Td [(spmm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 470.356 297.738 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 322.794 485.309 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 473.495 297.539 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.884 -11.956 Td [(containing)-313(numbers)-314(of)-313(type)-313(speci\002ed)-314(in)-313(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-313(3)]TJ -0 g 0 G - [(.)-500(The)-314(rank)-313(of)]TJ/F52 9.9626 Tf 274.898 0 Td [(y)]TJ/F54 9.9626 Tf 8.228 0 Td [(must)-313(be)]TJ -283.126 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 53.116 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -83.227 -19.22 Td [(desc)]TJ +/F84 9.9626 Tf 325.783 485.11 Td [(spmm)]TJ -140.223 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 171.218 254.607 cm +1 0 0 1 322.794 473.354 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 254.408 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F84 9.9626 Tf 325.783 473.154 Td [(spmm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 324.173 206.786 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 322.794 461.398 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 327.311 206.587 Td [(desc)]TJ +/F84 9.9626 Tf 325.783 461.199 Td [(spmm)]TJ ET q -1 0 0 1 348.86 206.786 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 179.582 457.413 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q +0 g 0 G BT -/F59 9.9626 Tf 351.998 206.587 Td [(type)]TJ +/F84 9.9626 Tf 227.467 429.035 Td [(T)92(able)-250(12:)-310(Data)-250(types)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.214 -19.22 Td [(On)-250(Return)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -107.397 -24.261 Td [(call)]TJ 0 g 0 G - 0 -19.221 Td [(res)]TJ + [-525(psb_spmm\050alpha,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 18.261 0 Td [(is)-250(the)-250(dot)-250(pr)18(oduct)-250(of)-250(vectors)]TJ/F52 9.9626 Tf 126.33 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -151.968 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-289(as:)-389(a)-290(number)-290(or)-289(a)-290(rank-one)-289(array)-290(of)-289(the)-290(data)-289(type)-290(indicated)-289(in)]TJ 0 -11.955 Td [(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ + [-525(a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(.)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 141.967 -29.888 Td [(34)]TJ + [-525(beta,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1064 0 obj -<< -/Length 582 ->> -stream + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(info\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 9.9626 Tf 99.895 706.129 Td [(info)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -14.944 -11.955 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + [-525(psb_spmm\050alpha,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 141.968 -567.87 Td [(35)]TJ + [-525(a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1071 0 obj -<< -/Length 7477 ->> -stream + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(beta,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.4)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(normi)-250(\227)-250(In\002nity-Norm)-250(of)-250(V)111(ector)]TJ/F54 9.9626 Tf -51.12 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(in\002nity-norm)-250(of)-250(a)-250(vector)]TJ/F52 9.9626 Tf 233.576 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -238.781 -11.955 Td [(If)]TJ/F52 9.9626 Tf 9.459 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(in\002nity)-250(norm)-250(as:)]TJ/F52 9.9626 Tf 117.807 -18.736 Td [(a)-25(m)-40(a)-42(x)]TJ/F83 10.3811 Tf 25.761 0 Td [(\040)]TJ/F54 9.9626 Tf 13.272 0 Td [(max)]TJ/F52 7.5716 Tf 8.355 -7.21 Td [(i)]TJ/F83 10.3811 Tf 12.349 7.21 Td [(j)]TJ/F52 9.9626 Tf 3.298 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F54 9.9626 Tf -206.019 -23.313 Td [(else)-250(if)]TJ/F52 9.9626 Tf 28.159 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(the)-250(in\002nity-norm)-250(as:)]TJ/F52 9.9626 Tf 63.42 -18.737 Td [(a)-25(m)-40(a)-42(x)]TJ/F83 10.3811 Tf 25.761 0 Td [(\040)]TJ/F54 9.9626 Tf 13.273 0 Td [(max)]TJ/F52 7.5716 Tf 8.354 -7.21 Td [(i)]TJ/F85 10.3811 Tf 12.35 7.21 Td [(\050)]TJ/F83 10.3811 Tf 4.274 0 Td [(j)]TJ/F52 9.9626 Tf 3.028 0 Td [(r)-17(e)]TJ/F85 10.3811 Tf 8.17 0 Td [(\050)]TJ/F52 9.9626 Tf 4.443 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F85 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(j)]TJ/F85 10.3811 Tf 5.066 0 Td [(+)]TJ/F83 10.3811 Tf 10.256 0 Td [(j)]TJ/F52 9.9626 Tf 3.058 0 Td [(i)-32(m)]TJ/F85 10.3811 Tf 11.088 0 Td [(\050)]TJ/F52 9.9626 Tf 4.444 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F85 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(j)]TJ/F85 10.3811 Tf 3.128 0 Td [(\051)]TJ/F59 9.9626 Tf -225.616 -22.974 Td [(psb_geamax\050x,)-525(desc_a,)-525(info)-525([,global]\051)]TJ -14.944 -11.955 Td [(psb_normi\050x,)-525(desc_a,)-525(info)-525([,global]\051)]TJ + [-525(y,desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(trans,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(work\051)]TJ +0 g 0 G +/F75 9.9626 Tf -5.231 -22.618 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -20.626 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.626 Td [(alpha)]TJ +0 g 0 G +/F84 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F147 9.9626 Tf 44.368 0 Td [(a)]TJ/F84 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(12)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -20.626 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F78 9.9626 Tf 164.964 0 Td [(A)]TJ/F84 9.9626 Tf 7.318 0 Td [(.)]TJ -157.338 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 179.304 566.399 cm -[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S -Q -BT -/F52 9.9626 Tf 185.556 557.832 Td [(a)-25(m)-40(a)-42(x)-7779(x)]TJ/F51 9.9626 Tf 220.764 0 Td [(Function)]TJ -ET -q -1 0 0 1 179.304 554.046 cm -[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S +1 0 0 1 273.363 212.882 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 185.282 545.478 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F145 9.9626 Tf 276.501 212.682 Td [(Tspmat)]TJ ET q -1 0 0 1 422.639 545.677 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 308.511 212.882 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 425.628 545.478 Td [(geamax)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F145 9.9626 Tf 311.649 212.682 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -232.676 -20.625 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.614 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.98 0 0 1 124.802 144.236 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-247(an)-248(object)-247(of)-247(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 369.545 144.236 Tm [(psb)]TJ ET q -1 0 0 1 422.639 533.722 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 385.864 144.435 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 425.628 533.523 Td [(geamax)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F145 9.9626 Tf 389.002 144.236 Td [(T)]TJ ET q -1 0 0 1 422.639 521.767 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 394.86 144.435 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 425.628 521.568 Td [(geamax)]TJ -240.346 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F145 9.9626 Tf 397.998 144.236 Td [(vect)]TJ ET q -1 0 0 1 422.639 509.812 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 419.547 144.435 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 425.628 509.612 Td [(geamax)]TJ -ET -q -1 0 0 1 179.304 505.827 cm -[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S -Q +/F145 9.9626 Tf 422.685 144.236 Td [(type)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 280.768 477.448 Td [(T)92(able)-250(4:)-310(Data)-250(types)]TJ +/F84 9.9626 Tf 1.015 0 0 1 124.802 132.281 Tm [(containing)-247(numbers)-246(of)-247(type)-247(speci\002ed)-246(in)-247(T)91(able)]TJ +0 0 1 rg 0 0 1 RG + [-247(12)]TJ +0 g 0 G + [(.)-307(The)-247(rank)-246(of)]TJ/F78 9.9626 Tf 1 0 0 1 400.366 132.281 Tm [(x)]TJ/F84 9.9626 Tf 1.015 0 0 1 408.066 132.281 Tm [(must)-247(be)]TJ 1 0 0 1 124.802 120.326 Tm [(the)-250(same)-250(of)]TJ/F78 9.9626 Tf 52.946 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ +0 g 0 G + 83.916 -29.888 Td [(49)]TJ 0 g 0 G +ET + +endstream +endobj +1310 0 obj +<< +/Length 6709 +>> +stream 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -130.063 -30.014 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +BT +/F75 9.9626 Tf 150.705 706.129 Td [(beta)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -18.652 Td [(On)-250(Entry)]TJ +/F84 9.9626 Tf 24.348 0 Td [(the)-250(scalar)]TJ/F147 9.9626 Tf 44.618 0 Td [(b)]TJ/F84 9.9626 Tf 5.524 0 Td [(.)]TJ -49.584 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(12)]TJ 0 g 0 G + [(.)]TJ 0 g 0 G - 0 -18.653 Td [(x)]TJ +/F75 9.9626 Tf -24.906 -18.597 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F84 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.445 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ 0.98 0 0 1 175.611 591.891 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-248(an)-247(object)-247(of)-247(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ +/F145 9.9626 Tf 1 0 0 1 420.354 591.891 Tm [(psb)]TJ ET q -1 0 0 1 436.673 362.508 cm +1 0 0 1 436.673 592.09 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 439.811 362.308 Td [(T)]TJ +/F145 9.9626 Tf 439.811 591.891 Td [(T)]TJ ET q -1 0 0 1 445.669 362.508 cm +1 0 0 1 445.669 592.09 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 448.807 362.308 Td [(vect)]TJ +/F145 9.9626 Tf 448.807 591.891 Td [(vect)]TJ ET q -1 0 0 1 470.356 362.508 cm +1 0 0 1 470.356 592.09 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 473.495 362.308 Td [(type)]TJ +/F145 9.9626 Tf 473.495 591.891 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +/F84 9.9626 Tf 1.016 0 0 1 175.611 579.935 Tm [(containing)-246(numbers)-247(of)-246(type)-247(speci\002ed)-246(in)-246(T)90(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(4)]TJ + [-246(12)]TJ 0 g 0 G - [(.)]TJ + [(.)-306(The)-247(rank)-246(of)]TJ/F78 9.9626 Tf 1 0 0 1 451.243 579.935 Tm [(y)]TJ/F84 9.9626 Tf 1.016 0 0 1 458.843 579.935 Tm [(must)-246(be)]TJ 1 0 0 1 175.611 567.98 Tm [(the)-250(same)-250(of)]TJ/F78 9.9626 Tf 53.116 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -18.652 Td [(desc)]TJ +/F75 9.9626 Tf -83.227 -18.597 Td [(desc)]TJ ET q -1 0 0 1 171.218 331.9 cm +1 0 0 1 171.218 549.583 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 331.701 Td [(a)]TJ +/F75 9.9626 Tf 174.207 549.383 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F84 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 324.173 284.079 cm +1 0 0 1 324.173 501.762 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 327.311 283.88 Td [(desc)]TJ +/F145 9.9626 Tf 327.311 501.563 Td [(desc)]TJ ET q -1 0 0 1 348.86 284.079 cm +1 0 0 1 348.86 501.762 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 351.998 283.88 Td [(type)]TJ +/F145 9.9626 Tf 351.998 501.563 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.214 -18.653 Td [(global)]TJ +/F75 9.9626 Tf -222.214 -18.597 Td [(trans)]TJ 0 g 0 G -/F54 9.9626 Tf 33.763 0 Td [(Speci\002es)-226(whether)-227(the)-226(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.955 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F59 9.9626 Tf 165.319 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F84 9.9626 Tf 27.666 0 Td [(indicates)-250(what)-250(kind)-250(of)-250(operation)-250(to)-250(perform.)]TJ 0 g 0 G - [(.true.)]TJ +/F75 9.9626 Tf -2.76 -18.597 Td [(trans)-250(=)-250(N)]TJ 0 g 0 G -/F51 9.9626 Tf -190.225 -30.607 Td [(On)-250(Return)]TJ +/F84 9.9626 Tf 46.984 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ 0 g 0 G 0 g 0 G - 0 -18.653 Td [(Function)-250(value)]TJ +/F75 9.9626 Tf -46.984 -14.612 Td [(trans)-250(=)-250(T)]TJ 0 g 0 G -/F54 9.9626 Tf 72.776 0 Td [(is)-250(the)-250(in\002nity)-250(norm)-250(of)-250(vector)]TJ/F52 9.9626 Tf 128.562 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -181.637 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.133 0 Td [(global)]TJ/F54 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F59 9.9626 Tf 121.612 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F84 9.9626 Tf 45.33 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ +0 0 1 rg 0 0 1 RG + [-250(2)]TJ +0 g 0 G +0 g 0 G +/F75 9.9626 Tf -45.33 -14.612 Td [(trans)-250(=)-250(C)]TJ +0 g 0 G +/F84 9.9626 Tf 45.878 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ +0 0 1 rg 0 0 1 RG + [-250(3)]TJ +0 g 0 G + -45.878 -18.597 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F78 9.9626 Tf 38.64 0 Td [(t)-15(r)-50(a)-25(n)-25(s)]TJ/F192 10.3811 Tf 25.193 0 Td [(=)]TJ/F78 9.9626 Tf 11.435 0 Td [(N)]TJ/F84 9.9626 Tf -75.268 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -18.596 Td [(work)]TJ +0 g 0 G +/F84 9.9626 Tf 28.363 0 Td [(work)-250(array)111(.)]TJ -3.457 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ 0.98 0 0 1 175.611 302.31 Tm [(Speci\002ed)-208(as:)-293(a)-208(rank)-208(one)-208(array)-208(of)-208(the)-209(same)-208(type)-208(of)]TJ/F78 9.9626 Tf 1 0 0 1 385.445 302.31 Tm [(x)]TJ/F84 9.9626 Tf 0.98 0 0 1 392.682 302.31 Tm [(and)]TJ/F78 9.9626 Tf 1 0 0 1 411.368 302.31 Tm [(y)]TJ/F84 9.9626 Tf 0.98 0 0 1 418.506 302.31 Tm [(with)-208(the)-208(T)75(ARGET)]TJ 1 0 0 1 175.611 290.355 Tm [(attribute.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -18.597 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -18.597 Td [(y)]TJ 0 g 0 G - [(.false.)]TJ/F54 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ +/F84 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(matrix)]TJ/F78 9.9626 Tf 144.94 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ -135.66 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ 1.02 0 0 1 175.611 205.34 Tm [(Speci\002ed)-330(as:)-475(an)-331(array)-330(of)-331(rank)-330(one)-330(or)-331(two)-330(containing)-331(numbers)-330(of)-331(type)]TJ 1 0 0 1 175.611 193.385 Tm [(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(12)]TJ +0 g 0 G + [(.)]TJ 0 g 0 G - 141.968 -29.888 Td [(36)]TJ +/F75 9.9626 Tf -24.906 -18.597 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 142.356 -36.529 Td [(50)]TJ 0 g 0 G ET endstream endobj -1076 0 obj +1316 0 obj << -/Length 2600 +/Length 8135 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(info)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(4.13)-1000(psb)]TJ +ET +q +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 156.993 706.129 Td [(spsm)-250(\227)-250(T)111(riangular)-250(System)-250(Solve)]TJ/F84 9.9626 Tf -57.406 -19.83 Td [(This)-250(subr)18(outine)-250(computes)-250(the)-250(T)90(riangular)-250(System)-250(Solve:)]TJ/F78 9.9626 Tf 123.033 -35.213 Td [(y)]TJ/F190 10.3811 Tf 15.193 0 Td [(\040)]TJ/F147 9.9626 Tf 20.592 0 Td [(a)]TJ/F78 9.9626 Tf 5.639 0 Td [(T)]TJ/F190 7.8896 Tf 6.546 4.115 Td [(\000)]TJ/F84 7.5716 Tf 6.227 0 Td [(1)]TJ/F78 9.9626 Tf 4.578 -4.115 Td [(x)]TJ/F192 10.3811 Tf 7.267 0 Td [(+)]TJ/F147 9.9626 Tf 10.505 0 Td [(b)]TJ/F78 9.9626 Tf 5.649 0 Td [(y)]TJ -82.196 -16.139 Td [(y)]TJ/F190 10.3811 Tf 15.193 0 Td [(\040)]TJ/F147 9.9626 Tf 20.592 0 Td [(a)]TJ/F78 9.9626 Tf 5.709 0 Td [(D)-48(T)]TJ/F190 7.8896 Tf 14.774 4.114 Td [(\000)]TJ/F84 7.5716 Tf 6.228 0 Td [(1)]TJ/F78 9.9626 Tf 4.578 -4.114 Td [(x)]TJ/F192 10.3811 Tf 7.267 0 Td [(+)]TJ/F147 9.9626 Tf 10.505 0 Td [(b)]TJ/F78 9.9626 Tf 5.649 0 Td [(y)]TJ -90.495 -16.139 Td [(y)]TJ/F190 10.3811 Tf 15.193 0 Td [(\040)]TJ/F147 9.9626 Tf 20.592 0 Td [(a)]TJ/F78 9.9626 Tf 5.639 0 Td [(T)]TJ/F190 7.8896 Tf 6.546 4.114 Td [(\000)]TJ/F84 7.5716 Tf 6.227 0 Td [(1)]TJ/F78 9.9626 Tf 4.608 -4.114 Td [(D)-52(x)]TJ/F192 10.3811 Tf 15.536 0 Td [(+)]TJ/F147 9.9626 Tf 10.505 0 Td [(b)]TJ/F78 9.9626 Tf 5.649 0 Td [(y)]TJ -90.495 -16.09 Td [(y)]TJ/F190 10.3811 Tf 15.193 0 Td [(\040)]TJ/F147 9.9626 Tf 20.592 0 Td [(a)]TJ/F78 9.9626 Tf 5.639 0 Td [(T)]TJ/F190 7.8896 Tf 6.546 4.114 Td [(\000)]TJ/F78 7.5716 Tf 6.42 0 Td [(T)]TJ/F78 9.9626 Tf 5.695 -4.114 Td [(x)]TJ/F192 10.3811 Tf 7.267 0 Td [(+)]TJ/F147 9.9626 Tf 10.505 0 Td [(b)]TJ/F78 9.9626 Tf 5.649 0 Td [(y)]TJ -83.506 -16.09 Td [(y)]TJ/F190 10.3811 Tf 15.193 0 Td [(\040)]TJ/F147 9.9626 Tf 20.592 0 Td [(a)]TJ/F78 9.9626 Tf 5.709 0 Td [(D)-48(T)]TJ/F190 7.8896 Tf 14.774 4.114 Td [(\000)]TJ/F78 7.5716 Tf 6.421 0 Td [(T)]TJ/F78 9.9626 Tf 5.695 -4.114 Td [(x)]TJ/F192 10.3811 Tf 7.267 0 Td [(+)]TJ/F147 9.9626 Tf 10.505 0 Td [(b)]TJ/F78 9.9626 Tf 5.648 0 Td [(y)]TJ -91.804 -16.09 Td [(y)]TJ/F190 10.3811 Tf 15.193 0 Td [(\040)]TJ/F147 9.9626 Tf 20.592 0 Td [(a)]TJ/F78 9.9626 Tf 5.639 0 Td [(T)]TJ/F190 7.8896 Tf 6.546 4.114 Td [(\000)]TJ/F78 7.5716 Tf 6.42 0 Td [(T)]TJ/F78 9.9626 Tf 5.725 -4.114 Td [(D)-52(x)]TJ/F192 10.3811 Tf 15.536 0 Td [(+)]TJ/F147 9.9626 Tf 10.505 0 Td [(b)]TJ/F78 9.9626 Tf 5.648 0 Td [(y)]TJ -91.804 -16.091 Td [(y)]TJ/F190 10.3811 Tf 15.193 0 Td [(\040)]TJ/F147 9.9626 Tf 20.592 0 Td [(a)]TJ/F78 9.9626 Tf 5.639 0 Td [(T)]TJ/F190 7.8896 Tf 6.546 4.115 Td [(\000)]TJ/F78 7.5716 Tf 6.609 0 Td [(H)]TJ/F78 9.9626 Tf 6.982 -4.115 Td [(x)]TJ/F192 10.3811 Tf 7.267 0 Td [(+)]TJ/F147 9.9626 Tf 10.505 0 Td [(b)]TJ/F78 9.9626 Tf 5.649 0 Td [(y)]TJ -84.982 -16.09 Td [(y)]TJ/F190 10.3811 Tf 15.193 0 Td [(\040)]TJ/F147 9.9626 Tf 20.592 0 Td [(a)]TJ/F78 9.9626 Tf 5.709 0 Td [(D)-48(T)]TJ/F190 7.8896 Tf 14.774 4.115 Td [(\000)]TJ/F78 7.5716 Tf 6.61 0 Td [(H)]TJ/F78 9.9626 Tf 6.982 -4.115 Td [(x)]TJ/F192 10.3811 Tf 7.267 0 Td [(+)]TJ/F147 9.9626 Tf 10.505 0 Td [(b)]TJ/F78 9.9626 Tf 5.649 0 Td [(y)]TJ -93.281 -16.09 Td [(y)]TJ/F190 10.3811 Tf 15.193 0 Td [(\040)]TJ/F147 9.9626 Tf 20.592 0 Td [(a)]TJ/F78 9.9626 Tf 5.639 0 Td [(T)]TJ/F190 7.8896 Tf 6.545 4.115 Td [(\000)]TJ/F78 7.5716 Tf 6.61 0 Td [(H)]TJ/F78 9.9626 Tf 7.012 -4.115 Td [(D)-52(x)]TJ/F192 10.3811 Tf 15.536 0 Td [(+)]TJ/F147 9.9626 Tf 10.505 0 Td [(b)]TJ/F78 9.9626 Tf 5.649 0 Td [(y)]TJ/F84 9.9626 Tf -201.062 -38.202 Td [(wher)18(e:)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +/F78 9.9626 Tf -14.65 -21.265 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F84 9.9626 Tf 10.187 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 115.61 0 Td [(x)]TJ/F84 7.5716 Tf 5.2 -1.495 Td [(:)-13(,)-12(:)]TJ 0 g 0 G - [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(ove)1(r)18(head.)-514(It)-318(may)-318(be)-318(necessary)-317(and/or)-318(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-333(multiple)-333(norms)-332(at)-333(the)-333(same)-333(time;)-374(in)-333(this)-333(case,)-354(it)-332(is)-333(possible)-333(to)]TJ 0 -11.955 Td [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ/F59 9.9626 Tf 52.303 -19.926 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +/F78 9.9626 Tf -131.166 -20.218 Td [(y)]TJ 0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ +/F84 9.9626 Tf 10.087 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 115.441 0 Td [(y)]TJ/F84 7.5716 Tf 5.2 -1.494 Td [(:)-13(,)-12(:)]TJ 0 g 0 G - [-525(psb_geamax\050x1,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F78 9.9626 Tf -130.599 -20.218 Td [(T)]TJ 0 g 0 G - [(.false.\051)]TJ 0 -11.955 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(2)]TJ +/F84 9.9626 Tf 11.432 0 Td [(is)-250(the)-250(global)-250(sparse)-250(block)-250(triangular)-250(submatrix)]TJ/F78 9.9626 Tf 206.797 0 Td [(T)]TJ 0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ + -218.159 -21.712 Td [(D)]TJ 0 g 0 G - [-525(psb_geamax\050x2,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F84 9.9626 Tf 12.956 0 Td [(is)-250(the)-250(scaling)-250(diagonal)-250(matrix.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(.false.\051)]TJ 0 -11.955 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf 6.895 -21.266 Td [(call)]TJ 0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ + [-525(psb_spsm\050alpha,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_geamax\050x3,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-525(t,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(beta,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(.false.\051)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ + -14.944 -11.955 Td [(call)]TJ 0 g 0 G - [-525(psb_amx\050ctxt,vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [-525(psb_spsm\050alpha,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ + [-525(t,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(\051\051)]TJ/F54 9.9626 Tf -52.303 -19.926 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communicati)1(on,)-254(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.955 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 141.968 -402.49 Td [(37)]TJ + [-525(beta,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1085 0 obj -<< -/Length 6238 ->> -stream + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.5)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(geamaxs)-250(\227)-250(Generalized)-250(In\002nity)-250(Norm)]TJ/F54 9.9626 Tf -51.12 -18.964 Td [(This)-256(subr)18(outine)-255(computes)-256(a)-256(series)-255(of)-256(in\002nity)-256(norms)-256(on)-255(the)-256(columns)-256(of)-255(a)-256(dense)]TJ 0 -11.955 Td [(matrix)]TJ/F52 9.9626 Tf 31.785 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(:)]TJ/F52 9.9626 Tf 88.539 -11.955 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.294 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 7.042 0 Td [(\040)]TJ/F54 9.9626 Tf 13.273 0 Td [(max)]TJ/F52 7.5716 Tf 7.759 -7.336 Td [(k)]TJ/F83 10.3811 Tf 12.944 7.336 Td [(j)]TJ/F52 9.9626 Tf 3.298 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(k)]TJ/F54 9.9626 Tf 4.598 0 Td [(,)]TJ/F52 9.9626 Tf 4.206 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(j)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -195.028 -22.296 Td [(call)]TJ + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(trans,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(unit,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_geamaxs\050res,)-525(x,)-525(desc_a,)-525(info\051)]TJ + [-525(choice,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(diag,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(work\051)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 177.192 626.591 cm -[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +1 0 0 1 179.582 339.439 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F52 9.9626 Tf 183.195 618.023 Td [(r)-17(e)-25(s)-8868(x)]TJ/F51 9.9626 Tf 221.013 0 Td [(Subroutine)]TJ +/F78 9.9626 Tf 185.814 330.871 Td [(T)]TJ/F84 9.9626 Tf 6.451 0 Td [(,)]TJ/F78 9.9626 Tf 5.275 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(,)]TJ/F78 9.9626 Tf 5.106 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(,)]TJ/F78 9.9626 Tf 5.305 0 Td [(D)]TJ/F84 9.9626 Tf 7.975 0 Td [(,)]TJ/F147 9.9626 Tf 5.106 0 Td [(a)]TJ/F84 9.9626 Tf 5.385 0 Td [(,)]TJ/F147 9.9626 Tf 5.355 0 Td [(b)]TJ/F75 9.9626 Tf 64.392 0 Td [(Subroutine)]TJ ET q -1 0 0 1 177.192 614.237 cm -[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +1 0 0 1 179.582 327.085 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F54 9.9626 Tf 183.17 605.669 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F84 9.9626 Tf 185.56 318.517 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 420.527 605.868 cm +1 0 0 1 322.794 318.716 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 423.516 605.669 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F84 9.9626 Tf 325.783 318.517 Td [(spsm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 420.527 593.913 cm +1 0 0 1 322.794 306.761 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 423.516 593.714 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F84 9.9626 Tf 325.783 306.562 Td [(spsm)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 420.527 581.958 cm +1 0 0 1 322.794 294.806 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 423.516 581.759 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F84 9.9626 Tf 325.783 294.607 Td [(spsm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 420.527 570.003 cm +1 0 0 1 322.794 282.851 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 423.516 569.804 Td [(geamaxs)]TJ +/F84 9.9626 Tf 325.783 282.652 Td [(spsm)]TJ +ET +q +1 0 0 1 179.582 278.866 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q +0 g 0 G +BT +/F84 9.9626 Tf 227.467 250.487 Td [(T)92(able)-250(13:)-310(Data)-250(types)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +/F75 9.9626 Tf -127.572 -38.916 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -21.712 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -21.713 Td [(alpha)]TJ +0 g 0 G +/F84 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F147 9.9626 Tf 44.368 0 Td [(a)]TJ/F84 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(13)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G + 141.968 -29.888 Td [(51)]TJ +0 g 0 G +ET + +endstream +endobj +1327 0 obj +<< +/Length 7465 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F75 9.9626 Tf 150.705 706.129 Td [(t)]TJ +0 g 0 G +/F84 9.9626 Tf 8.299 0 Td [(the)-250(global)-250(portion)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F78 9.9626 Tf 171.22 0 Td [(T)]TJ/F84 9.9626 Tf 6.451 0 Td [(.)]TJ -161.064 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(type)-250(speci\002ed)-250(in)-250(\247)]TJ +0 0 1 rg 0 0 1 RG + [-250(3)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -20.65 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.615 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -165.876 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ 0.98 0 0 1 175.611 589.838 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-248(an)-247(object)-247(of)-247(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 420.354 589.838 Tm [(psb)]TJ +ET +q +1 0 0 1 436.673 590.037 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 439.811 589.838 Td [(T)]TJ +ET +q +1 0 0 1 445.669 590.037 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 448.807 589.838 Td [(vect)]TJ ET q -1 0 0 1 177.192 566.018 cm -[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +1 0 0 1 470.356 590.037 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 280.768 537.639 Td [(T)92(able)-250(5:)-310(Data)-250(types)]TJ -0 g 0 G +/F145 9.9626 Tf 473.495 589.838 Td [(type)]TJ 0 g 0 G +/F84 9.9626 Tf 1.015 0 0 1 175.611 577.883 Tm [(containing)-247(numbers)-246(of)-247(type)-247(speci\002ed)-246(in)-247(T)90(able)]TJ +0 0 1 rg 0 0 1 RG + [-246(13)]TJ 0 g 0 G -/F51 9.9626 Tf -130.063 -34.468 Td [(T)90(ype:)]TJ + [(.)-307(The)-247(rank)-246(of)]TJ/F78 9.9626 Tf 1 0 0 1 451.175 577.883 Tm [(x)]TJ/F84 9.9626 Tf 1.015 0 0 1 458.875 577.883 Tm [(must)-247(be)]TJ 1 0 0 1 175.611 565.927 Tm [(the)-250(same)-250(of)]TJ/F78 9.9626 Tf 52.946 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F75 9.9626 Tf -82.958 -20.649 Td [(beta)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F84 9.9626 Tf 24.348 0 Td [(the)-250(scalar)]TJ/F147 9.9626 Tf 44.618 0 Td [(b)]TJ/F84 9.9626 Tf 5.524 0 Td [(.)]TJ -49.584 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(13)]TJ 0 g 0 G + [(.)]TJ 0 g 0 G - 0 -19.926 Td [(x)]TJ +/F75 9.9626 Tf -24.906 -20.65 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F84 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.445 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ 0.98 0 0 1 175.611 428.986 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-248(an)-247(object)-247(of)-247(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ +/F145 9.9626 Tf 1 0 0 1 420.354 428.986 Tm [(psb)]TJ ET q -1 0 0 1 436.673 415.699 cm +1 0 0 1 436.673 429.186 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 439.811 415.5 Td [(T)]TJ +/F145 9.9626 Tf 439.811 428.986 Td [(T)]TJ ET q -1 0 0 1 445.669 415.699 cm +1 0 0 1 445.669 429.186 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 448.807 415.5 Td [(vect)]TJ +/F145 9.9626 Tf 448.807 428.986 Td [(vect)]TJ ET q -1 0 0 1 470.356 415.699 cm +1 0 0 1 470.356 429.186 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 473.495 415.5 Td [(type)]TJ +/F145 9.9626 Tf 473.495 428.986 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.956 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +/F84 9.9626 Tf 1.016 0 0 1 175.611 417.031 Tm [(containing)-246(numbers)-247(of)-246(type)-247(speci\002ed)-246(in)-246(T)90(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(5)]TJ + [-246(13)]TJ 0 g 0 G - [(.)]TJ + [(.)-306(The)-247(rank)-246(of)]TJ/F78 9.9626 Tf 1 0 0 1 451.243 417.031 Tm [(y)]TJ/F84 9.9626 Tf 1.016 0 0 1 458.843 417.031 Tm [(must)-246(be)]TJ 1 0 0 1 175.611 405.076 Tm [(the)-250(same)-250(of)]TJ/F78 9.9626 Tf 53.116 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(desc)]TJ +/F75 9.9626 Tf -83.227 -20.65 Td [(desc)]TJ ET q -1 0 0 1 171.218 383.818 cm +1 0 0 1 171.218 384.625 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 383.619 Td [(a)]TJ +/F75 9.9626 Tf 174.207 384.426 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F84 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 324.173 335.998 cm +1 0 0 1 324.173 336.805 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 327.311 335.798 Td [(desc)]TJ +/F145 9.9626 Tf 327.311 336.605 Td [(desc)]TJ ET q -1 0 0 1 348.86 335.998 cm +1 0 0 1 348.86 336.805 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 351.998 335.798 Td [(type)]TJ +/F145 9.9626 Tf 351.998 336.605 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.214 -19.925 Td [(On)-250(Return)]TJ +/F75 9.9626 Tf -222.214 -20.649 Td [(trans)]TJ 0 g 0 G +/F84 9.9626 Tf 27.666 0 Td [(specify)-250(with)]TJ/F78 9.9626 Tf 56.398 0 Td [(unitd)]TJ/F84 9.9626 Tf 24.637 0 Td [(the)-250(operation)-250(to)-250(perform.)]TJ 0 g 0 G - 0 -19.925 Td [(res)]TJ +/F75 9.9626 Tf -83.795 -20.65 Td [(trans)-250(=)-250('N')]TJ +0 g 0 G +/F84 9.9626 Tf 52.523 0 Td [(the)-250(operation)-250(is)-250(with)-250(no)-250(transposed)-250(matrix)]TJ +0 g 0 G +/F75 9.9626 Tf -52.523 -16.303 Td [(trans)-250(=)-250('T')]TJ +0 g 0 G +/F84 9.9626 Tf 50.869 0 Td [(the)-250(operation)-250(is)-250(with)-250(transposed)-250(matrix.)]TJ +0 g 0 G +/F75 9.9626 Tf -50.869 -16.302 Td [(trans)-250(=)-250('C')]TJ +0 g 0 G +/F84 9.9626 Tf 51.417 0 Td [(the)-250(operation)-250(is)-250(with)-250(conjugate)-250(transposed)-250(matrix.)]TJ -51.417 -20.65 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)]TJ/F78 9.9626 Tf 38.64 0 Td [(t)-15(r)-50(a)-25(n)-25(s)]TJ/F192 10.3811 Tf 25.193 0 Td [(=)]TJ/F78 9.9626 Tf 11.435 0 Td [(N)]TJ/F84 9.9626 Tf -75.268 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -20.65 Td [(unitd)]TJ +0 g 0 G +/F84 9.9626 Tf 29.878 0 Td [(specify)-250(with)]TJ/F78 9.9626 Tf 56.397 0 Td [(trans)]TJ/F84 9.9626 Tf 23.522 0 Td [(the)-250(operation)-250(to)-250(perform.)]TJ +0 g 0 G +/F75 9.9626 Tf -84.891 -20.649 Td [(unitd)-250(=)-250('U')]TJ +0 g 0 G +/F84 9.9626 Tf 54.187 0 Td [(the)-250(operation)-250(is)-250(with)-250(no)-250(scaling)]TJ +0 g 0 G +/F75 9.9626 Tf -54.187 -16.303 Td [(unitd)-250(=)-250('L)74(')]TJ +0 g 0 G +/F84 9.9626 Tf 51.786 0 Td [(the)-250(operation)-250(is)-250(with)-250(left)-250(scaling)]TJ +0 g 0 G +/F75 9.9626 Tf -51.786 -16.302 Td [(unitd)-250(=)-250('R')]TJ +0 g 0 G +/F84 9.9626 Tf 53.629 0 Td [(the)-250(operation)-250(is)-250(with)-250(right)-250(scaling.)]TJ +0 g 0 G + 88.339 -29.888 Td [(52)]TJ +0 g 0 G +ET + +endstream +endobj +1333 0 obj +<< +/Length 4640 +>> +stream +0 g 0 G +0 g 0 G +BT +/F84 9.9626 Tf 124.802 706.129 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)]TJ/F78 9.9626 Tf 38.64 0 Td [(u)-25(n)-18(i)-32(t)-25(d)]TJ/F192 10.3811 Tf 26.159 0 Td [(=)]TJ/F78 9.9626 Tf 10.927 0 Td [(U)]TJ/F84 9.9626 Tf -75.726 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.925 Td [(choice)]TJ +0 g 0 G +/F84 9.9626 Tf 33.754 0 Td [(speci\002es)-250(the)-250(update)-250(of)-250(overlap)-250(elements)-250(to)-250(be)-250(performed)-250(on)-250(exit:)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -3.866 -19.925 Td [(psb_none_)]TJ +0 g 0 G +0 g 0 G + 0 -15.941 Td [(psb_sum_)]TJ +0 g 0 G +0 g 0 G + 0 -15.94 Td [(psb_avg_)]TJ +0 g 0 G +0 g 0 G + 0 -15.94 Td [(psb_square_root_)]TJ/F84 9.9626 Tf -4.981 -19.925 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F145 9.9626 Tf 38.515 0 Td [(psb_avg_)]TJ/F84 9.9626 Tf -38.515 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.925 Td [(diag)]TJ +0 g 0 G +/F84 9.9626 Tf 24.907 0 Td [(the)-250(diagonal)-250(scaling)-250(matrix.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F78 9.9626 Tf 38.64 0 Td [(d)-18(i)-47(a)-47(g)]TJ/F192 10.3811 Tf 18.52 0 Td [(\050)]TJ/F84 9.9626 Tf 4.149 0 Td [(1)]TJ/F192 10.3811 Tf 5.106 0 Td [(\051)-289(=)]TJ/F84 9.9626 Tf 18.003 0 Td [(1)]TJ/F192 10.3811 Tf 5.106 0 Td [(\050)]TJ/F78 9.9626 Tf 4.274 0 Td [(n)-25(o)-35(s)-25(c)-40(a)-25(l)-48(i)-32(n)-47(g)]TJ/F192 10.3811 Tf 41.384 0 Td [(\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 124.802 423.19 Tm [(Speci\002ed)-253(as:)-316(a)-253(rank)-254(one)-253(array)-254(containing)-253(numbers)-253(of)-254(the)-253(type)-254(indicated)-253(in)]TJ 1 0 0 1 124.493 411.235 Tm [(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(13)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.598 -19.926 Td [(work)]TJ +0 g 0 G +/F84 9.9626 Tf 28.782 0 Td [(a)-250(work)-250(array)111(.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ 1.02 0 0 1 124.802 343.489 Tm [(Speci\002ed)-304(as:)-423(a)-305(rank)-304(one)-304(array)-305(of)-304(the)-305(same)-304(type)-305(of)]TJ/F78 9.9626 Tf 1 0 0 1 354.298 343.489 Tm [(x)]TJ/F84 9.9626 Tf 1.02 0 0 1 362.597 343.489 Tm [(with)-304(the)-305(T)73(ARGET)]TJ 1 0 0 1 124.802 331.534 Tm [(attribute.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.926 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(y)]TJ +0 g 0 G +/F84 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.445 0 Td [(y)]TJ/F84 9.9626 Tf 5.105 0 Td [(.)]TJ -166.164 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ 1.02 0 0 1 124.802 243.862 Tm [(Speci\002ed)-330(as:)-475(an)-331(array)-330(of)-331(rank)-330(one)-330(or)-331(two)-330(containing)-331(numbers)-330(of)-331(type)]TJ 1 0 0 1 124.802 231.907 Tm [(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(13)]TJ 0 g 0 G -/F54 9.9626 Tf 18.261 0 Td [(is)-250(the)-250(in\002nity)-250(norm)-250(of)-250(the)-250(columns)-250(of)]TJ/F52 9.9626 Tf 166.26 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -164.82 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.956 Td [(Speci\002ed)-330(as:)-470(a)-330(number)-330(or)-330(a)-330(rank-one)-330(array)-329(of)-330(long)-330(pr)18(ecision)-330(r)18(eal)-330(num-)]TJ 0 -11.955 Td [(bers.)]TJ + [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(info)]TJ +/F75 9.9626 Tf -24.907 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 141.968 -89.943 Td [(38)]TJ + 142.357 -73.723 Td [(53)]TJ 0 g 0 G ET endstream endobj -1093 0 obj +1344 0 obj << -/Length 7104 +/Length 7721 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.6)-1000(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(4.14)-1000(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(V)111(ector)]TJ/F54 9.9626 Tf -51.121 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(vector)]TJ/F52 9.9626 Tf 206.349 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -211.554 -11.955 Td [(If)]TJ/F52 9.9626 Tf 9.46 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F52 9.9626 Tf 125.989 -21.269 Td [(a)-25(s)-25(u)-25(m)]TJ/F83 10.3811 Tf 25.352 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 19.007 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(k)]TJ/F54 9.9626 Tf -195.526 -21.269 Td [(else)-250(if)]TJ/F52 9.9626 Tf 28.159 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F52 9.9626 Tf 71.974 -21.269 Td [(a)-25(s)-25(u)-25(m)]TJ/F83 10.3811 Tf 25.353 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 18.737 0 Td [(r)-17(e)]TJ/F85 10.3811 Tf 8.17 0 Td [(\050)]TJ/F52 9.9626 Tf 4.443 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F85 10.3811 Tf 6.345 1.858 Td [(+)]TJ/F83 10.3811 Tf 10.256 0 Td [(k)]TJ/F52 9.9626 Tf 5.37 0 Td [(i)-32(m)]TJ/F85 10.3811 Tf 11.088 0 Td [(\050)]TJ/F52 9.9626 Tf 4.444 0 Td [(x)]TJ/F85 10.3811 Tf 5.329 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F59 9.9626 Tf -216.928 -19.411 Td [(psb_geasum\050x,)-525(desc_a,)-525(info)-525([,global]\051)-190(psb_norm1\050x,)-525(desc_a,)-525(info)-525([,global]\051)]TJ +/F75 11.9552 Tf 207.803 706.129 Td [(gemlt)-250(\227)-250(Entrywise)-250(Product)]TJ/F84 9.9626 Tf -57.407 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(pr)18(oduct)-250(between)-250(two)-250(vectors)]TJ/F78 9.9626 Tf 299.677 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(and)]TJ/F78 9.9626 Tf 19.482 0 Td [(y)]TJ -187.61 -21.112 Td [(d)-25(o)-35(t)]TJ/F190 10.3811 Tf 16.337 0 Td [(\040)]TJ/F78 9.9626 Tf 13.566 0 Td [(x)]TJ/F192 10.3811 Tf 5.33 0 Td [(\050)]TJ/F78 9.9626 Tf 4.205 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F78 9.9626 Tf 4.274 0 Td [(y)]TJ/F192 10.3811 Tf 5.23 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.089 0 Td [(\051)]TJ/F84 9.9626 Tf 4.149 0 Td [(.)]TJ/F145 9.9626 Tf -187.464 -21.111 Td [(psb_gemlt\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 128.44 576.025 cm -[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +1 0 0 1 233.844 630.896 cm +[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S Q BT -/F52 9.9626 Tf 134.691 567.457 Td [(a)-25(s)-25(u)-25(m)-7810(x)]TJ/F51 9.9626 Tf 220.765 0 Td [(Function)]TJ +/F78 9.9626 Tf 239.946 622.328 Td [(d)-25(o)-35(t)]TJ/F84 9.9626 Tf 13.445 0 Td [(,)]TJ/F78 9.9626 Tf 5.275 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(,)]TJ/F78 9.9626 Tf 5.106 0 Td [(y)]TJ/F75 9.9626 Tf 91.76 0 Td [(Function)]TJ ET q -1 0 0 1 128.44 563.671 cm -[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +1 0 0 1 233.844 618.542 cm +[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S Q BT -/F54 9.9626 Tf 134.417 555.103 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F84 9.9626 Tf 239.822 609.974 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 371.775 555.303 cm +1 0 0 1 377.055 610.173 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 374.763 555.103 Td [(geasum)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F84 9.9626 Tf 380.044 609.974 Td [(gemlt)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 371.775 543.347 cm +1 0 0 1 377.055 598.218 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 374.763 543.148 Td [(geasum)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F84 9.9626 Tf 380.044 598.019 Td [(gemlt)]TJ -140.222 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 371.775 531.392 cm +1 0 0 1 377.055 586.263 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 374.763 531.193 Td [(geasum)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F84 9.9626 Tf 380.044 586.064 Td [(gemlt)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 371.775 519.437 cm +1 0 0 1 377.055 574.308 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 374.763 519.238 Td [(geasum)]TJ +/F84 9.9626 Tf 380.044 574.109 Td [(gemlt)]TJ ET q -1 0 0 1 128.44 515.452 cm -[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +1 0 0 1 233.844 570.323 cm +[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 229.958 487.074 Td [(T)92(able)-250(6:)-310(Data)-250(types)]TJ +/F84 9.9626 Tf 278.277 541.944 Td [(T)92(able)-250(14:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -130.063 -33.561 Td [(T)90(ype:)]TJ +/F75 9.9626 Tf -127.572 -33.34 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.665 Td [(On)-250(Entry)]TJ +/F75 9.9626 Tf -29.828 -19.603 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.666 Td [(x)]TJ + 0 -19.603 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F78 9.9626 Tf 174.059 0 Td [(x)]TJ/F84 9.9626 Tf 5.206 0 Td [(.)]TJ -164.321 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.02 0 0 1 175.611 421.578 Tm [(Speci\002ed)-306(as:)-425(an)-306(object)-306(of)-306(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F145 9.9626 Tf 1 0 0 1 314.513 421.578 Tm [(psb)]TJ ET q -1 0 0 1 385.864 366.56 cm +1 0 0 1 330.831 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 389.002 366.361 Td [(T)]TJ +/F145 9.9626 Tf 333.969 421.578 Td [(T)]TJ ET q -1 0 0 1 394.86 366.56 cm +1 0 0 1 339.827 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 366.361 Td [(vect)]TJ +/F145 9.9626 Tf 342.966 421.578 Td [(vect)]TJ ET q -1 0 0 1 419.547 366.56 cm +1 0 0 1 364.515 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 366.361 Td [(type)]TJ +/F145 9.9626 Tf 367.653 421.578 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +/F84 9.9626 Tf 1.02 0 0 1 391.683 421.578 Tm [(containing)-306(numbers)-306(of)]TJ 1 0 0 1 175.611 409.623 Tm [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(6)]TJ + [-250(2)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -19.603 Td [(y)]TJ +0 g 0 G +/F84 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F78 9.9626 Tf 173.891 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ -164.611 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ 1.02 0 0 1 175.611 342.199 Tm [(Speci\002ed)-306(as:)-425(an)-306(object)-306(of)-306(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 314.513 342.199 Tm [(psb)]TJ +ET +q +1 0 0 1 330.831 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 333.969 342.199 Td [(T)]TJ +ET +q +1 0 0 1 339.827 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 342.966 342.199 Td [(vect)]TJ +ET +q +1 0 0 1 364.515 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 367.653 342.199 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 391.683 342.199 Tm [(containing)-306(numbers)-306(of)]TJ 1 0 0 1 175.611 330.244 Tm [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(2)]TJ 0 g 0 G [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.666 Td [(desc)]TJ +/F75 9.9626 Tf -24.906 -19.603 Td [(desc)]TJ ET q -1 0 0 1 120.408 334.939 cm +1 0 0 1 171.218 310.84 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 334.74 Td [(a)]TJ +/F75 9.9626 Tf 174.207 310.641 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F84 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 273.363 287.119 cm +1 0 0 1 324.173 263.02 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 286.919 Td [(desc)]TJ +/F145 9.9626 Tf 327.311 262.82 Td [(desc)]TJ ET q -1 0 0 1 298.05 287.119 cm +1 0 0 1 348.86 263.02 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 286.919 Td [(type)]TJ +/F145 9.9626 Tf 351.998 262.82 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.215 -19.665 Td [(global)]TJ +/F75 9.9626 Tf -222.214 -19.602 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 33.764 0 Td [(Speci\002es)-226(whether)-226(the)-227(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.955 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F59 9.9626 Tf 165.318 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ 0 g 0 G - [(.true.)]TJ + 0 -19.603 Td [(y)]TJ 0 g 0 G -/F51 9.9626 Tf -190.225 -31.621 Td [(On)-250(Return)]TJ +/F84 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F78 9.9626 Tf 160.68 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ -151.4 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ 0.98 0 0 1 175.611 175.794 Tm [(Speci\002ed)-240(as:)-309(an)-240(object)-240(of)-240(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 304.709 175.794 Tm [(psb)]TJ +ET +q +1 0 0 1 321.027 175.993 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 324.166 175.794 Td [(T)]TJ +ET +q +1 0 0 1 330.023 175.993 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 333.162 175.794 Td [(vect)]TJ +ET +q +1 0 0 1 354.711 175.993 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 357.849 175.794 Td [(type)]TJ 0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 381.113 175.794 Tm [(containing)-240(numbers)-240(of)-240(the)]TJ 1 0 0 1 175.611 163.839 Tm [(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(14)]TJ 0 g 0 G - 0 -19.666 Td [(Function)-250(value)]TJ + [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(1-norm)-250(of)-250(vector)]TJ/F52 9.9626 Tf 102.161 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -155.236 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.133 0 Td [(global)]TJ/F54 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F59 9.9626 Tf 121.612 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F75 9.9626 Tf -24.906 -19.603 Td [(info)]TJ 0 g 0 G - [(.false.)]TJ/F54 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ 0 g 0 G - 141.968 -29.888 Td [(39)]TJ +/F84 9.9626 Tf 115.188 -29.888 Td [(54)]TJ 0 g 0 G ET endstream endobj -1098 0 obj +1349 0 obj << -/Length 2603 +/Length 312 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(over)18(head.)-513(It)-318(may)-318(be)-318(necessary)-318(and/or)-317(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-333(multiple)-333(norms)-333(at)-332(the)-333(same)-333(time;)-374(in)-333(this)-333(case,)-354(it)-333(i)1(s)-333(possible)-333(to)]TJ 0 -11.955 Td [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ/F59 9.9626 Tf 20.922 -19.926 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_geasum\050x1,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ 31.382 -11.955 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(2)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_geasum\050x2,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ 0 -11.955 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_geasum\050x3,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_sum\050ctxt,vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ -0 g 0 G - [(:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ +/F84 9.9626 Tf 124.802 706.129 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - [(\051\051)]TJ/F54 9.9626 Tf -52.304 -19.926 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communication,)-253(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.955 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ -0 g 0 G - 141.968 -402.49 Td [(40)]TJ + 142.357 -603.736 Td [(55)]TJ 0 g 0 G ET endstream endobj -1106 0 obj +1361 0 obj << -/Length 7308 +/Length 7700 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.7)-1000(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(4.15)-1000(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(geasums)-250(\227)-250(Generalized)-250(1-Norm)-250(of)-250(V)111(ector)]TJ/F54 9.9626 Tf -51.121 -20.192 Td [(This)-216(subr)18(outine)-217(computes)-216(a)-217(series)-216(of)-216(1-norms)-217(on)-216(the)-217(columns)-216(of)-216(a)-217(dense)-216(matrix)]TJ/F52 9.9626 Tf 0.294 -11.955 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(:)]TJ/F52 9.9626 Tf 120.031 -13.856 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.293 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 7.041 0 Td [(\040)]TJ/F54 9.9626 Tf 13.273 0 Td [(max)]TJ/F52 7.5716 Tf 7.76 -7.335 Td [(k)]TJ/F83 10.3811 Tf 12.944 7.335 Td [(j)]TJ/F52 9.9626 Tf 3.298 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F52 9.9626 Tf 4.273 0 Td [(k)]TJ/F54 9.9626 Tf 4.598 0 Td [(,)]TJ/F52 9.9626 Tf 4.206 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(j)]TJ/F54 9.9626 Tf -215.202 -24.535 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(vector)]TJ/F52 9.9626 Tf 206.349 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -211.554 -11.955 Td [(If)]TJ/F52 9.9626 Tf 9.46 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F52 9.9626 Tf 125.227 -23.185 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.293 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 19.007 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(k)]TJ/F54 9.9626 Tf -196.039 -23.185 Td [(else)-250(if)]TJ/F52 9.9626 Tf 28.159 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F52 9.9626 Tf 71.212 -23.185 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.294 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F83 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 18.737 0 Td [(r)-17(e)]TJ/F85 10.3811 Tf 8.169 0 Td [(\050)]TJ/F52 9.9626 Tf 4.444 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F85 10.3811 Tf 6.345 1.858 Td [(+)]TJ/F83 10.3811 Tf 10.256 0 Td [(k)]TJ/F52 9.9626 Tf 5.37 0 Td [(i)-32(m)]TJ/F85 10.3811 Tf 11.088 0 Td [(\050)]TJ/F52 9.9626 Tf 4.443 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.858 Td [(1)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -212.21 -21.96 Td [(call)]TJ +/F75 11.9552 Tf 207.803 706.129 Td [(gediv)-250(\227)-250(Entrywise)-250(Division)]TJ/F84 9.9626 Tf -57.407 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(division)-250(between)-250(two)-250(vectors)]TJ/F78 9.9626 Tf 300.604 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(and)]TJ/F78 9.9626 Tf 19.481 0 Td [(y)]TJ/F84 9.9626 Tf -188.038 -21.112 Td [(/)]TJ/F190 10.3811 Tf 9.054 0 Td [(\040)]TJ/F78 9.9626 Tf 13.567 0 Td [(x)]TJ/F192 10.3811 Tf 5.33 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F84 9.9626 Tf 4.274 0 Td [(/)]TJ/F78 9.9626 Tf 6.287 0 Td [(y)]TJ/F192 10.3811 Tf 5.23 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.089 0 Td [(\051)]TJ/F84 9.9626 Tf 4.149 0 Td [(.)]TJ/F145 9.9626 Tf -186.966 -21.111 Td [(psb_gediv\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_geasums\050res,)-525(x,)-525(desc_a,)-525(info\051)]TJ + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([flag\051)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 126.328 514.627 cm -[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +1 0 0 1 234.008 630.896 cm +[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S Q BT -/F52 9.9626 Tf 132.33 506.059 Td [(r)-17(e)-25(s)-8868(x)]TJ/F51 9.9626 Tf 221.014 0 Td [(Subroutine)]TJ +/F84 9.9626 Tf 240.111 622.328 Td [(/)-12(,)]TJ/F78 9.9626 Tf 11.437 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(,)]TJ/F78 9.9626 Tf 5.106 0 Td [(y)]TJ/F75 9.9626 Tf 99.042 0 Td [(Function)]TJ ET q -1 0 0 1 126.328 502.274 cm -[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +1 0 0 1 234.008 618.542 cm +[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S Q BT -/F54 9.9626 Tf 132.305 493.706 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F84 9.9626 Tf 239.986 609.974 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 369.663 493.905 cm +1 0 0 1 377.22 610.173 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.651 493.706 Td [(geasums)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F84 9.9626 Tf 380.209 609.974 Td [(gediv)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 369.663 481.95 cm +1 0 0 1 377.22 598.218 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.651 481.751 Td [(geasums)]TJ -240.346 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F84 9.9626 Tf 380.209 598.019 Td [(gediv)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 369.663 469.995 cm +1 0 0 1 377.22 586.263 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.651 469.795 Td [(geasums)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F84 9.9626 Tf 380.209 586.064 Td [(gediv)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 369.663 458.04 cm +1 0 0 1 377.22 574.308 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.651 457.84 Td [(geasums)]TJ +/F84 9.9626 Tf 380.209 574.109 Td [(gediv)]TJ ET q -1 0 0 1 126.328 454.054 cm -[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +1 0 0 1 234.008 570.323 cm +[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 229.958 425.676 Td [(T)92(able)-250(7:)-310(Data)-250(types)]TJ +/F84 9.9626 Tf 278.277 541.944 Td [(T)92(able)-250(15:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -130.063 -37.636 Td [(T)90(ype:)]TJ +/F75 9.9626 Tf -127.572 -33.34 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.46 Td [(On)-250(Entry)]TJ +/F75 9.9626 Tf -29.828 -19.603 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -22.459 Td [(x)]TJ + 0 -19.603 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F78 9.9626 Tf 174.059 0 Td [(x)]TJ/F84 9.9626 Tf 5.206 0 Td [(.)]TJ -164.321 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.02 0 0 1 175.611 421.578 Tm [(Speci\002ed)-306(as:)-425(an)-306(object)-306(of)-306(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F145 9.9626 Tf 1 0 0 1 314.513 421.578 Tm [(psb)]TJ ET q -1 0 0 1 385.864 295.5 cm +1 0 0 1 330.831 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 389.002 295.3 Td [(T)]TJ +/F145 9.9626 Tf 333.969 421.578 Td [(T)]TJ ET q -1 0 0 1 394.86 295.5 cm +1 0 0 1 339.827 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 295.3 Td [(vect)]TJ +/F145 9.9626 Tf 342.966 421.578 Td [(vect)]TJ ET q -1 0 0 1 419.547 295.5 cm +1 0 0 1 364.515 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 295.3 Td [(type)]TJ +/F145 9.9626 Tf 367.653 421.578 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +/F84 9.9626 Tf 1.02 0 0 1 391.683 421.578 Tm [(containing)-306(numbers)-306(of)]TJ 1 0 0 1 175.611 409.623 Tm [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(7)]TJ + [-250(2)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -19.603 Td [(y)]TJ +0 g 0 G +/F84 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F78 9.9626 Tf 173.891 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ -164.611 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ 1.02 0 0 1 175.611 342.199 Tm [(Speci\002ed)-306(as:)-425(an)-306(object)-306(of)-306(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 314.513 342.199 Tm [(psb)]TJ +ET +q +1 0 0 1 330.831 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 333.969 342.199 Td [(T)]TJ +ET +q +1 0 0 1 339.827 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 342.966 342.199 Td [(vect)]TJ +ET +q +1 0 0 1 364.515 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 367.653 342.199 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 391.683 342.199 Tm [(containing)-306(numbers)-306(of)]TJ 1 0 0 1 175.611 330.244 Tm [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(2)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -19.603 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 310.84 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 174.207 310.641 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 324.173 263.02 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 327.311 262.82 Td [(desc)]TJ +ET +q +1 0 0 1 348.86 263.02 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 351.998 262.82 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -222.214 -19.602 Td [(\003ag)]TJ +0 g 0 G +/F84 9.9626 Tf 0.994 0 0 1 172.294 243.218 Tm [(check)-252(if)-252(any)-252(of)-251(the)]TJ/F78 9.9626 Tf 1 0 0 1 254.535 243.218 Tm [(y)]TJ/F192 10.3811 Tf 5.231 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)-290(=)]TJ/F84 9.9626 Tf 0.994 0 0 1 285.074 243.218 Tm [(0,)-252(and)-252(in)-252(case)-252(r)19(eturns)-252(err)18(or)-252(halting)-252(the)-252(computa-)]TJ 1 0 0 1 175.611 231.262 Tm [(tion.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 40.677 0 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -108.662 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F145 9.9626 Tf 132.133 0 Td [(flag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(.true.)]TJ +0 g 0 G +/F75 9.9626 Tf -157.039 -19.603 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.603 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F78 9.9626 Tf 160.85 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -151.111 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ +0 g 0 G + 85.819 -29.888 Td [(56)]TJ +0 g 0 G +ET + +endstream +endobj +1366 0 obj +<< +/Length 1343 +>> +stream 0 g 0 G - [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.459 Td [(desc)]TJ +BT +/F84 9.9626 Tf 0.98 0 0 1 124.802 706.129 Tm [(Speci\002ed)-240(as:)-309(an)-240(object)-240(of)-240(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 253.899 706.129 Tm [(psb)]TJ ET q -1 0 0 1 120.408 261.085 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 270.218 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 123.397 260.886 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F145 9.9626 Tf 273.356 706.129 Td [(T)]TJ ET q -1 0 0 1 273.363 213.264 cm +1 0 0 1 279.214 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 213.065 Td [(desc)]TJ +/F145 9.9626 Tf 282.352 706.129 Td [(vect)]TJ ET q -1 0 0 1 298.05 213.264 cm +1 0 0 1 303.901 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 213.065 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -22.459 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -22.46 Td [(res)]TJ -0 g 0 G -/F54 9.9626 Tf 18.262 0 Td [(contains)-250(the)-250(1-norm)-250(of)-250(\050the)-250(columns)-250(of\051)]TJ/F52 9.9626 Tf 176.182 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -174.742 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Short)-255(as:)-320(a)-255(long)-254(pr)18(ecision)-255(r)18(eal)-255(number)74(.)-325(Speci\002ed)-255(as:)-320(a)-254(long)-255(pr)18(ecision)-255(r)18(eal)]TJ 0 -11.955 Td [(number)74(.)]TJ -0 g 0 G - 141.968 -29.888 Td [(41)]TJ -0 g 0 G -ET - -endstream -endobj -1111 0 obj -<< -/Length 583 ->> -stream +/F145 9.9626 Tf 307.039 706.129 Td [(type)]TJ 0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 330.304 706.129 Tm [(containing)-240(numbers)-240(of)-240(the)]TJ 1 0 0 1 124.802 694.174 Tm [(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(14)]TJ 0 g 0 G + [(.)]TJ 0 g 0 G -BT -/F51 9.9626 Tf 150.705 706.129 Td [(info)]TJ +/F75 9.9626 Tf -24.907 -19.926 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 141.968 -567.87 Td [(42)]TJ + 142.357 -535.99 Td [(57)]TJ 0 g 0 G ET endstream endobj -1118 0 obj +1375 0 obj << -/Length 6387 +/Length 7612 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.8)-1000(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(4.16)-1000(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(norm2)-250(\227)-250(2-Norm)-250(of)-250(V)111(ector)]TJ/F54 9.9626 Tf -51.121 -20.076 Td [(This)-250(function)-250(computes)-250(the)-250(2-norm)-250(of)-250(a)-250(vector)]TJ/F52 9.9626 Tf 206.349 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -211.554 -11.955 Td [(If)]TJ/F52 9.9626 Tf 9.46 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(2-norm)-250(as:)]TJ/F52 9.9626 Tf 122.551 -25.46 Td [(n)-15(r)-35(m)]TJ/F54 9.9626 Tf 17.788 0 Td [(2)]TJ/F83 10.3811 Tf 7.873 0 Td [(\040)]TJ 13.397 9.727 Td [(p)]TJ -ET -q -1 0 0 1 287.432 658.569 cm -[]0 d 0 J 0.408 w 0 0 m 16.592 0 l S -Q -BT -/F52 9.9626 Tf 287.726 648.638 Td [(x)]TJ/F52 7.5716 Tf 5.399 2.88 Td [(T)]TJ/F52 9.9626 Tf 5.694 -2.88 Td [(x)]TJ/F54 9.9626 Tf -198.924 -23.065 Td [(else)-250(if)]TJ/F52 9.9626 Tf 28.159 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(2-norm)-250(as:)]TJ/F52 9.9626 Tf 103.113 -25.46 Td [(n)-15(r)-35(m)]TJ/F54 9.9626 Tf 17.789 0 Td [(2)]TJ/F83 10.3811 Tf 7.873 0 Td [(\040)]TJ 13.397 9.727 Td [(p)]TJ -ET -q -1 0 0 1 286.694 610.044 cm -[]0 d 0 J 0.408 w 0 0 m 18.069 0 l S -Q -BT -/F52 9.9626 Tf 286.988 600.113 Td [(x)]TJ/F52 7.5716 Tf 5.588 2.88 Td [(H)]TJ/F52 9.9626 Tf 6.982 -2.88 Td [(x)]TJ +/F75 11.9552 Tf 207.803 706.129 Td [(geinv)-250(\227)-250(Entrywise)-250(Inversion)]TJ/F84 9.9626 Tf -57.407 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(inverse)-250(of)-250(a)-250(vector)]TJ/F78 9.9626 Tf 252.096 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(and)-250(puts)-250(it)-250(into)]TJ/F78 9.9626 Tf 69.952 0 Td [(y)]TJ/F84 9.9626 Tf -184.093 -18.334 Td [(/)]TJ/F190 10.3811 Tf 9.054 0 Td [(\040)]TJ/F84 9.9626 Tf 13.273 0 Td [(1)-12(/)]TJ/F78 9.9626 Tf 11.562 0 Td [(x)]TJ/F192 10.3811 Tf 5.329 0 Td [(\050)]TJ/F78 9.9626 Tf 4.205 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F84 9.9626 Tf 4.149 0 Td [(.)]TJ/F145 9.9626 Tf -181.058 -18.334 Td [(psb_geinv\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([flag\051)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 128.689 575.464 cm -[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S +1 0 0 1 234.153 637.562 cm +[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S Q BT -/F52 9.9626 Tf 134.791 566.896 Td [(n)-15(r)-35(m)]TJ/F54 9.9626 Tf 17.788 0 Td [(2)]TJ/F52 9.9626 Tf 82.505 0 Td [(x)]TJ/F51 9.9626 Tf 120.621 0 Td [(Function)]TJ +/F84 9.9626 Tf 240.255 628.995 Td [(/)-13(,)]TJ/F78 9.9626 Tf 11.437 0 Td [(x)]TJ/F84 9.9626 Tf 5.206 0 Td [(,)]TJ/F78 9.9626 Tf 5.105 0 Td [(y)]TJ/F75 9.9626 Tf 99.043 0 Td [(Function)]TJ ET q -1 0 0 1 128.689 563.111 cm -[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S +1 0 0 1 234.153 625.209 cm +[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S Q BT -/F54 9.9626 Tf 134.667 554.543 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F84 9.9626 Tf 240.131 616.641 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 372.024 554.742 cm +1 0 0 1 377.364 616.84 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 375.012 554.543 Td [(genrm2)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F84 9.9626 Tf 380.353 616.641 Td [(geinv)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 372.024 542.787 cm +1 0 0 1 377.364 604.885 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 375.012 542.588 Td [(genrm2)]TJ -240.346 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F84 9.9626 Tf 380.353 604.686 Td [(geinv)]TJ -140.222 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 372.024 530.832 cm +1 0 0 1 377.364 592.93 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 375.012 530.632 Td [(genrm2)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F84 9.9626 Tf 380.353 592.731 Td [(geinv)]TJ -140.222 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 372.024 518.876 cm +1 0 0 1 377.364 580.975 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 375.012 518.677 Td [(genrm2)]TJ +/F84 9.9626 Tf 380.353 580.775 Td [(geinv)]TJ ET q -1 0 0 1 128.689 514.891 cm -[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S +1 0 0 1 234.153 576.99 cm +[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 229.958 486.513 Td [(T)92(able)-250(8:)-310(Data)-250(types)]TJ +/F84 9.9626 Tf 278.277 548.611 Td [(T)92(able)-250(16:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -115.119 -27.631 Td [(psb_genrm2\050x,)-525(desc_a,)-525(info)-525([,global]\051)]TJ -14.944 -11.955 Td [(psb_norm2\050x,)-525(desc_a,)-525(info)-525([,global]\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -36.169 Td [(T)90(ype:)]TJ +/F75 9.9626 Tf -127.572 -29.451 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.221 Td [(On)-250(Entry)]TJ +/F75 9.9626 Tf -29.828 -18.492 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -22.221 Td [(x)]TJ + 0 -18.491 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F78 9.9626 Tf 174.059 0 Td [(x)]TJ/F84 9.9626 Tf 5.206 0 Td [(.)]TJ -164.321 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.02 0 0 1 175.611 434.356 Tm [(Speci\002ed)-306(as:)-425(an)-306(object)-306(of)-306(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F145 9.9626 Tf 1 0 0 1 314.513 434.356 Tm [(psb)]TJ ET q -1 0 0 1 385.864 318.695 cm +1 0 0 1 330.831 434.555 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 389.002 318.495 Td [(T)]TJ +/F145 9.9626 Tf 333.969 434.356 Td [(T)]TJ ET q -1 0 0 1 394.86 318.695 cm +1 0 0 1 339.827 434.555 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 318.495 Td [(vect)]TJ +/F145 9.9626 Tf 342.966 434.356 Td [(vect)]TJ ET q -1 0 0 1 419.547 318.695 cm +1 0 0 1 364.515 434.555 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 318.495 Td [(type)]TJ +/F145 9.9626 Tf 367.653 434.356 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +/F84 9.9626 Tf 1.02 0 0 1 391.683 434.356 Tm [(containing)-306(numbers)-306(of)]TJ 1 0 0 1 175.611 422.401 Tm [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(8)]TJ + [-250(2)]TJ 0 g 0 G [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.221 Td [(desc)]TJ +/F75 9.9626 Tf -24.906 -18.492 Td [(desc)]TJ ET q -1 0 0 1 120.408 284.518 cm +1 0 0 1 171.218 404.108 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 284.319 Td [(a)]TJ +/F75 9.9626 Tf 174.207 403.909 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F84 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F145 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 273.363 236.698 cm +1 0 0 1 324.173 356.288 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 236.499 Td [(desc)]TJ +/F145 9.9626 Tf 327.311 356.088 Td [(desc)]TJ ET q -1 0 0 1 298.05 236.698 cm +1 0 0 1 348.86 356.288 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 236.499 Td [(type)]TJ +/F145 9.9626 Tf 351.998 356.088 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.215 -22.221 Td [(global)]TJ +/F75 9.9626 Tf -222.214 -18.491 Td [(\003ag)]TJ 0 g 0 G -/F54 9.9626 Tf 33.764 0 Td [(Speci\002es)-226(whether)-226(the)-227(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.956 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F59 9.9626 Tf 165.318 0 Td [(global)]TJ +/F84 9.9626 Tf 0.993 0 0 1 172.294 337.597 Tm [(check)-252(if)-252(any)-252(of)-252(the)]TJ/F78 9.9626 Tf 1 0 0 1 254.633 337.597 Tm [(x)]TJ/F192 10.3811 Tf 5.329 0 Td [(\050)]TJ/F78 9.9626 Tf 4.205 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)-290(=)]TJ/F84 9.9626 Tf 0.993 0 0 1 285.269 337.597 Tm [(0,)-252(and)-252(in)-252(case)-252(r)18(eturns)-252(err)18(or)-252(halting)-252(the)-252(computa-)]TJ 1 0 0 1 175.611 325.642 Tm [(tion.)]TJ 0 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 40.677 0 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -108.662 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F145 9.9626 Tf 132.133 0 Td [(flag)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG [(=)]TJ 0 g 0 G [(.true.)]TJ 0 g 0 G -/F51 9.9626 Tf -190.225 -34.176 Td [(On)-250(Return)]TJ +/F75 9.9626 Tf -157.039 -18.492 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -18.491 Td [(y)]TJ +0 g 0 G +/F84 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F78 9.9626 Tf 160.85 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -151.669 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ 0.98 0 0 1 175.611 204.972 Tm [(Speci\002ed)-240(as:)-309(an)-240(object)-240(of)-240(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 304.709 204.972 Tm [(psb)]TJ +ET +q +1 0 0 1 321.027 205.171 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 324.166 204.972 Td [(T)]TJ +ET +q +1 0 0 1 330.023 205.171 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 333.162 204.972 Td [(vect)]TJ +ET +q +1 0 0 1 354.711 205.171 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 357.849 204.972 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 381.113 204.972 Tm [(containing)-240(numbers)-240(of)-240(the)]TJ 1 0 0 1 175.611 193.017 Tm [(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(16)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -18.492 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 142.356 -36.266 Td [(58)]TJ +0 g 0 G +ET + +endstream +endobj +1381 0 obj +<< +/Length 655 +>> +stream +0 g 0 G +0 g 0 G +BT +/F75 14.3462 Tf 99.895 706.042 Td [(5)-1000(Communication)-250(routines)]TJ/F84 9.9626 Tf 0.98 0 0 1 99.587 683.34 Tm [(The)-234(r)19(outines)-234(in)-233(this)-234(chapter)-234(implem)1(ent)-234(various)-234(global)-233(communication)-234(operators)]TJ 0.995 0 0 1 99.895 671.385 Tm [(on)-251(vectors)-252(associated)-251(with)-251(a)-251(discr)18(etization)-251(mesh.)-312(For)-251(auxiliary)-252(communication)]TJ 1 0 0 1 99.895 659.43 Tm [(r)18(outines)-250(not)-250(tied)-250(to)-250(a)-250(discr)18(etization)-250(space)-250(see)]TJ +0 0 1 rg 0 0 1 RG + [-250(7)]TJ 0 g 0 G + [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 166.875 -29.888 Td [(43)]TJ + 166.875 -568.992 Td [(59)]TJ 0 g 0 G ET endstream endobj -1011 0 obj +1278 0 obj << /Type /ObjStm /N 100 -/First 987 -/Length 12089 ->> -stream -216 0 1007 58 1013 165 1015 282 220 340 1012 397 1024 478 1016 661 1017 807 1018 951 -1019 1097 1020 1243 1021 1387 1026 1533 224 1592 998 1650 1023 1708 1031 1855 1022 2012 1028 2159 -1029 2303 1033 2449 1030 2507 1041 2614 1035 2789 1036 2930 1037 3076 1038 3220 1039 3365 1043 3512 -228 3571 1044 3629 1040 3688 1047 3808 1045 3947 1049 4093 1050 4151 1046 4209 1058 4316 1051 4500 -1052 4644 1053 4790 1054 4934 1055 5079 1056 5226 1060 5370 232 5429 1061 5487 1057 5545 1063 5678 -1065 5796 1062 5854 1070 5935 1066 6092 1067 6236 1068 6382 1072 6529 236 6588 1073 6646 1069 6705 -1075 6838 1077 6956 1078 7014 1074 7071 1084 7165 1080 7322 1081 7466 1082 7612 1086 7759 240 7818 -1087 7876 1083 7935 1092 8068 1088 8225 1089 8369 1090 8512 1094 8659 244 8717 1095 8774 1091 8831 -1097 8964 1099 9082 1100 9141 1096 9199 1105 9293 1101 9450 1102 9594 1103 9740 1107 9887 248 9945 -1108 10002 1104 10060 1110 10193 1112 10311 1109 10370 1117 10451 1113 10608 1114 10751 1115 10897 1119 11044 -% 216 0 obj +/First 998 +/Length 13026 +>> +stream +1274 0 1271 148 1272 293 1276 440 264 498 1277 555 1273 612 1282 734 1279 882 1280 1027 +1284 1174 268 1233 1286 1291 1281 1349 1293 1486 1287 1652 1288 1799 1289 1944 1290 2086 1295 2233 +272 2291 1296 2348 1297 2406 1298 2465 1299 2524 1292 2582 1309 2733 1291 2935 1301 3082 1302 3226 +1303 3372 1304 3519 1305 3670 1306 3821 1307 3972 1311 4119 1308 4178 1315 4315 1312 4454 1317 4599 +276 4657 1318 4714 1314 4772 1326 4923 1313 5116 1319 5264 1320 5408 1321 5555 1322 5702 1323 5845 +1324 5992 1328 6137 1325 6196 1332 6333 1329 6481 1330 6627 1334 6773 1331 6831 1343 6953 1335 7146 +1336 7289 1337 7434 1338 7577 1339 7722 1340 7869 1341 8013 1345 8160 280 8219 1346 8277 1342 8336 +1348 8472 1350 8590 1347 8648 1360 8729 1352 8904 1353 9047 1354 9192 1355 9335 1356 9480 1362 9627 +284 9686 1363 9744 1359 9803 1365 9939 1357 10087 1358 10231 1367 10378 1364 10436 1374 10531 1368 10706 +1369 10847 1370 10992 1371 11139 1372 11283 1376 11430 288 11489 1377 11547 1373 11606 1380 11742 1378 11881 +% 1274 0 obj << -/D [1008 0 R /XYZ 150.705 613.372 null] +/Type /Page +/Contents 1275 0 R +/Resources 1273 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1244 0 R +/Annots [ 1271 0 R 1272 0 R ] >> -% 1007 0 obj +% 1271 0 obj << -/Font << /F94 915 0 R /F54 586 0 R /F51 584 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 340.341 333.567 352.401] +/A << /S /GoTo /D (spdata) >> >> -% 1013 0 obj +% 1272 0 obj << -/Type /Page -/Contents 1014 0 R -/Resources 1012 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 991 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 272.595 323.106 284.655] +/A << /S /GoTo /D (descdata) >> >> -% 1015 0 obj +% 1276 0 obj << -/D [1013 0 R /XYZ 98.895 753.953 null] +/D [1274 0 R /XYZ 98.895 753.953 null] >> -% 220 0 obj +% 264 0 obj << -/D [1013 0 R /XYZ 99.895 716.092 null] +/D [1274 0 R /XYZ 99.895 716.092 null] >> -% 1012 0 obj +% 1277 0 obj +<< +/D [1274 0 R /XYZ 99.895 517.78 null] +>> +% 1273 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1024 0 obj +% 1282 0 obj << /Type /Page -/Contents 1025 0 R -/Resources 1023 0 R +/Contents 1283 0 R +/Resources 1281 0 R /MediaBox [0 0 595.276 841.89] -/Parent 991 0 R -/Annots [ 1016 0 R 1017 0 R 1018 0 R 1019 0 R 1020 0 R 1021 0 R ] +/Parent 1244 0 R +/Annots [ 1279 0 R 1280 0 R ] >> -% 1016 0 obj +% 1279 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 413.274 435.942 425.334] -/A << /S /GoTo /D (table.1) >> +/Rect [306.858 340.341 384.376 352.401] +/A << /S /GoTo /D (spdata) >> >> -% 1017 0 obj +% 1280 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 345.063 495.412 357.123] -/A << /S /GoTo /D (vdata) >> +/Rect [306.858 272.595 373.916 284.655] +/A << /S /GoTo /D (descdata) >> >> -% 1018 0 obj +% 1284 0 obj +<< +/D [1282 0 R /XYZ 149.705 753.953 null] +>> +% 268 0 obj +<< +/D [1282 0 R /XYZ 150.705 716.092 null] +>> +% 1286 0 obj +<< +/D [1282 0 R /XYZ 150.705 517.78 null] +>> +% 1281 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F243 1285 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1293 0 obj +<< +/Type /Page +/Contents 1294 0 R +/Resources 1292 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1300 0 R +/Annots [ 1287 0 R 1288 0 R 1289 0 R 1290 0 R ] +>> +% 1287 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [381.755 333.108 388.729 345.168] -/A << /S /GoTo /D (table.1) >> +/Rect [378.159 277.323 390.114 289.383] +/A << /S /GoTo /D (table.12) >> >> -% 1019 0 obj +% 1288 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 252.942 435.942 265.002] -/A << /S /GoTo /D (table.1) >> +/Rect [256.048 208.877 333.567 220.936] +/A << /S /GoTo /D (spdata) >> >> -% 1020 0 obj +% 1289 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 184.731 495.412 196.791] +/Rect [368.549 140.43 444.603 152.49] /A << /S /GoTo /D (vdata) >> >> -% 1021 0 obj +% 1290 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [397.868 172.776 404.842 184.835] -/A << /S /GoTo /D (table.1) >> +/Rect [329.477 128.475 341.581 140.535] +/A << /S /GoTo /D (table.12) >> >> -% 1026 0 obj +% 1295 0 obj << -/D [1024 0 R /XYZ 149.705 753.953 null] +/D [1293 0 R /XYZ 98.895 753.953 null] >> -% 224 0 obj +% 272 0 obj << -/D [1024 0 R /XYZ 150.705 716.092 null] +/D [1293 0 R /XYZ 99.895 716.092 null] >> -% 998 0 obj +% 1296 0 obj << -/D [1024 0 R /XYZ 150.705 558.947 null] +/D [1293 0 R /XYZ 239.918 674.17 null] >> -% 1023 0 obj +% 1297 0 obj +<< +/D [1293 0 R /XYZ 237.121 654.041 null] +>> +% 1298 0 obj +<< +/D [1293 0 R /XYZ 236.383 633.911 null] +>> +% 1299 0 obj +<< +/D [1293 0 R /XYZ 99.895 447.252 null] +>> +% 1292 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F60 1027 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F147 1157 0 R /F192 942 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1031 0 obj +% 1309 0 obj << /Type /Page -/Contents 1032 0 R -/Resources 1030 0 R +/Contents 1310 0 R +/Resources 1308 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R -/Annots [ 1022 0 R 1028 0 R 1029 0 R ] +/Parent 1300 0 R +/Annots [ 1291 0 R 1301 0 R 1302 0 R 1303 0 R 1304 0 R 1305 0 R 1306 0 R 1307 0 R ] >> -% 1022 0 obj +% 1291 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 690.368 323.106 702.428] -/A << /S /GoTo /D (descdata) >> +/Rect [428.968 654.503 440.924 666.562] +/A << /S /GoTo /D (table.12) >> >> -% 1028 0 obj +% 1301 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 600.704 444.603 612.764] +/Rect [419.358 588.085 495.412 600.145] /A << /S /GoTo /D (vdata) >> >> -% 1029 0 obj +% 1302 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [344.963 588.749 351.937 600.809] -/A << /S /GoTo /D (table.1) >> +/Rect [380.469 576.13 392.583 588.189] +/A << /S /GoTo /D (table.12) >> >> -% 1033 0 obj +% 1303 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [306.858 497.757 373.916 509.817] +/A << /S /GoTo /D (descdata) >> +>> +% 1304 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [388.949 460.563 395.923 472.623] +/A << /S /GoTo /D (equation.4.1) >> +>> +% 1305 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [387.295 445.951 394.269 458.011] +/A << /S /GoTo /D (equation.4.2) >> +>> +% 1306 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [387.843 431.339 394.817 443.399] +/A << /S /GoTo /D (equation.4.3) >> +>> +% 1307 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [253.329 189.579 265.284 201.639] +/A << /S /GoTo /D (table.12) >> +>> +% 1311 0 obj +<< +/D [1309 0 R /XYZ 149.705 753.953 null] +>> +% 1308 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R /F147 1157 0 R /F78 686 0 R /F145 940 0 R /F192 942 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1315 0 obj +<< +/Type /Page +/Contents 1316 0 R +/Resources 1314 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1300 0 R +/Annots [ 1312 0 R ] +>> +% 1312 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [378.159 116.52 390.114 128.58] +/A << /S /GoTo /D (table.13) >> +>> +% 1317 0 obj +<< +/D [1315 0 R /XYZ 98.895 753.953 null] +>> +% 276 0 obj +<< +/D [1315 0 R /XYZ 99.895 716.092 null] +>> +% 1318 0 obj << -/D [1031 0 R /XYZ 98.895 753.953 null] +/D [1315 0 R /XYZ 99.895 268.704 null] >> -% 1030 0 obj +% 1314 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F52 585 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F147 1157 0 R /F192 942 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1041 0 obj +% 1326 0 obj << /Type /Page -/Contents 1042 0 R -/Resources 1040 0 R +/Contents 1327 0 R +/Resources 1325 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R -/Annots [ 1035 0 R 1036 0 R 1037 0 R 1038 0 R 1039 0 R ] +/Parent 1300 0 R +/Annots [ 1313 0 R 1319 0 R 1320 0 R 1321 0 R 1322 0 R 1323 0 R 1324 0 R ] >> -% 1035 0 obj +% 1313 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 355.7 495.412 367.76] -/A << /S /GoTo /D (vdata) >> +/Rect [355.953 654.503 362.927 666.562] +/A << /S /GoTo /D (section.3) >> >> -% 1036 0 obj +% 1319 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [381.755 343.745 388.729 355.804] -/A << /S /GoTo /D (table.2) >> +/Rect [419.358 586.032 495.412 598.092] +/A << /S /GoTo /D (vdata) >> >> -% 1037 0 obj +% 1320 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 264.018 495.412 276.078] -/A << /S /GoTo /D (vdata) >> +/Rect [380.286 574.077 392.391 586.136] +/A << /S /GoTo /D (table.13) >> >> -% 1038 0 obj +% 1321 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [381.88 252.063 388.854 264.123] -/A << /S /GoTo /D (table.2) >> +/Rect [428.968 493.651 440.924 505.711] +/A << /S /GoTo /D (table.13) >> >> -% 1039 0 obj +% 1322 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 172.336 373.916 184.396] -/A << /S /GoTo /D (descdata) >> +/Rect [419.358 425.181 495.412 437.24] +/A << /S /GoTo /D (vdata) >> >> -% 1043 0 obj +% 1323 0 obj << -/D [1041 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [380.469 413.225 392.583 425.285] +/A << /S /GoTo /D (table.13) >> >> -% 228 0 obj +% 1324 0 obj << -/D [1041 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [306.858 332.8 373.916 344.859] +/A << /S /GoTo /D (descdata) >> >> -% 1044 0 obj +% 1328 0 obj << -/D [1041 0 R /XYZ 150.705 499.951 null] +/D [1326 0 R /XYZ 149.705 753.953 null] >> -% 1040 0 obj +% 1325 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F145 940 0 R /F147 1157 0 R /F192 942 0 R >> /ProcSet [ /PDF /Text ] >> -% 1047 0 obj +% 1332 0 obj << /Type /Page -/Contents 1048 0 R -/Resources 1046 0 R +/Contents 1333 0 R +/Resources 1331 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R -/Annots [ 1045 0 R ] +/Parent 1300 0 R +/Annots [ 1329 0 R 1330 0 R ] >> -% 1045 0 obj +% 1329 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [378.159 602.697 385.133 614.756] -/A << /S /GoTo /D (table.2) >> +/Rect [149.34 410.079 161.295 419.489] +/A << /S /GoTo /D (table.13) >> >> -% 1049 0 obj +% 1330 0 obj << -/D [1047 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [202.52 228.102 214.475 240.161] +/A << /S /GoTo /D (table.13) >> >> -% 1050 0 obj +% 1334 0 obj << -/D [1047 0 R /XYZ 99.895 512.639 null] +/D [1332 0 R /XYZ 98.895 753.953 null] >> -% 1046 0 obj +% 1331 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F52 585 0 R >> +/Font << /F84 687 0 R /F75 685 0 R /F78 686 0 R /F192 942 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1058 0 obj +% 1343 0 obj << /Type /Page -/Contents 1059 0 R -/Resources 1057 0 R +/Contents 1344 0 R +/Resources 1342 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R -/Annots [ 1051 0 R 1052 0 R 1053 0 R 1054 0 R 1055 0 R 1056 0 R ] +/Parent 1300 0 R +/Annots [ 1335 0 R 1336 0 R 1337 0 R 1338 0 R 1339 0 R 1340 0 R 1341 0 R ] >> -% 1051 0 obj +% 1335 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 384.684 495.412 396.744] +/Rect [313.516 417.772 389.57 429.832] /A << /S /GoTo /D (vdata) >> >> -% 1052 0 obj +% 1336 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [381.755 372.729 388.729 384.789] -/A << /S /GoTo /D (table.3) >> +/Rect [275.366 405.817 282.34 417.877] +/A << /S /GoTo /D (table.2) >> >> -% 1053 0 obj +% 1337 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 293.733 495.412 305.793] +/Rect [313.516 338.393 389.57 350.453] /A << /S /GoTo /D (vdata) >> >> -% 1054 0 obj +% 1338 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [381.88 281.778 388.854 293.837] -/A << /S /GoTo /D (table.3) >> +/Rect [275.366 326.438 282.34 338.498] +/A << /S /GoTo /D (table.2) >> >> -% 1055 0 obj +% 1339 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 202.781 373.916 214.841] +/Rect [306.858 259.015 373.916 271.074] /A << /S /GoTo /D (descdata) >> >> -% 1056 0 obj +% 1340 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [200.458 119.17 207.432 128.58] -/A << /S /GoTo /D (table.2) >> +/Rect [303.712 171.988 379.767 184.048] +/A << /S /GoTo /D (vdata) >> >> -% 1060 0 obj +% 1341 0 obj << -/D [1058 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [277.368 160.033 289.324 172.093] +/A << /S /GoTo /D (table.14) >> >> -% 232 0 obj +% 1345 0 obj << -/D [1058 0 R /XYZ 150.705 716.092 null] +/D [1343 0 R /XYZ 149.705 753.953 null] >> -% 1061 0 obj +% 280 0 obj << -/D [1058 0 R /XYZ 150.705 524.97 null] +/D [1343 0 R /XYZ 150.705 716.092 null] >> -% 1057 0 obj +% 1346 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F59 812 0 R >> +/D [1343 0 R /XYZ 150.705 560.161 null] +>> +% 1342 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F192 942 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1063 0 obj +% 1348 0 obj << /Type /Page -/Contents 1064 0 R -/Resources 1062 0 R +/Contents 1349 0 R +/Resources 1347 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R +/Parent 1351 0 R >> -% 1065 0 obj +% 1350 0 obj << -/D [1063 0 R /XYZ 98.895 753.953 null] +/D [1348 0 R /XYZ 98.895 753.953 null] >> -% 1062 0 obj +% 1347 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> +/Font << /F84 687 0 R /F75 685 0 R >> /ProcSet [ /PDF /Text ] >> -% 1070 0 obj +% 1360 0 obj << /Type /Page -/Contents 1071 0 R -/Resources 1069 0 R +/Contents 1361 0 R +/Resources 1359 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R -/Annots [ 1066 0 R 1067 0 R 1068 0 R ] +/Parent 1351 0 R +/Annots [ 1352 0 R 1353 0 R 1354 0 R 1355 0 R 1356 0 R ] >> -% 1066 0 obj +% 1352 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 358.503 495.412 370.562] +/Rect [313.516 417.772 389.57 429.832] /A << /S /GoTo /D (vdata) >> >> -% 1067 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [377.462 346.547 384.436 358.607] -/A << /S /GoTo /D (table.4) >> ->> -% 1068 0 obj +% 1353 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 280.074 373.916 292.134] -/A << /S /GoTo /D (descdata) >> ->> -% 1072 0 obj -<< -/D [1070 0 R /XYZ 149.705 753.953 null] ->> -% 236 0 obj -<< -/D [1070 0 R /XYZ 150.705 716.092 null] ->> -% 1073 0 obj -<< -/D [1070 0 R /XYZ 150.705 495.665 null] ->> -% 1069 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1075 0 obj -<< -/Type /Page -/Contents 1076 0 R -/Resources 1074 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R ->> -% 1077 0 obj -<< -/D [1075 0 R /XYZ 98.895 753.953 null] ->> -% 1078 0 obj -<< -/D [1075 0 R /XYZ 99.895 632.19 null] ->> -% 1074 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1084 0 obj -<< -/Type /Page -/Contents 1085 0 R -/Resources 1083 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R -/Annots [ 1080 0 R 1081 0 R 1082 0 R ] +/Rect [275.366 405.817 282.34 417.877] +/A << /S /GoTo /D (table.2) >> >> -% 1080 0 obj +% 1354 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 411.694 495.412 423.754] +/Rect [313.516 338.393 389.57 350.453] /A << /S /GoTo /D (vdata) >> >> -% 1081 0 obj +% 1355 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [377.462 399.739 384.436 411.798] -/A << /S /GoTo /D (table.5) >> +/Rect [275.366 326.438 282.34 338.498] +/A << /S /GoTo /D (table.2) >> >> -% 1082 0 obj +% 1356 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 331.993 373.916 344.052] +/Rect [306.858 259.015 373.916 271.074] /A << /S /GoTo /D (descdata) >> >> -% 1086 0 obj +% 1362 0 obj << -/D [1084 0 R /XYZ 149.705 753.953 null] +/D [1360 0 R /XYZ 149.705 753.953 null] >> -% 240 0 obj +% 284 0 obj << -/D [1084 0 R /XYZ 150.705 716.092 null] +/D [1360 0 R /XYZ 150.705 716.092 null] >> -% 1087 0 obj +% 1363 0 obj << -/D [1084 0 R /XYZ 150.705 555.856 null] +/D [1360 0 R /XYZ 150.705 560.161 null] >> -% 1083 0 obj +% 1359 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F192 942 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1092 0 obj +% 1365 0 obj << /Type /Page -/Contents 1093 0 R -/Resources 1091 0 R +/Contents 1366 0 R +/Resources 1364 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R -/Annots [ 1088 0 R 1089 0 R 1090 0 R ] +/Parent 1351 0 R +/Annots [ 1357 0 R 1358 0 R ] >> -% 1088 0 obj +% 1357 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 362.555 444.603 374.615] +/Rect [252.903 702.323 328.957 714.383] /A << /S /GoTo /D (vdata) >> >> -% 1089 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [326.652 350.6 333.626 362.66] -/A << /S /GoTo /D (table.6) >> ->> -% 1090 0 obj +% 1358 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 283.114 323.106 295.173] -/A << /S /GoTo /D (descdata) >> ->> -% 1094 0 obj -<< -/D [1092 0 R /XYZ 98.895 753.953 null] ->> -% 244 0 obj -<< -/D [1092 0 R /XYZ 99.895 716.092 null] ->> -% 1095 0 obj -<< -/D [1092 0 R /XYZ 99.895 505.29 null] ->> -% 1091 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1097 0 obj -<< -/Type /Page -/Contents 1098 0 R -/Resources 1096 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R ->> -% 1099 0 obj -<< -/D [1097 0 R /XYZ 149.705 753.953 null] +/Rect [226.559 690.368 238.514 702.428] +/A << /S /GoTo /D (table.14) >> >> -% 1100 0 obj +% 1367 0 obj << -/D [1097 0 R /XYZ 150.705 632.19 null] +/D [1365 0 R /XYZ 98.895 753.953 null] >> -% 1096 0 obj +% 1364 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F84 687 0 R /F145 940 0 R /F75 685 0 R >> /ProcSet [ /PDF /Text ] >> -% 1105 0 obj +% 1374 0 obj << /Type /Page -/Contents 1106 0 R -/Resources 1104 0 R +/Contents 1375 0 R +/Resources 1373 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R -/Annots [ 1101 0 R 1102 0 R 1103 0 R ] +/Parent 1351 0 R +/Annots [ 1368 0 R 1369 0 R 1370 0 R 1371 0 R 1372 0 R ] >> -% 1101 0 obj +% 1368 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 291.495 444.603 303.554] +/Rect [313.516 430.55 389.57 442.61] /A << /S /GoTo /D (vdata) >> >> -% 1102 0 obj +% 1369 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [326.652 279.539 333.626 291.599] -/A << /S /GoTo /D (table.7) >> +/Rect [275.366 418.595 282.34 430.655] +/A << /S /GoTo /D (table.2) >> >> -% 1103 0 obj +% 1370 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 209.259 323.106 221.319] +/Rect [306.858 352.283 373.916 364.342] /A << /S /GoTo /D (descdata) >> >> -% 1107 0 obj -<< -/D [1105 0 R /XYZ 98.895 753.953 null] ->> -% 248 0 obj +% 1371 0 obj << -/D [1105 0 R /XYZ 99.895 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [303.712 201.166 379.767 213.226] +/A << /S /GoTo /D (vdata) >> >> -% 1108 0 obj +% 1372 0 obj << -/D [1105 0 R /XYZ 99.895 443.893 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [277.368 189.211 289.324 201.271] +/A << /S /GoTo /D (table.16) >> >> -% 1104 0 obj +% 1376 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/D [1374 0 R /XYZ 149.705 753.953 null] >> -% 1110 0 obj +% 288 0 obj << -/Type /Page -/Contents 1111 0 R -/Resources 1109 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R +/D [1374 0 R /XYZ 150.705 716.092 null] >> -% 1112 0 obj +% 1377 0 obj << -/D [1110 0 R /XYZ 149.705 753.953 null] +/D [1374 0 R /XYZ 150.705 566.828 null] >> -% 1109 0 obj +% 1373 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F192 942 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1117 0 obj +% 1380 0 obj << /Type /Page -/Contents 1118 0 R -/Resources 1116 0 R +/Contents 1381 0 R +/Resources 1379 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R -/Annots [ 1113 0 R 1114 0 R 1115 0 R ] ->> -% 1113 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 314.69 444.603 326.749] -/A << /S /GoTo /D (vdata) >> ->> -% 1114 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [326.652 302.734 333.626 314.794] -/A << /S /GoTo /D (table.8) >> +/Parent 1351 0 R +/Annots [ 1378 0 R ] >> -% 1115 0 obj +% 1378 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 232.693 323.106 244.753] -/A << /S /GoTo /D (descdata) >> ->> -% 1119 0 obj -<< -/D [1117 0 R /XYZ 98.895 753.953 null] +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [297.94 655.624 304.914 667.684] +/A << /S /GoTo /D (section.7) >> >> endstream endobj -1125 0 obj +1390 0 obj << -/Length 4322 +/Length 6834 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(Function)-250(V)111(alue)]TJ -0 g 0 G -/F54 9.9626 Tf 73.882 0 Td [(is)-250(the)-250(2-norm)-250(of)-250(vector)]TJ/F52 9.9626 Tf 102.161 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -156.342 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.133 0 Td [(global)]TJ/F54 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F59 9.9626 Tf 121.612 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(5.1)-1000(psb)]TJ +ET +q +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 201.825 706.129 Td [(halo)-250(\227)-250(Halo)-250(Data)-250(Communication)]TJ/F84 9.9626 Tf -51.429 -19.15 Td [(These)-250(subr)18(outines)-250(gathers)-250(the)-250(values)-250(of)-250(the)-250(halo)-250(elements:)]TJ/F78 9.9626 Tf 158.877 -25.014 Td [(x)]TJ/F190 10.3811 Tf 8.097 0 Td [(\040)]TJ/F78 9.9626 Tf 13.567 0 Td [(x)]TJ/F84 9.9626 Tf -180.651 -22.11 Td [(wher)18(e:)]TJ 0 g 0 G - [(.false.)]TJ/F54 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ +/F78 9.9626 Tf 0.713 -20.212 Td [(x)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(info)]TJ +/F84 9.9626 Tf 10.186 0 Td [(is)-250(a)-250(global)-250(dense)-250(submatrix.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G - [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(over)18(head.)-513(It)-318(may)-318(be)-318(necessary)-318(and/or)-317(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-333(multiple)-333(norms)-333(at)-332(the)-333(same)-333(time;)-374(in)-333(this)-333(case,)-354(it)-332(is)-333(possible)-333(to)]TJ 0 -11.955 Td [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ 24.981 -17.933 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(1)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ ET q -1 0 0 1 278.034 495.12 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 230.392 596.326 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F54 9.9626 Tf 281.627 494.921 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(1)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ +/F147 9.9626 Tf 236.494 587.758 Td [(a)]TJ/F84 9.9626 Tf 5.385 0 Td [(,)]TJ/F78 9.9626 Tf 5.275 0 Td [(x)]TJ/F75 9.9626 Tf 110.131 0 Td [(Subroutine)]TJ ET q -1 0 0 1 367.96 495.12 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 230.392 583.972 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F54 9.9626 Tf 371.922 494.921 Td [(a)-371(,)-283(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-163(e)-367(.)-206(\051)]TJ -171.33 -11.955 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(2)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ +/F84 9.9626 Tf 236.369 575.404 Td [(Integer)-8983(psb)]TJ ET q -1 0 0 1 278.034 483.165 cm +1 0 0 1 373.603 575.603 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 281.627 482.966 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(2)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ +/F84 9.9626 Tf 376.592 575.404 Td [(halo)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 367.96 483.165 cm +1 0 0 1 373.603 563.648 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 371.922 482.966 Td [(a)-371(,)-283(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-163(e)-367(.)-206(\051)]TJ -171.33 -11.955 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(3)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ +/F84 9.9626 Tf 376.592 563.449 Td [(halo)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 278.034 471.21 cm +1 0 0 1 373.603 551.693 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 281.627 471.011 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(3)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ +/F84 9.9626 Tf 376.592 551.494 Td [(halo)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 367.96 471.21 cm +1 0 0 1 373.603 539.738 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 371.922 471.011 Td [(a)-371(,)-283(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-163(e)-367(.)-206(\051)]TJ -170.658 -11.956 Td [(c)-175(a)-175(l)-174(l)-831(p)-56(s)-56(b)]TJ +/F84 9.9626 Tf 376.592 539.539 Td [(halo)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 247.952 459.255 cm +1 0 0 1 373.603 527.783 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 251.497 459.055 Td [(n)-56(r)-56(m)-55(2)-190(\050)-264(c)-132(t)-131(x)-131(t)-438(,)-283(v)-107(r)-107(e)-108(s)-300(\050)-193(1)-193(:)-193(3)-193(\051)-193(\051)]TJ +/F84 9.9626 Tf 376.592 527.583 Td [(halo)]TJ +ET +q +1 0 0 1 230.392 523.798 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q 0 g 0 G +BT +/F84 9.9626 Tf 278.277 495.419 Td [(T)92(able)-250(17:)-310(Data)-250(types)]TJ 0 g 0 G - -75.886 -21.917 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communication,)-253(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.956 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ 0 g 0 G - 141.968 -334.744 Td [(44)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -127.572 -24.102 Td [(call)]TJ 0 g 0 G -ET - -endstream -endobj -1138 0 obj -<< -/Length 6050 ->> -stream + [-525(psb_halo\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ +0 g 0 G + [-525(psb_halo\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(work,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.9)-1000(psb)]TJ -ET -q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 151.016 706.129 Td [(genrm2s)-250(\227)-250(Generalized)-250(2-Norm)-250(of)-250(V)111(ector)]TJ/F54 9.9626 Tf -51.121 -18.964 Td [(This)-216(subr)18(outine)-217(computes)-216(a)-217(series)-216(of)-216(2-norms)-217(on)-216(the)-217(columns)-216(of)-216(a)-217(dense)-216(matrix)]TJ/F52 9.9626 Tf 0.294 -11.955 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(:)]TJ/F52 9.9626 Tf 126.858 -11.955 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.294 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F83 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 19.006 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F54 9.9626 Tf 4.274 0 Td [(:)-13(,)]TJ/F52 9.9626 Tf 6.821 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.744 Td [(2)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -186.919 -16.189 Td [(call)]TJ + [-525(data)]TJ +0 g 0 G + [(\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -22.301 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - [-525(psb_genrm2s\050res,)-525(x,)-525(desc_a,)-525(info\051)]TJ +/F75 9.9626 Tf -29.828 -20.308 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G + 0 -20.309 Td [(x)]TJ 0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 89.687 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -79.948 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ 0.98 0 0 1 175.611 348.623 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-248(an)-247(object)-247(of)-247(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 420.354 348.623 Tm [(psb)]TJ ET q -1 0 0 1 126.577 630.954 cm -[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S +1 0 0 1 436.673 348.823 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 132.579 622.386 Td [(r)-17(e)-25(s)-8868(x)]TJ/F51 9.9626 Tf 221.014 0 Td [(Subroutine)]TJ +/F145 9.9626 Tf 439.811 348.623 Td [(T)]TJ ET q -1 0 0 1 126.577 618.6 cm -[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S +1 0 0 1 445.669 348.823 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 132.554 610.032 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F145 9.9626 Tf 448.807 348.623 Td [(vect)]TJ ET q -1 0 0 1 369.912 610.231 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 470.356 348.823 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 372.9 610.032 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F145 9.9626 Tf 473.495 348.623 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf -297.884 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(17)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -20.308 Td [(desc)]TJ ET q -1 0 0 1 369.912 598.276 cm +1 0 0 1 171.218 316.559 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.9 598.077 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F75 9.9626 Tf 174.207 316.36 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 369.912 586.321 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 360.068 268.738 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 372.9 586.122 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F145 9.9626 Tf 363.206 268.539 Td [(desc)]TJ ET q -1 0 0 1 369.912 574.366 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 384.755 268.738 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 372.9 574.167 Td [(genrm2s)]TJ +/F145 9.9626 Tf 387.893 268.539 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -258.11 -20.308 Td [(work)]TJ +0 g 0 G +/F84 9.9626 Tf 28.782 0 Td [(the)-250(work)-250(array)111(.)]TJ -3.875 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(of)-250(the)-250(same)-250(type)-250(of)]TJ/F78 9.9626 Tf 218.453 0 Td [(x)]TJ/F84 9.9626 Tf 5.206 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -248.566 -20.309 Td [(data)]TJ +0 g 0 G +/F84 9.9626 Tf 24.349 0 Td [(index)-250(list)-250(selector)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 0.98 0 0 1 175.611 144.236 Tm [(Speci\002ed)-194(as:)-286(an)-193(integer)75(.)-296(V)94(alues:)]TJ/F145 9.9626 Tf 1 0 0 1 309.544 144.236 Tm [(psb_comm_halo_)]TJ/F84 9.9626 Tf 0.98 0 0 1 382.769 144.236 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 385.21 144.236 Tm [(psb_comm_mov_)]TJ/F84 9.9626 Tf 0.98 0 0 1 453.204 144.236 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 457.658 144.236 Tm [(psb_comm_ext_)]TJ/F84 9.9626 Tf 0.98 0 0 1 525.652 144.236 Tm [(,)]TJ 0.98 0 0 1 175.611 132.281 Tm [(default:)]TJ/F145 9.9626 Tf 1 0 0 1 211.658 132.281 Tm [(psb_comm_halo_)]TJ/F84 9.9626 Tf 0.98 0 0 1 284.883 132.281 Tm [(.)-305(Chooses)-220(the)-221(index)-221(list)-220(on)-221(which)-220(to)-221(base)-221(the)-220(data)]TJ 1 0 0 1 175.611 120.326 Tm [(exchange.)]TJ +0 g 0 G + 141.968 -29.888 Td [(60)]TJ +0 g 0 G ET -q -1 0 0 1 126.577 570.381 cm -[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S -Q + +endstream +endobj +1397 0 obj +<< +/Length 3211 +>> +stream +0 g 0 G +0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 229.958 542.002 Td [(T)92(able)-250(9:)-310(Data)-250(types)]TJ +/F75 9.9626 Tf 99.895 706.129 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(r)18(esult)-250(matrix)]TJ/F78 9.9626 Tf 117.084 0 Td [(x)]TJ/F84 9.9626 Tf 5.206 0 Td [(.)]TJ -107.346 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ 0.98 0 0 1 124.802 638.383 Tm [(Returned)-228(as:)-303(a)-227(rank)-228(one)-228(or)-228(two)-228(array)-228(containing)-228(numbers)-227(of)-228(type)-228(speci\002ed)]TJ 1 0 0 1 124.802 626.428 Tm [(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(17)]TJ 0 g 0 G -/F51 9.9626 Tf -130.063 -34.468 Td [(T)90(ype:)]TJ + [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F75 9.9626 Tf -24.907 -19.926 Td [(info)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F84 9.9626 Tf 23.801 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F78 9.9626 Tf 160.68 0 Td [(y)]TJ/F84 9.9626 Tf 5.106 0 Td [(.)]TJ -164.68 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value)-250(that)-250(contains)-250(an)-250(err)18(or)-250(code.)]TJ 0 g 0 G 0 g 0 G - 0 -19.926 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ ET +1 0 0 1 159.702 336.406 cm q -1 0 0 1 385.864 420.062 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 389.002 419.863 Td [(T)]TJ -ET +.45 0 0 .45 0 0 cm q -1 0 0 1 394.86 420.062 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 0 0 cm +/Im4 Do Q -BT -/F59 9.9626 Tf 397.998 419.863 Td [(vect)]TJ -ET -q -1 0 0 1 419.547 420.062 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q +0 g 0 G +1 0 0 1 -159.702 -336.406 cm BT -/F59 9.9626 Tf 422.685 419.863 Td [(type)]TJ +/F84 9.9626 Tf 189.276 304.526 Td [(Figur)18(e)-250(3:)-310(Sample)-250(discr)18(etization)-250(mesh.)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.956 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 g 0 G +/F75 11.9552 Tf 1.02 0 0 1 99.895 280.616 Tm [(Usage)-276(Exam)1(ple)]TJ/F84 9.9626 Tf 1.02 0 0 1 188.024 280.616 Tm [(Consider)-276(the)-275(discr)18(etization)-276(mesh)-275(depicted)-276(in)-275(\002g.)]TJ 0 0 1 rg 0 0 1 RG - [-250(9)]TJ + [-276(3)]TJ 0 g 0 G - [(.)]TJ + [(,)-283(parti-)]TJ 0.989 0 0 1 99.895 268.66 Tm [(tioned)-252(among)-253(two)-252(pr)18(ocesses)-252(as)-252(shown)-252(by)-253(the)-252(dashed)-252(line;)-252(the)-253(data)-252(distribution)]TJ 1.02 0 0 1 99.895 256.705 Tm [(is)-298(such)-299(that)-298(each)-298(pr)18(ocess)-299(will)-298(own)-298(32)-299(entries)-298(in)-298(the)-298(index)-299(space,)-311(with)-298(a)-299(halo)]TJ 1.02 0 0 1 99.895 244.75 Tm [(made)-312(of)-312(8)-312(entries)-312(placed)-312(at)-312(local)-312(indices)-312(33)-312(thr)17(ough)-312(40.)-504(If)-312(pr)17(ocess)-312(0)-312(assigns)]TJ 1.018 0 0 1 99.895 232.795 Tm [(an)-245(initial)-245(value)-246(of)-245(1)-245(to)-245(its)-245(entries)-246(i)1(n)-246(the)]TJ/F78 9.9626 Tf 1 0 0 1 273.331 232.795 Tm [(x)]TJ/F84 9.9626 Tf 1.018 0 0 1 281.023 232.795 Tm [(vector)73(,)-246(and)-245(pr)18(ocess)-245(1)-245(assigns)-246(a)-245(value)]TJ 1.02 0 0 1 99.895 220.84 Tm [(of)-277(2,)-285(then)-277(after)-277(a)-276(call)-277(to)]TJ/F145 9.9626 Tf 1 0 0 1 206.342 220.84 Tm [(psb_halo)]TJ/F84 9.9626 Tf 1.02 0 0 1 250.999 220.84 Tm [(the)-277(contents)-277(of)-277(the)-277(l)1(ocal)-277(vectors)-277(will)-277(be)-277(the)]TJ 1 0 0 1 99.895 208.885 Tm [(following:)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 388.181 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 387.982 Td [(a)]TJ + 166.875 -118.447 Td [(61)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ -ET -q -1 0 0 1 273.363 340.361 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 276.501 340.161 Td [(desc)]TJ ET -q -1 0 0 1 298.05 340.361 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 301.189 340.161 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + +endstream +endobj +1393 0 obj +<< +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/try8x8.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 1401 0 R +/BBox [0 0 498 439] +/Resources << +/ProcSet [ /PDF /Text ] +/ExtGState << +/R7 1402 0 R +>>/Font << /R8 1403 0 R/R10 1404 0 R>> +>> +/Length 3349 +/Filter /FlateDecode +>> +stream +xœ]›½Ž$¹„ý~ŠötkìÿªXtÒj½ÃY}Ð-q’¡×WWfDFr±Æ$ɬªo›1d%ç÷gù¨ÏrÿÃÏ×o¿Ìç¯ÿ}Œãù¿‡=¿üù1Žùœs<³hÔñü÷ã×^Ù½ŽÔ}ì®c©¿õÅþVzê_=ú©þ>⩽¦ÇŽí'ž»!ïˆw È;\)ònÒBBZÃ!-$äݤ…5=ÖOÈztRZHLk€Óâ•/©ÇDµX=&¬·œÖcâZ ¼× b!r+“ÈÙ@¶xå €ì1‘­d‰ì-Gö˜ÈÖ²Ç5ÙGù¬|‡|7ˆ|Ç+_@d‹ùnÙâ@¶-ä»Ed‹k ²¹×И…D¶-^ù {LdkÙc"{Ë‘=&²µ€ìqÍ@ö‘@>Cqò)ÍY¼òD>³ì¬Eä3 Ï[@>³ô¬Eä3‹ÏiŠy´Ÿ…aeMò³xå èf-ËÏZô³–åç-8ZËò³=­eù9M€>åyÝœÅFê@l#mÄH?Ki³h¤œy¤ž1ÒŽ–FêÙb¤®+Ô‘ ¬ü|cîÇÌ3FŒ9FŒ™#7³Fê#Æ#ÆŒcÖH1ÇÈ!€•ŸïÌ¥‰ùŽÅì#ÎÌgƈ1ÇH=cÄ™9âÌ>âÌ1RDàÌ9°òó¹+˜-fŒsŒ3GnfÔ3FŒ9FŒ#Ƭ‘"cŽ‘C+?ߘëÁlq0cĘcĘ9r3k¤ž1bÌ1bÌ1fsŒXùùÎ<¤A‹Å<¤Áqæ! j¤ž1âÌ#k#Î<²922Á!€•ŸoÌ—$xe^à•õwI~WVß%ñ]Y{—¤weå]Þ•tw…ì®MuIt›æ’ä6Å%ÁmzKrÛԖĶi-I-+MBK½ð9 Ë1>úó(çÇ,Ým;V¥¯Ç(ó£ñ7P?#¿Óz_þq"ûýmó»ÜÁëw¿›`ùÁòz|ÛíÌÚÑìwM,¤…fÍ~F~§Ã:Z?kF³ßE±Öš5í©ž, Í]ËÌ`ÍŒf¿ cí/4kÞÈï4R •¹¡U|ö$´»é3Ðü.ßvsò°ohSaAB»›†æù~éhíèÍ~ëÇŽFhÖ´§z~° dáµ2š-Eb‡%4kÞÈï´EG«kmh…Ÿ}éÚÝt€E4ÜåÛn5öM¶|Š=aBëò;Ýhc“­¯bß(4k:@Èwù¶¹‹Eï/õ}3^<1sÁ„˜Ýatε©Û³>øµI`AÌ&GR€0wªÆIØ¿ÿüúOQu}ýçN5h=u÷ |ù§¨z|ù·õœÅîOïžó¶û××cdg¶ÌóôXñ½°ø³y‰ìÛ¶Ö +;ãÍï,_0 Ž 7k‚3ãŠM·à¼¿Dz,íÙ·qž<à¬åõ|Ád8¬Ðlåã…ëͺâM༿Dz¬áçpŸ?,pw SpeŸV.Å<î®cB`jgý%Òc±æp0©È>ðùÃÔxó»åõ|Á$8®¹,vŸÂõf_ñNFpÞ_"=Ve/,ÐÖWøù»­în`\òµmqeqÏ‚0‹÷H ®CHåàÆ&3°x×pÖ€‘œ-¯§,„[áú‰ù˜›&ÔÀd.¹œŒfÅì…e1,jÙ’µ¤07®Æ©Ø•@!Lqu aî\ƒFT7¡ fâJî–÷±ÃÝŽ+ÜM/»Þ·×üÀ"Îw·ÈZh!öùA¾™¿ÂóùÄý­ß˜?´Cœõá¾/¬ƒÏûAúÈ~—ö•ÒïüúÉi™Y“âˆì 1 ÃÄ<qVÖÆ¢4"›,YIsCk1#›0B3¡õ¤‹¹¡ð§ºÙ +U1š\o{¯Wµçlµ…ëé}ùëÑJ“ë­´\ZŽ!¶Ybþín¨جòþÖ_µ«Oâã;¼ª'®7wÓ›þàs×[i¸´"óØ]Œù·»©|Þ_µµOæÃûºªÝ'®7wS"øÜõVZ .-ÊÏÌW9p=òYÕþV<‰ï檶 ä뜸ùÌõVZ .­ËïŸÚÿŠ'óÁõ¦6¨¸Þ]/ÊœÁçý‡Ö†ªp.¾Êy ëÏú§¶ÀâI|t½©=*ù:ç…®>ë?´6T5|t1䜺îoýS»`ñ$>ºÞÔ6×»ëEi9ø¼ÿÐÚP•‹«+󕘸ø¬j#,žÌ7¤ºø:ç…®¾N}0ÿÐ qÖ‡»[”̓Ïû§öÂâI|—ä®ç×ONËÌê˜Gd±J³0|Ìó'aem,J#²gÙ\O…Ÿõ¹¡µ˜‘M¡‹™ÐzÒÅÜÐFøSÝl…ª˜ M®·Õ}›v¬½•p=Uí_^K¸žWƒ‘_´FCl³ÄüÛÝpÁf•÷·þ¦±x+¹M;V\﵇ª¢žçý%%ŠÖh»‹1ÿv7H>ïoÚ‹'ó¡jÛ´cÅõ^€¨ªLàyÞ_Re¢h†xf¾Êy€ë‘Ïú›vÄâI|¬Ð6íXÉ×9/p=òYI剢5šÇîbÌ?8p=Þßú›vÄâI|¬Æ6íXq½—"ªjxž÷—T£(Z£!^™¯Ä<¸ë‘Ïú›vÄâÉ|Cú`á|ó×#_§>˜_´FCœõa7ÁçýM;bñ$¾Kòˆúƒ_?9-3«cR‘]b•f!} ù‹“°²6¥Ùmd×Ëe×–ö¬ñÕ‹oÞöÅã÷n&´žt17´þT7[¡*fBK®—ÎÁx ›ªZèàÏ]-r½SkC”yñÔŒÏòÍÝN.xëŸÚ‹'ñÑõ¦v¬¸WU.ð<ôkm¨S1࣋!ßÜ-5Ÿ÷OíˆÅ“ùàzS;V\²«Šxúµ6Ô òÁÅÀW9t½©ò¦øÊ>¿t½©+ù:ç…®>ï×ÚP§]ÀGCþÁy ëMU0ü¹ÈŸ×îzéÜŠÇp±© +†¸úµ6ÔÉ–O¹¬ÌWbàzSE 0¬àË®—Ψxܳ>P…Uƒ|úˆò¬Öhˆ³>ÜÝâð^ðyÿÔŽX<‰ï’<ÂÇfÔ1â@aÐMŠC5Ú¢"mZ½!qVÖÆ¢4"{^›ëéŠÏúÜÐZÌÈ&ŒÐÅLh=ébnh#ü©n¶BUÌ„&×]Õ ÝÅFS5Cg_QUÍðӂȯZ£!ö²{U5'"ýÈCS5 žßÛæzÖæI¿®+®÷ó'MÕ <Ïû«Ö†:%>A©ªfè\dðy׎X<™§úºv¬¸Þ¡4U3ð<ï¯ZêD ùp¥ªš¡â³þ®±xOðuíXÉ×9/<ŠRUÍÀ‰¿|p+q¥ªš¡³¨Áçý];bñ$>žÖëÚ±âz?ŽÒTÍÀó¼¿jm¨“~/žú[™¯Ä<¸ë‘Ïú»vÄâÉ|Cú€‹‘¯s^x(¥ªšá]ú€[!ÎúðS)MÕ ÞPÌïms=k_’} ×ONËÌê˜Gd×X¥YgSjT3âÔr°-J#²{Ë®ç_ŤŒ¹¡µ˜‘M¡‹™ÐzÒÅÜÐFøSÝl…ª˜ M®÷Ó[ÎÏŸŸåùËãý¤†¿õ8šÇ÷‰<ÿÓ”ÿüúøýQ=Æ×oÏ?~}üøåzÖöüúχÿeK}ŽãýÜò±ž_{üôCýô¹Ï£üÐ>}¥”:~ü<ðóÄωŸ×§Ÿ¿þåíÝíýŸšÇóë/Îñé뿟ߟvñvÿ®Ý¾k×ïÚeoë»öõ]{Þí·¯??·~¢ûCÞŸ#²ÚaYïá÷…–aXŽí)Ãüã—÷'W¯û³êíãýŸ¼žkà®3Ô{οþõÝ*ŸþðøÓ×Çßßÿþ{Ü +endstream +endobj +1407 0 obj +<< +/Filter /FlateDecode +/Subtype /Type1C +/Length 13073 +>> +stream +xœºwxWö?laÏŒ˜ r‘G¶5h†ôB'ZBïL·1`pø˶$K–eK²%«Yr•-˽w Lï%„ ”$$$¤m²›Æî{½ûýÈ–ßû<ïûýãõ<~4£¹ºsçÜs>çs +ÏÇo”dz%!)&cÒæ”¤}ÉÞ뉚ç7Ê#ñ-VyRŸ/Å<ÌŸâ1¾ÅcüNŽc~G¿8ŽfüÇÑA>~<Þ¬5›íonݼý­ &.II•§'ÄÅKÇOŸ:mÆøhùøÝ¿4&#!.yüëÜIVÌ¡”Ô¤˜déú„¤èÌŒñ/ž<~sL\æ¡}éÿúøïDÿÿ¦æùsïæ³Ý§•À‹åý<*Åw¬ïjßM¾}~´_¤_¬_;ÆÃæbë±d,ëÆð`|<þ +¾—ãýø)üþ þÿ'‘G˜;?€Ì·ò«ø®ÑüÑ£•£¿'7ÇÉÈûà=°,Ù ¨­c|Æ?æ1³ÆèÆÜ;nìkcç]4vÍØð±ÛÆÆ-ûpì+ˆ¤ +ä‚|ApQð‘àgÁßÿôçû ü—û¯ößà¿Í¿Ø¿Ä¿É¿Ó¿×ÿI€0`I@x€2   0 $àTÀÕ€Û¾ ôd_ |#p]`Z`f 5°.ðbàõÀ? šô~Ðò ¸ Â ª Ú æ Ž ž þ ÓA烾 ú6è7¡¿P('|S8[ø¾p±p¥p¿0Gh– +Â&a‡°GxEø¡ð¡ð ! ö ‡³Á¯O^¼*8|=øqð“à?ÿüwʇR¯Ss¨ÔJjµ‹ÚO¤”TUBÕPõT'u’ºI=¦þLý&%"Eþ¢0ÑK¢·DSE³DóEËD«EE[D¢ƒ"¹¨HdÙEu¢ÑIÑ%Ñ ÑmÑ}ÑW¢ŸE#!£CD!lÈÄi!³C„, Y²1$2$>D’b ) q†4„´†t‡9r!äÇ!C~ù-…ú…Ž„ +Sðˆ—¢ÒÑ+F~*þŠÅhÃb8™øÂP©®œÃo€J)®ž«RÏ2ðÑd˜Ž·›¬ÖVÒ8äܪêéü&¼¾‚´Êð»úŠ\óþÈÆåàY“R)Ôz%-ÕV ±p;qFߦldz3÷Ön¤'-ÚµVZ›ÝØXç®·mF;k(5:ìâ–ÎúÃ'[’·2ë 4i½<o ?3[•pP à?žÏ¦P8‡^AQ“:§œÛÌ®½ôôÀ×4Ü ÇAFý%ùÉ–ËìGkgõN ãüŽ—'ö²çvuLB@¼Q½8©êÍt{Ð`‚;èã'Põ‹p¢'œÛÙ,\8.³0O­ 5 +«CÁ +»ªbcé}{ÉÙøCò}ÛÅó†Â¡ßFæÎ3û›bø¥¹¶¬tñª»O[w +v2R¼Åcõ¸pm­©¢ÊI—×jk¹‰”ýƒ4äýè÷Ï·Ü@¡wØ—/î:/>¤ïÚ±#²¸¦#±*­z#§ÈZ<{)2mbиï߀A0èÇgœv…Nù³¹)Ô—' £V.<Ûx8‹M9“sûcñŸŽ_»Ã€%dMù$ p‰`¼ßiÉ©ìÃnâØp}E}²ñÐ*ņÕêœ]E|%<à$vÙò*Œ'ùPNü°ç¢݇kV0­y»,!o»2LAXìp¦¤µT¹‹E&¸1/wg@žïó7j©KvYr7І%¡PØtîÔw⣹}imÌ`̬æÉ4j“á÷ +ÊòM‹É|ÍB_Ûk‰ù6u…ñ.î$.˜a>ºŠÁ™øH„ç ÕZî8 /í´3k_œŒÙ¯›`àƒB(Áòð½¾HGë4f[ëJmô{›·/ ïÜÿe{é Ö“]“qH¼'1eÇòøþ_¥LšZ‰—Ù­æ +èår½ÞÄ€¸¶ÊÑif¬eŦr¶uɈaÎh~F/Á´%-&ŒïäçO-äÖy¹–˜nÎ-7>àÃm|f]ýº¹þ´5 tyÆSŽÊs9ÝáP¬aÑ:bòðÖÏÏn>y„9r²öÚc1$bžìLÏÎN—•åÔg3-%%¿"£äÐAñÛ›–ÎKÉ(¯U3y®ÂÆa1Œ!Ú9³®b<ᵈžÿ|yzˆa¸KÇ#_Þ›†¨BþJ|¾¹Àbìå{’óÝÊʇV> p$xý KÀaš‡]ÃhÊc*IŸ—¥fTé©Yt|J÷QÖJ˜® õÃу|p¯z (E ÑgªÎ1ǣݨ.$¾v8Ošä$:@§èó³I˜U“ƒÇ*+ûŠX¸º0W"Ñ’8Ó% ß×£{žM½òNÜÂùì’eû'¼!Füþé0K?yÿ ýëÓ¾»wÙO>éÿî™øYì·‹î2÷Në{‰F?ŽQp œÈ[`8œˆ&¢-h3š„&£ o½ßys³òøÇ߈A¶d“Äl3›îJÀR@yëÈ{ØHðwäEÒ…äĽjU!wßš[a¼Ç÷8ΜG¨SÔ#¿×ÁùØÈU\‡æcxäÕÕ@ÂÌ?7R„gækféù2àù.¥’Æ'¤"ŸB>*Ã=K=Åœ# D6Z€5ã5¿ô¶A‡iøÈª‘¿bJÏU¢.ÅÄ%ÊZ5 '¿ Š7Kµ 9#k½z·¤iÎ’?ý×JVP@ÂQ*éÎldžΟ:¶…5%P=íIqqÉÉ1ñÉm}½ím½,ðµ‹`®Ï6í³¨Ë}ap-Ç£ V½Eo) +sh,:½8?_§fdJ˜ìÄ÷XÕî;&Ám¡m-0úµTß0„9Ñ^1`,/Ì寿ªr"˜g6Qh> NÎ߬ K*ÀrSe©©Œî.Í`G²ã¶¢¢¥y|ð¤ì?¯S-vçVpýûv×3†Õ 5ŠRÂU™Ÿ›_˜¯Éc9 ~ K{Š›¾µ•ž²ð]5([áÂOÙòÍSùÓS _ÂòË eâªrG-Sê?fêVôï…£( +Cs±]’e tFÔÈñ<­Ía·Øí¥, €Ó0Á{¸ø©ËyÎʯEÛdD±\—#Ž]„F½ŠøLŒÂoW:/3.$“—+•ËX•E¼ +ù‹à¨>qM¹­Çë3¸ß+¬U^˜W“qzE…²"‡;”Õ™¡h5Ê[>#!-Â&‡%."ÁœWâ;í'®3P7`°$Wµ„@Í"A éYå¦à›«Œ—ÚhDM}Û]±».†­N­Tv§.FãCûð ?–ÓӠЋÙê“}lNcnÅ¡z~âªmkyÔ1½gÅÂiE{:“ÕI¥^R_üHüÉkwN¸yó&P¯áPˆrX_•ï +;žÓ¸‹FÄd4½…‚ïLûí“‹=wް%1Žl§”¯(/Ï·Ó›Ùbg¿Äz ­¢À ¡÷'÷}¢`g¸(Ä ¦wÄ +GCvð‹Û” n¶JêH¯{Ÿ/°pÈ#'nÛ2¿#F‚BgEç*Òiu¾µJÆVeçYtb¦<5®Szúèáê¶6¶¾¾´²ó\í¡B>éï¼næ»P†œ8Y©•j‹‹óÓÙ‚L$B½ˆ ˜ª9¿¥R\ç*ëg*]8(îó|ÙÇûýWÎyjìãܳ® ר.÷=ýIüMòÃðóÌG+¯IhÔ Ãif‰q±Z½Pχ›=7¨Ž2Ç‘¯nä$ 3·ÎD>bDFÏšÉ$ïØ¤žAïW—v²àeÌÕÒñùåÇYÏ BoŽÌIÈ[¥ãôßa+.¥ûJ5Ñ,šJtƤ–ï!Y ½Œ‚ïÍ€¾úÝ'ºÙ5„µ¾qC®PZ­Z“Çd+Ò4Éô¬­ŸrêwòÞýsÑÛXÐË"ŸXJ²±E<ØÑrübûÁi ŸŠØA[5š™ãÀëœj#[ W¨™ÍsößÀrk5eâ²J{ c¯F‘Üc…&㧈:¶8tÅK[f¨Õ1ú0™Ëskòˆì¼’ +‡Ùæîd?„¼[hV!/Qæ‹só´™L$n•÷óQn?§à\ü’ÑfXÆwA7-ÎJ 7ñÑ\h†“ðßÌéåÓkL¡ø66b”ãÀEfå¼M‚zYŸÝn2•0à0i+.6ߕռÒ2¥`íО«Cƒug®2 Ø wzh7ÏC<ŸF©q”7rdÿ¤<Í*gIiµj"NZÕ`·7–5±GžaP÷¿U´æ(ŲÜ8&G;\¹xTZÛ×qì0ÔÏ0ÿo(wÉvÎÒ;dÄ]™Æ´ˆþã€v™ +œ ‚‰€„ösÙdÜ´£$ÃÂÔêË:«Þ¦·…™óšR½5,¹ùP»Ñ¯Sd»t´žûËmÚßÇv¨Éj:X¨ Õi±ùoEoÊë,E%6³Éfe._:Ü÷ñU¾nõì¦ô–u1¹á^U±™0³ÓÉ¡eC©b/;âO,(JÏËf4:UŠVjìu +¶A®*Í¡¥rEfâ€ôüÐ᪦¶·³þÖ#1ðúó9®ŒDKJÕBZðݧr +0ñ6š{(Û¨ObáJâvµóÇÄ/Q¶–Z³›þÓ5§¯]?-+ÇRÏÑ$‰0à+¯fªkP–¢¯”ü!0LÖe¶€µš`r'$zqÝY{ÖÌpû'€ob.YÁdNmÞÐNÅä¨ÔRZ•]R*g3*0ḊŒŒ’LZ81E"×Q^íl„8YUØD_€A¸ŽZ³™žÎ!GºpH­ýø-ñæ¾BQè£7ž;Ý2Üͺ+Û\øè £n:ºûOѧÛcVYX #Éò³¸€&l©Äóæ¾ž7á= +%kÕ²}FnÄäPæ"ä&lŸY]{] “› (B°kíj¥Xž§KàF÷)Æ^k«)7fÖ§gdffhl +»Œqï°flÇ™”Ö}5|‡LfÓÐR…<-Å©l«(³”6²Îk¤gËÒ3Ü\ çv76f»¥,'®‹›/yBÁθ¸ÕJ¶ ÷"o‡»p-d%ðÊC_xÆRÓ Ç„8Â+WÀlÎMí´èKßñŸ¶Õš†9ò-Z­¸@«W3r˜ü‡·çvã8ÙÓØÐÝ™Y«Õ:Æ 7é]‰£«¥ç|? èrg¡ÖÉ6äa O¢×/Ý>•ѵa½DÅgHÀšè̃\¤q{óO?ß¾ú»›‘Y°÷Ó#¢WÒ+wœºÕæ¨kè`UmÚº½¶¢Î-­ŒOÏÔçÆ±€* ÷ªc8n·Þ8Gº$—蠟-\ŸT#oiuÖtX‹íÆÖXŽ•]憚£tcmÊÎÚsl‡¤bÃÚ™qàØÊd —,$#¬Û$î…Á’Ø­±¬@AÂK25Æ,–î*ñYYk|¼,e[®e¯9‡D?cŽ%TbZZbbsZgGssggZó!?RÝÙö6ºZ[/óÑßÏP¦Ø’Øò8¾‰“ŒQ,_›40|¥¥Žb>?G} òÞS¨æpáþk\¸oj·XZM|8îÿ;ÞXÇ áÛä2 Øÿ²`a„Ïÿ+^íÙå–­¦Õ3½xSDÂÃ.BjÂÀú=Êìýâ §£ïß?5xýXVëò“LêZêpjUö!ñA©4>:µýDWEC ö;vQI8Q”$p8AÀ©$Тä‹ðz<8`k¦…Ÿ'Àóhøµ–GâhªÅvÁ•Ónsž£K)Dã†+°!øµμ|»–.*4 +ÙW·b ]r‚$²Hà,kV:Ó‰£SS£åyÖêL¦J–gË£³•9Y‰íò³Ð÷óKàhGMC«øDdïúuRWÄ3Mrl¨Ã]ÙK÷ÔÉ’bÓ2—IYAÖ{$ðØŸ_šºYgÕš9Ÿ09R+y:bÇ•ú‹¤>³µ½¾¾­MZŸÌ +ŽŸmâ~Þ™>•‰yä‰&n¶¨Ú™el}4½+*}ã&|$™z#Â( +½‹fr1Åø.šwrÇl8 î€;¸«¹h+§¦(ÍE³Ñv´ÎD³¹Øb7à]¸FqCf¡(nH#ùÂ"ÏKn)¢ìƒáÀ¶†¹(¸ͨ5X¿èêøÒÆi{TÔÖð£{€'ŽZž Ñ/:dW…–Y1³½Ää Ûì¹›Yt÷ü‘}íõdžÅO\E~±)ºœh¦@ª)Ê£³4¥µEly¿Ây¸Â!øµ›÷ø¬{æ úȯŀø¸ºj R%ß ³ŸI5è…h2Ujdb@Í—€ç¡T½;UŸ¶ÉÌŽ¼CÌÒ¦eæiJZf(3·<ô†+ ™TÊJJe^äljm¥¦†®sV7ÝÙ2Á¦´]qlRt^¬n<%¯H†<¿æµB.Öžü憓Ȃ‰g?µ"!¯pQ÷†å k·p¶ºø=¯Ãñ°ÙeÕÁ , O)Ì÷æ1ŒÅµûY×e€j¿Ð{üÞÉh4#ÜFíZ¸$rso;#ÈV$drŽ&e;yf(± $A¡¤·JHPÉÙû/`äµ›ž¸‘¸U„ÀE,Êqâ. øJ˜è$¶Z5¥gÅžŸ8v•oЩuaà Ù~¿Žˆ»rk22reà°„ pR|}ÛÑ÷£ÔR)“%ÓÈrv7©B‡ëÁMÞ7¤'&Q#q€bþwâ‘@?’{0@‰=ŸöƒÊÒÊ7HçQgH¸·…Û®ó Ñw–Qk¡ñ:!ym•'η + Fì¬Á)[+nÉcÑ–G¨Ç>?ö)Øõ˜ÅNC–Èi +õ!:+ËVª`OL—Û£èÝß°ÛL@'LGÜ?wze;üHhG‰ˆÐFÜxûÅØpvˆ®¯(Hu±àÊì"œ(rÿÜé†3(OI.I§wïÎ8Â&íÕ(|‡f‘¦øœ|N¹Æ_CZüþ®¼ùO%R¼Ø +¼;ʪaêÖÆÂúá‰Nð{±Ûóí0–<ó…ïŠ8\â|”ÃÏQ_e®§¿¼°iæ»Û¶Ì“ ™X/Ä2£RÉ PQ¡ÆMyTPk?ôÝ9nÐüîWHóBÕ‚—Ƹ°ß®ÊÌ/L1†¡±ø“/±«à)Ïï%9áÎlim¨oÍ#SY çTLZÎ@òbŽ[N—,žÈEøÇ@wQuÑ`_ëvë+èúj¸_1pø}¡çàŽˆ¸ƒ;ØD€N่Ūq×ý–¦|¹|ÉÜ-áÛoßgRiJ2p‰VCq¯³ÝbÀB1|…ÄA9•Š»àÎv8ßͼ»_Àõ_ø‚nˆ{æCÎByCžw†8ÿ}ǃµˆý|ü³+͹yn@Á·Hè(øˆ,ª^Ï?¿'AQ4¦Å}Ö[5¸˜ +Âx±¼â’¹Áêg7Üzòú±÷â®g¹Ç„tƒ“0„Ì¡îÕyº®Úd=ÌáQº"Gª3éÌ:Öª^±ÉœWaºVA€¯AâÒÎë`Éé)h{aÉq±mÉà_鋂İíòO^8Û tf'1Ç’Wþ‰~€Z»ˆÇ=ö¦~öŽaó é|ùˆ/ü€ó¸c®ž\@y5|$ŽÁI5@ÉÝp­ }äÑ|ä Ð<3 Šk<Ó8‰]ò¹®¥¹ÐM?ô p .§<<o„Ç;*£߮×`½-à2v]w¯ø>?H;€üëY5߇„•LØ´Uó3Ù¯ @å1}$$sëŽ*ކŽ7êô™,uëõè/|Iô<âyÄ?"pð¾v®î…KZx@QîæT ^¿7Ýßò·x2–]½744´µHSXAC]{{zc+(5•Óàù«ÜdÃôcàCNgÆÌwgÍž3ušÐ'؇òù„ø„ú„ùˆ}}‚|€·ªäç³ÃçSÅ+æyF½2Êéëç»ß·• ´Å÷= ÷xðÛ'¾ž(¡rm±›ò´‘œÚàð=¸{úýÀý‡âŸç>xeËöÌý±Lb‚2Q±´NúçÃ=·9Û<ë½ÝSfMgÑr´ S{Ä„@˩ԷW‚àÇbî?‚“àcêrƒ!¿½.í_.~7|Í¢”l{c,ãJVØ´T©ÊŠ?šyûþ£æÁaöÄ`Ó¥Ågrާt0Yõ²Š½µ|á—·N4õ¾öüä{‰qLRŠ2=kƒ³0ôÞác×é›C{×Ä)ãÓÓØ$îΊ >÷ZnøÆ Ïûî xìáüÂï=‡`õšgÓ}8² [»ÑE£÷dkcÖ²-pôƒ>pZ3E£ê«“KP u`ý²w¶þ§œv´ºlu]Gi-и=wïñ<[9O1‚ãÑ5ƒÖV^0…Á$íùû‡ñÏ_öJ£õ¹ 5èÂýe÷`ÊG[î À£‚úù\÷à-ñ㥗_çBœ•SW×&?›ÂŸ"Þì¬äåâ·®„8óàæo†ÑØoù«ÔÝíqáâÕQQ«ì¸ô䃺ÁËÃŒðÁÉÔôKqÇ/ˆ/¼~ëèžåëÓö¯ˆbZ¨9êñkåµç{9“‚~•çkoäÿ~ÿ*Òb8g*DLÑHpg.ô9¤zð,f!ßñhb^†Ï©ìÂü¼|Fš£Š£D|ÁÅê+uͬ»¦©¼þêØLô˽`óó f^ã°òG_h>›Ú²/'.–݃4ZŒFu!þ‰pfOßéŒKôé£5=½ì¥óýNJᾩð%Â¥Q_CŠA1{ÆÏœ¹çoðL8G=a½zø<€Ãª„'¾Á6'>_Q>ld9:P`F+§b*Â9Ðh/¥ëJ•‰ìÈN°V¡Z`ä TUžme0ª +ï'ƒ“Âç±Á+ÕdµÚ,k‰Õj3fÀê¶Y­î¢Ì:f,Oà5ó½>Gx8ï,ïþ¨M£®Žºé;Å7Ê7Ú·Ø÷¾ïcßïüÄ~‰~R?›ßE¿ë~ëÂNcÿÄq|4.Ä_Çâ…„¡& +‰ßùß¿0zúè„Ñé£?ýýè¿ÛÈDò6ù|Jþ Ð`&X*Á1pÜCY}#úE„DÿB†CÆ…¼òzÈ„ç^sGJþ@ïÌ6¿ÊG Ä +]Œ"‰‰H]sh!½tWã‰TVÞ©í».†wOJ>õ&šzeÄ£?Mè-úlœyŸôÜ`” †U„ùYyÕg¥|ð|[-eÑ”ÅßCfÏ¡Ù8:0¢^ƒ^WçDÂdpW-‘]ÑVa©²U²Çáh :ñÁ XEª=M%NWl322è Fr9² + ÈzÔŠ8›Z±7y ½yoÛÅdVÞ¯í4\ãëxÁš¶ôN~oÒîºíôÎHyB ºtŽ1wfBâOŸ\†>GôN¤–w÷]¥¯6¦Ì¨dÁƒ–V_ÏAO(50ŸÈRYìÙl¦+KK±¥Ñ“-²¶9æÃƒlZFV¶:¯¨0,+S—§g–çÉf–lO;+Þö`ÏŸr†éï¨éì_‹\deU‚m5g”V‹Ý]=-¦¨·Ö`¦û;[:Ø®VgÏ)q›¾AÕÉ íˆ˜Òv?Ô¦¿éÊÛ´à¦onj(ufmð{êrkÏñŽfER5S_µ_ž¿{]ô?2% „ó°u8ò¯ºq8ö7(xüäû|'ü…rü¡ÞQ`^ÀGï‰R£Yop ¼þ‡Öo‡N2'^o¹E_8%‹ëgÛ“«ÓšÖóð‡eâÙ:yn­É±Øå¬3*ÒEsaç+è ´÷­–…6²ËoþGÑ_Á€žžÃ,ÜC˜¯–W\´ò'ų È•xgu~N¡QW¤eßFmˆ€%˜®ª°ªLìrÚš'ŠVÍ…5yÄ‘1´"Õd´iø­;«c•âézÅ!&]™%SѺ|oÑÒ.—[sè ™,ý`GúÑ[ÇÏÃ×γp²gbegyY£% dJÞ&ïsÏv*fr\–.GMç+-6%[³k§#’ž3ûЖuì²u1SÐ(1Ú ÃÐ8GI¹ [ºxeÒlzm¸»/»åä¡o Ÿ®úžƒó×.žÉ:p˜éLt¦Ô®æ¢I_eÓ–zK°(´ )[0%á0aæŠ +SÝXš³‡EJ°(_³¢ˆò'¾¨%ŸªÅç˜UU†{PHš?©¬xláŒè”ŒøR_®1KŒ Ôy³¸Á¾ž—áTjYD\ÆfzÊÚO!ñûùÛŸõVjbÊØ’L,ÙáRÔÓÍuµ –˜µkGv\4»sOÊŠybD=š }®îèeZj[Û/óõa×±i$hùw@ò©¾Ray‰¾!F( C døÑª‚ÈÃìÈ:ÂðR®bŽž/ƒêZb¾UYgøïG›jøJsN•á2fGNµ–”ê5åŒÒY¡¯¤5­]Yî„})™Kv²ßqqlxfœ*\–CXªJMtÓ aXÃvnm\½âÍë¿+g%Ï|¹ˆ?RâM‰µH®‹=‘1’zgžw!MÉ­òI*õ„¢0Äà°ÞóæD?EˆÄ\¸ãã*ç5328ªy SzÆ`™£ æ9ñµ–¼2ã¾ç¢Ð¼#'6o©",§„0•W˜tg™*’I#Œ[Uªu…œ¦÷Ãôö>XÙËó¼üßç=. +¾4r5›øÀДŸ)Öåéòd‘\¯ÎÛ«ã+¢ÖH쳦•;Åõ —2­g1¸MÄ Ëµùkó†P™ÇN Á÷° |•*G~þüPšÕVÈê-E5b˜_#>6Ôf;Þç×ÁÜ,\;/[ù²|¡o0•ØÝôW8|­I5µ‚9Ò)ø¼ƒ¡;HJOOLnJoëhnlïHkNbÓé¬vV—•^¹ÙÞyûÓFç‡Þ‚_¡Œ8a,×g‹ãÞæØ‡ ˜@|PíºhbÌ¥Å&ëBƒ3$pðÉÅ\U¥o`‹Dð9À_¿Cn¢×$´¨zQ)°gÔBN‡©ŽÔ;ëú®nÓ[ÍF]b±”TF_šÿg¶Èj,Ö•6uÕÔu ðÛz›WŠmz›^—-ÏR0Æ¢CIÉiZ]6¿xr¨6¼lsž7ÙëĽªÚ´T•,.ˤ« +g$$Ï_žNí—ŒÇz×DU®¢ÑVŽÅ½‚ö¬q®iÞɺ⫳ÄLDlèIüäc±×ŽçÞ¢á6.î} ü õ+ïQ¸£ûù{×PÝ«+ç‰Ñæ¹h4 +Ý]{b/“ÞC%ÜPŸÿZ Ã?‡†\”íb@q삵ÿ­ª=¡¼U3ÓB>Ú6{ÏcMü«|&ƒ}ÄÿZnÃáStB¯Ãl¼£Ì1ðÍÿ^eƒø:!ˆ!G"‰eFYɧ|¹­>DYqø.z«ÌµèµbÞÁä+`tÏΨlJcQÜçÀ—;ZZ*Ía5(\QAÔ•iÚB­Zâhögâ¿ÕDZ ÌS¼ "ÔÂF xîõø)2üŒ±¦ C¬)Tk” 'é,ô&LÇòªuMãÄp¥ÈQ˜!Δ&mÙÁ eh&÷¦OÊJ?+ᔬOF<З«Í‹ɼ¼÷½ÒéyÑœRn¼Ç‡ˆæº¿¾Œ=˜vy†X™«K62ú¼b£š•ÁKµÄv“ªlêOH-¡p3ÜxëƒjÇ-[X-rà# ·N"ÿ`’>•ÞÖv“m!¾¶;O™¼Uð”&2•Øg‘—Ë™¤Úœ‹ô@­ò 82íû«Ä“㤶–êíÆù0‚x}nÎÆ}v3ð"¡·Ä«Ó ¶ä„¥¾(â[ÍÅ%ôQGÁ^v$‘0,ÑlÖò•ð“Ød+(7p?×ǴצO6Å-cÑ.°4?o³žOx«xÃC°×Íûۯ𯾵"ØQ‹/,QWïða4¼€¶;HÏFb—mCà6îddY- ¶KRS^7`^5µ&=+5Ö™ì*¶D™kÍ£S¤™éI­™}=í•-Ílcƒ»¿ý!À»¡-wìeö2Sh›$W/WÅ0sQÒ,˜†ÉO­å⺊ò#ã¨A;¸ÓØ­/.>‡$a@FÎjß{ùdã™.&·;.ûÉÖ?‘@wpj«¥~˜½Ñ~øÊ-ña•·áHÚ”YQÓåt–9«ùBOskÍ‘~ñ…¸³;˜Æ½moÓ»÷Q–ºRSýÕíóæn˜¦P›léì?[c™ÉE Ÿýzcë;³Ömž–‘k¶'³‚ÿTf¼€øE9ÎÛ,€·`…Vk•²­&Ý[¼:í"RMGRz‹W+ø*ÔŸzÒÝ~ÖVn¼eñ:¢ÅX¥‘‹Uú\E +G¨P2Á}˜¬½ ¶Z\ï*ëcª\èœo0œÒ6dY^<6™ýNMZj–"á ù"\Ø«7ºžü$†[9·ÿ*ZÃPpÌc4–SíUó@þhôW6Aê˜ÓÜð"ÛÙÖPØI?îÙ’ÄzYÿ6IJœ7J|ûoãOP”ãCzk¾yAKáâ)œ ál)N•pß,ç¾ùÒî²zí+¡Ìßaà+€ÚcV;ŒÍ|ø#¡² r &%J›ª¬VºÄ¬ËdQa”j´û ü NxsÆg ÕÞœqªDJŽý)jˆJ ”âÿñ©Ï,~Vü îìÂn&Ú•mFA&>:(Ç_ýËðÏtåÞØrd?¥ÎJSéMZ¦G“SF§fJS÷ &]‚>WïÀ@ÖƒæOJ½ xÖœÀj< ¸å÷ÊšGvî‹×nÞ¼é ´( &i:=‚+AŸ|3Ÿ+m(‰‚z¾Ð¤.3ÞàÃi„Pv°·As„†à[è_ƒËž¾ ŒS"$_‹ÁŽ"¯A„“èœ7¹>…”‹h9ðL¡ã·:“—–³5xd1ô çfm&Î}ÅÆ©’¯ÄÀâ2WšKùÕ÷K¯´‰ëe.i†L.UTMs½Í€m6Wö Ýý™ƒ=œ°xÏàð3_xÞ¢à0ŠÃAƒ´µ­¡´H€ç\Š¿(6Ψõ6 ÌÄFŠ«IxÙEL“UÝ6²ÀT^ΩSc•*óPZz¢Vo²ªY›2ßœOgæäd¥5(:ï|p÷Ñ™”öZ¥ÑÂTR%ðÀ]‹œ‚ŽƒG»Îö1ÙÕX*0“ph"ÿ(*¥lÛ¿ðŽXG~ÌAyìgÒ³h ±ù@Á gPìçP‘uý=,ÙùåØI yÜ,ÞPäêŸÍ=‘G¢¹Þ´<œûǧÙ{ÍH¸‘ŒÄ!œŽïßA +à= +®B°ÓÛ]9â¥n¯·á6Yâm³1—ü§,PKÊ8ïKüËûþ;oŸõ76þOCáÓ¿ÀJÐ~…ÚÝ‘ÝÔ&nk¨ëê«ÏŒ=¤â8'?“ŽU¶\gA“»º£K|cKßä”\CA"£ËÖ +è\mI¹‘Uáè ViÔ[Œ)HuáÖܪóbøùâ'‹a«‰E5„q9ãÓ½%Ý•‘}£³[!÷ W~` d±Æwó½Ô ÜôêU-þmùÉéOŽïY#d'@5áåý˜£ÉâtŠ;ÔÕÒ,•<-×–Q’ÅO8Õyk½PÙß,nTÔdÄ+¢_Ñ1úyà¸oÕIóï’„À£*‚!žQ0„7À0™´Öÿ 8´JMáv(RÐ÷*÷s†³Á/žÏ¦vã&RpÄÅ!ÈHÜHÜͧ«FâɃ¿]Wž$Ÿ’‚Žòg]Qõïæ¡ yú½%N»©”Ô­®¡ÿ U…··OÀû›ÄëC’¿ð\uƒÏÏnÞ_~å¶ÏCðÀ ›*¤é%™4€Éó¾÷vÜýÉÜ¢%‰كœ‹l0^2”«›ÃFŒ~ 996®=¹¯§­½§/¹ ¼`ðÍ/üvà Ípć0µ´e4:GÕ*ð­Ùîó,ÕÈ-¥%¥|€ 4×TtÐuuúü$5¥”…Û‡+ëJ\Þf§o´Ë[q¼æÁ¯ñ€ˆC +ðÓE€Ï'Ãl>Ž„hà,@Ñ[hZßBÁAñ¾ÿ¸Mrïë òeJ}­×xsi@ŒB¿`Ð_ ðö‡„³À£‚)×y`mq»GÜÚÚúz’Û½bøáÙòMà‚æ| Ët„Zû€®ˆ)Њ ùÙŽ›èuµNÒÁŽc°©pÊM”SWáo3„©°r#€«½å] a×5ÄéÊÍ¿+Wy{ê?q*>;^›Z9ëÚÕ —ºyÞÆ‡Å’¬pºˆ‹€'óÑ‚ó\©h"Õ²¿k¶px”’¦ÑتTŒÛÛRà)ú§°oǤ°„p!ÈUK¹ÍQìÿ,…KŸ +endstream +endobj +1408 0 obj +<< +/Filter /FlateDecode +/Subtype /Type1C +/Length 11578 +>> +stream +xœzwxTÕÚ/CØ…½’I™Ùf³÷F&X*ˆˆ€ô -dÒë¤L’I&½Ìd&½·I2“BHB „Б*Š"¢¢Ç‚¢~õ¨krV<÷® úï»Ï½ß÷™Ì“é{­w½ëWÞwI&M™qxâð«Ã˜t¶t‰t·t¿4Iš*Í‘vJû¤¤Ÿ9Nv”9.s\åxÄÑß1Â1ɱƱÉñ¼ãÇGŽß:N38-rZéôšÓ:§ÍN¾NÉNeN§>§!§»N÷œ8}éôÓÿržê,u~ÆYtžã¼Ðyµó^gogµs–ss›s¯ó€óMçÏsþ§ í¹¼ä²Þ個K˜‹Ê%Î%É%եĥÅeÈå¬Ë—.w\î¹|íò™Df/s–Í’-‘mùËÔ²Y¦¬Tf–õÉÎÊ®ÊnÊÞ—}"ûRöDö£ìWÙ?åùT¹TÎÉÈWÈ7Ê÷È˽åAòpy´<^ž"7È«ä-ò>ù¨ü–üùòoä?Ê“³–fØéìv1»†ÝÄîc°¾lÍjØ46‡5°El9[Ï6³Ýìö2ûûýšý‘ý……5c"{'·>’wKNYŸE«¬ŸR}Ј C7«#Âiš×¹­ •"̦ ¤ßÑb3|E&!Ò/M2Ð0‰ê©,…+ΡéÁ=Æ? µs€õ6¦Têh8Ž<¢©³¹p*NRÁÁêÔpîX’霿A»£©þŒb8•A…Ô’÷G£]5#W„­Ô@µ¹àV1 Æ6ZX¯Ô*è|@DÑÖ&<4¤?­|!%ùHútô²P» +bË›•ÅæÆ BucÉÎ×*+`±ú°ha49 «ÈŒV¤êÒÓUÂ&´=ÜA¨Ïåå•(ª+Š-ùB0£ühª)·(·(=_?´ æ´Gg¦%¸‡U!Ñ\zFI©!ÏhÈ …†ÂE[Œ)P¥<Úr¹³­¼±Ahh2õu= ¬ÏN«=UVÖžGîn»<Mvç”gÄ(´ZeèëBr@F¬6 u »>á3ãD™¢¦¬°'_0#m4Õ›cÐ×o1ä3ò36¡µÓ¢o|KÄdW—+jËŠ[ ð§TÑTsvAVË8œ™f&¡3¼ñ3ºA4‘ð%ëœêî’’–Âé~ ²pN,•”••­å²Ò…±<$¦ †{yËŽ[ú=~ò¨B~aQž=N'D W-dcyQQ)WX¢Ë)SÚFRÛ¸Ç7®ýžr`Þ)qS[„¥SÑÑjéiiNkÔå Õ^4€Ö0ÉÌB3Ðì'ÏÁiÐõ»_àLøìÂ_+BÙ+õûÑJô¢Ú}Ïnõ98®¯½.‚±]c+Y©"ßÉ®ÉW¨µ!QÞ²C;=LÊ-Ôu…Ó“›r**Õ%ý‚Å@XЗ*ªS×o +‚ÛÐçÓжPkKÓÊu÷hx°…‚¾è6}ÈñmÖØ¢‚êü"¡¬¨¦ÐÂ}Óí=k[’_Hœ•”¹VOƒZ"?ÍXù:’-ÝÁ^ñNñ¥9ÊMñÖa¯×w¶œU +‘ÝɉïÒá(¯l.*)©áJÊs3Åijݙƒœÿ÷Ï¡Ë}Ïû[OˆhÞûξXËÅõS=ïœ>ïRè ¬ 7m¥Á0ßÖu2ìx (5.âÁ¾¤7x`fƒ +´Õ= +«ÚM f”¤|ÒŸÌ Ï4:øøG%ã–îÿÎøåÄÑšêŒÔ"±0=/ÏGqÈPÛÃj6÷^ß|f š1ïYôáÿíÐÿœ  Å›;¯2{ÇñȃsÑsÐÿ8ÉÜsK„2Êø~yÅûx×^®¬ºU*XPˆŠz?Ó·A®Pègë:¨HSmÒÞ3âx8¥ß­ÑnÈ u%\ÊFaé š˜@è´ez:œ„ë<¢yœ¢2Ñ,¢™´|×\gê‘v|þ4•UFe|¶ðÆK8ãà"FQº“5­Ú„üÕT›¾&NçO£áèo¢" šà èc§¡HÊ ‰ÐÓjèx]wMCÛŸßjÕWÇéið +ÓK?ãÒúåkÇÍù?”m¶ÆYg±ç‘+%kÛÌÈz×ó²Ëך\Wô¥hdïù3²E! ˆgà]`U²Æ²¢.¢î3Caƒâ¸º1"2.6L›W'”(‹ƒ ÃiÐØh®hâºj#÷‰(›Ò+µšà\:¦P`yAj¹îm5SÐ1gÔ£ºÔAbü2ymd-£CЩ€7Sºµ©i+0~ƒXE¬6=P'dëµúd=~a Ó_Žs"DcêÓ‹Ö¯ÐÞhªC_oPÒãÅø’ q!z|É +|ŸÈÀÏaû#ÚB”&3ÓMj¤¤†C¦2JÜíõ͆EÓ¾ƒÂ­oÌu׋§›P¬º‚j7«U‰ñšq«á\"©-§¬TQ[[ajM肚Ém¸22 h±æ€2Ô—KSÃS<éSm©,ª®n¥«5Hôb@Rž vöÁ“=HÀ¹ðu¸ÔVÀ‹¬ÞlÊ«æ¾8_fÏ·µ_¸©¸|aw»`ò *Þ¡OÕ&r4·:Q·¦ ‹ø5üÔ²„·š§Luø«'š³RkÄ–ø]Šyþǧjò Aº‘ÀöŒà¨cŠ=÷ƒ  \ýÅèö ¼o×ð8¿räl3Õ|âDKK»(ÝÈxk™ ýƒcÈãÙµ9…¹tAn~V¶"--!5U@r$'¬ëÑ®hꄾ:Áà7±¾šøÐ‰•0SG‹sŠôÃ4$©&ówHAÔ©ŠR2)ÉYz!jÍTjv~EºˆÜ`Ãßm®»Q‚ÿ<Àî†ÏÚÙõÕ +SyE½PjB1j²#£8mIàTÔ=-ü Â?)9!™ËÌ.,‹)Ca^A>¦³Æð UÀ±“ÁÃÃ=7Íä­Äi +‹» xŸÃk\_¥OÌ +O‹äßÞu²í/ì1ðIµ`<¦²O`ÎO(‡'[[Ož ÇïÖߨ­6ôѰ€ú›ÛÅWwyDìöÀ÷y]ÖtJ~€Ëa|Ó®ÊÞ%a(| +ÐC"tFI"º¢"od×'ŽÒãi”Wdì>ê¬Ù&SEq෺„÷ª7—+^P/ ñ<‚ŽÄlå<µ•§E@é®V×u2¹WjêÏézº«®'Gu5ñº=ôx#а6ú1¤t//5I¬³ ”=ê–àÃ)Ãk»Ôb|{fGÎæ‘YŸ$ ‡öÐÝ!ž¦ýrXù<šfü”><ÓþÍM•ÁÕ,dòk{Ïr­uI¡%bA|~”á D×^ãÕ¸€îÌ¡#m¡§ ó EB¯Uù¼á.œ—”îJµmˆÀÈÉY8>gØÁB«‹¦.›‡¸M槼ŀ®ó1§¹ó—›ºÅî~ÓÛwð™eÿHA¯ÄáHŒ,Ë"óƒ¶+Ðê„}Ž% ÿ +·Õ\Á¹ºÛZ:¤€_§RhåV"Ž*;^WXÂÕk#DE6$§nãÔ0‡·%á ¸Ào`îóÀzµ+€(cà‹ŸÝ€S¡üåwÑ«±9y¹‡E¿)ÆšÒ¼rîÑо¹‹}ÜÖû¥Tõ‹`´¼©Ôl¢eÖ‹ià”âbÈÙ=íB“·géznzšIÙ¯O†.Zâí¶A©­ê­mSêI™õog®\~ôÈÊœ´p&/è‰VÀÈSY¼q±ª!‚€N|K±|Ú²Gá§ŠRjRŠLqBXsFAx>™®ÕÆ+‚{ãÞèÉa Ýá/îÕ¤—Vä +„rêxtc2Ful¿ÿµ«Ã]˜$ÃBƒý[Ã{ñy +GŠd€o ÓÛÞÞÓzÂ_Ag!%6ÁbêzöñÄ.ázØÎ“ë87ÿ¤QVÚ >dÆœL. ¯6öÁs]ë›úÁÕzƒì‡D ™¾€Ï1š„25§V©Ëµmñ"œµ„°~þ6HãSüž »{GHôáx:%<8+‚Û²½k4\ ?ŸüÞO +è ~nè¸Ó\“¡ÍÑeåf +Y©ºŒL…¦4©"G^î~ŠàU^Úi“#UuH€ø%<W'1¶Y†4US–œª8]½ŽZuFùsK}~y».*ní8ýr´&7=\ÈÑfè29mVQ…˜ˆq®…´dæØÁ>ØÇ¢DÃRß1¦hÇ/K`v`Α¥}’_ óÏ`A€ß²QЦŽmb½}<;‚;;ƒ:½Ei¿’!` ©ÛKQÆ}I‰ k˜ØmvÔ%zW +å{Xƒ‡!*XžÇ^1ÖÆŽ€É?1àÌØÔLÀÞ&ù8+°ˆÀÎ@Ù#(Ûx¨tý·ð6…U'”M(¬–Vï­â;(„Fæ¼®ú/4Ùi)Z9#€®”¾³¢¾¹6ë2 s¶ûäð°²ÃÌ붸´¦¢Rh45U4r£æ]è08ÝÊᡞž! ¸pê€Gx¸'½Èr—;±—‚ÕP…ð?¼æŽ/ Q‡éèl5 0‘Q…)&CG³“ÛjÁ{ß›4(Žnñûùz€MQ¨)‡ +Ó4vE`éKå.ö™ú,bms±¹p„–FjMF¼æU]o[óÚ#hÖ—dF)ÐEÊ Ô&Ú€:x(•ž‡OúŸÆãVöàùœo9Û*ÔŸ(鹪îÑ^‘b\`Ʊe +€Ò|—òp ¼?2‰À5,æ)¾´šÍ­Oñ…Œ‰‰ŠhŠiiijji‰iŠAÑBºz ëéïïåqÒètw÷i`â¥?3¥%˜õ ê뛹J@þˆù'à¶Š + iŽ:ÑÞÜÜ~BÕ,‚ŒÍè©)8ùÚà'@y‘ïí>ÑÞÓÚî'¶ÁÂ"›*p1Q®“V±ç‰ä¯ãkÙS-µB¤Ò–Ôè+4 °ÕS¹"H0“½ ÕÜ© (/-S3NÀiâáéúDò€ÚtQ]4œI4‹2&Ç®¾Vƒýƒ*/ÛöϱI@¬èúEé÷pë)8딜汣à Êa@ Ä6-ãEl½“%;º€!ARyyZ —_dÌ/‰ +Ç݈‰}˜Ð:±ÖkÀx ÌäÝ¢–γ\_S¨hîǃìŸ$èRÜ>uøp°Ïö¨BÛ|ñŒµÙ +(i|½Õ§zU“ ÀˆìŸIò1’m+(,,,((Î/.üÚÞ~ ®ØRb©(+-(¬°wxÚÙ7É<é;Él‰I2,¹+“gOž79rÅäÎÉßÚM±[n·Ù.ÒNm÷Ñ”W¦lšR5¥nÊ}b1L\ ®ïŸ?“!äWÔkTUHÕQŸÐ›èP:—®¢ÿ˜:yê SÓ§~Â,f¶01Œ†IgŒÌ s H€ LžÀ:À5pÓ^jÏÚO·çíØ/³ßn¿ßþ°½—}®}½}“ýÇö_8Lr f9¸9¨RÚ>vøÌá±Ãw¿;ŒK'IIéé*éZé^©Fš*Í•öJ‡¥W¥w¥Hÿp´wtr|ÅñUÇ×}3›O;žs¼êxÇñsÇ_œœ^rÚáäéádp:îÔã4êtÓé§Oþæô›Ó¿œ§9Ïs^îü†ófg/ç`çXçTçRç&çÎ=ÎÎÃÎç?tþÑ…p™á2Óe¹Ëz—Ý.‡]¼]]’]Š]Z]κÜuùÂåw-“ËfÈž“=/[#Û-ó”EÉ2dY™¬ZÖ ë•–Ý½#»'ûJö›œ”?#Vþ¼ü%ù+òuò=òCò ¹Zž&Ï’åeòZy£¼MÞ%‘ß’$$ÿ^>ÆNb¥ì3ìLv»}™]ƾÉîb²Al›Áæ²¥l-{œígϰçÙ«ìmö=öûû»ëdW©«Âu®ë‹®Ë]_wÝàºÕu·«›ëWo×`×XWk¶k‘k•«Åõ¤ë°ëˆëE×kc{þ,I?Ènˆ{s¢j9ß[<çuÎà”ñAUå'FŒ¼UÔݬr­ñM}A-h8ü¶—xæfÂûÜï6vˆ§{š?üVq“Ò\èÌ>–àÍù…×uʼn '2:rîÒ°Ìú,i8¤‡î 9jÚÇ!§óÑ\4÷Ó…Ð铳íOn‰˜ÜײÐ>¿®ï,7\„ìŒb¬ÈkêyH‰uí<4úâÛ<|óƒÞh>Þ€ž¢>˜…“o"䇎íÄ +œFÔ,¨¼`è Hé"°†ÝdÓÒˆÜÜ\}6—œYR+Â*êóµ£HŠˆµßð1G÷÷µš;«²+µ5BzYnQ‘¢¡¥æ¤þd‹ÐVy!·:-T¡ÍMÊIÐô=š¿"R;s+ÊæêÒÁ|Áh«vˆtKEõëLéåÑ] §!½ì­“ˆìZ]q©¢­¢t´@° au*«,½ö0 ÿqZ+ wZ£àÚñ(¢…„G­?Vö56tæOI¼‹c¹[Bò²»³Ù·óÙOË0Á]&‘#Ü@¼Dâl .‘pÊR+iaר«ôçik°Æ²rCWRœ›] –¦åé ÏÓÈ +Ë.ŠöUŸ ºÍAúÁïpÙ½äwbÛDSR|Y4çZÓ‘"¶¥çëJ2èÊäâ(¥bÏ[»½:ð u:¢783G¯ÏÁõÀ*{BF®àÁ#^_ÿ§ÈOnKx*òÇ\ š 'ƒ×¨CChÿcûÖp;m:kèDš~n›êÏ*êvn½ÆÊèÝ“S7dÐ*ë} +lÌ3Y]\>âe­'ÍÛyÙéÅŒìÁ«<ø¬ÂM8ÚŸJPbNÿÏ> Ñ”ùhÊÄ>Úbÿ )½öESƒ™%éï4Æ;#Ý+kÅ,3€­{·š¨üâò²2dðpÖ—è±³f©ÙÙKÞx]\¿Ñwá<¢{^y´VØpÿ«ÀŸ¸Ÿ¿ê~ÿ}ñþýžo~UüêÿxÝû‡o,î~–Cß»°p?\„oûá>¸-BûÑ^ôzíZ°æÄíMÂæ;¾Vxm!šÕ‡MbilA‚J‘Ñ?Œ¡Ò{ìÒёРIòëÜуe/ÐpëxºŽ¼ «NèáäÒ jk+yz*2Ùm؇¦h%附IWÈý2©N‰æÂâZú®œ:õq~qk"ŸaryEÙËðŒ™Ý™ŸZ©¿AÃ=Ôhë@GIi¦¦\ˆ¬¯No溛[{†‚:ŽRFm:*Êî~ŠöDS§²ËS&:6i©Þ™8põ 0`L +³Ø(Ò­Úô:Òê8F5ã T:F´’g xºÎÓU: ݆q—?ˆië +H¬/Á ö,"‰’¸‚”tETBª· R˦t2Ú˜Y¬éBeÖŸ¦EÈgütÜÖ°`?Ýt5Tšb©ÀÈzsi~eI£ØóñLg?CžZL‚»le<”˜p±Þ7©ÈCMa„.« Þ¾ZÑ0(~Ú{ññ +Hnú~f§Z–h\_˜`˜VNéÏÖÔàÌ4¡ûj39¢«MЦS)ŸÜÍ9‘™¾ñÓWø¸-™¯˜ñåOý„ØÓWR¯sðE«†ý9ëâkh¦‰û_›“% Ù®sŠö?„¢μôð§"Aú +?ÑæXË'¦Çƒe¼©ë/ð&/ËÜÀÈ6üUÐÑøÚ“-ÙÜÇ1¼6NglUh88 ÿ]ˆ.‚ÙÇ(…œ(6«à]ê*J£€—)«+"¡Š¬5” +å%õE­Ü{ç‚ßê[ܪÖ/W,R/ûïµi˜Œ´t¬Ìª ©…ÚjtÅ*wC·“–Jð–=¥¢fåF+4Ù¡¡ +Cá ¦ŒïWWß+xÊ#­Ó,ÊJ; t¹‰5^E¾+¶, +¬Á®¶QU¯ާ_‹ôõ;¨8V—p3F,ú^E~–SfÜGÏ¢†ÇnÞîS?,x]Šç†Û†,BlU¶N¯ÏÈ™žÆ\ØÛ9uÉFwz<œÒ¹'§àU¶¾KõBo"’ Ý¢ŽŒ <Ý4opÛ*°CH£~ïS"²\l†ŸG‘ÉhŠ_À, ã©îŠÒ _žO‰hÚ#«Â*7ÓfxhÛ^ÐCÐÀÿÕçm +êZ;ÌüUÅ‘µ5{ç`A½Óí~Â.ö‹ÐL¤6ê ‹55¥-B RW9qm;áËèâ4ôŠ­ ®†_›Â¶ðù—h¸™‚ +ØÒs¿¼|¨hº íS›¨ •&+QƒÍº+üÂB®)Í(Õ}@Ãä ƒµô'íƒ×>R@zÑm´z¢]ÿNvU’q½­]¯ß‘ë™Cç¦ét©"Ô[?b›j+:ux¡IÛS”Q1‚::8sÆ–²K¶Dïß*®Ûî¿MQ`ƒ¤@3 <ˆüþpÇMáÖ¶5í³¹‚@ö÷kh&“ƒÛÐÔ™G=[‡"ÅÐ Éïþª€N¦†/ -µ}œJ©‹U'ëÊ3S²s³²Ò„ô¤œ´TÌ7kTäÎe¾ž[÷Ó‘£èµuÀ÷î¾ãŸö ªrB©Ö$Çq©µ}"¼`ë_)d\xûöAe¸VUs£{ÍÖ®š†Âbºç|us­¢8§ ###G›$èõ:]®y¡•Ór(ŒJ«°»§ïŒÜþ›âî±á]‡‚CÀÓ½­«VÛööÈðêpÝ–@>þjëYÄh²ótQ"$ +Ê Å±»ò + Œ­šãqä¤ÿ𙞓ÃglÕÀþú¦­¦{SëKÈÂ$»S­'ÄNúÀ#h!| ݧÀ #âÐkh-V`xÝÑóXÍÀNr t„ÄE鯶îÐQî››¿`©ê|ø›ÍÑ1!Áõq]Yb_¦A_©¥«‹#<s=·­÷¬í‰Z®V (ÎÄ´(£CyVh:bÛ7ÖÃG¤tÜàI?\cÿ¬¨š¨¨¹åÒ@§@ç`¤ƒið{ÛÒÐjæÌeqØBæàRï=¿ãH@øÆáG +mÙ_¨&òK + ùœyÂn߇ÎÔÄ’1àò"@ýÕ[Á€§E’bKÑ9Zúú¸{¡Yä±?jþ¦Á†âòÖ&Ü:h²¾ZÀ£¿f·%ñà SÀ5Áøê§Ý¡Ò‰îPéŸÝ¡¬¤t]&•cë}eMg‡ë•/¾˜päHBד'uÝgDàþ˜ÖçØ>&F¨çW0î¿´òQ2õÊzâõÕ=©ÿKWßL=íûK­snß¾mج´Ä­£ŽKåÜTW¡²-W¯t´G+«D`s21;ù¿Úä{¡œx@¡^(‹qƒþ«ˆ?ìHðC_j;‘“’¥Ïà|BºßOÑä:6# r{B˜›’Èy©GqèÅ.ö6û ºC”Æk‹Ë +ó‹‹Ê„²êlŽG™ƒýUª€ÝW޶ZmL¤:`"Ò2 ?¯žì…5½.ð,ô_Ä2®’ý:dïzj²Ì¸<Þ,†‰V²›¿z»;mÇ\†Ò‹² Þôÿ›à+5@;L p6È3IÀ+L»©ÎâãáK| ÐùÃçy`3XôæC[—*Þê?zm¤ãĹ¡°ÞÙYyú,AŸ«ËÓq©¥}ý÷GEðSl(ãþž°ÛÃc¼x[3a_b)ÄÉq“ô!vSÆS’ÞPœP©?GÃD˜…)ÝÁDíÛÙ¥o)pÞØÈC¹Jô­#€•€ Ùõ¥¯ç8x³!¿¦¤AŒ?ÞœÞÊõ¶4o©ŠˆÎJÂÛs[BxLt]U Íùp6¯«|ëɧmyœ&`aÿ”6 t®¡ö¶­pø­Šz'פÁ„‚|Qô¥ €1oÚâ›Ybºž´ ZWD¶jcµŒ¿QÆe©aœ)“ü³r|š?n MÀ”±Ä†žh9Z†Qâ \ŽVÀCø¶®À{?[…ˆàd yµ[µ­K0y²¦­EGV+ß„CÝî×|3Hð4ÚÎî2jêuØ#?K=cÑžæ ñðp>œùÆdb  õHœº½nM«l%:Œ¡H°€Qûz¬æ!F\„D©Bþ,ŸhjÅ”5)Ï £h‡³Ë PvÜ¥>' [ +åìe@<˜åщP.‡î8”+ð͇r9|Lœh02à+Æêcb=È]s‰:€Ö·‘…ßœùãû”ƒ=¢þ{¢\ä{ÀÓÎx}sV='›Ñ„¤€n¶Îa;˜/?¯½Ždñ9q…Œøt]—QfÑ‹ C¨ +Š,x‹C;à N 2¾‡ák¸¾Æçܶj·PÒ£¼ídÖ±Cõ¡~Š M\THrÓÈ`AM}« e­s¶ŒXnS?¼=Ž¿t›G[G¼>¼wîÜ=0aKá6“änogm7³ÐÚ£¸x¤oë[îž»|ð°Þ=ŸÞþÁ㋆:îw ðþý€ûî≯¯Š²ŽñgCXUtrf4§Œ7ÛjèÚH2 )=-ŽKO.(‹Í{Ž”ÛúÂ(€B¯A"¾)³²Jaª©hÀŸýÀæþø ÜÅþ¿èy°†æõ4€/ÃÅp6¾¿Œ×îþë@S½ °;`õ¨«ØÒ#ÞÑ‘jn‹÷Íäˆ:0ª={ºaÓ™x“ P·]ú†¥Q ‡9­:Ó7gwÚü:@î‰×I„Áÿ<ˆ/›(ƒ‰Ë©-J»i€Vgó¶îkn5†'yCô9)"¸*×àû×ìÆüŽ¹â£‰ÖPœó;¾yë‘ð@ni—áºnøj¯D«Ô*›š-øäZ×pKBJ• Á¤ãÖD*Р 1¿Â¨¨b÷)ÓÛï`±¿fÁåX[!þnÿ3 ïwre`¦ +ØÎ}àÜ%àr€u=˜8«’€qtK~‡Î6‰jÆ[È…8Cžu;2vä#$ Ñ³”º ©4gÞ3Œí0#@¿ìÌÊ +ÒOWk'T²_Ž–Pài³ì¤®¦$1R¸(Ïlç?Àÿñå¼£ +endstream +endobj +1411 0 obj +<< +/Length 3049 +>> +stream 0 g 0 G -/F51 9.9626 Tf -222.215 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(res)]TJ 0 g 0 G -/F54 9.9626 Tf 18.262 0 Td [(contains)-250(the)-250(1-norm)-250(of)-250(\050the)-250(columns)-250(of\051)]TJ/F52 9.9626 Tf 176.182 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -174.742 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +BT +/F84 8.9664 Tf 260.579 645.656 Td [(Pr)18(ocess)-250(0)-7729(Pr)18(ocess)-250(1)]TJ -31.696 -10.959 Td [(I)-1333(GLOB\050I\051)-1334(X\050I\051)-4663(I)-1333(GLOB\050I\051)-1333(X\050I\051)]TJ -1.461 -10.959 Td [(1)-4607(1)-1754(1.0)-4500(1)-4107(33)-1753(2.0)]TJ 0 -10.959 Td [(2)-4607(2)-1754(1.0)-4500(2)-4107(34)-1753(2.0)]TJ 0 -10.959 Td [(3)-4607(3)-1754(1.0)-4500(3)-4107(35)-1753(2.0)]TJ 0 -10.959 Td [(4)-4607(4)-1754(1.0)-4500(4)-4107(36)-1753(2.0)]TJ 0 -10.959 Td [(5)-4607(5)-1754(1.0)-4500(5)-4107(37)-1753(2.0)]TJ 0 -10.959 Td [(6)-4607(6)-1754(1.0)-4500(6)-4107(38)-1753(2.0)]TJ 0 -10.959 Td [(7)-4607(7)-1754(1.0)-4500(7)-4107(39)-1753(2.0)]TJ 0 -10.958 Td [(8)-4607(8)-1754(1.0)-4500(8)-4107(40)-1753(2.0)]TJ 0 -10.959 Td [(9)-4607(9)-1754(1.0)-4500(9)-4107(41)-1753(2.0)]TJ -4.484 -10.959 Td [(10)-4107(10)-1754(1.0)-4000(10)-4107(42)-1753(2.0)]TJ 0 -10.959 Td [(11)-4107(11)-1754(1.0)-4000(11)-4107(43)-1753(2.0)]TJ 0 -10.959 Td [(12)-4107(12)-1754(1.0)-4000(12)-4107(44)-1753(2.0)]TJ 0 -10.959 Td [(13)-4107(13)-1754(1.0)-4000(13)-4107(45)-1753(2.0)]TJ 0 -10.959 Td [(14)-4107(14)-1754(1.0)-4000(14)-4107(46)-1753(2.0)]TJ 0 -10.959 Td [(15)-4107(15)-1754(1.0)-4000(15)-4107(47)-1753(2.0)]TJ 0 -10.959 Td [(16)-4107(16)-1754(1.0)-4000(16)-4107(48)-1753(2.0)]TJ 0 -10.959 Td [(17)-4107(17)-1754(1.0)-4000(17)-4107(49)-1753(2.0)]TJ 0 -10.958 Td [(18)-4107(18)-1754(1.0)-4000(18)-4107(50)-1753(2.0)]TJ 0 -10.959 Td [(19)-4107(19)-1754(1.0)-4000(19)-4107(51)-1753(2.0)]TJ 0 -10.959 Td [(20)-4107(20)-1754(1.0)-4000(20)-4107(52)-1753(2.0)]TJ 0 -10.959 Td [(21)-4107(21)-1754(1.0)-4000(21)-4107(53)-1753(2.0)]TJ 0 -10.959 Td [(22)-4107(22)-1754(1.0)-4000(22)-4107(54)-1753(2.0)]TJ 0 -10.959 Td [(23)-4107(23)-1754(1.0)-4000(23)-4107(55)-1753(2.0)]TJ 0 -10.959 Td [(24)-4107(24)-1754(1.0)-4000(24)-4107(56)-1753(2.0)]TJ 0 -10.959 Td [(25)-4107(25)-1754(1.0)-4000(25)-4107(57)-1753(2.0)]TJ 0 -10.959 Td [(26)-4107(26)-1754(1.0)-4000(26)-4107(58)-1753(2.0)]TJ 0 -10.959 Td [(27)-4107(27)-1754(1.0)-4000(27)-4107(59)-1753(2.0)]TJ 0 -10.958 Td [(28)-4107(28)-1754(1.0)-4000(28)-4107(60)-1753(2.0)]TJ 0 -10.959 Td [(29)-4107(29)-1754(1.0)-4000(29)-4107(61)-1753(2.0)]TJ 0 -10.959 Td [(30)-4107(30)-1754(1.0)-4000(30)-4107(62)-1753(2.0)]TJ 0 -10.959 Td [(31)-4107(31)-1754(1.0)-4000(31)-4107(63)-1753(2.0)]TJ 0 -10.959 Td [(32)-4107(32)-1754(1.0)-4000(32)-4107(64)-1753(2.0)]TJ 0 -10.959 Td [(33)-4107(33)-1754(2.0)-4000(33)-4107(25)-1753(1.0)]TJ 0 -10.959 Td [(34)-4107(34)-1754(2.0)-4000(34)-4107(26)-1753(1.0)]TJ 0 -10.959 Td [(35)-4107(35)-1754(2.0)-4000(35)-4107(27)-1753(1.0)]TJ 0 -10.959 Td [(36)-4107(36)-1754(2.0)-4000(36)-4107(28)-1753(1.0)]TJ 0 -10.959 Td [(37)-4107(37)-1754(2.0)-4000(37)-4107(29)-1753(1.0)]TJ 0 -10.958 Td [(38)-4107(38)-1754(2.0)-4000(38)-4107(30)-1753(1.0)]TJ 0 -10.959 Td [(39)-4107(39)-1754(2.0)-4000(39)-4107(31)-1753(1.0)]TJ 0 -10.959 Td [(40)-4107(40)-1754(2.0)-4000(40)-4107(32)-1753(1.0)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 141.968 -106.261 Td [(45)]TJ +/F84 9.9626 Tf 94.641 -105.903 Td [(62)]TJ 0 g 0 G ET endstream endobj -1145 0 obj +1418 0 obj << -/Length 5385 +/Length 7845 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.10)-1000(psb)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(5.2)-1000(psb)]TJ ET q -1 0 0 1 204.216 706.328 cm +1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(Sparse)-250(Matrix)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(matrix)]TJ/F52 9.9626 Tf 208.231 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(:)]TJ/F52 9.9626 Tf -74.65 -33.873 Td [(n)-15(r)-35(m)]TJ/F54 9.9626 Tf 17.788 0 Td [(1)]TJ/F83 10.3811 Tf 7.873 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 19.335 0 Td [(A)]TJ/F83 10.3811 Tf 7.442 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F54 9.9626 Tf -198.652 -20.06 Td [(wher)18(e:)]TJ +/F75 11.9552 Tf 151.016 706.129 Td [(ovrl)-250(\227)-250(Overlap)-250(Update)]TJ/F84 9.9626 Tf -51.429 -18.964 Td [(These)-250(subr)18(outines)-250(applies)-250(an)-250(overlap)-250(operator)-250(to)-250(the)-250(input)-250(vector:)]TJ/F78 9.9626 Tf 154.826 -23.824 Td [(x)]TJ/F190 10.3811 Tf 8.098 0 Td [(\040)]TJ/F78 9.9626 Tf 13.497 0 Td [(Q)-42(x)]TJ/F84 9.9626 Tf -176.531 -21.014 Td [(wher)18(e:)]TJ +0 g 0 G +/F78 9.9626 Tf 0.712 -19.203 Td [(x)]TJ 0 g 0 G -/F52 9.9626 Tf 0.622 -19.925 Td [(A)]TJ +/F84 9.9626 Tf 10.187 0 Td [(is)-250(the)-250(global)-250(dense)-250(submatrix)]TJ/F78 9.9626 Tf 131.351 0 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 12.299 0 Td [(r)18(epr)18(esents)-250(the)-250(global)-250(matrix)]TJ/F52 9.9626 Tf 125.981 0 Td [(A)]TJ + -141.607 -19.564 Td [(Q)]TJ +0 g 0 G +/F84 9.9626 Tf 12.856 0 Td [(is)-250(the)-250(overlap)-250(operator;)-250(it)-250(is)-250(the)-250(composition)-250(of)-250(two)-250(operators)]TJ/F78 9.9626 Tf 271.932 0 Td [(P)]TJ/F78 7.5716 Tf 5.423 -1.494 Td [(a)]TJ/F84 9.9626 Tf 6.446 1.494 Td [(and)]TJ/F78 9.9626 Tf 19.681 0 Td [(P)]TJ/F78 7.5716 Tf 6.405 3.616 Td [(T)]TJ/F84 9.9626 Tf 5.4 -3.616 Td [(.)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 229.61 588.515 cm -[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S +1 0 0 1 179.582 581.71 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F52 9.9626 Tf 236.21 579.947 Td [(A)]TJ/F51 9.9626 Tf 120.293 0 Td [(Function)]TJ +/F78 9.9626 Tf 185.854 573.142 Td [(x)]TJ/F75 9.9626 Tf 120.621 0 Td [(Subroutine)]TJ ET q -1 0 0 1 229.61 576.161 cm -[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S +1 0 0 1 179.582 569.356 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F54 9.9626 Tf 235.587 567.594 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F84 9.9626 Tf 185.56 560.788 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 372.821 567.793 cm +1 0 0 1 322.794 560.988 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 375.81 567.594 Td [(spnrm1)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F84 9.9626 Tf 325.783 560.788 Td [(ovrl)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 372.821 555.838 cm +1 0 0 1 322.794 549.032 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 375.81 555.638 Td [(spnrm1)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F84 9.9626 Tf 325.783 548.833 Td [(ovrl)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 372.821 543.882 cm +1 0 0 1 322.794 537.077 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 375.81 543.683 Td [(spnrm1)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F84 9.9626 Tf 325.783 536.878 Td [(ovrl)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 372.821 531.927 cm +1 0 0 1 322.794 525.122 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 375.81 531.728 Td [(spnrm1)]TJ +/F84 9.9626 Tf 325.783 524.923 Td [(ovrl)]TJ ET q -1 0 0 1 229.61 527.942 cm -[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S +1 0 0 1 179.582 521.137 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 278.277 499.564 Td [(T)92(able)-250(10:)-310(Data)-250(types)]TJ +/F84 9.9626 Tf 227.467 492.758 Td [(T)92(able)-250(18:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -127.572 -23.549 Td [(call)]TJ 0 g 0 G + [-525(psb_ovrl\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -127.572 -33.873 Td [(psb_spnrm1\050A,)-525(desc_a,)-525(info\051)]TJ 0 -11.955 Td [(psb_norm1\050A,)-525(desc_a,)-525(info\051)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ + [-525(info\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + [-525(psb_ovrl\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(update)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(update_type,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(work)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(work\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.014 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -19.564 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -19.926 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 194.722 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(.)]TJ -187.095 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ + 0 -19.564 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 89.688 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -79.949 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ 0.98 0 0 1 124.802 349.291 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-247(an)-248(object)-247(of)-247(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.242 0 Td [(psb)]TJ +/F145 9.9626 Tf 1 0 0 1 369.545 349.291 Tm [(psb)]TJ ET q -1 0 0 1 324.173 344.346 cm +1 0 0 1 385.864 349.49 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 327.311 344.147 Td [(Tspmat)]TJ +/F145 9.9626 Tf 389.002 349.291 Td [(T)]TJ ET q -1 0 0 1 359.321 344.346 cm +1 0 0 1 394.86 349.49 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 362.459 344.147 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -232.675 -19.926 Td [(desc)]TJ +/F145 9.9626 Tf 397.998 349.291 Td [(vect)]TJ ET q -1 0 0 1 171.218 324.421 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 419.547 349.49 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 174.207 324.221 Td [(a)]TJ +/F145 9.9626 Tf 422.685 349.291 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F84 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ + [-250(18)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.564 Td [(desc)]TJ ET q -1 0 0 1 324.173 276.6 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.408 317.971 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 327.311 276.401 Td [(desc)]TJ +/F75 9.9626 Tf 123.397 317.772 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 348.86 276.6 cm +1 0 0 1 309.258 270.151 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 351.998 276.401 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.214 -19.926 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.776 0 Td [(is)-250(the)-250(1-norm)-250(of)-250(sparse)-250(submatrix)]TJ/F52 9.9626 Tf 150.4 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(.)]TJ -205.587 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -54.456 Td [(46)]TJ -0 g 0 G -ET - -endstream -endobj -1152 0 obj -<< -/Length 5403 ->> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.11)-1000(psb)]TJ +/F145 9.9626 Tf 312.397 269.951 Td [(desc)]TJ ET q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 333.945 270.151 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(normi)-250(\227)-250(In\002nity)-250(Norm)-250(of)-250(Sparse)-250(Matrix)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(in\002nity-norm)-250(of)-250(a)-250(matrix)]TJ/F52 9.9626 Tf 235.459 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(:)]TJ/F52 9.9626 Tf -102.327 -33.873 Td [(n)-15(r)-35(m)-18(i)]TJ/F83 10.3811 Tf 23.698 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 19.336 0 Td [(A)]TJ/F83 10.3811 Tf 7.442 0 Td [(k)]TJ/F96 7.5716 Tf 5.409 -1.494 Td [(\245)]TJ/F54 9.9626 Tf -196.335 -20.424 Td [(wher)18(e:)]TJ +/F145 9.9626 Tf 337.084 269.951 Td [(type)]TJ 0 g 0 G -/F52 9.9626 Tf 0.623 -19.925 Td [(A)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.299 0 Td [(r)18(epr)18(esents)-250(the)-250(global)-250(matrix)]TJ/F52 9.9626 Tf 125.981 0 Td [(A)]TJ +/F75 9.9626 Tf -258.11 -19.564 Td [(update)]TJ 0 g 0 G +/F84 9.9626 Tf 36.523 0 Td [(Update)-250(operator)74(.)]TJ 0 g 0 G -0 g 0 G -ET -q -1 0 0 1 179.842 588.515 cm -[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S -Q -BT -/F52 9.9626 Tf 186.442 579.947 Td [(A)]TJ/F51 9.9626 Tf 120.292 0 Td [(Function)]TJ +/F75 9.9626 Tf -11.616 -31.519 Td [(update)-250(=)-250(psb)]TJ ET q -1 0 0 1 179.842 576.161 cm -[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S -Q -BT -/F54 9.9626 Tf 185.819 567.594 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 323.053 567.793 cm +1 0 0 1 184.558 219.067 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 326.042 567.594 Td [(spnrmi)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F75 9.9626 Tf 187.546 218.868 Td [(none)]TJ ET q -1 0 0 1 323.053 555.838 cm +1 0 0 1 210.839 219.067 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q +0 g 0 G BT -/F54 9.9626 Tf 326.042 555.638 Td [(spnrmi)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F84 9.9626 Tf 218.809 218.868 Td [(Do)-250(nothing;)]TJ +0 g 0 G +/F75 9.9626 Tf -94.007 -15.579 Td [(update)-250(=)-250(psb)]TJ ET q -1 0 0 1 323.053 543.882 cm +1 0 0 1 184.558 203.488 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 326.042 543.683 Td [(spnrmi)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F75 9.9626 Tf 187.546 203.289 Td [(add)]TJ ET q -1 0 0 1 323.053 531.927 cm +1 0 0 1 205.3 203.488 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -BT -/F54 9.9626 Tf 326.042 531.728 Td [(spnrmi)]TJ -ET -q -1 0 0 1 179.842 527.942 cm -[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S -Q 0 g 0 G BT -/F54 9.9626 Tf 227.467 499.564 Td [(T)92(able)-250(11:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -127.572 -33.873 Td [(psb_spnrmi\050A,)-525(desc_a,)-525(info\051)]TJ 0 -11.955 Td [(psb_normi\050A,)-525(desc_a,)-525(info\051)]TJ +/F84 9.9626 Tf 213.27 203.289 Td [(Sum)-250(overlap)-250(entries,)-250(i.e.)-310(apply)]TJ/F78 9.9626 Tf 137.239 0 Td [(P)]TJ/F78 7.5716 Tf 6.404 3.617 Td [(T)]TJ/F84 9.9626 Tf 5.401 -3.617 Td [(;)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 194.722 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(.)]TJ -187.096 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F75 9.9626 Tf -237.512 -15.579 Td [(update)-250(=)-250(psb)]TJ ET q -1 0 0 1 273.363 344.346 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 184.558 187.91 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 276.501 344.147 Td [(Tspmat)]TJ +/F75 9.9626 Tf 187.546 187.71 Td [(avg)]TJ ET q -1 0 0 1 308.511 344.346 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 204.204 187.91 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -BT -/F59 9.9626 Tf 311.649 344.147 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -232.676 -19.926 Td [(desc)]TJ +BT +/F84 9.9626 Tf 211.785 187.71 Td [(A)92(verage)-250(overlap)-250(entries,)-250(i.e.)-310(apply)]TJ/F78 9.9626 Tf 153.667 0 Td [(P)]TJ/F78 7.5716 Tf 5.424 -1.494 Td [(a)]TJ/F78 9.9626 Tf 4.278 1.494 Td [(P)]TJ/F78 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F84 9.9626 Tf 5.401 -3.617 Td [(;)]TJ -262.158 -19.564 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F78 9.9626 Tf 38.64 0 Td [(u)-80(p)-25(d)-40(a)-25(t)-25(e)]TJ ET q -1 0 0 1 120.408 324.421 cm +1 0 0 1 193.225 144.435 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 324.221 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F78 9.9626 Tf 196.338 144.236 Td [(t)-25(y)-80(p)-25(e)]TJ/F192 10.3811 Tf 21.467 0 Td [(=)]TJ/F78 9.9626 Tf 11.634 0 Td [(p)-25(s)-25(b)]TJ ET q -1 0 0 1 273.363 276.6 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 244.129 144.435 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 276.501 276.401 Td [(desc)]TJ +/F78 9.9626 Tf 247.391 144.236 Td [(a)-25(v)-47(g)]TJ ET q -1 0 0 1 298.05 276.6 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 263.217 144.435 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 301.189 276.401 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -19.926 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(in\002nity-norm)-250(of)-250(sparse)-250(submatrix)]TJ/F52 9.9626 Tf 177.627 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(.)]TJ -232.814 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(info)]TJ +/F84 9.9626 Tf 124.802 132.281 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(integer)-250(variable.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -54.456 Td [(47)]TJ + 141.968 -29.888 Td [(63)]TJ 0 g 0 G ET endstream endobj -1163 0 obj +1427 0 obj << -/Length 7234 +/Length 5934 >> stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.12)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(spmm)-250(\227)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Product)]TJ/F54 9.9626 Tf -57.098 -19.303 Td [(This)-250(subr)18(outine)-250(computes)-250(the)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Pr)18(oduct:)]TJ/F52 9.9626 Tf 140.147 -24.611 Td [(y)]TJ/F83 10.3811 Tf 7.998 0 Td [(\040)]TJ/F60 9.9626 Tf 13.397 0 Td [(a)]TJ/F52 9.9626 Tf 6.008 0 Td [(A)-42(x)]TJ/F85 10.3811 Tf 14.878 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 133.513 0 Td [(\0501\051)]TJ +BT +/F75 9.9626 Tf 150.705 706.129 Td [(work)]TJ 0 g 0 G -/F52 9.9626 Tf -194.745 -20.13 Td [(y)]TJ/F83 10.3811 Tf 7.998 0 Td [(\040)]TJ/F60 9.9626 Tf 13.398 0 Td [(a)]TJ/F52 9.9626 Tf 6.007 0 Td [(A)]TJ/F52 7.5716 Tf 7.511 4.115 Td [(T)]TJ/F52 9.9626 Tf 5.694 -4.115 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ +/F84 9.9626 Tf 28.782 0 Td [(the)-250(work)-250(array)111(.)]TJ -3.876 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one)-250(dimensional)-250(array)-250(of)-250(the)-250(same)-250(type)-250(of)]TJ/F78 9.9626 Tf 252.795 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 130.715 0 Td [(\0502\051)]TJ +/F75 9.9626 Tf -282.906 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G -/F52 9.9626 Tf -195.482 -20.129 Td [(y)]TJ/F83 10.3811 Tf 7.998 0 Td [(\040)]TJ/F60 9.9626 Tf 13.397 0 Td [(a)]TJ/F52 9.9626 Tf 6.008 0 Td [(A)]TJ/F52 7.5716 Tf 7.7 4.114 Td [(H)]TJ/F52 9.9626 Tf 6.981 -4.114 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 129.977 0 Td [(\0503\051)]TJ + 0 -19.925 Td [(x)]TJ 0 g 0 G - -317.15 -18.633 Td [(wher)18(e:)]TJ +/F84 9.9626 Tf 9.962 0 Td [(global)-250(dense)-250(r)18(esult)-250(matrix)]TJ/F78 9.9626 Tf 117.085 0 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -107.346 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ 1.02 0 0 1 175.611 570.637 Tm [(Speci\002ed)-330(as:)-475(an)-331(array)-330(of)-331(rank)-330(one)-330(or)-331(two)-330(containing)-331(numbers)-330(of)-331(type)]TJ 1 0 0 1 175.611 558.682 Tm [(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(18)]TJ 0 g 0 G -/F52 9.9626 Tf -14.65 -20.451 Td [(x)]TJ + [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 10.186 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 115.61 0 Td [(x)]TJ/F54 7.5716 Tf 5.201 -1.494 Td [(:)-12(,)-13(:)]TJ +/F75 9.9626 Tf -24.906 -19.925 Td [(info)]TJ 0 g 0 G -/F52 9.9626 Tf -131.167 -19.132 Td [(y)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 10.087 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 115.441 0 Td [(y)]TJ/F54 7.5716 Tf 5.2 -1.494 Td [(:)-13(,)-12(:)]TJ +/F84 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G -/F52 9.9626 Tf -130.23 -19.131 Td [(A)]TJ + 0.98 0 0 1 175.611 449.093 Tm [(If)-213(ther)18(e)-213(is)-214(no)-213(overlap)-214(in)-213(the)-213(data)-214(distribution)-213(associated)-214(with)-213(the)-213(descriptor)75(,)]TJ 1 0 0 1 175.611 437.138 Tm [(no)-250(operations)-250(ar)18(e)-250(performed;)]TJ 0 g 0 G -/F54 9.9626 Tf 12.299 0 Td [(is)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 118.41 0 Td [(A)]TJ + -12.453 -19.926 Td [(2.)]TJ 0 g 0 G + 1.017 0 0 1 175.303 417.212 Tm [(The)-245(operator)]TJ/F78 9.9626 Tf 1 0 0 1 235.937 417.212 Tm [(P)]TJ/F78 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F84 9.9626 Tf 1.017 0 0 1 250.228 417.212 Tm [(performs)-245(the)-245(r)17(eduction)-245(sum)-245(of)-246(overlap)-245(elements;)-245(it)-245(is)-246(a)]TJ 1.009 0 0 1 174.117 405.257 Tm [(\223pr)18(olongation\224)-248(operator)]TJ/F78 9.9626 Tf 1 0 0 1 285.294 405.257 Tm [(P)]TJ/F78 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F84 9.9626 Tf 1.009 0 0 1 299.591 405.257 Tm [(that)-248(r)18(eplicates)-248(overlap)-247(elements,)-248(accounting)]TJ 1 0 0 1 175.611 393.302 Tm [(for)-250(the)-250(physical)-250(r)18(eplication)-250(of)-250(data;)]TJ 0 g 0 G + -12.453 -19.925 Td [(3.)]TJ 0 g 0 G -ET -q -1 0 0 1 230.392 517.986 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F52 9.9626 Tf 236.992 509.418 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(,)]TJ/F52 9.9626 Tf 5.275 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(,)]TJ/F60 9.9626 Tf 5.355 0 Td [(b)]TJ/F51 9.9626 Tf 76.437 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 230.392 505.633 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F54 9.9626 Tf 236.369 497.065 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 373.603 497.264 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 497.065 Td [(spmm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 373.603 485.309 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 485.11 Td [(spmm)]TJ -140.223 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 373.603 473.354 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 473.154 Td [(spmm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 373.603 461.398 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 461.199 Td [(spmm)]TJ -ET -q -1 0 0 1 230.392 457.413 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q + 0.98 0 0 1 175.303 373.377 Tm [(The)-234(operator)]TJ/F78 9.9626 Tf 1 0 0 1 233.533 373.377 Tm [(P)]TJ/F78 7.5716 Tf 5.424 -1.495 Td [(a)]TJ/F84 9.9626 Tf 0.98 0 0 1 245.201 373.377 Tm [(performs)-234(a)-235(scaling)-235(on)-234(the)-235(overlap)-234(elements)-235(by)-234(the)-235(amount)]TJ 0.987 0 0 1 175.611 361.422 Tm [(of)-254(r)18(eplication;)-255(thus,)-255(when)-254(combined)-255(with)-254(the)-255(r)19(eduction)-255(operator)75(,)-254(it)-255(imple-)]TJ 1 0 0 1 175.611 349.466 Tm [(ments)-250(the)-250(average)-250(of)-250(r)18(eplicated)-250(elements)-250(over)-250(all)-250(of)-250(their)-250(instances.)]TJ/F75 11.9552 Tf 1.02 0 0 1 150.705 329.541 Tm [(Example)-276(of)-275(use)]TJ/F84 9.9626 Tf 1.02 0 0 1 238.812 329.541 Tm [(Consider)-276(the)-276(discr)18(etization)-276(mesh)-275(depicted)-276(in)-276(\002g.)]TJ +0 0 1 rg 0 0 1 RG + [-276(4)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 278.277 429.035 Td [(T)92(able)-250(12:)-310(Data)-250(types)]TJ + [(,)-283(parti-)]TJ 0.983 0 0 1 150.705 317.586 Tm [(tioned)-254(among)-254(two)-255(pr)19(ocesses)-254(as)-255(shown)-254(by)-254(the)-254(dashed)-255(lines,)-254(with)-254(an)-254(overlap)-254(of)-255(1)]TJ 1.02 0 0 1 150.705 305.631 Tm [(extra)-266(layer)-266(with)-266(r)17(esp)1(ect)-267(to)-266(the)-266(partition)-266(of)-266(\002g.)]TJ +0 0 1 rg 0 0 1 RG + [-266(3)]TJ 0 g 0 G + [(;)-276(the)-266(data)-267(distributi)1(on)-267(is)-266(such)]TJ 1.009 0 0 1 150.705 293.676 Tm [(that)-247(each)-247(pr)18(ocess)-247(will)-247(own)-247(40)-247(entries)-247(in)-247(the)-247(index)-247(space,)-247(with)-247(an)-247(overlap)-247(of)-247(16)]TJ 1 0 0 1 150.705 281.72 Tm [(entries)-250(placed)-251(at)-250(local)-251(indices)-250(25)-251(thr)18(ough)-250(40;)-251(the)-251(halo)-250(will)-251(r)8(un)-250(fr)18(om)-251(local)-250(index)]TJ 0.993 0 0 1 150.705 269.765 Tm [(41)-252(thr)18(ough)-252(local)-252(index)-252(48..)-313(If)-253(pr)19(ocess)-253(0)-252(assigns)-252(an)-252(initial)-252(value)-252(of)-252(1)-252(to)-252(its)-252(entries)]TJ 1.006 0 0 1 150.705 257.81 Tm [(in)-248(the)]TJ/F78 9.9626 Tf 1 0 0 1 178.629 257.81 Tm [(x)]TJ/F84 9.9626 Tf 1.006 0 0 1 186.324 257.81 Tm [(vector)74(,)-249(and)-248(pr)18(ocess)-249(1)-248(assigns)-249(a)-248(value)-248(of)-249(2,)-248(then)-249(after)-248(a)-249(call)-248(to)]TJ/F145 9.9626 Tf 1 0 0 1 452.573 257.81 Tm [(psb_ovrl)]TJ/F84 9.9626 Tf 1.006 0 0 1 150.286 245.855 Tm [(with)]TJ/F145 9.9626 Tf 1 0 0 1 173.159 245.855 Tm [(psb_avg_)]TJ/F84 9.9626 Tf 1.006 0 0 1 217.499 245.855 Tm [(and)-249(a)-249(call)-250(to)]TJ/F145 9.9626 Tf 1 0 0 1 273.502 245.855 Tm [(psb_halo_)]TJ/F84 9.9626 Tf 1.006 0 0 1 323.072 245.855 Tm [(the)-249(contents)-249(of)-250(t)1(he)-250(local)-249(vectors)-249(will)-249(be)]TJ 1 0 0 1 150.705 233.9 Tm [(the)-250(following)-250(\050showing)-250(a)-250(transition)-250(among)-250(the)-250(two)-250(subdomains\051)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -107.398 -24.261 Td [(call)]TJ + 166.874 -143.462 Td [(64)]TJ 0 g 0 G - [-525(psb_spmm\050alpha,)-525(a,)-525(x,)-525(beta,)-525(y,)-525(desc_a,)-525(info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -14.944 -11.955 Td [(call)]TJ +ET + +endstream +endobj +1435 0 obj +<< +/Length 3551 +>> +stream 0 g 0 G - [-525(psb_spmm\050alpha,)-525(a,)-525(x,)-525(beta,)-525(y,desc_a,)-525(info,)-525(trans,)-525(work\051)]TJ 0 g 0 G -/F51 9.9626 Tf -5.23 -22.618 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.626 Td [(On)-250(Entry)]TJ 0 g 0 G +BT +/F84 7.9701 Tf 214.996 653.177 Td [(Pr)18(ocess)-250(0)-8396(Pr)18(ocess)-250(1)]TJ -31.163 -9.464 Td [(I)-1500(GLOB\050I\051)-1500(X\050I\051)-5163(I)-1500(GLOB\050I\051)-1500(X\050I\051)]TJ -1.299 -9.465 Td [(1)-4774(1)-1920(1.0)-5000(1)-4274(33)-1920(1.5)]TJ 0 -9.464 Td [(2)-4774(2)-1920(1.0)-5000(2)-4274(34)-1920(1.5)]TJ 0 -9.465 Td [(3)-4774(3)-1920(1.0)-5000(3)-4274(35)-1920(1.5)]TJ 0 -9.464 Td [(4)-4774(4)-1920(1.0)-5000(4)-4274(36)-1920(1.5)]TJ 0 -9.465 Td [(5)-4774(5)-1920(1.0)-5000(5)-4274(37)-1920(1.5)]TJ 0 -9.464 Td [(6)-4774(6)-1920(1.0)-5000(6)-4274(38)-1920(1.5)]TJ 0 -9.465 Td [(7)-4774(7)-1920(1.0)-5000(7)-4274(39)-1920(1.5)]TJ 0 -9.464 Td [(8)-4774(8)-1920(1.0)-5000(8)-4274(40)-1920(1.5)]TJ 0 -9.465 Td [(9)-4774(9)-1920(1.0)-5000(9)-4274(41)-1920(2.0)]TJ -3.985 -9.464 Td [(10)-4274(10)-1920(1.0)-4500(10)-4274(42)-1920(2.0)]TJ 0 -9.465 Td [(11)-4274(11)-1920(1.0)-4500(11)-4274(43)-1920(2.0)]TJ 0 -9.464 Td [(12)-4274(12)-1920(1.0)-4500(12)-4274(44)-1920(2.0)]TJ 0 -9.465 Td [(13)-4274(13)-1920(1.0)-4500(13)-4274(45)-1920(2.0)]TJ 0 -9.464 Td [(14)-4274(14)-1920(1.0)-4500(14)-4274(46)-1920(2.0)]TJ 0 -9.465 Td [(15)-4274(15)-1920(1.0)-4500(15)-4274(47)-1920(2.0)]TJ 0 -9.464 Td [(16)-4274(16)-1920(1.0)-4500(16)-4274(48)-1920(2.0)]TJ 0 -9.465 Td [(17)-4274(17)-1920(1.0)-4500(17)-4274(49)-1920(2.0)]TJ 0 -9.464 Td [(18)-4274(18)-1920(1.0)-4500(18)-4274(50)-1920(2.0)]TJ 0 -9.465 Td [(19)-4274(19)-1920(1.0)-4500(19)-4274(51)-1920(2.0)]TJ 0 -9.464 Td [(20)-4274(20)-1920(1.0)-4500(20)-4274(52)-1920(2.0)]TJ 0 -9.465 Td [(21)-4274(21)-1920(1.0)-4500(21)-4274(53)-1920(2.0)]TJ 0 -9.464 Td [(22)-4274(22)-1920(1.0)-4500(22)-4274(54)-1920(2.0)]TJ 0 -9.465 Td [(23)-4274(23)-1920(1.0)-4500(23)-4274(55)-1920(2.0)]TJ 0 -9.464 Td [(24)-4274(24)-1920(1.0)-4500(24)-4274(56)-1920(2.0)]TJ 0 -9.465 Td [(25)-4274(25)-1920(1.5)-4500(25)-4274(57)-1920(2.0)]TJ 0 -9.464 Td [(26)-4274(26)-1920(1.5)-4500(26)-4274(58)-1920(2.0)]TJ 0 -9.465 Td [(27)-4274(27)-1920(1.5)-4500(27)-4274(59)-1920(2.0)]TJ 0 -9.464 Td [(28)-4274(28)-1920(1.5)-4500(28)-4274(60)-1920(2.0)]TJ 0 -9.465 Td [(29)-4274(29)-1920(1.5)-4500(29)-4274(61)-1920(2.0)]TJ 0 -9.464 Td [(30)-4274(30)-1920(1.5)-4500(30)-4274(62)-1920(2.0)]TJ 0 -9.465 Td [(31)-4274(31)-1920(1.5)-4500(31)-4274(63)-1920(2.0)]TJ 0 -9.464 Td [(32)-4274(32)-1920(1.5)-4500(32)-4274(64)-1920(2.0)]TJ 0 -9.465 Td [(33)-4274(33)-1920(1.5)-4500(33)-4274(25)-1920(1.5)]TJ 0 -9.464 Td [(34)-4274(34)-1920(1.5)-4500(34)-4274(26)-1920(1.5)]TJ 0 -9.465 Td [(35)-4274(35)-1920(1.5)-4500(35)-4274(27)-1920(1.5)]TJ 0 -9.464 Td [(36)-4274(36)-1920(1.5)-4500(36)-4274(28)-1920(1.5)]TJ 0 -9.465 Td [(37)-4274(37)-1920(1.5)-4500(37)-4274(29)-1920(1.5)]TJ 0 -9.464 Td [(38)-4274(38)-1920(1.5)-4500(38)-4274(30)-1920(1.5)]TJ 0 -9.465 Td [(39)-4274(39)-1920(1.5)-4500(39)-4274(31)-1920(1.5)]TJ 0 -9.464 Td [(40)-4274(40)-1920(1.5)-4500(40)-4274(32)-1920(1.5)]TJ 0 -9.465 Td [(41)-4274(41)-1920(2.0)-4500(41)-4274(17)-1920(1.0)]TJ 0 -9.464 Td [(42)-4274(42)-1920(2.0)-4500(42)-4274(18)-1920(1.0)]TJ 0 -9.465 Td [(43)-4274(43)-1920(2.0)-4500(43)-4274(19)-1920(1.0)]TJ 0 -9.464 Td [(44)-4274(44)-1920(2.0)-4500(44)-4274(20)-1920(1.0)]TJ 0 -9.465 Td [(45)-4274(45)-1920(2.0)-4500(45)-4274(21)-1920(1.0)]TJ 0 -9.464 Td [(46)-4274(46)-1920(2.0)-4500(46)-4274(22)-1920(1.0)]TJ 0 -9.465 Td [(47)-4274(47)-1920(2.0)-4500(47)-4274(23)-1920(1.0)]TJ 0 -9.464 Td [(48)-4274(48)-1920(2.0)-4500(48)-4274(24)-1920(1.0)]TJ 0 g 0 G - 0 -20.626 Td [(alpha)]TJ 0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.368 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(12)]TJ +/F84 9.9626 Tf 88.221 -98.979 Td [(65)]TJ 0 g 0 G - [(.)]TJ +ET + +endstream +endobj +1439 0 obj +<< +/Length 321 +>> +stream 0 g 0 G -/F51 9.9626 Tf -24.907 -20.626 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 164.964 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(.)]TJ -157.337 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.242 0 Td [(psb)]TJ -ET -q -1 0 0 1 324.173 212.882 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 327.311 212.682 Td [(Tspmat)]TJ -ET -q -1 0 0 1 359.321 212.882 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 362.459 212.682 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -232.675 -20.625 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ -ET -q -1 0 0 1 436.673 144.435 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.811 144.236 Td [(T)]TJ -ET +1 0 0 1 154.862 292.88 cm q -1 0 0 1 445.669 144.435 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 448.807 144.236 Td [(vect)]TJ -ET +.65 0 0 .65 0 0 cm q -1 0 0 1 470.356 144.435 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 0 0 cm +/Im5 Do Q +Q +0 g 0 G +1 0 0 1 -154.862 -292.88 cm BT -/F59 9.9626 Tf 473.495 144.236 Td [(type)]TJ +/F84 9.9626 Tf 240.086 261 Td [(Figur)18(e)-250(4:)-310(Sample)-250(discr)18(etization)-250(mesh.)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-278(numbers)-278(of)-279(type)-278(speci\002ed)-278(in)-278(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-278(12)]TJ 0 g 0 G - [(.)-395(The)-278(rank)-279(of)]TJ/F52 9.9626 Tf 275.498 0 Td [(x)]TJ/F54 9.9626 Tf 7.978 0 Td [(must)-278(be)]TJ -283.476 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 52.946 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ 0 g 0 G - 83.916 -29.888 Td [(48)]TJ + 77.493 -170.562 Td [(66)]TJ 0 g 0 G ET endstream endobj -1178 0 obj +1422 0 obj << -/Length 6532 +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/try8x8_ov.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 1441 0 R +/BBox [0 0 516 439] +/Resources << +/ProcSet [ /PDF /Text ] +/ExtGState << +/R7 1442 0 R +>>/Font << /R8 1443 0 R/R10 1444 0 R>> +>> +/Length 3413 +/Filter /FlateDecode +>> +stream +xœ…›Aä¸ …ïõ+ê8s˜^Ë’-é SvÒ·Ážj‘éÃ&‡üýØä{|ÔI°‡¦(ÚõmÑ|%ZšßŸÛ[yn÷øûúxüôµ?û×£½Ï?lîùõO³ögïíùaV+íùÇ_Í.µÐ=äÝ¥Mù÷:éß·šü³†ÿèòןZKúضÅÇÖŸ[J'¥™Ä´8ÍžùºMTÕmÂúÈiÝ&®ÀëvÉ ö™@î%o3ï‘o{æ ˆlv ß#"›È6²Ù|ˆlvÉD¶"ï×Õ@6“È6²Ù3_d·‰l# »Md9²ÛD¶Ý.È>È}òmò= òmÏ|‘Íä{Dd³ÙF@6;ï‘Í.€È6CäZ£âÌ$² €löÌÙm"ÛÈnÙGŽì6‘md·K²Ïòˆò33‡ÊÏì™/ òÈåg#"\~>òÈåg#"\~N³ [‹ò3“È6²Ù3_d·‰l# »Md9²ÛD¶Ý.È>ßòãМqhÌtÌÔ3͇ÆL9óÌ.‡æL€CÚ ½·Ðf4f :f š37´fj‹ƒŽƒæL9óÌ.ƒŽ™.ƒÚ ]f h³33Í™Z3µÅŒAÇŒAs¦œyfAÇL€A Í › +ÑlA7bÌ8tS!j¦¶˜qè– ‘3åÌ3»º¥BäÄ– z¨G.á*¹‡jpäªÀ‘ p¨þF.¿¡ê©ø†jo,¥—*o)¼TwKÙ¥ª[Š.ÕÜRr©â–‚Kõ–Ë-U[rCïÌÜŽöVŸç8ßúV]¾c­úz£¿íü½?ZÄ”à+ÄÌ£¾õgD_œßå6^¸û=t‹–×ãûªjn–Œf¿9±¼š ñ…ÖÑêY2šý&Å +\h6´Oõø`Ih®]¦³g4û¡ŒŽ@h6¼!PN¶õ­à»7#¡ÝCèæwù¾*”›uA«H… íšÇMGÛšÑì×?ú¡ÙÐ>ÕãƒEhÐ!3ÇÌh¶$‰¾Kh6¼!P­Ì¹ müî·º ÝC˜DÃ]¾¯jcf]ÊÀ–QÑ)&´Š2@üAZ[ÊÀÖYÑM +͆e€»|_ôŬ롾oÆ‹;2Ñ—"è¨F:çš÷#}]Ëè‰/~.%0QŒ&Gª@_©v&a}þùøwQU=þ}¥j”ž²jþ.ªÿ²®3Ûõ©ìûu¹Ë–:Ù×ÕÉîý 2çË=ÄÇÒïÅe ¥†ñ·p¡?·TòþæwžÄÇ•šÛ%ñ™v©>÷ûÊ®ôÈm—)Æßê¥.=øÜoŸ‹øàÉ|X”Ù"È• +×›€éíAð¹ßÖCˆù\«ÈW˜hùÌï ]|Ûš_®¿Ü®™¯2/7ò™ß×k5ø Ln»`1þ` p¼¿ùís<‰K-³]³p½I™ÞØŸûm•ƒøX¦½¸d›™o‹<¸Ê‘ÏüÎ0ƒ/ÉܲÜ2»æú0=Ó«&ñUÖãcEF¾–ëÃMo£‚ÏýΠúHZ—×XfB¿p}gZz®ŽÎâˆh®ÃGCüdf®ÉÒˆh²äÒH•Ñ´=2²FÔEOh5ÕE_ÐZèSYd…UÑZR½Ôçº ë§T/^†]·=O©Þ©UŸ:á»bÏâMÝüŸg÷7¿3x|ð$>ªžÛ%ñ¹ºÅk¼àsÿ©¥Ÿš^ðQÅoê/ú‚Ïýö¹ˆžÌÕ³¦*æ×»ºÅ Èàsÿ©õŸú[òAÅÀW˜ªøÌï ]|Ûš_ªžÛ5óUæ…ª>óŸZª•U ñó@ÕÃýÍoŸ‹øàI|T=³¡b~½«[¼ô >÷ŸZ ªk}±ƒ™o‹<@õÀg~g˜Á—U/µŸn×\®nñ¶Z|•õÁøS+3ع>\Ýâ…vð¹ßTYõÔqºIóë;ÓÒsutGDŸ±@33tÌã'“0smL–FD“%—FªŒ¾ í‘‘¥0¢.zB«©.ú‚ÖBŸÊ"+¬ŠžÐ¤zË{¿ª^t¯[¨žÞ§¿û¾…êùÛ@Ä­Ñ`[–«v ,«¼¿ù«º]ñ$>¾È«jHq½©›v‚ÏýEkC½TŒñ·ºi¯ øÜ_ÕòŠ'óá]UWŠëMÝ´‡|î/Zê}ù\ÅÈW˜¨ùÌ_Õ÷Š'ññõ\UkJ¾Ê¼@õÈgþ¢µ¡^í*Æøƒy€êñþæ¯j~Å“øø&®ª?Åõ¦nÚ7 +>÷­ õïÅ7z3óm‘W=ò™¿ªOækª¨ù*óÕ#_e}0¾h;ׇ©›öÄ‚ÏýUm°xßPyPÇp}gZz®ŽÎâˆè«43©cˆŸLÂ̵1Y][V½üέ¦†5½xò–Ï]Oh5ÕE_ÐZèSYd…UÑZR½´â6Tl4©^lÉ]·íMª×µ6ÔNÉ‹»&ž%Ä›ºõ)ÕÃýÍ?Ô‹'ñQõ†:V\ïꛉÁ¿Ö†ÚU ñ¦n±Ý|îêˆÅ“ù zC+®wu‹mÐàƒ_kCí*¾Â\Ýb[=øÜ?Ô‹'ñ •Gè˜_ß™–ž«£³8"ºÇ*ÍÌÐ1ŸLÂ̵1Y=Æ¢zÚð¬÷mŒ,…uÑZMuÑ´úTYaUô„&Õ[ö…›:ÖZG¨žvõ_ºP=ß-Fü®5lËãouÃYË*ïoþ¦ŽX<‰½M+®7uÓy„àsÿ®µ¡6‰Ácü­n:±|îoêˆÅ“ù°§ÛÔ±âzS7¤>÷ïZj?˜|®bä+ÌT|æoêˆÅ“ø¸}ÛÔ±’¯2/P=ò™×ÚP[¿àƒŠ1þ` z¼¿ù›:bñ$>îÔ6u¬¸ÞÔM§W‚Ïý»Ö†Úå}qÇwf¾-òàªG>ó7uÄâÉ|Mõ#_e^ z䫬ÆïZ£ÁÎõaꦓ9Áçþ¦ŽX<‰o¨<¨c¸¾3-=WGgqDô«43©cˆŸLÂ̵1YÝö¬zy?¶¥ž5½xò–Ï]Oh5ÕE_ÐZèSYd…UÑZR½tNÆm¨ØÔn†]·ÚÍðÓ3ˆïZ£Áö, ífณguj7 ?ÏUõÒ‘Ø%ñ¹º ífàóÜßµ6Ô¡ðQņv3tè)øÜ?Õ‹'óAõ¦:V\ïê6´›Ïs×ÚPçcÈÚÍÐq-ñ™ª#Oâ£êMu¬ä«Ì Uoh7Ç_jðQõºÖnŒ?˜ªÞÔn†.â繪^:Óâ6Tlj7Cà‚Ïý]kCzyñÌÌ|[äª7µ›†|YõÒñ·k®W·¡Ý òUÖã»Öh°s}¸º ífðþõÁøy®ª§“*nRÇfìfÄàë,Žˆî±J33tlÄnFI ¶ÉÒˆèy.ª§Ó)žõ¾ í‘‘¥0¢.zB«©.ú‚ÖBŸÊ"+¬ŠžÐ¤zíÐn†Û®b­i7Cg _Vµ›á§ _µFƒmYbü­n81iYåýͨ#OâãAÀC+®7uÓ©ÈàsÕÚP‡Ácü­n:7|î?Ô‹'óáÌß¡Ž×›ºé®?Œ÷Æå‡ñ¶ŽùÃxü0î÷øúz~ÙëDõ¹žDí‡E]Ó×…aXŽí!Í>øïŸ×düoõ–‘¿Ö«\¯ç­ÜýÌ•çë»ýô—w-ÿ/Iÿõv×ï!o'ÈŸÿ`[G. +endstream +endobj +1447 0 obj +<< +/Filter /FlateDecode +/Subtype /Type1C +/Length 13073 +>> +stream +xœºwxWö?laÏŒ˜ r‘G¶5h†ôB'ZBïL·1`pø˶$K–eK²%«Yr•-˽w Lï%„ ”$$$¤m²›Æî{½ûýÈ–ßû<ïûýãõ<~4£¹ºsçÜs>çs +ÏÇo”dz%!)&cÒæ”¤}ÉÞ뉚ç7Ê#ñ-VyRŸ/Å<ÌŸâ1¾ÅcüNŽc~G¿8ŽfüÇÑA>~<Þ¬5›íonݼý­ &.II•§'ÄÅKÇOŸ:mÆøhùøÝ¿4&#!.yüëÜIVÌ¡”Ô¤˜déú„¤èÌŒñ/ž<~sL\æ¡}éÿúøïDÿÿ¦æùsïæ³Ý§•À‹åý<*Åw¬ïjßM¾}~´_¤_¬_;ÆÃæbë±d,ëÆð`|<þ +¾—ãýø)üþ þÿ'‘G˜;?€Ì·ò«ø®ÑüÑ£•£¿'7ÇÉÈûà=°,Ù ¨­c|Æ?æ1³ÆèÆÜ;nìkcç]4vÍØð±ÛÆÆ-ûpì+ˆ¤ +ä‚|ApQð‘àgÁßÿôçû ü—û¯ößà¿Í¿Ø¿Ä¿É¿Ó¿×ÿI€0`I@x€2   0 $àTÀÕ€Û¾ ôd_ |#p]`Z`f 5°.ðbàõÀ? šô~Ðò ¸ Â ª Ú æ Ž ž þ ÓA烾 ú6è7¡¿P('|S8[ø¾p±p¥p¿0Gh– +Â&a‡°GxEø¡ð¡ð ! ö ‡³Á¯O^¼*8|=øqð“à?ÿüwʇR¯Ss¨ÔJjµ‹ÚO¤”TUBÕPõT'u’ºI=¦þLý&%"Eþ¢0ÑK¢·DSE³DóEËD«EE[D¢ƒ"¹¨HdÙEu¢ÑIÑ%Ñ ÑmÑ}ÑW¢ŸE#!£CD!lÈÄi!³C„, Y²1$2$>D’b ) q†4„´†t‡9r!äÇ!C~ù-…ú…Ž„ +Sðˆ—¢ÒÑ+F~*þŠÅhÃb8™øÂP©®œÃo€J)®ž«RÏ2ðÑd˜Ž·›¬ÖVÒ8äܪêéü&¼¾‚´Êð»úŠ\óþÈÆåàY“R)Ôz%-ÕV ±p;qFߦldz3÷Ön¤'-ÚµVZ›ÝØXç®·mF;k(5:ìâ–ÎúÃ'[’·2ë 4i½<o ?3[•pP à?žÏ¦P8‡^AQ“:§œÛÌ®½ôôÀ×4Ü ÇAFý%ùÉ–ËìGkgõN ãüŽ—'ö²çvuLB@¼Q½8©êÍt{Ð`‚;èã'Põ‹p¢'œÛÙ,\8.³0O­ 5 +«CÁ +»ªbcé}{ÉÙøCò}ÛÅó†Â¡ßFæÎ3û›bø¥¹¶¬tñª»O[w +v2R¼Åcõ¸pm­©¢ÊI—×jk¹‰”ýƒ4äýè÷Ï·Ü@¡wØ—/î:/>¤ïÚ±#²¸¦#±*­z#§ÈZ<{)2mbиï߀A0èÇgœv…Nù³¹)Ô—' £V.<Ûx8‹M9“sûcñŸŽ_»Ã€%dMù$ p‰`¼ßiÉ©ìÃnâØp}E}²ñÐ*ņÕêœ]E|%<à$vÙò*Œ'ùPNü°ç¢݇kV0­y»,!o»2LAXìp¦¤µT¹‹E&¸1/wg@žïó7j©KvYr7І%¡PØtîÔw⣹}imÌ`̬æÉ4j“á÷ +ÊòM‹É|ÍB_Ûk‰ù6u…ñ.î$.˜a>ºŠÁ™øH„ç ÕZî8 /í´3k_œŒÙ¯›`àƒB(Áòð½¾HGë4f[ëJmô{›·/ ïÜÿe{é Ö“]“qH¼'1eÇòøþ_¥LšZ‰—Ù­æ +èår½ÞÄ€¸¶ÊÑif¬eŦr¶uɈaÎh~F/Á´%-&ŒïäçO-äÖy¹–˜nÎ-7>àÃm|f]ýº¹þ´5 tyÆSŽÊs9ÝáP¬aÑ:bòðÖÏÏn>y„9r²öÚc1$bžìLÏÎN—•åÔg3-%%¿"£äÐAñÛ›–ÎKÉ(¯U3y®ÂÆa1Œ!Ú9³®b<ᵈžÿ|yzˆa¸KÇ#_Þ›†¨BþJ|¾¹Àbìå{’óÝÊʇV> p$xý KÀaš‡]ÃhÊc*IŸ—¥fTé©Yt|J÷QÖJ˜® õÃу|p¯z (E ÑgªÎ1ǣݨ.$¾v8Ošä$:@§èó³I˜U“ƒÇ*+ûŠX¸º0W"Ñ’8Ó% ß×£{žM½òNÜÂùì’eû'¼!Füþé0K?yÿ ýëÓ¾»wÙO>éÿî™øYì·‹î2÷Në{‰F?ŽQp œÈ[`8œˆ&¢-h3š„&£ o½ßys³òøÇ߈A¶d“Äl3›îJÀR@yëÈ{ØHðwäEÒ…äĽjU!wßš[a¼Ç÷8ΜG¨SÔ#¿×ÁùØÈU\‡æcxäÕÕ@ÂÌ?7R„gækféù2àù.¥’Æ'¤"ŸB>*Ã=K=Åœ# D6Z€5ã5¿ô¶A‡iøÈª‘¿bJÏU¢.ÅÄ%ÊZ5 '¿ Š7Kµ 9#k½z·¤iÎ’?ý×JVP@ÂQ*éÎldžΟ:¶…5%P=íIqqÉÉ1ñÉm}½ím½,ðµ‹`®Ï6í³¨Ë}ap-Ç£ V½Eo) +sh,:½8?_§fdJ˜ìÄ÷XÕî;&Ám¡m-0úµTß0„9Ñ^1`,/Ì寿ªr"˜g6Qh> NÎ߬ K*ÀrSe©©Œî.Í`G²ã¶¢¢¥y|ð¤ì?¯S-vçVpýûv×3†Õ 5ŠRÂU™Ÿ›_˜¯Éc9 ~ K{Š›¾µ•ž²ð]5([áÂOÙòÍSùÓS _ÂòË eâªrG-Sê?fêVôï…£( +Cs±]’e tFÔÈñ<­Ía·Øí¥, €Ó0Á{¸ø©ËyÎʯEÛdD±\—#Ž]„F½ŠøLŒÂoW:/3.$“—+•ËX•E¼ +ù‹à¨>qM¹­Çë3¸ß+¬U^˜W“qzE…²"‡;”Õ™¡h5Ê[>#!-Â&‡%."ÁœWâ;í'®3P7`°$Wµ„@Í"A éYå¦à›«Œ—ÚhDM}Û]±».†­N­Tv§.FãCûð ?–ÓӠЋÙê“}lNcnÅ¡z~âªmkyÔ1½gÅÂiE{:“ÕI¥^R_üHüÉkwN¸yó&P¯áPˆrX_•ï +;žÓ¸‹FÄd4½…‚ïLûí“‹=wް%1Žl§”¯(/Ï·Ó›Ùbg¿Äz ­¢À ¡÷'÷}¢`g¸(Ä ¦wÄ +GCvð‹Û” n¶JêH¯{Ÿ/°pÈ#'nÛ2¿#F‚BgEç*Òiu¾µJÆVeçYtb¦<5®Szúèáê¶6¶¾¾´²ó\í¡B>éï¼næ»P†œ8Y©•j‹‹óÓÙ‚L$B½ˆ ˜ª9¿¥R\ç*ëg*]8(îó|ÙÇûýWÎyjìãܳ® ר.÷=ýIüMòÃðóÌG+¯IhÔ Ãif‰q±Z½Pχ›=7¨Ž2Ç‘¯nä$ 3·ÎD>bDFÏšÉ$ïØ¤žAïW—v²àeÌÕÒñùåÇYÏ BoŽÌIÈ[¥ãôßa+.¥ûJ5Ñ,šJtƤ–ï!Y ½Œ‚ïÍ€¾úÝ'ºÙ5„µ¾qC®PZ­Z“Çd+Ò4Éô¬­ŸrêwòÞýsÑÛXÐË"ŸXJ²±E<ØÑrübûÁi ŸŠØA[5š™ãÀëœj#[ W¨™ÍsößÀrk5eâ²J{ c¯F‘Üc…&㧈:¶8tÅK[f¨Õ1ú0™Ëskòˆì¼’ +‡Ùæîd?„¼[hV!/Qæ‹só´™L$n•÷óQn?§à\ü’ÑfXÆwA7-ÎJ 7ñÑ\h†“ðßÌéåÓkL¡ø66b”ãÀEfå¼M‚zYŸÝn2•0à0i+.6ߕռÒ2¥`íО«Cƒug®2 Ø wzh7ÏC<ŸF©q”7rdÿ¤<Í*gIiµj"NZÕ`·7–5±GžaP÷¿U´æ(ŲÜ8&G;\¹xTZÛ×qì0ÔÏ0ÿo(wÉvÎÒ;dÄ]™Æ´ˆþã€v™ +œ ‚‰€„ösÙdÜ´£$ÃÂÔêË:«Þ¦·…™óšR½5,¹ùP»Ñ¯Sd»t´žûËmÚßÇv¨Éj:X¨ Õi±ùoEoÊë,E%6³Éfe._:Ü÷ñU¾nõì¦ô–u1¹á^U±™0³ÓÉ¡eC©b/;âO,(JÏËf4:UŠVjìu +¶A®*Í¡¥rEfâ€ôüÐ᪦¶·³þÖ#1ðúó9®ŒDKJÕBZðݧr +0ñ6š{(Û¨ObáJâvµóÇÄ/Q¶–Z³›þÓ5§¯]?-+ÇRÏÑ$‰0à+¯fªkP–¢¯”ü!0LÖe¶€µš`r'$zqÝY{ÖÌpû'€ob.YÁdNmÞÐNÅä¨ÔRZ•]R*g3*0ḊŒŒ’LZ81E"×Q^íl„8YUØD_€A¸ŽZ³™žÎ!GºpH­ýø-ñæ¾BQè£7ž;Ý2Üͺ+Û\øè £n:ºûOѧÛcVYX #Éò³¸€&l©Äóæ¾ž7á= +%kÕ²}FnÄäPæ"ä&lŸY]{] “› (B°kíj¥Xž§KàF÷)Æ^k«)7fÖ§gdffhl +»Œqï°flÇ™”Ö}5|‡LfÓÐR…<-Å©l«(³”6²Îk¤gËÒ3Ü\ çv76f»¥,'®‹›/yBÁθ¸ÕJ¶ ÷"o‡»p-d%ðÊC_xÆRÓ Ç„8Â+WÀlÎMí´èKßñŸ¶Õš†9ò-Z­¸@«W3r˜ü‡·çvã8ÙÓØÐÝ™Y«Õ:Æ 7é]‰£«¥ç|? èrg¡ÖÉ6äa O¢×/Ý>•ѵa½DÅgHÀšè̃\¤q{óO?ß¾ú»›‘Y°÷Ó#¢WÒ+wœºÕæ¨kè`UmÚº½¶¢Î-­ŒOÏÔçÆ±€* ÷ªc8n·Þ8Gº$—蠟-\ŸT#oiuÖtX‹íÆÖXŽ•]憚£tcmÊÎÚsl‡¤bÃÚ™qàØÊd —,$#¬Û$î…Á’Ø­±¬@AÂK25Æ,–î*ñYYk|¼,e[®e¯9‡D?cŽ%TbZZbbsZgGssggZó!?RÝÙö6ºZ[/óÑßÏP¦Ø’Øò8¾‰“ŒQ,_›40|¥¥Žb>?G} òÞS¨æpáþk\¸oj·XZM|8îÿ;ÞXÇ áÛä2 Øÿ²`a„Ïÿ+^íÙå–­¦Õ3½xSDÂÃ.BjÂÀú=Êìýâ §£ïß?5xýXVëò“LêZêpjUö!ñA©4>:µýDWEC ö;vQI8Q”$p8AÀ©$Тä‹ðz<8`k¦…Ÿ'Àóhøµ–GâhªÅvÁ•Ónsž£K)Dã†+°!øµμ|»–.*4 +ÙW·b ]r‚$²Hà,kV:Ó‰£SS£åyÖêL¦J–gË£³•9Y‰íò³Ð÷óKàhGMC«øDdïúuRWÄ3Mrl¨Ã]ÙK÷ÔÉ’bÓ2—IYAÖ{$ðØŸ_šºYgÕš9Ÿ09R+y:bÇ•ú‹¤>³µ½¾¾­MZŸÌ +ŽŸmâ~Þ™>•‰yä‰&n¶¨Ú™el}4½+*}ã&|$™z#Â( +½‹fr1Åø.šwrÇl8 î€;¸«¹h+§¦(ÍE³Ñv´ÎD³¹Øb7à]¸FqCf¡(nH#ùÂ"ÏKn)¢ìƒáÀ¶†¹(¸ͨ5X¿èêøÒÆi{TÔÖð£{€'ŽZž Ñ/:dW…–Y1³½Ää Ûì¹›Yt÷ü‘}íõdžÅO\E~±)ºœh¦@ª)Ê£³4¥µEly¿Ây¸Â!øµ›÷ø¬{æ úȯŀø¸ºj R%ß ³ŸI5è…h2Ujdb@Í—€ç¡T½;UŸ¶ÉÌŽ¼CÌÒ¦eæiJZf(3·<ô†+ ™TÊJJe^äljm¥¦†®sV7ÝÙ2Á¦´]qlRt^¬n<%¯H†<¿æµB.Öžü憓Ȃ‰g?µ"!¯pQ÷†å k·p¶ºø=¯Ãñ°ÙeÕÁ , O)Ì÷æ1ŒÅµûY×e€j¿Ð{üÞÉh4#ÜFíZ¸$rso;#ÈV$drŽ&e;yf(± $A¡¤·JHPÉÙû/`äµ›ž¸‘¸U„ÀE,Êqâ. øJ˜è$¶Z5¥gÅžŸ8v•oЩuaà Ù~¿Žˆ»rk22reà°„ pR|}ÛÑ÷£ÔR)“%ÓÈrv7©B‡ëÁMÞ7¤'&Q#q€bþwâ‘@?’{0@‰=ŸöƒÊÒÊ7HçQgH¸·…Û®ó Ñw–Qk¡ñ:!ym•'η + Fì¬Á)[+nÉcÑ–G¨Ç>?ö)Øõ˜ÅNC–Èi +õ!:+ËVª`OL—Û£èÝß°ÛL@'LGÜ?wze;üHhG‰ˆÐFÜxûÅØpvˆ®¯(Hu±àÊì"œ(rÿÜé†3(OI.I§wïÎ8Â&íÕ(|‡f‘¦øœ|N¹Æ_CZüþ®¼ùO%R¼Ø +¼;ʪaêÖÆÂúá‰Nð{±Ûóí0–<ó…ïŠ8\â|”ÃÏQ_e®§¿¼°iæ»Û¶Ì“ ™X/Ä2£RÉ PQ¡ÆMyTPk?ôÝ9nÐüîWHóBÕ‚—Ƹ°ß®ÊÌ/L1†¡±ø“/±«à)Ïï%9áÎlim¨oÍ#SY çTLZÎ@òbŽ[N—,žÈEøÇ@wQuÑ`_ëvë+èúj¸_1pø}¡çàŽˆ¸ƒ;ØD€N่Ūq×ý–¦|¹|ÉÜ-áÛoßgRiJ2p‰VCq¯³ÝbÀB1|…ÄA9•Š»àÎv8ßͼ»_Àõ_ø‚nˆ{æCÎByCžw†8ÿ}ǃµˆý|ü³+͹yn@Á·Hè(øˆ,ª^Ï?¿'AQ4¦Å}Ö[5¸˜ +Âx±¼â’¹Áêg7Üzòú±÷â®g¹Ç„tƒ“0„Ì¡îÕyº®Úd=ÌáQº"Gª3éÌ:Öª^±ÉœWaºVA€¯AâÒÎë`Éé)h{aÉq±mÉà_鋂İíòO^8Û tf'1Ç’Wþ‰~€Z»ˆÇ=ö¦~öŽaó é|ùˆ/ü€ó¸c®ž\@y5|$ŽÁI5@ÉÝp­ }äÑ|ä Ð<3 Šk<Ó8‰]ò¹®¥¹ÐM?ô p .§<<o„Ç;*£߮×`½-à2v]w¯ø>?H;€üëY5߇„•LØ´Uó3Ù¯ @å1}$$sëŽ*ކŽ7êô™,uëõè/|Iô<âyÄ?"pð¾v®î…KZx@QîæT ^¿7Ýßò·x2–]½744´µHSXAC]{{zc+(5•Óàù«ÜdÃôcàCNgÆÌwgÍž3ušÐ'؇òù„ø„ú„ùˆ}}‚|€·ªäç³ÃçSÅ+æyF½2Êéëç»ß·• ´Å÷= ÷xðÛ'¾ž(¡rm±›ò´‘œÚàð=¸{úýÀý‡âŸç>xeËöÌý±Lb‚2Q±´NúçÃ=·9Û<ë½ÝSfMgÑr´ S{Ä„@˩ԷW‚àÇbî?‚“àcêrƒ!¿½.í_.~7|Í¢”l{c,ãJVØ´T©ÊŠ?šyûþ£æÁaöÄ`Ó¥Ågrާt0Yõ²Š½µ|á—·N4õ¾öüä{‰qLRŠ2=kƒ³0ôÞác×é›C{×Ä)ãÓÓØ$îΊ >÷ZnøÆ Ïûî xìáüÂï=‡`õšgÓ}8² [»ÑE£÷dkcÖ²-pôƒ>pZ3E£ê«“KP u`ý²w¶þ§œv´ºlu]Gi-и=wïñ<[9O1‚ãÑ5ƒÖV^0…Á$íùû‡ñÏ_öJ£õ¹ 5èÂýe÷`ÊG[î À£‚úù\÷à-ñ㥗_çBœ•SW×&?›ÂŸ"Þì¬äåâ·®„8óàæo†ÑØoù«ÔÝíqáâÕQQ«ì¸ô䃺ÁËÃŒðÁÉÔôKqÇ/ˆ/¼~ëèžåëÓö¯ˆbZ¨9êñkåµç{9“‚~•çkoäÿ~ÿ*Òb8g*DLÑHpg.ô9¤zð,f!ßñhb^†Ï©ìÂü¼|Fš£Š£D|ÁÅê+uͬ»¦©¼þêØLô˽`óó f^ã°òG_h>›Ú²/'.–݃4ZŒFu!þ‰pfOßéŒKôé£5=½ì¥óýNJᾩð%Â¥Q_CŠA1{ÆÏœ¹çoðL8G=a½zø<€Ãª„'¾Á6'>_Q>ld9:P`F+§b*Â9Ðh/¥ëJ•‰ìÈN°V¡Z`ä TUžme0ª +ï'ƒ“Âç±Á+ÕdµÚ,k‰Õj3fÀê¶Y­î¢Ì:f,Oà5ó½>Gx8ï,ïþ¨M£®Žºé;Å7Ê7Ú·Ø÷¾ïcßïüÄ~‰~R?›ßE¿ë~ëÂNcÿÄq|4.Ä_Çâ…„¡& +‰ßùß¿0zúè„Ñé£?ýýè¿ÛÈDò6ù|Jþ Ð`&X*Á1pÜCY}#úE„DÿB†CÆ…¼òzÈ„ç^sGJþ@ïÌ6¿ÊG Ä +]Œ"‰‰H]sh!½tWã‰TVÞ©í».†wOJ>õ&šzeÄ£?Mè-úlœyŸôÜ`” †U„ùYyÕg¥|ð|[-eÑ”ÅßCfÏ¡Ù8:0¢^ƒ^WçDÂdpW-‘]ÑVa©²U²Çáh :ñÁ XEª=M%NWl322è Fr9² + ÈzÔŠ8›Z±7y ½yoÛÅdVÞ¯í4\ãëxÁš¶ôN~oÒîºíôÎHyB ºtŽ1wfBâOŸ\†>GôN¤–w÷]¥¯6¦Ì¨dÁƒ–V_ÏAO(50ŸÈRYìÙl¦+KK±¥Ñ“-²¶9æÃƒlZFV¶:¯¨0,+S—§g–çÉf–lO;+Þö`ÏŸr†éï¨éì_‹\deU‚m5g”V‹Ý]=-¦¨·Ö`¦û;[:Ø®VgÏ)q›¾AÕÉ íˆ˜Òv?Ô¦¿éÊÛ´à¦onj(ufmð{êrkÏñŽfER5S_µ_ž¿{]ô?2% „ó°u8ò¯ºq8ö7(xüäû|'ü…rü¡ÞQ`^ÀGï‰R£Yop ¼þ‡Öo‡N2'^o¹E_8%‹ëgÛ“«ÓšÖóð‡eâÙ:yn­É±Øå¬3*ÒEsaç+è ´÷­–…6²ËoþGÑ_Á€žžÃ,ÜC˜¯–W\´ò'ų È•xgu~N¡QW¤eßFmˆ€%˜®ª°ªLìrÚš'ŠVÍ…5yÄ‘1´"Õd´iø­;«c•âézÅ!&]™%SѺ|oÑÒ.—[sè ™,ý`GúÑ[ÇÏÃ×γp²gbegyY£% dJÞ&ïsÏv*fr\–.GMç+-6%[³k§#’ž3ûЖuì²u1SÐ(1Ú ÃÐ8GI¹ [ºxeÒlzm¸»/»åä¡o Ÿ®úžƒó×.žÉ:p˜éLt¦Ô®æ¢I_eÓ–zK°(´ )[0%á0aæŠ +SÝXš³‡EJ°(_³¢ˆò'¾¨%ŸªÅç˜UU†{PHš?©¬xláŒè”ŒøR_®1KŒ Ôy³¸Á¾ž—áTjYD\ÆfzÊÚO!ñûùÛŸõVjbÊØ’L,ÙáRÔÓÍuµ –˜µkGv\4»sOÊŠybD=š }®îèeZj[Û/óõa×±i$hùw@ò©¾Ray‰¾!F( C døÑª‚ÈÃìÈ:ÂðR®bŽž/ƒêZb¾UYgøïG›jøJsN•á2fGNµ–”ê5åŒÒY¡¯¤5­]Yî„})™Kv²ßqqlxfœ*\–CXªJMtÓ aXÃvnm\½âÍë¿+g%Ï|¹ˆ?RâM‰µH®‹=‘1’zgžw!MÉ­òI*õ„¢0Äà°ÞóæD?EˆÄ\¸ãã*ç5328ªy SzÆ`™£ æ9ñµ–¼2ã¾ç¢Ð¼#'6o©",§„0•W˜tg™*’I#Œ[Uªu…œ¦÷Ãôö>XÙËó¼üßç=. +¾4r5›øÀДŸ)Öåéòd‘\¯ÎÛ«ã+¢ÖH쳦•;Åõ —2­g1¸MÄ Ëµùkó†P™ÇN Á÷° |•*G~þüPšÕVÈê-E5b˜_#>6Ôf;Þç×ÁÜ,\;/[ù²|¡o0•ØÝôW8|­I5µ‚9Ò)ø¼ƒ¡;HJOOLnJoëhnlïHkNbÓé¬vV—•^¹ÙÞyûÓFç‡Þ‚_¡Œ8a,×g‹ãÞæØ‡ ˜@|PíºhbÌ¥Å&ëBƒ3$pðÉÅ\U¥o`‹Dð9À_¿Cn¢×$´¨zQ)°gÔBN‡©ŽÔ;ëú®nÓ[ÍF]b±”TF_šÿg¶Èj,Ö•6uÕÔu ðÛz›WŠmz›^—-ÏR0Æ¢CIÉiZ]6¿xr¨6¼lsž7ÙëĽªÚ´T•,.ˤ« +g$$Ï_žNí—ŒÇz×DU®¢ÑVŽÅ½‚ö¬q®iÞɺ⫳ÄLDlèIüäc±×ŽçÞ¢á6.î} ü õ+ïQ¸£ûù{×PÝ«+ç‰Ñæ¹h4 +Ý]{b/“ÞC%ÜPŸÿZ Ã?‡†\”íb@q삵ÿ­ª=¡¼U3ÓB>Ú6{ÏcMü«|&ƒ}ÄÿZnÃáStB¯Ãl¼£Ì1ðÍÿ^eƒø:!ˆ!G"‰eFYɧ|¹­>DYqø.z«ÌµèµbÞÁä+`tÏΨlJcQÜçÀ—;ZZ*Ía5(\QAÔ•iÚB­Zâhögâ¿ÕDZ ÌS¼ "ÔÂF xîõø)2üŒ±¦ C¬)Tk” 'é,ô&LÇòªuMãÄp¥ÈQ˜!Δ&mÙÁ eh&÷¦OÊJ?+ᔬOF<З«Í‹ɼ¼÷½ÒéyÑœRn¼Ç‡ˆæº¿¾Œ=˜vy†X™«K62ú¼b£š•ÁKµÄv“ªlêOH-¡p3ÜxëƒjÇ-[X-rà# ·N"ÿ`’>•ÞÖv“m!¾¶;O™¼Uð”&2•Øg‘—Ë™¤Úœ‹ô@­ò 82íû«Ä“㤶–êíÆù0‚x}nÎÆ}v3ð"¡·Ä«Ó ¶ä„¥¾(â[ÍÅ%ôQGÁ^v$‘0,ÑlÖò•ð“Ød+(7p?×ǴצO6Å-cÑ.°4?o³žOx«xÃC°×Íûۯ𯾵"ØQ‹/,QWïða4¼€¶;HÏFb—mCà6îddY- ¶KRS^7`^5µ&=+5Ö™ì*¶D™kÍ£S¤™éI­™}=í•-Ílcƒ»¿ý!À»¡-wìeö2Sh›$W/WÅ0sQÒ,˜†ÉO­å⺊ò#ã¨A;¸ÓØ­/.>‡$a@FÎjß{ùdã™.&·;.ûÉÖ?‘@wpj«¥~˜½Ñ~øÊ-ña•·áHÚ”YQÓåt–9«ùBOskÍ‘~ñ…¸³;˜Æ½moÓ»÷Q–ºRSýÕíóæn˜¦P›léì?[c™ÉE Ÿýzcë;³Ömž–‘k¶'³‚ÿTf¼€øE9ÎÛ,€·`…Vk•²­&Ý[¼:í"RMGRz‹W+ø*ÔŸzÒÝ~ÖVn¼eñ:¢ÅX¥‘‹Uú\E +G¨P2Á}˜¬½ ¶Z\ï*ëcª\èœo0œÒ6dY^<6™ýNMZj–"á ù"\Ø«7ºžü$†[9·ÿ*ZÃPpÌc4–SíUó@þhôW6Aê˜ÓÜð"ÛÙÖPØI?îÙ’ÄzYÿ6IJœ7J|ûoãOP”ãCzk¾yAKáâ)œ ál)N•pß,ç¾ùÒî²zí+¡Ìßaà+€ÚcV;ŒÍ|ø#¡² r &%J›ª¬VºÄ¬ËdQa”j´û ü NxsÆg ÕÞœqªDJŽý)jˆJ ”âÿñ©Ï,~Vü îìÂn&Ú•mFA&>:(Ç_ýËðÏtåÞØrd?¥ÎJSéMZ¦G“SF§fJS÷ &]‚>WïÀ@ÖƒæOJ½ xÖœÀj< ¸å÷ÊšGvî‹×nÞ¼é ´( &i:=‚+AŸ|3Ÿ+m(‰‚z¾Ð¤.3ÞàÃi„Pv°·As„†à[è_ƒËž¾ ŒS"$_‹ÁŽ"¯A„“èœ7¹>…”‹h9ðL¡ã·:“—–³5xd1ô çfm&Î}ÅÆ©’¯ÄÀâ2WšKùÕ÷K¯´‰ëe.i†L.UTMs½Í€m6Wö Ýý™ƒ=œ°xÏàð3_xÞ¢à0ŠÃAƒ´µ­¡´H€ç\Š¿(6Ψõ6 ÌÄFŠ«IxÙEL“UÝ6²ÀT^ΩSc•*óPZz¢Vo²ªY›2ßœOgæäd¥5(:ï|p÷Ñ™”öZ¥ÑÂTR%ðÀ]‹œ‚ŽƒG»Îö1ÙÕX*0“ph"ÿ(*¥lÛ¿ðŽXG~ÌAyìgÒ³h ±ù@Á gPìçP‘uý=,ÙùåØI yÜ,ÞPäêŸÍ=‘G¢¹Þ´<œûǧÙ{ÍH¸‘ŒÄ!œŽïßA +à= +®B°ÓÛ]9â¥n¯·á6Yâm³1—ü§,PKÊ8ïKüËûþ;oŸõ76þOCáÓ¿ÀJÐ~…ÚÝ‘ÝÔ&nk¨ëê«ÏŒ=¤â8'?“ŽU¶\gA“»º£K|cKßä”\CA"£ËÖ +è\mI¹‘Uáè ViÔ[Œ)HuáÖܪóbøùâ'‹a«‰E5„q9ãÓ½%Ý•‘}£³[!÷ W~` d±Æwó½Ô ÜôêU-þmùÉéOŽïY#d'@5áåý˜£ÉâtŠ;ÔÕÒ,•<-×–Q’ÅO8Õyk½PÙß,nTÔdÄ+¢_Ñ1úyà¸oÕIóï’„À£*‚!žQ0„7À0™´Öÿ 8´JMáv(RÐ÷*÷s†³Á/žÏ¦vã&RpÄÅ!ÈHÜHÜͧ«FâɃ¿]Wž$Ÿ’‚Žòg]Qõïæ¡ yú½%N»©”Ô­®¡ÿ U…··OÀû›ÄëC’¿ð\uƒÏÏnÞ_~å¶ÏCðÀ ›*¤é%™4€Éó¾÷vÜýÉÜ¢%‰كœ‹l0^2”«›ÃFŒ~ 996®=¹¯§­½§/¹ ¼`ðÍ/üvà Ípć0µ´e4:GÕ*ð­Ùîó,ÕÈ-¥%¥|€ 4×TtÐuuúü$5¥”…Û‡+ëJ\Þf§o´Ë[q¼æÁ¯ñ€ˆC +ðÓE€Ï'Ãl>Ž„hà,@Ñ[hZßBÁAñ¾ÿ¸Mrïë òeJ}­×xsi@ŒB¿`Ð_ ðö‡„³À£‚)×y`mq»GÜÚÚúz’Û½bøáÙòMà‚æ| Ët„Zû€®ˆ)Њ ùÙŽ›èuµNÒÁŽc°©pÊM”SWáo3„©°r#€«½å] a×5ÄéÊÍ¿+Wy{ê?q*>;^›Z9ëÚÕ —ºyÞÆ‡Å’¬pºˆ‹€'óÑ‚ó\©h"Õ²¿k¶px”’¦ÑتTŒÛÛRà)ú§°oǤ°„p!ÈUK¹ÍQìÿ,…KŸ +endstream +endobj +1448 0 obj +<< +/Filter /FlateDecode +/Subtype /Type1C +/Length 11578 +>> +stream +xœzwxTÕÚ/CØ…½’I™Ùf³÷F&X*ˆˆ€ô -dÒë¤L’I&½Ìd&½·I2“BHB „Б*Š"¢¢Ç‚¢~õ¨krV<÷® úï»Ï½ß÷™Ì“é{­w½ëWÞwI&M™qxâð«Ã˜t¶t‰t·t¿4Iš*Í‘vJû¤¤Ÿ9Nv”9.s\åxÄÑß1Â1ɱƱÉñ¼ãÇGŽß:N38-rZéôšÓ:§ÍN¾NÉNeN§>§!§»N÷œ8}éôÓÿržê,u~ÆYtžã¼Ðyµó^gogµs–ss›s¯ó€óMçÏsþ§ í¹¼ä²Þ個K˜‹Ê%Î%É%եĥÅeÈå¬Ë—.w\î¹|íò™Df/s–Í’-‘mùËÔ²Y¦¬Tf–õÉÎÊ®ÊnÊÞ—}"ûRöDö£ìWÙ?åùT¹TÎÉÈWÈ7Ê÷È˽åAòpy´<^ž"7È«ä-ò>ù¨ü–üùòoä?Ê“³–fØéìv1»†ÝÄîc°¾lÍjØ46‡5°El9[Ï6³Ýìö2ûûýšý‘ý……5c"{'·>’wKNYŸE«¬ŸR}Ј C7«#Âiš×¹­ •"̦ ¤ßÑb3|E&!Ò/M2Ð0‰ê©,…+ΡéÁ=Æ? µs€õ6¦Têh8Ž<¢©³¹p*NRÁÁêÔpîX’霿A»£©þŒb8•A…Ô’÷G£]5#W„­Ô@µ¹àV1 Æ6ZX¯Ô*è|@DÑÖ&<4¤?­|!%ùHútô²P» +bË›•ÅæÆ BucÉÎ×*+`±ú°ha49 «ÈŒV¤êÒÓUÂ&´=ÜA¨Ïåå•(ª+Š-ùB0£ühª)·(·(=_?´ æ´Gg¦%¸‡U!Ñ\zFI©!ÏhÈ …†ÂE[Œ)P¥<Úr¹³­¼±Ahh2õu= ¬ÏN«=UVÖžGîn»<Mvç”gÄ(´ZeèëBr@F¬6 u »>á3ãD™¢¦¬°'_0#m4Õ›cÐ×o1ä3ò36¡µÓ¢o|KÄdW—+jËŠ[ ð§TÑTsvAVË8œ™f&¡3¼ñ3ºA4‘ð%ëœêî’’–Âé~ ²pN,•”••­å²Ò…±<$¦ †{yËŽ[ú=~ò¨B~aQž=N'D W-dcyQQ)WX¢Ë)SÚFRÛ¸Ç7®ýžr`Þ)qS[„¥SÑÑjéiiNkÔå Õ^4€Ö0ÉÌB3Ðì'ÏÁiÐõ»_àLøìÂ_+BÙ+õûÑJô¢Ú}Ïnõ98®¯½.‚±]c+Y©"ßÉ®ÉW¨µ!QÞ²C;=LÊ-Ôu…Ó“›r**Õ%ý‚Å@XЗ*ªS×o +‚ÛÐçÓжPkKÓÊu÷hx°…‚¾è6}ÈñmÖØ¢‚êü"¡¬¨¦ÐÂ}Óí=k[’_Hœ•”¹VOƒZ"?ÍXù:’-ÝÁ^ñNñ¥9ÊMñÖa¯×w¶œU +‘ÝɉïÒá(¯l.*)©áJÊs3Åijݙƒœÿ÷Ï¡Ë}Ïû[OˆhÞûξXËÅõS=ïœ>ïRè ¬ 7m¥Á0ßÖu2ìx (5.âÁ¾¤7x`fƒ +´Õ= +«ÚM f”¤|ÒŸÌ Ï4:øøG%ã–îÿÎøåÄÑšêŒÔ"±0=/ÏGqÈPÛÃj6÷^ß|f š1ïYôáÿíÐÿœ  Å›;¯2{ÇñȃsÑsÐÿ8ÉÜsK„2Êø~yÅûx×^®¬ºU*XPˆŠz?Ó·A®Pègë:¨HSmÒÞ3âx8¥ß­ÑnÈ u%\ÊFaé š˜@è´ez:œ„ë<¢yœ¢2Ñ,¢™´|×\gê‘v|þ4•UFe|¶ðÆK8ãà"FQº“5­Ú„üÕT›¾&NçO£áèo¢" šà èc§¡HÊ ‰ÐÓjèx]wMCÛŸßjÕWÇéið +ÓK?ãÒúåkÇÍù?”m¶ÆYg±ç‘+%kÛÌÈz×ó²Ëך\Wô¥hdïù3²E! ˆgà]`U²Æ²¢.¢î3Caƒâ¸º1"2.6L›W'”(‹ƒ ÃiÐØh®hâºj#÷‰(›Ò+µšà\:¦P`yAj¹îm5SÐ1gÔ£ºÔAbü2ymd-£CЩ€7Sºµ©i+0~ƒXE¬6=P'dëµúd=~a Ó_Žs"DcêÓ‹Ö¯ÐÞhªC_oPÒãÅø’ q!z|É +|ŸÈÀÏaû#ÚB”&3ÓMj¤¤†C¦2JÜíõ͆EÓ¾ƒÂ­oÌu׋§›P¬º‚j7«U‰ñšq«á\"©-§¬TQ[[ajM肚Ém¸22 h±æ€2Ô—KSÃS<éSm©,ª®n¥«5Hôb@Rž vöÁ“=HÀ¹ðu¸ÔVÀ‹¬ÞlÊ«æ¾8_fÏ·µ_¸©¸|aw»`ò *Þ¡OÕ&r4·:Q·¦ ‹ø5üÔ²„·š§Luø«'š³RkÄ–ø]Šyþǧjò Aº‘ÀöŒà¨cŠ=÷ƒ  \ýÅèö ¼o×ð8¿räl3Õ|âDKK»(ÝÈxk™ ýƒcÈãÙµ9…¹tAn~V¶"--!5U@r$'¬ëÑ®hꄾ:Áà7±¾šøÐ‰•0SG‹sŠôÃ4$©&ówHAÔ©ŠR2)ÉYz!jÍTjv~EºˆÜ`Ãßm®»Q‚ÿ<Àî†ÏÚÙõÕ +SyE½PjB1j²#£8mIàTÔ=-ü Â?)9!™ËÌ.,‹)Ca^A>¦³Æð UÀ±“ÁÃÃ=7Íä­Äi +‹» xŸÃk\_¥OÌ +O‹äßÞu²í/ì1ðIµ`<¦²O`ÎO(‡'[[Ož ÇïÖߨ­6ôѰ€ú›ÛÅWwyDìöÀ÷y]ÖtJ~€Ëa|Ó®ÊÞ%a(| +ÐC"tFI"º¢"od×'ŽÒãi”Wdì>ê¬Ù&SEq෺„÷ª7—+^P/ ñ<‚ŽÄlå<µ•§E@é®V×u2¹WjêÏézº«®'Gu5ñº=ôx#а6ú1¤t//5I¬³ ”=ê–àÃ)Ãk»Ôb|{fGÎæ‘YŸ$ ‡öÐÝ!ž¦ýrXù<šfü”><ÓþÍM•ÁÕ,dòk{Ïr­uI¡%bA|~”á D×^ãÕ¸€îÌ¡#m¡§ ó EB¯Uù¼á.œ—”îJµmˆÀÈÉY8>gØÁB«‹¦.›‡¸M槼ŀ®ó1§¹ó—›ºÅî~ÓÛwð™eÿHA¯ÄáHŒ,Ë"óƒ¶+Ðê„}Ž% ÿ +·Õ\Á¹ºÛZ:¤€_§RhåV"Ž*;^WXÂÕk#DE6$§nãÔ0‡·%á ¸Ào`îóÀzµ+€(cà‹ŸÝ€S¡üåwÑ«±9y¹‡E¿)ÆšÒ¼rîÑо¹‹}ÜÖû¥Tõ‹`´¼©Ôl¢eÖ‹ià”âbÈÙ=íB“·géznzšIÙ¯O†.Zâí¶A©­ê­mSêI™õog®\~ôÈÊœ´p&/è‰VÀÈSY¼q±ª!‚€N|K±|Ú²Gá§ŠRjRŠLqBXsFAx>™®ÕÆ+‚{ãÞèÉa Ýá/îÕ¤—Vä +„rêxtc2Ful¿ÿµ«Ã]˜$ÃBƒý[Ã{ñy +GŠd€o ÓÛÞÞÓzÂ_Ag!%6ÁbêzöñÄ.ázØÎ“ë87ÿ¤QVÚ >dÆœL. ¯6öÁs]ë›úÁÕzƒì‡D ™¾€Ï1š„25§V©Ëµmñ"œµ„°~þ6HãSüž »{GHôáx:%<8+‚Û²½k4\ ?ŸüÞO +è ~nè¸Ó\“¡ÍÑeåf +Y©ºŒL…¦4©"G^î~ŠàU^Úi“#UuH€ø%<W'1¶Y†4US–œª8]½ŽZuFùsK}~y».*ní8ýr´&7=\ÈÑfè29mVQ…˜ˆq®…´dæØÁ>ØÇ¢DÃRß1¦hÇ/K`v`Α¥}’_ óÏ`A€ß²QЦŽmb½}<;‚;;ƒ:½Ei¿’!` ©ÛKQÆ}I‰ k˜ØmvÔ%zW +å{Xƒ‡!*XžÇ^1ÖÆŽ€É?1àÌØÔLÀÞ&ù8+°ˆÀÎ@Ù#(Ûx¨tý·ð6…U'”M(¬–Vï­â;(„Fæ¼®ú/4Ùi)Z9#€®”¾³¢¾¹6ë2 s¶ûäð°²ÃÌ붸´¦¢Rh45U4r£æ]è08ÝÊᡞž! ¸pê€Gx¸'½Èr—;±—‚ÕP…ð?¼æŽ/ Q‡éèl5 0‘Q…)&CG³“ÛjÁ{ß›4(Žnñûùz€MQ¨)‡ +Ó4vE`éKå.ö™ú,bms±¹p„–FjMF¼æU]o[óÚ#hÖ—dF)ÐEÊ Ô&Ú€:x(•ž‡OúŸÆãVöàùœo9Û*ÔŸ(鹪îÑ^‘b\`Ʊe +€Ò|—òp ¼?2‰À5,æ)¾´šÍ­Oñ…Œ‰‰ŠhŠiiijji‰iŠAÑBºz ëéïïåqÒètw÷i`â¥?3¥%˜õ ê뛹J@þˆù'à¶Š + iŽ:ÑÞÜÜ~BÕ,‚ŒÍè©)8ùÚà'@y‘ïí>ÑÞÓÚî'¶ÁÂ"›*p1Q®“V±ç‰ä¯ãkÙS-µB¤Ò–Ôè+4 °ÕS¹"H0“½ ÕÜ© (/-S3NÀiâáéúDò€ÚtQ]4œI4‹2&Ç®¾Vƒýƒ*/ÛöϱI@¬èúEé÷pë)8딜汣à Êa@ Ä6-ãEl½“%;º€!ARyyZ —_dÌ/‰ +Ç݈‰}˜Ð:±ÖkÀx ÌäÝ¢–γ\_S¨hîǃìŸ$èRÜ>uøp°Ïö¨BÛ|ñŒµÙ +(i|½Õ§zU“ ÀˆìŸIò1’m+(,,,((Î/.üÚÞ~ ®ØRb©(+-(¬°wxÚÙ7É<é;Él‰I2,¹+“gOž79rÅäÎÉßÚM±[n·Ù.ÒNm÷Ñ”W¦lšR5¥nÊ}b1L\ ®ïŸ?“!äWÔkTUHÕQŸÐ›èP:—®¢ÿ˜:yê SÓ§~Â,f¶01Œ†IgŒÌ s H€ LžÀ:À5pÓ^jÏÚO·çíØ/³ßn¿ßþ°½—}®}½}“ýÇö_8Lr f9¸9¨RÚ>vøÌá±Ãw¿;ŒK'IIéé*éZé^©Fš*Í•öJ‡¥W¥w¥Hÿp´wtr|ÅñUÇ×}3›O;žs¼êxÇñsÇ_œœ^rÚáäéádp:îÔã4êtÓé§Oþæô›Ó¿œ§9Ïs^îü†ófg/ç`çXçTçRç&çÎ=ÎÎÃÎç?tþÑ…p™á2Óe¹Ëz—Ý.‡]¼]]’]Š]Z]κÜuùÂåw-“ËfÈž“=/[#Û-ó”EÉ2dY™¬ZÖ ë•–Ý½#»'ûJö›œ”?#Vþ¼ü%ù+òuò=òCò ¹Zž&Ï’åeòZy£¼MÞ%‘ß’$$ÿ^>ÆNb¥ì3ìLv»}™]ƾÉîb²Al›Áæ²¥l-{œígϰçÙ«ìmö=öûû»ëdW©«Âu®ë‹®Ë]_wÝàºÕu·«›ëWo×`×XWk¶k‘k•«Åõ¤ë°ëˆëE×kc{þ,I?Ènˆ{s¢j9ß[<çuÎà”ñAUå'FŒ¼UÔݬr­ñM}A-h8ü¶—xæfÂûÜï6vˆ§{š?üVq“Ò\èÌ>–àÍù…×uʼn '2:rîÒ°Ìú,i8¤‡î 9jÚÇ!§óÑ\4÷Ó…Ð铳íOn‰˜ÜײÐ>¿®ï,7\„ìŒb¬ÈkêyH‰uí<4úâÛ<|óƒÞh>Þ€ž¢>˜…“o"䇎íÄ +œFÔ,¨¼`è Hé"°†ÝdÓÒˆÜÜ\}6—œYR+Â*êóµ£HŠˆµßð1G÷÷µš;«²+µ5BzYnQ‘¢¡¥æ¤þd‹ÐVy!·:-T¡ÍMÊIÐô=š¿"R;s+ÊæêÒÁ|Áh«vˆtKEõëLéåÑ] §!½ì­“ˆìZ]q©¢­¢t´@° au*«,½ö0 ÿqZ+ wZ£àÚñ(¢…„G­?Vö56tæOI¼‹c¹[Bò²»³Ù·óÙOË0Á]&‘#Ü@¼Dâl .‘pÊR+iaר«ôçik°Æ²rCWRœ›] –¦åé ÏÓÈ +Ë.ŠöUŸ ºÍAúÁïpÙ½äwbÛDSR|Y4çZÓ‘"¶¥çëJ2èÊäâ(¥bÏ[»½:ð u:¢783G¯ÏÁõÀ*{BF®àÁ#^_ÿ§ÈOnKx*òÇ\ š 'ƒ×¨CChÿcûÖp;m:kèDš~n›êÏ*êvn½ÆÊèÝ“S7dÐ*ë} +lÌ3Y]\>âe­'ÍÛyÙéÅŒìÁ«<ø¬ÂM8ÚŸJPbNÿÏ> Ñ”ùhÊÄ>Úbÿ )½öESƒ™%éï4Æ;#Ý+kÅ,3€­{·š¨üâò²2dðpÖ—è±³f©ÙÙKÞx]\¿Ñwá<¢{^y´VØpÿ«ÀŸ¸Ÿ¿ê~ÿ}ñþýžo~UüêÿxÝû‡o,î~–Cß»°p?\„oûá>¸-BûÑ^ôzíZ°æÄíMÂæ;¾Vxm!šÕ‡MbilA‚J‘Ñ?Œ¡Ò{ìÒёРIòëÜуe/ÐpëxºŽ¼ «NèáäÒ jk+yz*2Ùm؇¦h%附IWÈý2©N‰æÂâZú®œ:õq~qk"ŸaryEÙËðŒ™Ý™ŸZ©¿AÃ=Ôhë@GIi¦¦\ˆ¬¯No溛[{†‚:ŽRFm:*Êî~ŠöDS§²ËS&:6i©Þ™8põ 0`L +³Ø(Ò­Úô:Òê8F5ã T:F´’g xºÎÓU: ݆q—?ˆië +H¬/Á ö,"‰’¸‚”tETBª· R˦t2Ú˜Y¬éBeÖŸ¦EÈgütÜÖ°`?Ýt5Tšb©ÀÈzsi~eI£ØóñLg?CžZL‚»le<”˜p±Þ7©ÈCMa„.« Þ¾ZÑ0(~Ú{ññ +Hnú~f§Z–h\_˜`˜VNéÏÖÔàÌ4¡ûj39¢«MЦS)ŸÜÍ9‘™¾ñÓWø¸-™¯˜ñåOý„ØÓWR¯sðE«†ý9ëâkh¦‰û_›“% Ù®sŠö?„¢μôð§"Aú +?ÑæXË'¦Çƒe¼©ë/ð&/ËÜÀÈ6üUÐÑøÚ“-ÙÜÇ1¼6NglUh88 ÿ]ˆ.‚ÙÇ(…œ(6«à]ê*J£€—)«+"¡Š¬5” +å%õE­Ü{ç‚ßê[ܪÖ/W,R/ûïµi˜Œ´t¬Ìª ©…ÚjtÅ*wC·“–Jð–=¥¢fåF+4Ù¡¡ +Cá ¦ŒïWWß+xÊ#­Ó,ÊJ; t¹‰5^E¾+¶, +¬Á®¶QU¯ާ_‹ôõ;¨8V—p3F,ú^E~–SfÜGÏ¢†ÇnÞîS?,x]Šç†Û†,BlU¶N¯ÏÈ™žÆ\ØÛ9uÉFwz<œÒ¹'§àU¶¾KõBo"’ Ý¢ŽŒ <Ý4opÛ*°CH£~ïS"²\l†ŸG‘ÉhŠ_À, ã©îŠÒ _žO‰hÚ#«Â*7ÓfxhÛ^ÐCÐÀÿÕçm +êZ;ÌüUÅ‘µ5{ç`A½Óí~Â.ö‹ÐL¤6ê ‹55¥-B RW9qm;áËèâ4ôŠ­ ®†_›Â¶ðù—h¸™‚ +ØÒs¿¼|¨hº íS›¨ •&+QƒÍº+üÂB®)Í(Õ}@Ãä ƒµô'íƒ×>R@zÑm´z¢]ÿNvU’q½­]¯ß‘ë™Cç¦ét©"Ô[?b›j+:ux¡IÛS”Q1‚::8sÆ–²K¶Dïß*®Ûî¿MQ`ƒ¤@3 <ˆüþpÇMáÖ¶5í³¹‚@ö÷kh&“ƒÛÐÔ™G=[‡"ÅÐ Éïþª€N¦†/ -µ}œJ©‹U'ëÊ3S²s³²Ò„ô¤œ´TÌ7kTäÎe¾ž[÷Ó‘£èµuÀ÷î¾ãŸö ªrB©Ö$Çq©µ}"¼`ë_)d\xûöAe¸VUs£{ÍÖ®š†Âbºç|us­¢8§ ###G›$èõ:]®y¡•Ór(ŒJ«°»§ïŒÜþ›âî±á]‡‚CÀÓ½­«VÛööÈðêpÝ–@>þjëYÄh²ótQ"$ +Ê Å±»ò + Œ­šãqä¤ÿ𙞓ÃglÕÀþú¦­¦{SëKÈÂ$»S­'ÄNúÀ#h!| ݧÀ #âÐkh-V`xÝÑóXÍÀNr t„ÄE鯶îÐQî››¿`©ê|ø›ÍÑ1!Áõq]Yb_¦A_©¥«‹#<s=·­÷¬í‰Z®V (ÎÄ´(£CyVh:bÛ7ÖÃG¤tÜàI?\cÿ¬¨š¨¨¹åÒ@§@ç`¤ƒið{ÛÒÐjæÌeqØBæàRï=¿ãH@øÆáG +mÙ_¨&òK + ùœyÂn߇ÎÔÄ’1àò"@ýÕ[Á€§E’bKÑ9Zúú¸{¡Yä±?jþ¦Á†âòÖ&Ü:h²¾ZÀ£¿f·%ñà SÀ5Áøê§Ý¡Ò‰îPéŸÝ¡¬¤t]&•cë}eMg‡ë•/¾˜päHBד'uÝgDàþ˜ÖçØ>&F¨çW0î¿´òQ2õÊzâõÕ=©ÿKWßL=íûK­snß¾mج´Ä­£ŽKåÜTW¡²-W¯t´G+«D`s21;ù¿Úä{¡œx@¡^(‹qƒþ«ˆ?ìHðC_j;‘“’¥Ïà|BºßOÑä:6# r{B˜›’Èy©GqèÅ.ö6û ºC”Æk‹Ë +ó‹‹Ê„²êlŽG™ƒýUª€ÝW޶ZmL¤:`"Ò2 ?¯žì…5½.ð,ô_Ä2®’ý:dïzj²Ì¸<Þ,†‰V²›¿z»;mÇ\†Ò‹² Þôÿ›à+5@;L p6È3IÀ+L»©ÎâãáK| ÐùÃçy`3XôæC[—*Þê?zm¤ãĹ¡°ÞÙYyú,AŸ«ËÓq©¥}ý÷GEðSl(ãþž°ÛÃc¼x[3a_b)ÄÉq“ô!vSÆS’ÞPœP©?GÃD˜…)ÝÁDíÛÙ¥o)pÞØÈC¹Jô­#€•€ Ùõ¥¯ç8x³!¿¦¤AŒ?ÞœÞÊõ¶4o©ŠˆÎJÂÛs[BxLt]U Íùp6¯«|ëɧmyœ&`aÿ”6 t®¡ö¶­pø­Šz'פÁ„‚|Qô¥ €1oÚâ›Ybºž´ ZWD¶jcµŒ¿QÆe©aœ)“ü³r|š?n MÀ”±Ä†žh9Z†Qâ \ŽVÀCø¶®À{?[…ˆàd yµ[µ­K0y²¦­EGV+ß„CÝî×|3Hð4ÚÎî2jêuØ#?K=cÑžæ ñðp>œùÆdb  õHœº½nM«l%:Œ¡H°€Qûz¬æ!F\„D©Bþ,ŸhjÅ”5)Ï £h‡³Ë PvÜ¥>' [ +åìe@<˜åщP.‡î8”+ð͇r9|Lœh02à+Æêcb=È]s‰:€Ö·‘…ßœùãû”ƒ=¢þ{¢\ä{ÀÓÎx}sV='›Ñ„¤€n¶Îa;˜/?¯½Ždñ9q…Œøt]—QfÑ‹ C¨ +Š,x‹C;à N 2¾‡ák¸¾Æçܶj·PÒ£¼ídÖ±Cõ¡~Š M\THrÓÈ`AM}« e­s¶ŒXnS?¼=Ž¿t›G[G¼>¼wîÜ=0aKá6“änogm7³ÐÚ£¸x¤oë[îž»|ð°Þ=ŸÞþÁ㋆:îw ðþý€ûî≯¯Š²ŽñgCXUtrf4§Œ7ÛjèÚH2 )=-ŽKO.(‹Í{Ž”ÛúÂ(€B¯A"¾)³²Jaª©hÀŸýÀæþø ÜÅþ¿èy°†æõ4€/ÃÅp6¾¿Œ×îþë@S½ °;`õ¨«ØÒ#ÞÑ‘jn‹÷Íäˆ:0ª={ºaÓ™x“ P·]ú†¥Q ‡9­:Ó7gwÚü:@î‰×I„Áÿ<ˆ/›(ƒ‰Ë©-J»i€Vgó¶îkn5†'yCô9)"¸*×àû×ìÆüŽ¹â£‰ÖPœó;¾yë‘ð@ni—áºnøj¯D«Ô*›š-øäZ×pKBJ• Á¤ãÖD*Р 1¿Â¨¨b÷)ÓÛï`±¿fÁåX[!þnÿ3 ïwre`¦ +ØÎ}àÜ%àr€u=˜8«’€qtK~‡Î6‰jÆ[È…8Cžu;2vä#$ Ñ³”º ©4gÞ3Œí0#@¿ìÌÊ +ÒOWk'T²_Ž–Pài³ì¤®¦$1R¸(Ïlç?Àÿñå¼£ +endstream +endobj +1454 0 obj +<< +/Length 8610 >> stream -0 g 0 G 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(beta)]TJ -0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.617 0 Td [(b)]TJ/F54 9.9626 Tf 5.524 0 Td [(.)]TJ -49.583 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(12)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -18.597 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.445 0 Td [(y)]TJ/F54 9.9626 Tf 5.105 0 Td [(.)]TJ -166.164 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(5.3)-1000(psb)]TJ ET q -1 0 0 1 385.864 592.09 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 389.002 591.891 Td [(T)]TJ +/F75 11.9552 Tf 151.016 706.129 Td [(gather)-250(\227)-250(Gather)-250(Global)-250(Dense)-250(Matrix)]TJ/F84 9.9626 Tf 1.011 0 0 1 99.587 686.688 Tm [(These)-247(subr)17(outines)-247(collect)-247(the)-248(portions)-247(of)-248(global)-247(dense)-247(matrix)-248(distributed)-247(over)]TJ 1 0 0 1 99.895 674.733 Tm [(all)-250(pr)18(ocess)-250(into)-250(one)-250(single)-250(array)-250(stor)18(ed)-250(on)-250(one)-250(pr)18(ocess.)]TJ/F78 9.9626 Tf 120.161 -25.465 Td [(g)-25(l)-55(o)-35(b)]TJ ET q -1 0 0 1 394.86 592.09 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 238.711 649.467 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 397.998 591.891 Td [(vect)]TJ +/F78 9.9626 Tf 241.994 649.268 Td [(x)]TJ/F190 10.3811 Tf 8.097 0 Td [(\040)]TJ/F78 9.9626 Tf 13.397 0 Td [(c)-25(o)-35(l)-55(l)-55(e)-25(c)-25(t)]TJ/F192 10.3811 Tf 27.706 0 Td [(\050)]TJ/F78 9.9626 Tf 4.274 0 Td [(l)-55(o)-35(c)]TJ ET q -1 0 0 1 419.547 592.09 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 308.334 649.467 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 422.685 591.891 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.883 -11.956 Td [(containing)-280(numbers)-280(of)-280(type)-280(speci\002ed)-280(in)-280(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-280(12)]TJ -0 g 0 G - [(.)-400(The)-280(rank)-280(of)]TJ/F52 9.9626 Tf 275.562 0 Td [(y)]TJ/F54 9.9626 Tf 7.895 0 Td [(must)-280(be)]TJ -283.457 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 53.115 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ +/F78 9.9626 Tf 311.617 649.268 Td [(x)]TJ/F78 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F192 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F84 9.9626 Tf -220.163 -22.41 Td [(wher)18(e:)]TJ 0 g 0 G -/F51 9.9626 Tf -83.228 -18.597 Td [(desc)]TJ +/F78 9.9626 Tf 0.762 -20.664 Td [(g)-25(l)-55(o)-35(b)]TJ ET q -1 0 0 1 120.408 549.583 cm +1 0 0 1 118.894 606.393 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 549.383 Td [(a)]TJ +/F78 9.9626 Tf 122.176 606.194 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F84 9.9626 Tf 10.187 0 Td [(is)-250(the)-250(global)-250(submatrix)]TJ/F78 9.9626 Tf 103.256 0 Td [(g)-25(l)-55(o)-35(b)]TJ ET q -1 0 0 1 273.363 501.762 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 254.274 606.393 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 276.501 501.563 Td [(desc)]TJ +/F78 9.9626 Tf 257.557 606.194 Td [(x)]TJ/F84 7.5716 Tf 5.106 -1.858 Td [(1)-12(:)]TJ/F78 7.5716 Tf 5.962 0 Td [(m)]TJ/F84 7.5716 Tf 5.986 0 Td [(,1)-12(:)]TJ/F78 7.5716 Tf 7.855 0 Td [(n)]TJ +0 g 0 G +/F78 9.9626 Tf -182.446 -19.051 Td [(l)-55(o)-35(c)]TJ ET q -1 0 0 1 298.05 501.762 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 112.886 585.484 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 301.189 501.563 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -18.597 Td [(trans)]TJ -0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(indicates)-250(what)-250(kind)-250(of)-250(operation)-250(to)-250(perform.)]TJ -0 g 0 G -/F51 9.9626 Tf -2.759 -18.597 Td [(trans)-250(=)-250(N)]TJ -0 g 0 G -/F54 9.9626 Tf 46.983 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ -0 0 1 rg 0 0 1 RG - [-250(1)]TJ -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -46.983 -14.612 Td [(trans)-250(=)-250(T)]TJ -0 g 0 G -/F54 9.9626 Tf 45.33 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -45.33 -14.612 Td [(trans)-250(=)-250(C)]TJ -0 g 0 G -/F54 9.9626 Tf 45.878 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ -0 0 1 rg 0 0 1 RG - [-250(3)]TJ -0 g 0 G - -45.878 -18.597 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F52 9.9626 Tf 38.64 0 Td [(t)-15(r)-50(a)-25(n)-25(s)]TJ/F85 10.3811 Tf 25.193 0 Td [(=)]TJ/F52 9.9626 Tf 11.434 0 Td [(N)]TJ/F54 9.9626 Tf -75.267 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -18.596 Td [(work)]TJ -0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(work)-250(array)111(.)]TJ -3.875 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-270(as:)-351(a)-270(rank)-270(one)-270(array)-271(of)-270(the)-270(same)-270(type)-271(of)]TJ/F52 9.9626 Tf 220.875 0 Td [(x)]TJ/F54 9.9626 Tf 7.898 0 Td [(and)]TJ/F52 9.9626 Tf 19.684 0 Td [(y)]TJ/F54 9.9626 Tf 7.798 0 Td [(with)-270(the)-270(T)74(AR-)]TJ -256.255 -11.955 Td [(GET)-250(attribute.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -18.597 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -18.597 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(matrix)]TJ/F52 9.9626 Tf 144.939 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -135.659 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-379(as:)-568(an)-379(array)-379(of)-379(rank)-379(one)-379(or)-379(two)-379(containing)-379(numbers)-379(of)-379(type)]TJ 0 -11.955 Td [(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(12)]TJ -0 g 0 G - [(.)]TJ +/F78 9.9626 Tf 116.169 585.285 Td [(x)]TJ/F78 7.5716 Tf 5.147 -1.96 Td [(i)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -18.597 Td [(info)]TJ +/F84 9.9626 Tf 7.732 1.96 Td [(is)-250(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)-250(on)-250(pr)18(ocess)]TJ/F78 9.9626 Tf 234.035 0 Td [(i)]TJ/F84 9.9626 Tf 2.964 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F78 9.9626 Tf -266.027 -20.91 Td [(c)-25(o)-35(l)-55(l)-55(e)-25(c)-25(t)]TJ 0 g 0 G - 141.968 -36.529 Td [(49)]TJ +/F84 9.9626 Tf 32.562 0 Td [(is)-250(the)-250(collect)-250(function.)]TJ 0 g 0 G -ET - -endstream -endobj -1185 0 obj -<< -/Length 7154 ->> -stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.13)-1000(psb)]TJ ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 179.582 543.107 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(spsm)-250(\227)-250(T)111(riangular)-250(System)-250(Solve)]TJ/F54 9.9626 Tf -57.098 -19.83 Td [(This)-250(subr)18(outine)-250(computes)-250(the)-250(T)90(riangular)-250(System)-250(Solve:)]TJ/F52 9.9626 Tf 122.724 -35.213 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.115 Td [(\000)]TJ/F54 7.5716 Tf 6.228 0 Td [(1)]TJ/F52 9.9626 Tf 4.577 -4.115 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -82.196 -16.139 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.708 0 Td [(D)-48(T)]TJ/F83 7.8896 Tf 14.775 4.114 Td [(\000)]TJ/F54 7.5716 Tf 6.227 0 Td [(1)]TJ/F52 9.9626 Tf 4.578 -4.114 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -90.495 -16.139 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.114 Td [(\000)]TJ/F54 7.5716 Tf 6.228 0 Td [(1)]TJ/F52 9.9626 Tf 4.607 -4.114 Td [(D)-52(x)]TJ/F85 10.3811 Tf 15.536 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -90.495 -16.09 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.114 Td [(\000)]TJ/F52 7.5716 Tf 6.421 0 Td [(T)]TJ/F52 9.9626 Tf 5.694 -4.114 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -83.506 -16.09 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.708 0 Td [(D)-48(T)]TJ/F83 7.8896 Tf 14.775 4.114 Td [(\000)]TJ/F52 7.5716 Tf 6.42 0 Td [(T)]TJ/F52 9.9626 Tf 5.695 -4.114 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -91.805 -16.09 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.114 Td [(\000)]TJ/F52 7.5716 Tf 6.421 0 Td [(T)]TJ/F52 9.9626 Tf 5.724 -4.114 Td [(D)-52(x)]TJ/F85 10.3811 Tf 15.536 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -91.805 -16.091 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.115 Td [(\000)]TJ/F52 7.5716 Tf 6.61 0 Td [(H)]TJ/F52 9.9626 Tf 6.982 -4.115 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.648 0 Td [(y)]TJ -84.982 -16.09 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.708 0 Td [(D)-48(T)]TJ/F83 7.8896 Tf 14.775 4.115 Td [(\000)]TJ/F52 7.5716 Tf 6.61 0 Td [(H)]TJ/F52 9.9626 Tf 6.982 -4.115 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.504 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -93.281 -16.09 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.115 Td [(\000)]TJ/F52 7.5716 Tf 6.61 0 Td [(H)]TJ/F52 9.9626 Tf 7.012 -4.115 Td [(D)-52(x)]TJ/F85 10.3811 Tf 15.536 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.648 0 Td [(y)]TJ/F54 9.9626 Tf -201.061 -38.202 Td [(wher)18(e:)]TJ -0 g 0 G -/F52 9.9626 Tf -14.65 -21.265 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 10.186 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 115.61 0 Td [(x)]TJ/F54 7.5716 Tf 5.201 -1.495 Td [(:)-12(,)-13(:)]TJ -0 g 0 G -/F52 9.9626 Tf -131.167 -20.218 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.087 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 115.441 0 Td [(y)]TJ/F54 7.5716 Tf 5.201 -1.494 Td [(:)-12(,)-13(:)]TJ -0 g 0 G -/F52 9.9626 Tf -130.599 -20.218 Td [(T)]TJ -0 g 0 G -/F54 9.9626 Tf 11.432 0 Td [(is)-250(the)-250(global)-250(sparse)-250(block)-250(triangular)-250(submatrix)]TJ/F52 9.9626 Tf 206.797 0 Td [(T)]TJ -0 g 0 G - -218.159 -21.712 Td [(D)]TJ -0 g 0 G -/F54 9.9626 Tf 12.956 0 Td [(is)-250(the)-250(scaling)-250(diagonal)-250(matrix.)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 6.894 -21.266 Td [(call)]TJ -0 g 0 G - [-525(psb_spsm\050alpha,)-525(t,)-525(x,)-525(beta,)-525(y,)-525(desc_a,)-525(info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -14.944 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_spsm\050alpha,)-525(t,)-525(x,)-525(beta,)-525(y,)-525(desc_a,)-525(info,)-525(trans,)-525(unit,)-525(choice,)-525(diag,)-525(work\051)]TJ -0 g 0 G -0 g 0 G -0 g 0 G +/F78 9.9626 Tf 185.854 534.539 Td [(x)]TJ/F78 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F84 9.9626 Tf 2.751 1.96 Td [(,)]TJ/F78 9.9626 Tf 4.275 0 Td [(y)]TJ/F75 9.9626 Tf 108.448 0 Td [(Subroutine)]TJ ET q -1 0 0 1 230.392 339.439 cm +1 0 0 1 179.582 530.753 cm []0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F52 9.9626 Tf 236.623 330.871 Td [(T)]TJ/F54 9.9626 Tf 6.451 0 Td [(,)]TJ/F52 9.9626 Tf 5.275 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F54 9.9626 Tf 5.105 0 Td [(,)]TJ/F52 9.9626 Tf 5.306 0 Td [(D)]TJ/F54 9.9626 Tf 7.975 0 Td [(,)]TJ/F60 9.9626 Tf 5.105 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(,)]TJ/F60 9.9626 Tf 5.355 0 Td [(b)]TJ/F51 9.9626 Tf 64.393 0 Td [(Subroutine)]TJ +/F84 9.9626 Tf 185.56 522.185 Td [(Integer)-8983(psb)]TJ ET q -1 0 0 1 230.392 327.085 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 322.794 522.385 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 236.369 318.517 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F84 9.9626 Tf 325.783 522.185 Td [(gather)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 373.603 318.716 cm +1 0 0 1 322.794 510.429 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 318.517 Td [(spsm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F84 9.9626 Tf 325.783 510.23 Td [(gather)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 373.603 306.761 cm +1 0 0 1 322.794 498.474 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 306.562 Td [(spsm)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F84 9.9626 Tf 325.783 498.275 Td [(gather)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 373.603 294.806 cm +1 0 0 1 322.794 486.519 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 294.607 Td [(spsm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F84 9.9626 Tf 325.783 486.32 Td [(gather)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 373.603 282.851 cm +1 0 0 1 322.794 474.564 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 282.652 Td [(spsm)]TJ +/F84 9.9626 Tf 325.783 474.365 Td [(gather)]TJ ET q -1 0 0 1 230.392 278.866 cm +1 0 0 1 179.582 470.579 cm []0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 278.277 250.487 Td [(T)92(able)-250(13:)-310(Data)-250(types)]TJ -0 g 0 G +/F84 9.9626 Tf 227.467 442.2 Td [(T)92(able)-250(19:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -127.572 -38.916 Td [(T)90(ype:)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -127.572 -27.052 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + [-525(psb_gather\050glob_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -21.712 Td [(On)-250(Entry)]TJ + [-525(loc_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -21.713 Td [(alpha)]TJ + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.368 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(13)]TJ + [-525(root\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-190(call)]TJ 0 g 0 G - [(.)]TJ + [-525(psb_gather\050glob_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 141.967 -29.888 Td [(50)]TJ + [-525(loc_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1196 0 obj -<< -/Length 7295 ->> -stream + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(root\051)]TJ 0 g 0 G -BT -/F51 9.9626 Tf 99.895 706.129 Td [(t)]TJ +/F75 9.9626 Tf 0 -22.902 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 8.299 0 Td [(the)-250(global)-250(portion)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 171.221 0 Td [(T)]TJ/F54 9.9626 Tf 6.451 0 Td [(.)]TJ -161.064 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(type)-250(speci\002ed)-250(in)-250(\247)]TJ -0 0 1 rg 0 0 1 RG - [-250(3)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - [(.)]TJ +/F75 9.9626 Tf -29.828 -20.91 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.65 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ -ET -q -1 0 0 1 385.864 590.037 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 389.002 589.838 Td [(T)]TJ + 0 -20.909 Td [(loc)]TJ ET q -1 0 0 1 394.86 590.037 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 113.773 350.626 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 397.998 589.838 Td [(vect)]TJ +/F75 9.9626 Tf 116.762 350.427 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.664 0 Td [(g)-25(l)-55(o)-35(b)]TJ ET q -1 0 0 1 419.547 590.037 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 321.043 350.626 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 422.685 589.838 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-278(numbers)-278(of)-279(type)-278(speci\002ed)-278(in)-278(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-278(13)]TJ -0 g 0 G - [(.)-395(The)-278(rank)-278(of)]TJ/F52 9.9626 Tf 275.498 0 Td [(x)]TJ/F54 9.9626 Tf 7.977 0 Td [(must)-278(be)]TJ -283.475 -11.956 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 52.946 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -82.959 -20.649 Td [(beta)]TJ -0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.617 0 Td [(b)]TJ/F54 9.9626 Tf 5.524 0 Td [(.)]TJ -49.583 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(13)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -20.65 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.445 0 Td [(y)]TJ/F54 9.9626 Tf 5.105 0 Td [(.)]TJ -166.164 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F78 9.9626 Tf 324.326 350.427 Td [(x)]TJ/F84 9.9626 Tf 5.205 0 Td [(.)]TJ -204.729 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.98 0 0 1 124.802 302.606 Tm [(Speci\002ed)-247(as:)-313(a)-247(rank)-247(one)-247(or)-248(two)-247(array)-247(or)-247(an)-248(object)-247(of)-247(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F145 9.9626 Tf 1 0 0 1 369.545 302.606 Tm [(psb)]TJ ET q -1 0 0 1 385.864 429.186 cm +1 0 0 1 385.864 302.805 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 389.002 428.986 Td [(T)]TJ +/F145 9.9626 Tf 389.002 302.606 Td [(T)]TJ ET q -1 0 0 1 394.86 429.186 cm +1 0 0 1 394.86 302.805 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 428.986 Td [(vect)]TJ +/F145 9.9626 Tf 397.998 302.606 Td [(vect)]TJ ET q -1 0 0 1 419.547 429.186 cm +1 0 0 1 419.547 302.805 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 428.986 Td [(type)]TJ +/F145 9.9626 Tf 422.685 302.606 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-280(numbers)-280(of)-280(type)-280(speci\002ed)-280(in)-280(T)92(able)]TJ +/F84 9.9626 Tf -297.883 -11.955 Td [(indicated)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-280(13)]TJ + [-250(19)]TJ 0 g 0 G - [(.)-400(The)-280(rank)-280(of)]TJ/F52 9.9626 Tf 275.562 0 Td [(y)]TJ/F54 9.9626 Tf 7.895 0 Td [(must)-280(be)]TJ -283.457 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 53.115 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ + [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -83.228 -20.65 Td [(desc)]TJ +/F75 9.9626 Tf -24.907 -20.91 Td [(desc)]TJ ET q -1 0 0 1 120.408 384.625 cm +1 0 0 1 120.408 269.941 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 384.426 Td [(a)]TJ +/F75 9.9626 Tf 123.397 269.741 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F84 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 273.363 336.805 cm +1 0 0 1 309.258 222.12 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 336.605 Td [(desc)]TJ +/F145 9.9626 Tf 312.397 221.921 Td [(desc)]TJ ET q -1 0 0 1 298.05 336.805 cm +1 0 0 1 333.945 222.12 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 336.605 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -20.649 Td [(trans)]TJ -0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(specify)-250(with)]TJ/F52 9.9626 Tf 56.398 0 Td [(unitd)]TJ/F54 9.9626 Tf 24.637 0 Td [(the)-250(operation)-250(to)-250(perform.)]TJ +/F145 9.9626 Tf 337.084 221.921 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -83.794 -20.65 Td [(trans)-250(=)-250('N')]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 52.522 0 Td [(the)-250(operation)-250(is)-250(with)-250(no)-250(transposed)-250(matrix)]TJ +/F75 9.9626 Tf -258.11 -20.91 Td [(root)]TJ 0 g 0 G -/F51 9.9626 Tf -52.522 -16.303 Td [(trans)-250(=)-250('T')]TJ +/F84 9.9626 Tf 1.004 0 0 1 122.839 201.011 Tm [(The)-248(pr)18(ocess)-248(that)-248(holds)-248(the)-248(global)-248(copy)111(.)-308(If)]TJ/F78 9.9626 Tf 1 0 0 1 305.722 201.011 Tm [(r)-17(o)-35(o)-35(t)]TJ/F192 10.3811 Tf 19.923 0 Td [(=)]TJ/F190 10.3811 Tf 11.086 0 Td [(\000)]TJ/F84 9.9626 Tf 1.004 0 0 1 344.925 201.011 Tm [(1)-248(all)-248(the)-248(pr)18(ocesses)-248(will)]TJ 1 0 0 1 124.802 189.056 Tm [(have)-250(a)-250(copy)-250(of)-250(the)-250(global)-250(vector)74(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable)]TJ/F190 10.3811 Tf 142.419 0 Td [(\000)]TJ/F84 9.9626 Tf 8.194 0 Td [(1)]TJ/F190 10.3811 Tf 7.873 0 Td [(\024)]TJ/F78 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F190 10.3811 Tf 19.923 0 Td [(\024)]TJ/F78 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F190 10.3811 Tf 13.503 0 Td [(\000)]TJ/F84 9.9626 Tf 10.132 0 Td [(1,)-250(default)]TJ/F190 10.3811 Tf 43.889 0 Td [(\000)]TJ/F84 9.9626 Tf 8.195 0 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 50.869 0 Td [(the)-250(operation)-250(is)-250(with)-250(transposed)-250(matrix.)]TJ +/F75 9.9626 Tf -301.108 -20.909 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -50.869 -16.302 Td [(trans)-250(=)-250('C')]TJ 0 g 0 G -/F54 9.9626 Tf 51.417 0 Td [(the)-250(operation)-250(is)-250(with)-250(conjugate)-250(transposed)-250(matrix.)]TJ -51.417 -20.65 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)]TJ/F52 9.9626 Tf 38.64 0 Td [(t)-15(r)-50(a)-25(n)-25(s)]TJ/F85 10.3811 Tf 25.193 0 Td [(=)]TJ/F52 9.9626 Tf 11.434 0 Td [(N)]TJ/F54 9.9626 Tf -75.267 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -20.65 Td [(unitd)]TJ -0 g 0 G -/F54 9.9626 Tf 29.878 0 Td [(specify)-250(with)]TJ/F52 9.9626 Tf 56.398 0 Td [(trans)]TJ/F54 9.9626 Tf 23.521 0 Td [(the)-250(operation)-250(to)-250(perform.)]TJ -0 g 0 G -/F51 9.9626 Tf -84.89 -20.649 Td [(unitd)-250(=)-250('U')]TJ -0 g 0 G -/F54 9.9626 Tf 54.186 0 Td [(the)-250(operation)-250(is)-250(with)-250(no)-250(scaling)]TJ -0 g 0 G -/F51 9.9626 Tf -54.186 -16.303 Td [(unitd)-250(=)-250('L)74(')]TJ -0 g 0 G -/F54 9.9626 Tf 51.785 0 Td [(the)-250(operation)-250(is)-250(with)-250(left)-250(scaling)]TJ -0 g 0 G -/F51 9.9626 Tf -51.785 -16.302 Td [(unitd)-250(=)-250('R')]TJ -0 g 0 G -/F54 9.9626 Tf 53.628 0 Td [(the)-250(operation)-250(is)-250(with)-250(right)-250(scaling.)]TJ -0 g 0 G - 88.34 -29.888 Td [(51)]TJ +/F84 9.9626 Tf 166.875 -29.888 Td [(67)]TJ 0 g 0 G ET endstream endobj -1202 0 obj +1460 0 obj << -/Length 4541 +/Length 1417 >> stream 0 g 0 G 0 g 0 G -BT -/F54 9.9626 Tf 175.611 706.129 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)]TJ/F52 9.9626 Tf 38.64 0 Td [(u)-25(n)-18(i)-32(t)-25(d)]TJ/F85 10.3811 Tf 26.159 0 Td [(=)]TJ/F52 9.9626 Tf 10.927 0 Td [(U)]TJ/F54 9.9626 Tf -75.726 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(choice)]TJ -0 g 0 G -/F54 9.9626 Tf 33.753 0 Td [(speci\002es)-250(the)-250(update)-250(of)-250(overlap)-250(elements)-250(to)-250(be)-250(performed)-250(on)-250(exit:)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -3.865 -19.925 Td [(psb_none_)]TJ -0 g 0 G -0 g 0 G - 0 -15.941 Td [(psb_sum_)]TJ -0 g 0 G -0 g 0 G - 0 -15.94 Td [(psb_avg_)]TJ -0 g 0 G -0 g 0 G - 0 -15.94 Td [(psb_square_root_)]TJ/F54 9.9626 Tf -4.982 -19.925 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F59 9.9626 Tf 38.515 0 Td [(psb_avg_)]TJ/F54 9.9626 Tf -38.515 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(diag)]TJ -0 g 0 G -/F54 9.9626 Tf 24.906 0 Td [(the)-250(diagonal)-250(scaling)-250(matrix.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F52 9.9626 Tf 38.64 0 Td [(d)-18(i)-47(a)-47(g)]TJ/F85 10.3811 Tf 18.52 0 Td [(\050)]TJ/F54 9.9626 Tf 4.15 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)-289(=)]TJ/F54 9.9626 Tf 18.002 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(n)-25(o)-35(s)-25(c)-40(a)-25(l)-48(i)-32(n)-47(g)]TJ/F85 10.3811 Tf 41.384 0 Td [(\051)]TJ/F54 9.9626 Tf -135.182 -11.955 Td [(Speci\002ed)-293(as:)-395(a)-293(rank)-293(one)-293(array)-292(containing)-293(numbers)-293(of)-293(the)-292(type)-293(indicated)]TJ 0 -11.955 Td [(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(13)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(work)]TJ -0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(a)-250(work)-250(array)111(.)]TJ -3.876 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-344(as:)-498(a)-344(rank)-343(one)-344(array)-344(of)-344(the)-344(same)-344(type)-344(of)]TJ/F52 9.9626 Tf 229.679 0 Td [(x)]TJ/F54 9.9626 Tf 8.631 0 Td [(with)-344(the)-344(T)74(ARGET)]TJ -238.31 -11.955 Td [(attribute.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(y)]TJ +BT +/F75 9.9626 Tf 150.705 706.129 Td [(glob)]TJ +ET +q +1 0 0 1 171.786 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 174.774 706.129 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.445 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-379(as:)-568(an)-379(array)-379(of)-379(rank)-379(one)-379(or)-379(two)-379(containing)-379(numbers)-379(of)-379(type)]TJ 0 -11.955 Td [(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(13)]TJ +/F84 9.9626 Tf 9.654 0 Td [(The)-250(array)-250(wher)18(e)-250(the)-250(local)-250(parts)-250(must)-250(be)-250(gather)18(ed.)]TJ -8.817 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(or)-250(two)-250(array)-250(with)-250(the)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf 202.459 0 Td [(ALLOCATABLE)]TJ 0 g 0 G - [(.)]TJ +/F84 9.9626 Tf 60.025 0 Td [(attribute.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(info)]TJ +/F75 9.9626 Tf -287.39 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.943 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 141.968 -73.723 Td [(52)]TJ + 142.356 -500.124 Td [(68)]TJ 0 g 0 G ET endstream endobj -1213 0 obj +1466 0 obj << -/Length 7400 +/Length 7629 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.14)-1000(psb)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(5.4)-1000(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm +1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(gemlt)-250(\227)-250(Entrywise)-250(Product)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(pr)18(oduct)-250(between)-250(two)-250(vectors)]TJ/F52 9.9626 Tf 299.677 0 Td [(x)]TJ/F54 9.9626 Tf 7.697 0 Td [(and)]TJ/F52 9.9626 Tf 19.481 0 Td [(y)]TJ -187.918 -21.112 Td [(d)-25(o)-35(t)]TJ/F83 10.3811 Tf 16.336 0 Td [(\040)]TJ/F52 9.9626 Tf 13.567 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F52 9.9626 Tf 4.274 0 Td [(y)]TJ/F85 10.3811 Tf 5.231 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(.)]TJ/F59 9.9626 Tf -187.465 -21.111 Td [(psb_gemlt\050x,)-525(y,)-525(desc_a,)-525(info\051)]TJ +/F75 11.9552 Tf 151.016 706.129 Td [(scatter)-250(\227)-250(Scatter)-250(Global)-250(Dense)-250(Matrix)]TJ/F84 9.9626 Tf 1.02 0 0 1 99.587 685.766 Tm [(These)-354(subr)18(outines)-353(scatters)-354(the)-354(portions)-353(of)-354(global)-354(dense)-353(matrix)-354(owned)-353(by)-354(a)]TJ 1 0 0 1 99.596 673.811 Tm [(pr)18(ocess)-250(to)-250(all)-250(the)-250(pr)18(ocesses)-250(in)-250(the)-250(pr)18(ocesses)-250(grid.)]TJ/F78 9.9626 Tf 119.778 -26.893 Td [(l)-55(o)-35(c)]TJ +ET +q +1 0 0 1 232.24 647.117 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F78 9.9626 Tf 235.523 646.918 Td [(x)]TJ/F78 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F190 10.3811 Tf 5.642 1.96 Td [(\040)]TJ/F78 9.9626 Tf 13.397 0 Td [(s)-25(c)-40(a)-25(t)-25(t)-25(e)-15(r)]TJ/F192 10.3811 Tf 28.633 0 Td [(\050)]TJ/F78 9.9626 Tf 4.493 0 Td [(g)-25(l)-55(o)-35(b)]TJ +ET +q +1 0 0 1 311.49 647.117 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F78 9.9626 Tf 314.773 646.918 Td [(x)]TJ/F192 10.3811 Tf 5.33 0 Td [(\051)]TJ/F84 9.9626 Tf -220.626 -23.362 Td [(wher)18(e:)]TJ +0 g 0 G +/F78 9.9626 Tf 0.762 -22.091 Td [(g)-25(l)-55(o)-35(b)]TJ +ET +q +1 0 0 1 118.894 601.664 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F78 9.9626 Tf 122.176 601.465 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 10.187 0 Td [(is)-250(the)-250(global)-250(matrix)]TJ/F78 9.9626 Tf 87.516 0 Td [(g)-25(l)-55(o)-35(b)]TJ +ET +q +1 0 0 1 238.533 601.664 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F78 9.9626 Tf 241.816 601.465 Td [(x)]TJ/F84 7.5716 Tf 5.106 -1.858 Td [(1)-12(:)]TJ/F78 7.5716 Tf 5.962 0 Td [(m)]TJ/F84 7.5716 Tf 5.986 0 Td [(,1)-12(:)]TJ/F78 7.5716 Tf 7.855 0 Td [(n)]TJ +0 g 0 G +/F78 9.9626 Tf -166.705 -20.955 Td [(l)-55(o)-35(c)]TJ +ET +q +1 0 0 1 112.886 578.851 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F78 9.9626 Tf 116.169 578.652 Td [(x)]TJ/F78 7.5716 Tf 5.147 -1.96 Td [(i)]TJ +0 g 0 G +/F84 9.9626 Tf 7.732 1.96 Td [(is)-250(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)-250(on)-250(pr)18(ocess)]TJ/F78 9.9626 Tf 234.035 0 Td [(i)]TJ/F84 9.9626 Tf 2.964 0 Td [(.)]TJ +0 g 0 G +/F78 9.9626 Tf -266.027 -22.813 Td [(s)-25(c)-40(a)-25(t)-25(t)-25(e)-15(r)]TJ +0 g 0 G +/F84 9.9626 Tf 33.489 0 Td [(is)-250(the)-250(scatter)-250(function.)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 183.035 630.896 cm -[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S +1 0 0 1 179.582 532.667 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F52 9.9626 Tf 189.137 622.328 Td [(d)-25(o)-35(t)]TJ/F54 9.9626 Tf 13.444 0 Td [(,)]TJ/F52 9.9626 Tf 5.275 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F51 9.9626 Tf 91.759 0 Td [(Function)]TJ +/F78 9.9626 Tf 185.854 524.099 Td [(x)]TJ/F78 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F84 9.9626 Tf 2.751 1.96 Td [(,)]TJ/F78 9.9626 Tf 4.275 0 Td [(y)]TJ/F75 9.9626 Tf 108.448 0 Td [(Subroutine)]TJ ET q -1 0 0 1 183.035 618.542 cm -[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S +1 0 0 1 179.582 520.313 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F54 9.9626 Tf 189.012 609.974 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F84 9.9626 Tf 185.56 511.745 Td [(Integer)-8983(psb)]TJ ET q -1 0 0 1 326.246 610.173 cm +1 0 0 1 322.794 511.945 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 329.235 609.974 Td [(gemlt)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F84 9.9626 Tf 325.783 511.745 Td [(scatter)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 326.246 598.218 cm +1 0 0 1 322.794 499.989 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 329.235 598.019 Td [(gemlt)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F84 9.9626 Tf 325.783 499.79 Td [(scatter)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 326.246 586.263 cm +1 0 0 1 322.794 488.034 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 329.235 586.064 Td [(gemlt)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F84 9.9626 Tf 325.783 487.835 Td [(scatter)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 326.246 574.308 cm +1 0 0 1 322.794 476.079 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 329.235 574.109 Td [(gemlt)]TJ +/F84 9.9626 Tf 325.783 475.88 Td [(scatter)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 183.035 570.323 cm -[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S +1 0 0 1 322.794 464.124 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 325.783 463.925 Td [(scatter)]TJ +ET +q +1 0 0 1 179.582 460.139 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 227.467 541.944 Td [(T)92(able)-250(14:)-310(Data)-250(types)]TJ +/F84 9.9626 Tf 227.467 431.76 Td [(T)92(able)-250(20:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -112.628 -28.004 Td [(call)]TJ 0 g 0 G -/F51 9.9626 Tf -127.572 -33.34 Td [(T)90(ype:)]TJ + [-525(psb_scatter\050glob_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + [-525(loc_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.603 Td [(On)-250(Entry)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.603 Td [(x)]TJ + [-525(root,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F52 9.9626 Tf 174.06 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -164.321 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ + [-525(mold\051)]TJ +0 g 0 G +/F75 9.9626 Tf -14.944 -24.806 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -22.813 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.813 Td [(glob)]TJ ET q -1 0 0 1 280.646 421.777 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.976 333.523 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 123.965 333.324 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.654 0 Td [(The)-250(array)-250(that)-250(must)-250(be)-250(scatter)18(ed)-250(into)-250(local)-250(pieces.)]TJ -8.817 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(or)-250(two)-250(array)111(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -22.814 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 262.89 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 283.785 421.578 Td [(T)]TJ +/F75 9.9626 Tf 123.397 262.69 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 289.642 421.777 cm +1 0 0 1 309.258 215.069 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 292.781 421.578 Td [(vect)]TJ +/F145 9.9626 Tf 312.397 214.87 Td [(desc)]TJ ET q -1 0 0 1 314.33 421.777 cm +1 0 0 1 333.945 215.069 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 317.468 421.578 Td [(type)]TJ +/F145 9.9626 Tf 337.084 214.87 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(.)]TJ +/F75 9.9626 Tf -258.11 -22.813 Td [(root)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.603 Td [(y)]TJ +/F84 9.9626 Tf 0.987 0 0 1 122.839 192.057 Tm [(The)-253(pr)18(ocess)-254(that)-253(holds)-254(the)-253(global)-254(copy)113(.)-315(If)]TJ/F78 9.9626 Tf 1 0 0 1 303.049 192.057 Tm [(r)-17(o)-35(o)-35(t)]TJ/F192 10.3811 Tf 19.927 0 Td [(=)]TJ/F190 10.3811 Tf 11.09 0 Td [(\000)]TJ/F84 9.9626 Tf 0.987 0 0 1 342.26 192.057 Tm [(1)-253(all)-254(the)-253(pr)18(ocesses)-254(have)]TJ 1 0 0 1 124.802 180.101 Tm [(a)-250(copy)-250(of)-250(the)-250(global)-250(vector)74(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.015 0 0 1 124.802 132.281 Tm [(Speci\002ed)-246(as:)-305(an)-246(integer)-246(variable)]TJ/F190 10.3811 Tf 1 0 0 1 269.144 132.281 Tm [(\000)]TJ/F84 9.9626 Tf 1.015 0 0 1 277.338 132.281 Tm [(1)]TJ/F190 10.3811 Tf 1 0 0 1 285.286 132.281 Tm [(\024)]TJ/F78 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F190 10.3811 Tf 19.922 0 Td [(\024)]TJ/F78 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F190 10.3811 Tf 13.501 0 Td [(\000)]TJ/F84 9.9626 Tf 1.015 0 0 1 350.91 132.281 Tm [(1,)-246(default)]TJ/F145 9.9626 Tf 1 0 0 1 395.251 132.281 Tm [(psb_root_)]TJ/F84 9.9626 Tf 1.015 0 0 1 442.324 132.281 Tm [(,)]TJ 1 0 0 1 124.802 120.326 Tm [(i.e.)-310(pr)18(ocess)-250(0.)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F52 9.9626 Tf 173.89 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -164.61 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ + 141.968 -29.888 Td [(69)]TJ +0 g 0 G +ET + +endstream +endobj +1473 0 obj +<< +/Length 4073 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F75 9.9626 Tf 150.705 706.129 Td [(mold)]TJ +0 g 0 G +/F84 9.9626 Tf 28.473 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -3.567 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.966 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ 0.987 0 0 1 175.611 658.308 Tm [(Speci\002ed)-254(as:)-315(an)-254(object)-254(of)-254(a)-254(class)-254(derived)-254(fr)18(om)]TJ/F145 9.9626 Tf 1 0 0 1 374.749 658.308 Tm [(psb)]TJ ET q -1 0 0 1 280.646 342.398 cm +1 0 0 1 391.068 658.507 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 283.785 342.199 Td [(T)]TJ +/F145 9.9626 Tf 394.206 658.308 Td [(T)]TJ ET q -1 0 0 1 289.642 342.398 cm +1 0 0 1 400.064 658.507 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 292.781 342.199 Td [(vect)]TJ +/F145 9.9626 Tf 403.202 658.308 Td [(base)]TJ ET q -1 0 0 1 314.33 342.398 cm +1 0 0 1 424.751 658.507 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 317.468 342.199 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.603 Td [(desc)]TJ +/F145 9.9626 Tf 427.89 658.308 Td [(vect)]TJ +ET +q +1 0 0 1 449.439 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 452.577 658.308 Td [(type)]TJ/F84 9.9626 Tf 0.987 0 0 1 473.498 658.308 Tm [(;)-254(this)]TJ 1 0 0 1 175.611 646.353 Tm [(is)-250(only)-250(allowed)-250(when)-250(loc)]TJ ET q -1 0 0 1 120.408 310.84 cm +1 0 0 1 285.797 646.552 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 310.641 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F84 9.9626 Tf 288.786 646.353 Td [(x)-250(is)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F145 9.9626 Tf 50.53 0 Td [(psb)]TJ +ET +q +1 0 0 1 355.634 646.552 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 358.773 646.353 Td [(T)]TJ ET q -1 0 0 1 273.363 263.02 cm +1 0 0 1 364.63 646.552 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 262.82 Td [(desc)]TJ +/F145 9.9626 Tf 367.769 646.353 Td [(vect)]TJ ET q -1 0 0 1 298.05 263.02 cm +1 0 0 1 389.318 646.552 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 262.82 Td [(type)]TJ +/F145 9.9626 Tf 392.456 646.353 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.215 -19.602 Td [(On)-250(Return)]TJ +/F75 9.9626 Tf -262.672 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -19.603 Td [(y)]TJ + 0 -19.926 Td [(loc)]TJ +ET +q +1 0 0 1 164.583 606.702 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 167.571 606.502 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F52 9.9626 Tf 160.68 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -151.4 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F78 9.9626 Tf 175.664 0 Td [(g)-25(l)-55(o)-35(b)]TJ +ET +q +1 0 0 1 371.853 606.702 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F78 9.9626 Tf 375.135 606.502 Td [(x)]TJ/F84 9.9626 Tf 5.206 0 Td [(.)]TJ -204.73 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ 0.985 0 0 1 175.611 558.682 Tm [(Speci\002ed)-253(as:)-315(a)-253(rank)-253(one)-253(or)-253(two)-254(ALLOCA)76(T)75(ABLE)-254(array)-253(or)-253(an)-253(object)-253(of)-253(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ +/F145 9.9626 Tf 1 0 0 1 175.611 546.727 Tm [(psb)]TJ ET q -1 0 0 1 280.646 175.993 cm +1 0 0 1 191.93 546.926 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 283.785 175.794 Td [(T)]TJ +/F145 9.9626 Tf 195.068 546.727 Td [(T)]TJ ET q -1 0 0 1 289.642 175.993 cm +1 0 0 1 200.926 546.926 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 292.781 175.794 Td [(vect)]TJ +/F145 9.9626 Tf 204.065 546.727 Td [(vect)]TJ ET q -1 0 0 1 314.33 175.993 cm +1 0 0 1 225.613 546.926 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 317.468 175.794 Td [(type)]TJ +/F145 9.9626 Tf 228.752 546.727 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +/F84 9.9626 Tf 23.412 0 Td [(containing)-250(numbers)-250(of)-250(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(14)]TJ + [-250(20)]TJ 0 g 0 G [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.603 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ -0 g 0 G -/F54 9.9626 Tf 114.88 -29.888 Td [(53)]TJ -0 g 0 G -ET - -endstream -endobj -1218 0 obj -<< -/Length 314 ->> -stream -0 g 0 G +/F75 9.9626 Tf -101.459 -19.926 Td [(info)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 175.611 706.129 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 141.968 -603.736 Td [(54)]TJ + 142.356 -388.543 Td [(70)]TJ 0 g 0 G ET endstream endobj -1229 0 obj +1477 0 obj << -/Length 7318 +/Length 6702 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.15)-1000(psb)]TJ +/F75 14.3462 Tf 99.895 706.042 Td [(6)-1000(Data)-250(management)-250(routines)]TJ/F75 11.9552 Tf 0 -24.694 Td [(6.1)-1000(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm +1 0 0 1 147.429 681.547 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(gediv)-250(\227)-250(Entrywise)-250(Division)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(division)-250(between)-250(two)-250(vectors)]TJ/F52 9.9626 Tf 300.604 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf -188.347 -21.112 Td [(/)]TJ/F83 10.3811 Tf 9.054 0 Td [(\040)]TJ/F52 9.9626 Tf 13.567 0 Td [(x)]TJ/F85 10.3811 Tf 5.329 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.274 0 Td [(/)]TJ/F52 9.9626 Tf 6.286 0 Td [(y)]TJ/F85 10.3811 Tf 5.231 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(.)]TJ/F59 9.9626 Tf -186.967 -21.111 Td [(psb_gediv\050x,)-525(y,)-525(desc_a,)-525(info,)-525([flag\051)]TJ +/F75 11.9552 Tf 151.016 681.348 Td [(cdall)-250(\227)-250(Allocates)-250(a)-250(communication)-250(descriptor)]TJ 0 g 0 G 0 g 0 G +/F145 9.9626 Tf -51.121 -18.964 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,mg=mg,parts=parts\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,vg=vg,[mg=mg,flag=flag]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,vl=vl,[nl=nl,globalcheck=.false.,lidx=lidx]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,nl=nl\051)]TJ 0 -11.956 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,mg=mg,repl=.true.\051)]TJ/F84 9.9626 Tf 0.986 0 0 1 114.839 594.792 Tm [(This)-254(subr)19(outine)-254(initializes)-254(the)-253(communication)-254(descriptor)-253(associated)-254(with)-254(an)]TJ 1.018 0 0 1 99.895 582.836 Tm [(index)-245(space.)-305(One)-245(of)-245(the)-245(optional)-245(ar)17(guments)]TJ/F145 9.9626 Tf 1 0 0 1 297.448 582.836 Tm [(parts)]TJ/F84 9.9626 Tf 1.018 0 0 1 323.599 582.836 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 328.623 582.836 Tm [(vg)]TJ/F84 9.9626 Tf 1.018 0 0 1 339.083 582.836 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 344.106 582.836 Tm [(vl)]TJ/F84 9.9626 Tf 1.018 0 0 1 354.567 582.836 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 359.59 582.836 Tm [(nl)]TJ/F84 9.9626 Tf 1.018 0 0 1 372.538 582.836 Tm [(or)]TJ/F145 9.9626 Tf 1 0 0 1 384.568 582.836 Tm [(repl)]TJ/F84 9.9626 Tf 1.018 0 0 1 407.976 582.836 Tm [(must)-245(be)]TJ 1 0 0 1 99.895 570.881 Tm [(speci\002ed,)-250(ther)18(eby)-250(choosing)-250(the)-250(speci\002c)-250(initialization)-250(strategy)111(.)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -18.208 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.067 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -19.067 Td [(icontxt)]TJ +0 g 0 G +/F84 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -57.434 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.066 Td [(vg)]TJ +0 g 0 G +/F84 9.9626 Tf 16.06 0 Td [(Data)-250(allocation:)-310(each)-250(index)]TJ/F78 9.9626 Tf 121.707 0 Td [(i)]TJ/F190 10.3811 Tf 5.856 0 Td [(2)-290(f)]TJ/F84 9.9626 Tf 15.245 0 Td [(1)-179(.)-192(.)-192(.)]TJ/F78 9.9626 Tf 19.967 0 Td [(m)-47(g)]TJ/F190 10.3811 Tf 13.449 0 Td [(g)]TJ/F84 9.9626 Tf 7.806 0 Td [(is)-250(allocated)-250(to)-250(pr)18(ocess)]TJ/F78 9.9626 Tf 98.455 0 Td [(v)-47(g)]TJ/F192 10.3811 Tf 10.679 0 Td [(\050)]TJ/F78 9.9626 Tf 4.205 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F84 9.9626 Tf 4.149 0 Td [(.)]TJ -295.759 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -57.434 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -61.877 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.067 Td [(\003ag)]TJ +0 g 0 G +/F84 9.9626 Tf 21.589 0 Td [(Speci\002es)-250(whether)-250(entries)-250(in)]TJ/F78 9.9626 Tf 123.401 0 Td [(v)-47(g)]TJ/F84 9.9626 Tf 13.046 0 Td [(ar)18(e)-250(zer)18(o-)-250(or)-250(one-based.)]TJ -133.129 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -57.434 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -61.877 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0,)-167(1,)-250(default)-250(0.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.067 Td [(mg)]TJ +0 g 0 G +/F84 9.9626 Tf 19.377 0 Td [(the)-250(\050global\051)-250(number)-250(of)-250(r)18(ows)-250(of)-250(the)-250(pr)18(oblem.)]TJ 5.53 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -57.434 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -61.877 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.014 0 0 1 124.802 266.056 Tm [(Speci\002ed)-246(as:)-305(an)-246(integer)-246(value.)-305(It)-246(is)-246(r)18(equir)17(ed)-245(if)]TJ/F145 9.9626 Tf 1 0 0 1 328.848 266.056 Tm [(parts)]TJ/F84 9.9626 Tf 1.014 0 0 1 357.483 266.056 Tm [(or)]TJ/F145 9.9626 Tf 1 0 0 1 369.473 266.056 Tm [(repl)]TJ/F84 9.9626 Tf 1.014 0 0 1 392.878 266.056 Tm [(is)-246(speci\002ed,)]TJ 1 0 0 1 124.802 254.101 Tm [(it)-250(is)-250(optional)-250(if)]TJ/F145 9.9626 Tf 66.141 0 Td [(vg)]TJ/F84 9.9626 Tf 12.951 0 Td [(is)-250(speci\002ed.)]TJ +0 g 0 G +/F75 9.9626 Tf -103.999 -19.067 Td [(parts)]TJ +0 g 0 G +/F84 9.9626 Tf 27.666 0 Td [(the)-250(subr)18(outine)-250(that)-250(de\002nes)-250(the)-250(partitioning)-250(scheme.)]TJ -2.759 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -57.434 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(subr)18(outine.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.067 Td [(vl)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 113.733 180.101 Tm [(Data)-254(allocation:)-322(the)-255(set)-254(of)-254(global)-254(indices)]TJ/F78 9.9626 Tf 1 0 0 1 295.435 180.101 Tm [(v)-25(l)]TJ/F192 10.3811 Tf 8.548 0 Td [(\050)]TJ/F84 9.9626 Tf 1.02 0 0 1 308.132 180.101 Tm [(1)]TJ 1 0 0 1 316.277 180.101 Tm [(:)]TJ/F78 9.9626 Tf 5.679 0 Td [(n)-25(l)]TJ/F192 10.3811 Tf 9.106 0 Td [(\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 337.795 180.101 Tm [(belonging)-254(to)-254(the)-255(calling)]TJ 1 0 0 1 124.503 168.146 Tm [(pr)18(ocess.)]TJ 0.299 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -61.877 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ +0 g 0 G + 141.968 -29.888 Td [(71)]TJ 0 g 0 G ET -q -1 0 0 1 183.199 630.896 cm -[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S -Q -BT -/F54 9.9626 Tf 189.301 622.328 Td [(/)-13(,)]TJ/F52 9.9626 Tf 11.437 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(,)]TJ/F52 9.9626 Tf 5.105 0 Td [(y)]TJ/F51 9.9626 Tf 99.043 0 Td [(Function)]TJ -ET -q -1 0 0 1 183.199 618.542 cm -[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S -Q -BT -/F54 9.9626 Tf 189.177 609.974 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 326.41 610.173 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.399 609.974 Td [(gediv)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 326.41 598.218 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.399 598.019 Td [(gediv)]TJ -140.222 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 326.41 586.263 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.399 586.064 Td [(gediv)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 326.41 574.308 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.399 574.109 Td [(gediv)]TJ -ET -q -1 0 0 1 183.199 570.323 cm -[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S -Q + +endstream +endobj +1482 0 obj +<< +/Length 6818 +>> +stream +0 g 0 G +0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 227.467 541.944 Td [(T)92(able)-250(15:)-310(Data)-250(types)]TJ +/F75 9.9626 Tf 150.705 706.129 Td [(nl)]TJ 0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 165.091 706.129 Tm [(Data)-391(allocation:)-596(in)-391(a)-391(generalized)-391(block-r)17(ow)-391(distribution)-391(the)-391(number)-391(of)]TJ 1 0 0 1 175.611 694.174 Tm [(indices)-250(belonging)-250(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -61.878 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(May)-250(be)-250(speci\002ed)-250(together)-250(with)]TJ/F145 9.9626 Tf 272.944 0 Td [(vl)]TJ/F84 9.9626 Tf 10.46 0 Td [(.)]TJ 0 g 0 G +/F75 9.9626 Tf -308.31 -20.135 Td [(repl)]TJ 0 g 0 G -/F51 9.9626 Tf -127.572 -33.34 Td [(T)90(ype:)]TJ +/F84 9.9626 Tf 1.018 0 0 1 173.948 626.218 Tm [(Data)-246(allocation:)-306(build)-246(a)-247(r)18(eplicated)-246(index)-246(space)-247(\050i.e.)-306(all)-246(pr)17(ocesses)-246(own)-246(all)]TJ 1 0 0 1 175.611 614.263 Tm [(indices\051.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -57.434 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -61.878 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F145 9.9626 Tf 132.133 0 Td [(.true.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F75 9.9626 Tf -157.039 -20.135 Td [(globalcheck)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.603 Td [(On)-250(Entry)]TJ +/F84 9.9626 Tf 59.765 0 Td [(Data)-250(allocation:)-310(do)-250(global)-250(checks)-250(on)-250(the)-250(local)-250(index)-250(lists)]TJ/F145 9.9626 Tf 247.788 0 Td [(vl)]TJ/F84 9.9626 Tf -282.647 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -57.434 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -61.878 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value,)-250(default:)]TJ/F145 9.9626 Tf 162.678 0 Td [(.false.)]TJ 0 g 0 G +/F75 9.9626 Tf -187.584 -20.135 Td [(lidx)]TJ 0 g 0 G - 0 -19.603 Td [(x)]TJ +/F84 9.9626 Tf 1.02 0 0 1 173.39 478.351 Tm [(Data)-273(allocation:)-361(the)-274(set)-273(of)-274(local)-273(indices)]TJ/F78 9.9626 Tf 1 0 0 1 349.9 478.351 Tm [(l)-48(i)-32(d)-42(x)]TJ/F192 10.3811 Tf 17.066 0 Td [(\050)]TJ/F84 9.9626 Tf 1.02 0 0 1 371.116 478.351 Tm [(1)]TJ 1 0 0 1 379.623 478.351 Tm [(:)]TJ/F78 9.9626 Tf 6.042 0 Td [(n)-25(l)]TJ/F192 10.3811 Tf 9.106 0 Td [(\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 401.699 478.351 Tm [(to)-274(be)-273(assigned)-274(to)-273(the)]TJ 1 0 0 1 175.611 466.396 Tm [(global)-250(indices)]TJ/F78 9.9626 Tf 63.477 0 Td [(v)-25(l)]TJ/F84 9.9626 Tf 8.423 0 Td [(.)]TJ -71.9 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -61.878 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F52 9.9626 Tf 174.06 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -164.321 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ +/F75 9.9626 Tf -24.906 -22.128 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.135 Td [(desc)]TJ ET q -1 0 0 1 280.646 421.777 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.218 376.512 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 283.785 421.578 Td [(T)]TJ +/F75 9.9626 Tf 174.207 376.313 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.137 0 Td [(psb)]TJ ET q -1 0 0 1 289.642 421.777 cm +1 0 0 1 360.068 328.692 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 292.781 421.578 Td [(vect)]TJ +/F145 9.9626 Tf 363.206 328.492 Td [(desc)]TJ ET q -1 0 0 1 314.33 421.777 cm +1 0 0 1 384.755 328.692 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 317.468 421.578 Td [(type)]TJ +/F145 9.9626 Tf 387.893 328.492 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - [(.)]TJ +/F75 9.9626 Tf -258.11 -20.135 Td [(info)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.603 Td [(y)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -22.128 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.453 -20.082 Td [(1.)]TJ +0 g 0 G + 0.98 0 0 1 175.611 218.327 Tm [(One)-236(of)-236(the)-235(optional)-236(ar)18(guments)]TJ/F145 9.9626 Tf 1 0 0 1 309.412 218.327 Tm [(parts)]TJ/F84 9.9626 Tf 0.98 0 0 1 335.564 218.327 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 340.344 218.327 Tm [(vg)]TJ/F84 9.9626 Tf 0.98 0 0 1 350.805 218.327 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 355.585 218.327 Tm [(vl)]TJ/F84 9.9626 Tf 0.98 0 0 1 366.046 218.327 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 370.827 218.327 Tm [(nl)]TJ/F84 9.9626 Tf 0.98 0 0 1 383.589 218.327 Tm [(or)]TJ/F145 9.9626 Tf 1 0 0 1 395.078 218.327 Tm [(repl)]TJ/F84 9.9626 Tf 0.98 0 0 1 418.302 218.327 Tm [(must)-236(be)-235(speci\002ed,)]TJ 1 0 0 1 175.611 206.371 Tm [(ther)18(eby)-250(choosing)-250(the)-250(initialization)-250(strategy)-250(as)-250(follows:)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -20.135 Td [(parts)]TJ +0 g 0 G +/F84 9.9626 Tf 0.997 0 0 1 203.278 186.236 Tm [(In)-251(this)-251(case)-251(we)-251(have)-251(a)-251(sub)1(r)18(outine)-251(specifying)-251(the)-251(mapping)-251(between)]TJ 1.02 0 0 1 197.529 174.281 Tm [(global)-273(indices)-272(and)-273(pr)18(ocess/local)-273(index)-273(pairs.)-386(If)-273(this)-273(optional)-272(ar)17(gu-)]TJ 0.991 0 0 1 197.529 162.326 Tm [(ment)-252(is)-252(speci\002ed,)-252(then)-252(it)-253(is)-252(mandatory)-252(to)-252(specify)-252(the)-252(ar)18(gument)]TJ/F145 9.9626 Tf 1 0 0 1 472.343 162.326 Tm [(mg)]TJ/F84 9.9626 Tf 0.991 0 0 1 485.293 162.326 Tm [(as)]TJ 1 0 0 1 197.111 150.371 Tm [(well.)-310(The)-250(subr)18(outine)-250(must)-250(conform)-250(to)-250(the)-250(following)-250(interface:)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf 10.879 -18.09 Td [(interface)]TJ 15.691 -11.955 Td [(subroutine)-525(psb_parts\050glob_index,mg,np,pv,nv\051)]TJ +0 g 0 G +/F84 9.9626 Tf 93.898 -29.888 Td [(72)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F52 9.9626 Tf 173.89 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -164.61 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ -ET -q -1 0 0 1 280.646 342.398 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 283.785 342.199 Td [(T)]TJ ET -q -1 0 0 1 289.642 342.398 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +1488 0 obj +<< +/Length 11618 +>> +stream +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 292.781 342.199 Td [(vect)]TJ +/F145 9.9626 Tf 183.332 706.129 Td [(integer,)-525(intent)-525(\050in\051)-1050(::)-525(glob_index,np,mg)]TJ 0 -11.955 Td [(integer,)-525(intent)-525(\050out\051)-525(::)-525(nv,)-525(pv\050*\051)]TJ -10.46 -11.955 Td [(end)-525(subroutine)-525(psb_parts)]TJ -15.691 -11.956 Td [(end)-525(interface)]TJ/F84 9.9626 Tf -10.77 -17.586 Td [(The)-250(input)-250(ar)18(guments)-250(ar)18(e:)]TJ +0 g 0 G +/F75 9.9626 Tf 0.309 -15.594 Td [(glob)]TJ ET q -1 0 0 1 314.33 342.398 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 167.801 637.283 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 317.468 342.199 Td [(type)]TJ +/F75 9.9626 Tf 170.789 637.083 Td [(index)]TJ +0 g 0 G +/F84 9.9626 Tf 30.127 0 Td [(The)-250(global)-250(index)-250(to)-250(be)-250(mapped;)]TJ +0 g 0 G +/F75 9.9626 Tf -54.196 -13.774 Td [(np)]TJ +0 g 0 G +/F84 9.9626 Tf 16.847 0 Td [(The)-250(number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(mapping;)]TJ 0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +/F75 9.9626 Tf -16.847 -13.774 Td [(mg)]TJ +0 g 0 G +/F84 9.9626 Tf 19.068 0 Td [(The)-250(total)-250(number)-250(of)-250(global)-250(r)18(ows)-250(in)-250(the)-250(mapping;)]TJ -19.377 -15.594 Td [(The)-250(output)-250(ar)18(guments)-250(ar)18(e:)]TJ +0 g 0 G +/F75 9.9626 Tf 0.309 -15.594 Td [(nv)]TJ +0 g 0 G +/F84 9.9626 Tf 16.299 0 Td [(The)-250(number)-250(of)-250(entries)-250(in)]TJ/F145 9.9626 Tf 111.052 0 Td [(pv)]TJ/F84 9.9626 Tf 10.46 0 Td [(;)]TJ +0 g 0 G +/F75 9.9626 Tf -137.811 -13.774 Td [(pv)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 162.939 564.573 Tm [(A)-342(vector)-342(containing)-342(the)-342(indices)-343(of)-342(the)-342(pr)18(ocesses)-342(to)-343(which)-342(the)]TJ 1.02 0 0 1 165.35 552.618 Tm [(global)-309(index)-310(should)-309(be)-310(assigend;)-341(each)-310(entry)-309(must)-310(satisfy)-309(0)]TJ/F190 10.3811 Tf 1 0 0 1 435.412 552.618 Tm [(\024)]TJ/F78 9.9626 Tf -269.39 -11.956 Td [(p)-25(v)]TJ/F192 10.3811 Tf 10.461 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.089 0 Td [(\051)]TJ/F148 10.3811 Tf 8.296 0 Td [(<)]TJ/F78 9.9626 Tf 12.342 0 Td [(n)-80(p)]TJ/F84 9.9626 Tf 1.02 0 0 1 215.856 540.662 Tm [(;)-345(if)]TJ/F78 9.9626 Tf 1 0 0 1 231.538 540.662 Tm [(n)-25(v)]TJ/F148 10.3811 Tf 15.042 0 Td [(>)]TJ/F84 9.9626 Tf 1.02 0 0 1 258.797 540.662 Tm [(1)-312(we)-312(have)-311(an)-312(index)-312(assigned)-312(to)-312(multiple)]TJ 1 0 0 1 165.051 528.707 Tm [(pr)18(ocesses,)-250(i.e.)-310(we)-250(have)-250(an)-250(overlap)-250(among)-250(the)-250(subdomains.)]TJ +0 g 0 G +/F75 9.9626 Tf -40.249 -15.593 Td [(vg)]TJ +0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 140.862 513.114 Tm [(In)-244(this)-244(case)-244(the)-244(association)-244(between)-244(an)-244(index)-244(and)-244(a)-245(pr)19(ocess)-244(is)-244(speci\002ed)]TJ 0.986 0 0 1 146.441 501.158 Tm [(via)-254(an)-254(integer)-253(vector)]TJ/F145 9.9626 Tf 1 0 0 1 237.956 501.158 Tm [(vg\0501:mg\051)]TJ/F84 9.9626 Tf 0.986 0 0 1 279.798 501.158 Tm [(;)-254(each)-254(index)]TJ/F78 9.9626 Tf 1 0 0 1 333.837 501.158 Tm [(i)]TJ/F190 10.3811 Tf 5.861 0 Td [(2)-290(f)]TJ/F84 9.9626 Tf 0.986 0 0 1 354.949 501.158 Tm [(1)]TJ 1 0 0 1 361.645 501.158 Tm [(.)-192(.)-191(.)]TJ/F78 9.9626 Tf 13.201 0 Td [(m)-47(g)]TJ/F190 10.3811 Tf 13.449 0 Td [(g)]TJ/F84 9.9626 Tf 0.986 0 0 1 396.104 501.158 Tm [(is)-254(assigned)]TJ 1.02 0 0 1 146.72 489.203 Tm [(to)-260(pr)18(ocess)]TJ/F78 9.9626 Tf 1 0 0 1 194.476 489.203 Tm [(v)-47(g)]TJ/F192 10.3811 Tf 10.68 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 216.598 489.203 Tm [(.)-349(The)-260(vector)]TJ/F145 9.9626 Tf 1 0 0 1 272.971 489.203 Tm [(vg)]TJ/F84 9.9626 Tf 1.02 0 0 1 286.073 489.203 Tm [(must)-260(be)-260(identical)-260(on)-260(all)-260(calling)-260(pr)18(o-)]TJ 1.02 0 0 1 146.72 477.248 Tm [(cesses;)-427(its)-366(entries)-366(may)-367(have)-366(the)-366(ranges)]TJ/F192 10.3811 Tf 1 0 0 1 329.283 477.248 Tm [(\050)]TJ/F84 9.9626 Tf 1.02 0 0 1 333.433 477.248 Tm [(0)]TJ 1 0 0 1 340.299 477.248 Tm [(.)-192(.)-191(.)]TJ/F78 9.9626 Tf 13.2 0 Td [(n)-80(p)]TJ/F190 10.3811 Tf 13.96 0 Td [(\000)]TJ/F84 9.9626 Tf 1.02 0 0 1 378.046 477.248 Tm [(1)]TJ/F192 10.3811 Tf 1 0 0 1 383.252 477.248 Tm [(\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 391.123 477.248 Tm [(or)]TJ/F192 10.3811 Tf 1 0 0 1 404.532 477.248 Tm [(\050)]TJ/F84 9.9626 Tf 1.02 0 0 1 408.681 477.248 Tm [(1)]TJ 1 0 0 1 415.547 477.248 Tm [(.)-192(.)-191(.)]TJ/F78 9.9626 Tf 13.2 0 Td [(n)-80(p)]TJ/F192 10.3811 Tf 11.567 0 Td [(\051)]TJ/F84 9.9626 Tf 0.993 0 0 1 146.72 465.293 Tm [(accor)18(ding)-252(to)-253(the)-252(value)-252(of)]TJ/F145 9.9626 Tf 1 0 0 1 257.41 465.293 Tm [(flag)]TJ/F84 9.9626 Tf 0.993 0 0 1 278.331 465.293 Tm [(.)-314(The)-253(size)]TJ/F78 9.9626 Tf 1 0 0 1 322.35 465.293 Tm [(m)-47(g)]TJ/F84 9.9626 Tf 0.993 0 0 1 338.172 465.293 Tm [(may)-252(be)-253(speci\002ed)-252(via)-253(the)]TJ 0.999 0 0 1 146.72 453.338 Tm [(optional)-250(ar)18(gument)]TJ/F145 9.9626 Tf 1 0 0 1 231.215 453.338 Tm [(mg)]TJ/F84 9.9626 Tf 0.999 0 0 1 241.676 453.338 Tm [(;)-250(the)-251(default)-250(is)-251(to)-250(use)-250(the)-251(entir)18(e)-250(vector)]TJ/F145 9.9626 Tf 1 0 0 1 408.907 453.338 Tm [(vg)]TJ/F84 9.9626 Tf 0.999 0 0 1 419.367 453.338 Tm [(,)-251(t)1(hus)]TJ 1 0 0 1 146.72 441.383 Tm [(having)]TJ/F145 9.9626 Tf 33.135 0 Td [(mg=size\050vg\051)]TJ/F84 9.9626 Tf 57.534 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -112.587 -15.594 Td [(vl)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 138.64 425.789 Tm [(In)-336(this)-335(case)-336(we)-335(ar)17(e)-335(specifying)-336(the)-335(list)-336(of)-335(indices)]TJ/F145 9.9626 Tf 1 0 0 1 359.058 425.789 Tm [(vl\0501:nl\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 404.311 425.789 Tm [(assigned)]TJ 1.018 0 0 1 146.72 413.834 Tm [(to)-245(the)-246(curr)18(ent)-245(pr)17(ocess;)-245(thus,)-246(th)1(e)-246(global)-245(pr)17(oblem)-245(size)]TJ/F78 9.9626 Tf 1 0 0 1 379.625 413.834 Tm [(m)-47(g)]TJ/F84 9.9626 Tf 1.018 0 0 1 395.438 413.834 Tm [(is)-245(given)-246(by)]TJ 1.02 0 0 1 146.72 401.878 Tm [(the)-255(range)-255(of)-255(the)-255(aggr)18(egate)-255(of)-255(the)-255(individual)-255(vectors)]TJ/F145 9.9626 Tf 1 0 0 1 379.117 401.878 Tm [(vl)]TJ/F84 9.9626 Tf 1.02 0 0 1 392.168 401.878 Tm [(speci\002ed)-255(in)]TJ 1.02 0 0 1 146.72 389.923 Tm [(the)-346(calling)-347(pr)18(ocesses.)-608(The)-347(size)-346(may)-347(be)-346(speci\002ed)-347(via)-346(the)-347(optional)]TJ 1.02 0 0 1 146.72 377.968 Tm [(ar)18(gument)]TJ/F145 9.9626 Tf 1 0 0 1 193.05 377.968 Tm [(nl)]TJ/F84 9.9626 Tf 1.02 0 0 1 203.511 377.968 Tm [(;)-257(the)-253(default)-252(is)-253(to)-253(use)-253(the)-253(entir)18(e)-253(vector)]TJ/F145 9.9626 Tf 1 0 0 1 374.525 377.968 Tm [(vl)]TJ/F84 9.9626 Tf 1.02 0 0 1 384.986 377.968 Tm [(,)-255(thus)-253(having)]TJ/F145 9.9626 Tf 1 0 0 1 146.72 366.013 Tm [(nl=size\050vl\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 204.254 366.013 Tm [(.)-306(If)]TJ/F145 9.9626 Tf 1 0 0 1 218.412 366.013 Tm [(globalcheck=.true.)]TJ/F84 9.9626 Tf 0.98 0 0 1 314.747 366.013 Tm [(the)-224(subr)18(outine)-224(will)-224(check)-224(how)]TJ 0.98 0 0 1 146.72 354.058 Tm [(many)-208(times)-209(each)-208(entry)-209(in)-208(the)-209(global)-208(index)-209(space)]TJ/F192 10.3811 Tf 1 0 0 1 352.531 354.058 Tm [(\050)]TJ/F84 9.9626 Tf 0.98 0 0 1 356.68 354.058 Tm [(1)]TJ 1 0 0 1 363.347 354.058 Tm [(.)-192(.)-191(.)]TJ/F78 9.9626 Tf 13.2 0 Td [(m)-47(g)]TJ/F192 10.3811 Tf 13.45 0 Td [(\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 396.182 354.058 Tm [(is)-208(speci\002ed)]TJ 0.999 0 0 1 146.72 342.103 Tm [(in)-249(the)-250(input)-249(lists)]TJ/F145 9.9626 Tf 1 0 0 1 220.533 342.103 Tm [(vl)]TJ/F84 9.9626 Tf 0.999 0 0 1 230.994 342.103 Tm [(,)-250(thus)-249(allowing)-249(for)-250(the)-249(pr)18(esence)-249(of)-250(overlap)-249(in)-249(the)]TJ 1.005 0 0 1 146.72 330.147 Tm [(input,)-248(and)-249(checking)-248(for)-249(\223orphan\224)-248(indices.)-308(If)]TJ/F145 9.9626 Tf 1 0 0 1 342.972 330.147 Tm [(globalcheck=.false.)]TJ/F84 9.9626 Tf 1.005 0 0 1 442.349 330.147 Tm [(,)]TJ 1.018 0 0 1 146.72 318.192 Tm [(the)-246(subr)18(outine)-246(will)-247(not)-246(check)-246(for)-246(overlap,)-246(and)-246(may)-246(be)-247(signi\002)1(cantly)]TJ 1.02 0 0 1 146.72 306.237 Tm [(faster)73(,)-256(but)-253(the)-253(user)-253(is)-254(implicitly)-253(guaranteeing)-253(that)-253(ther)17(e)-253(ar)18(e)-253(neither)]TJ 1 0 0 1 146.72 294.282 Tm [(orphan)-250(nor)-250(overlap)-250(indices.)]TJ +0 g 0 G +/F75 9.9626 Tf -21.918 -15.594 Td [(lidx)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 147.178 278.688 Tm [(The)-326(optional)-325(ar)18(gument)]TJ/F145 9.9626 Tf 1 0 0 1 255.3 278.688 Tm [(lidx)]TJ/F84 9.9626 Tf 1.02 0 0 1 279.53 278.688 Tm [(is)-326(available)-325(for)-326(those)-325(cases)-326(in)-325(which)]TJ 1.02 0 0 1 146.72 266.733 Tm [(the)-305(user)-305(has)-305(alr)18(eady)-306(established)-305(a)-305(global-to-local)-305(mapping;)-335(if)-305(it)-305(is)]TJ 1.001 0 0 1 146.72 254.778 Tm [(speci\002ed,)-250(each)-250(index)-250(in)]TJ/F145 9.9626 Tf 1 0 0 1 251.841 254.778 Tm [(vl\050i\051)]TJ/F84 9.9626 Tf 1.001 0 0 1 280.486 254.778 Tm [(will)-250(be)-250(mapped)-250(to)-250(the)-250(corr)18(esponding)]TJ 1.02 0 0 1 146.72 242.823 Tm [(local)-275(index)]TJ/F145 9.9626 Tf 1 0 0 1 198.551 242.823 Tm [(lidx\050i\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 235.163 242.823 Tm [(.)-393(When)-275(specifying)-275(the)-274(ar)17(gument)]TJ/F145 9.9626 Tf 1 0 0 1 383.887 242.823 Tm [(lidx)]TJ/F84 9.9626 Tf 1.02 0 0 1 407.601 242.823 Tm [(the)-275(user)]TJ 1.02 0 0 1 146.301 230.868 Tm [(would)-297(also)-297(likely)-297(employ)]TJ/F145 9.9626 Tf 1 0 0 1 264.721 230.868 Tm [(lidx)]TJ/F84 9.9626 Tf 1.02 0 0 1 288.659 230.868 Tm [(in)-297(calls)-297(to)]TJ/F145 9.9626 Tf 1 0 0 1 335.257 230.868 Tm [(psb_cdins)]TJ/F84 9.9626 Tf 1.02 0 0 1 385.346 230.868 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 405.567 230.868 Tm [(local)]TJ/F84 9.9626 Tf 1.02 0 0 1 434.735 230.868 Tm [(in)]TJ 1 0 0 1 146.72 218.912 Tm [(calls)-250(to)]TJ/F145 9.9626 Tf 33.095 0 Td [(psb_spins)]TJ/F84 9.9626 Tf 49.564 0 Td [(and)]TJ/F145 9.9626 Tf 19.357 0 Td [(psb_geins)]TJ/F84 9.9626 Tf 47.073 0 Td [(;)-250(see)-250(also)-250(sec.)]TJ 0 0 1 rg 0 0 1 RG - [-250(2)]TJ + [-250(2.3.1)]TJ 0 g 0 G [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.603 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 310.84 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 310.641 Td [(a)]TJ +/F75 9.9626 Tf -171.007 -15.593 Td [(nl)]TJ +0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 139.188 203.319 Tm [(If)-247(this)-248(ar)19(gument)-247(is)-248(speci\002ed)-247(alone)-247(\050i.e.)-314(without)]TJ/F145 9.9626 Tf 1 0 0 1 342.277 203.319 Tm [(vl)]TJ/F84 9.9626 Tf 0.98 0 0 1 352.737 203.319 Tm [(\051)-247(the)-248(r)19(esult)-247(is)-248(a)-247(gener)18(-)]TJ 0.98 0 0 1 146.72 191.364 Tm [(alized)-250(r)18(ow-block)-250(distribution)-250(in)-250(which)-250(each)-251(pr)19(ocess)]TJ/F78 9.9626 Tf 1 0 0 1 372.364 191.364 Tm [(I)]TJ/F84 9.9626 Tf 0.98 0 0 1 378.657 191.364 Tm [(gets)-250(assigned)-250(a)]TJ 1 0 0 1 146.72 179.408 Tm [(consecutive)-250(chunk)-250(of)]TJ/F78 9.9626 Tf 95.904 0 Td [(N)]TJ/F78 7.5716 Tf 7.85 -1.808 Td [(I)]TJ/F192 10.3811 Tf 6.317 1.808 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(n)-25(l)]TJ/F84 9.9626 Tf 11.472 0 Td [(global)-250(indices.)]TJ +0 g 0 G +/F75 9.9626 Tf -154.547 -15.593 Td [(repl)]TJ +0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 147.736 163.815 Tm [(This)-242(ar)19(guments)-242(speci\002es)-241(to)-242(r)18(eplicate)-241(all)-242(indices)-242(o)1(n)-242(all)-242(pr)19(ocesses.)-312(This)]TJ 0.998 0 0 1 146.72 151.86 Tm [(is)-252(a)-251(special)-252(purpose)-252(data)-252(allocation)-251(that)-252(is)-252(useful)-251(in)-252(the)-252(constr)8(uction)]TJ 1 0 0 1 146.72 139.904 Tm [(of)-250(some)-250(multilevel)-250(pr)18(econditioners.)]TJ +0 g 0 G + -34.371 -19.578 Td [(2.)]TJ +0 g 0 G + [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(build)-250(state.)]TJ +0 g 0 G + 154.421 -29.888 Td [(73)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ -ET -q -1 0 0 1 273.363 263.02 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 276.501 262.82 Td [(desc)]TJ ET -q -1 0 0 1 298.05 263.02 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +1495 0 obj +<< +/Length 2982 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 301.189 262.82 Td [(type)]TJ +/F84 9.9626 Tf 163.158 706.129 Td [(3.)]TJ +0 g 0 G + 1.005 0 0 1 175.611 706.129 Tm [(Calling)-248(the)-249(r)18(outine)-248(with)]TJ/F145 9.9626 Tf 1 0 0 1 284.117 706.129 Tm [(vg)]TJ/F84 9.9626 Tf 1.005 0 0 1 297.063 706.129 Tm [(or)]TJ/F145 9.9626 Tf 1 0 0 1 308.971 706.129 Tm [(parts)]TJ/F84 9.9626 Tf 1.005 0 0 1 337.608 706.129 Tm [(implies)-248(that)-249(eve)1(ry)-249(pr)18(ocess)-248(will)-248(scan)]TJ 1 0 0 1 175.611 694.174 Tm [(the)-250(entir)18(e)-250(index)-250(space)-250(to)-250(\002gur)18(e)-250(out)-250(the)-250(local)-250(indices.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + -12.453 -19.926 Td [(4.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.215 -19.602 Td [(\003ag)]TJ + [-500(Overlapped)-250(indices)-250(ar)18(e)-250(possible)-250(with)-250(both)]TJ/F145 9.9626 Tf 201.094 0 Td [(parts)]TJ/F84 9.9626 Tf 28.642 0 Td [(and)]TJ/F145 9.9626 Tf 19.357 0 Td [(vl)]TJ/F84 9.9626 Tf 12.952 0 Td [(invocations.)]TJ 0 g 0 G -/F54 9.9626 Tf 21.589 0 Td [(check)-280(if)-280(any)-280(of)-280(the)]TJ/F52 9.9626 Tf 84.137 0 Td [(y)]TJ/F85 10.3811 Tf 5.23 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)-343(=)]TJ/F54 9.9626 Tf 19.108 0 Td [(0,)-287(and)-280(in)-280(case)-280(r)18(eturns)-280(err)18(or)-280(halting)-280(the)-280(compu-)]TJ -112.449 -11.956 Td [(tation.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 40.677 0 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -108.97 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F59 9.9626 Tf 132.133 0 Td [(flag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + -262.045 -19.925 Td [(5.)]TJ 0 g 0 G - [(.true.)]TJ + 0.98 0 0 1 175.113 654.323 Tm [(When)-194(the)-194(subr)19(outine)-194(is)-194(invoked)-194(with)]TJ/F145 9.9626 Tf 1 0 0 1 334.791 654.323 Tm [(vl)]TJ/F84 9.9626 Tf 0.98 0 0 1 347.144 654.323 Tm [(in)-194(conjunction)-194(with)]TJ/F145 9.9626 Tf 1 0 0 1 431.769 654.323 Tm [(globalcheck=.true.)]TJ/F84 9.9626 Tf 0.98 0 0 1 525.915 654.323 Tm [(,)]TJ 1.015 0 0 1 175.611 642.368 Tm [(it)-247(will)-246(perform)-247(a)-246(scan)-247(of)-246(the)-247(index)-247(space)-246(to)-247(sear)18(ch)-247(for)-246(overlap)-247(or)-246(orphan)]TJ 1 0 0 1 175.611 630.413 Tm [(indices.)]TJ 0 g 0 G -/F51 9.9626 Tf -157.04 -19.603 Td [(On)-250(Return)]TJ + -12.453 -19.925 Td [(6.)]TJ 0 g 0 G + 0.98 0 0 1 175.113 610.488 Tm [(When)-194(the)-194(subr)19(outine)-194(is)-194(invoked)-194(with)]TJ/F145 9.9626 Tf 1 0 0 1 334.791 610.488 Tm [(vl)]TJ/F84 9.9626 Tf 0.98 0 0 1 347.144 610.488 Tm [(in)-194(conjunction)-194(with)]TJ/F145 9.9626 Tf 1 0 0 1 431.769 610.488 Tm [(globalcheck=.false.)]TJ/F84 9.9626 Tf 0.98 0 0 1 531.145 610.488 Tm [(,)]TJ 1.02 0 0 1 175.611 598.532 Tm [(no)-295(index)-295(space)-295(scan)-296(wil)1(l)-296(take)-295(place.)-454(Thus)-295(it)-295(is)-295(the)-295(r)17(esponsib)1(ility)-296(of)-295(the)]TJ 1.02 0 0 1 175.611 586.577 Tm [(user)-277(to)-278(make)-277(sur)17(e)-277(that)-277(the)-278(indices)-277(speci\002ed)-278(in)]TJ/F145 9.9626 Tf 1 0 0 1 385.16 586.577 Tm [(vl)]TJ/F84 9.9626 Tf 1.02 0 0 1 398.439 586.577 Tm [(have)-277(neither)-278(orphans)]TJ 1 0 0 1 175.611 574.622 Tm [(nor)-250(overlaps;)-250(if)-250(this)-250(assumption)-250(fails,)-250(r)18(esults)-250(will)-250(be)-250(unpr)18(edictable.)]TJ 0 g 0 G - 0 -19.603 Td [(x)]TJ + -12.453 -19.925 Td [(7.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F52 9.9626 Tf 160.849 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ -151.111 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ + 1.02 0 0 1 175.611 554.697 Tm [(Orphan)-347(and)-347(overlap)-346(indices)-347(ar)17(e)-346(impossible)-347(by)-347(constr)8(uction)-347(when)-347(the)]TJ 1 0 0 1 175.611 542.742 Tm [(subr)18(outine)-250(is)-250(invoked)-250(with)]TJ/F145 9.9626 Tf 121.164 0 Td [(nl)]TJ/F84 9.9626 Tf 12.952 0 Td [(\050alone\051,)-250(or)]TJ/F145 9.9626 Tf 47.372 0 Td [(vg)]TJ/F84 9.9626 Tf 10.46 0 Td [(.)]TJ 0 g 0 G - 85.819 -29.888 Td [(55)]TJ + -49.98 -452.304 Td [(74)]TJ 0 g 0 G ET endstream endobj -1122 0 obj +1383 0 obj << /Type /ObjStm /N 100 -/First 989 -/Length 12429 ->> -stream -252 0 1120 57 1116 114 1124 234 1126 352 1127 411 1128 470 1129 529 1130 588 1131 647 -1132 706 1123 765 1137 872 1133 1029 1134 1173 1135 1319 1139 1466 256 1524 1140 1581 1136 1639 -1144 1772 1141 1920 1142 2065 1146 2212 260 2271 1147 2329 1143 2387 1151 2507 1148 2655 1149 2800 -1153 2947 264 3005 1155 3062 1150 3119 1162 3253 1156 3419 1157 3566 1158 3711 1159 3853 1164 3999 -268 4058 1165 4116 1166 4174 1167 4233 1168 4292 1161 4351 1177 4498 1160 4700 1169 4847 1170 4991 -1171 5137 1172 5284 1173 5435 1174 5586 1175 5737 1179 5883 1176 5941 1184 6075 1181 6214 1186 6359 -272 6418 1187 6476 1183 6535 1195 6682 1182 6875 1188 7023 1189 7167 1190 7314 1191 7461 1192 7604 -1193 7751 1197 7896 1194 7954 1201 8088 1198 8236 1199 8383 1203 8530 1200 8589 1212 8709 1204 8902 -1205 9046 1206 9191 1207 9335 1208 9480 1209 9627 1210 9771 1214 9918 276 9976 1215 10033 1211 10091 -1217 10224 1219 10342 1216 10401 1228 10482 1220 10657 1221 10801 1222 10946 1223 11090 1224 11235 1230 11382 -% 252 0 obj +/First 992 +/Length 12161 +>> +stream +1382 0 292 58 1379 115 1389 196 1384 353 1385 497 1386 644 1391 791 296 850 1392 908 +1388 967 1396 1104 1401 1252 1402 1379 1403 1422 1404 1629 1405 1867 1406 2143 1387 2379 1394 2526 +1398 2672 1399 2730 1395 2788 1410 2925 1412 3043 1409 3102 1417 3170 1413 3327 1414 3471 1415 3616 +1419 3763 300 3821 1420 3878 1416 3936 1426 4072 1421 4229 1423 4376 1424 4521 1428 4667 1429 4726 +1430 4785 1431 4844 1425 4903 1434 5011 1436 5129 1433 5187 1438 5255 1441 5373 1442 5500 1443 5543 +1444 5750 1445 5988 1446 6264 1440 6500 1432 6559 1437 6618 1453 6715 1449 6872 1450 7013 1451 7160 +1455 7307 304 7365 1456 7422 1452 7480 1459 7616 1461 7734 1458 7793 1465 7888 1462 8027 1467 8174 +308 8232 1468 8289 1464 8347 1472 8483 1463 8640 1469 8784 1470 8928 1474 9074 1471 9133 1476 9241 +1478 9359 312 9417 316 9474 1475 9530 1481 9666 1479 9805 1483 9952 1484 10011 1480 10070 1487 10192 +1485 10331 1489 10489 1491 10547 1486 10605 1494 10756 1496 10874 1497 10933 1498 10992 1499 11051 1500 11110 +% 1382 0 obj << -/D [1117 0 R /XYZ 99.895 716.092 null] +/D [1380 0 R /XYZ 98.895 753.953 null] >> -% 1120 0 obj +% 292 0 obj << -/D [1117 0 R /XYZ 99.895 504.73 null] +/D [1380 0 R /XYZ 99.895 716.092 null] >> -% 1116 0 obj +% 1379 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1124 0 obj +% 1389 0 obj << /Type /Page -/Contents 1125 0 R -/Resources 1123 0 R +/Contents 1390 0 R +/Resources 1388 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R +/Parent 1351 0 R +/Annots [ 1384 0 R 1385 0 R 1386 0 R ] >> -% 1126 0 obj -<< -/D [1124 0 R /XYZ 149.705 753.953 null] ->> -% 1127 0 obj +% 1384 0 obj << -/D [1124 0 R /XYZ 150.705 564.444 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 344.818 495.412 356.877] +/A << /S /GoTo /D (vdata) >> >> -% 1128 0 obj +% 1385 0 obj << -/D [1124 0 R /XYZ 150.705 504.067 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [377.462 332.863 389.417 344.922] +/A << /S /GoTo /D (table.17) >> >> -% 1129 0 obj +% 1386 0 obj << -/D [1124 0 R /XYZ 175.611 506.876 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 264.733 409.811 276.793] +/A << /S /GoTo /D (descdata) >> >> -% 1130 0 obj +% 1391 0 obj << -/D [1124 0 R /XYZ 175.611 494.921 null] +/D [1389 0 R /XYZ 149.705 753.953 null] >> -% 1131 0 obj +% 296 0 obj << -/D [1124 0 R /XYZ 175.611 482.966 null] +/D [1389 0 R /XYZ 150.705 716.092 null] >> -% 1132 0 obj +% 1392 0 obj << -/D [1124 0 R /XYZ 175.611 471.011 null] +/D [1389 0 R /XYZ 150.705 513.636 null] >> -% 1123 0 obj +% 1388 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F147 1157 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1137 0 obj +% 1396 0 obj << /Type /Page -/Contents 1138 0 R -/Resources 1136 0 R +/Contents 1397 0 R +/Resources 1395 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R -/Annots [ 1133 0 R 1134 0 R 1135 0 R ] +/Parent 1400 0 R +/Annots [ 1387 0 R 1394 0 R ] >> -% 1133 0 obj +% 1401 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 416.057 444.603 428.117] -/A << /S /GoTo /D (vdata) >> +/Producer (GPL Ghostscript 9.22) +/CreationDate (D:20180323100645Z00'00') +/ModDate (D:20180323100645Z00'00') +>> +% 1402 0 obj +<< +/Type /ExtGState +/OPM 1 +>> +% 1403 0 obj +<< +/BaseFont /XYUGDR+Times-Roman +/FontDescriptor 1405 0 R +/Type /Font +/FirstChar 48 +/LastChar 57 +/Widths [ 500 500 500 500 500 500 500 500 500 500] +/Encoding /WinAnsiEncoding +/Subtype /Type1 +>> +% 1404 0 obj +<< +/BaseFont /XISTAL+Times-Bold +/FontDescriptor 1406 0 R +/Type /Font +/FirstChar 48 +/LastChar 80 +/Widths [ 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611] +/Encoding /WinAnsiEncoding +/Subtype /Type1 +>> +% 1405 0 obj +<< +/Type /FontDescriptor +/FontName /XYUGDR+Times-Roman +/FontBBox [ 0 -14 476 688] +/Flags 65568 +/Ascent 688 +/CapHeight 688 +/Descent -14 +/ItalicAngle 0 +/StemV 71 +/MissingWidth 250 +/CharSet (/eight/five/four/nine/one/seven/six/three/two/zero) +/FontFile3 1407 0 R +>> +% 1406 0 obj +<< +/Type /FontDescriptor +/FontName /XISTAL+Times-Bold +/FontBBox [ 0 -13 600 688] +/Flags 65568 +/Ascent 688 +/CapHeight 676 +/Descent -13 +/ItalicAngle 0 +/StemV 90 +/MissingWidth 250 +/CharSet (/P/one/zero) +/FontFile3 1408 0 R >> -% 1134 0 obj +% 1387 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [326.652 404.102 333.626 416.161] -/A << /S /GoTo /D (table.9) >> +/Rect [160.836 625.272 172.792 634.682] +/A << /S /GoTo /D (table.17) >> >> -% 1135 0 obj +% 1394 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 336.356 323.106 348.415] -/A << /S /GoTo /D (descdata) >> +/Rect [408.91 276.439 415.983 290.202] +/A << /S /GoTo /D (figure.3) >> >> -% 1139 0 obj +% 1398 0 obj << -/D [1137 0 R /XYZ 98.895 753.953 null] +/D [1396 0 R /XYZ 98.895 753.953 null] >> -% 256 0 obj +% 1399 0 obj << -/D [1137 0 R /XYZ 99.895 716.092 null] +/D [1396 0 R /XYZ 99.895 326.444 null] >> -% 1140 0 obj +% 1395 0 obj << -/D [1137 0 R /XYZ 99.895 560.219 null] +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F145 940 0 R >> +/XObject << /Im4 1393 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1136 0 obj +% 1410 0 obj +<< +/Type /Page +/Contents 1411 0 R +/Resources 1409 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1400 0 R +>> +% 1412 0 obj +<< +/D [1410 0 R /XYZ 149.705 753.953 null] +>> +% 1409 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F59 812 0 R >> +/Font << /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1144 0 obj +% 1417 0 obj << /Type /Page -/Contents 1145 0 R -/Resources 1143 0 R +/Contents 1418 0 R +/Resources 1416 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R -/Annots [ 1141 0 R 1142 0 R ] +/Parent 1400 0 R +/Annots [ 1413 0 R 1414 0 R 1415 0 R ] >> -% 1141 0 obj +% 1413 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 340.341 384.376 352.401] -/A << /S /GoTo /D (spdata) >> +/Rect [368.549 345.485 444.603 357.545] +/A << /S /GoTo /D (vdata) >> >> -% 1142 0 obj +% 1414 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 272.595 373.916 284.655] +/Rect [326.652 333.53 338.608 345.59] +/A << /S /GoTo /D (table.18) >> +>> +% 1415 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 266.146 359.001 278.205] /A << /S /GoTo /D (descdata) >> >> -% 1146 0 obj +% 1419 0 obj << -/D [1144 0 R /XYZ 149.705 753.953 null] +/D [1417 0 R /XYZ 98.895 753.953 null] >> -% 260 0 obj +% 300 0 obj << -/D [1144 0 R /XYZ 150.705 716.092 null] +/D [1417 0 R /XYZ 99.895 716.092 null] >> -% 1147 0 obj +% 1420 0 obj << -/D [1144 0 R /XYZ 150.705 517.78 null] +/D [1417 0 R /XYZ 99.895 510.975 null] >> -% 1143 0 obj +% 1416 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F145 940 0 R /F192 942 0 R >> /ProcSet [ /PDF /Text ] >> -% 1151 0 obj +% 1426 0 obj << /Type /Page -/Contents 1152 0 R -/Resources 1150 0 R +/Contents 1427 0 R +/Resources 1425 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R -/Annots [ 1148 0 R 1149 0 R ] +/Parent 1400 0 R +/Annots [ 1421 0 R 1423 0 R 1424 0 R ] >> -% 1148 0 obj +% 1421 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 340.341 333.567 352.401] -/A << /S /GoTo /D (spdata) >> +/Rect [253.329 554.876 265.284 566.936] +/A << /S /GoTo /D (table.18) >> +>> +% 1423 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [459.716 325.46 466.79 339.127] +/A << /S /GoTo /D (figure.4) >> >> -% 1149 0 obj +% 1424 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 272.595 323.106 284.655] -/A << /S /GoTo /D (descdata) >> +/Rect [357.03 301.825 364.103 313.885] +/A << /S /GoTo /D (figure.3) >> >> -% 1153 0 obj +% 1428 0 obj << -/D [1151 0 R /XYZ 98.895 753.953 null] +/D [1426 0 R /XYZ 149.705 753.953 null] >> -% 264 0 obj +% 1429 0 obj << -/D [1151 0 R /XYZ 99.895 716.092 null] +/D [1426 0 R /XYZ 150.705 464.818 null] >> -% 1155 0 obj +% 1430 0 obj << -/D [1151 0 R /XYZ 99.895 517.78 null] +/D [1426 0 R /XYZ 150.705 430.343 null] >> -% 1150 0 obj +% 1431 0 obj +<< +/D [1426 0 R /XYZ 150.705 386.508 null] +>> +% 1425 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F96 1154 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1162 0 obj +% 1434 0 obj << /Type /Page -/Contents 1163 0 R -/Resources 1161 0 R +/Contents 1435 0 R +/Resources 1433 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R -/Annots [ 1156 0 R 1157 0 R 1158 0 R 1159 0 R ] +/Parent 1400 0 R >> -% 1156 0 obj +% 1436 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 277.323 440.924 289.383] -/A << /S /GoTo /D (table.12) >> +/D [1434 0 R /XYZ 98.895 753.953 null] >> -% 1157 0 obj +% 1433 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 208.877 384.376 220.936] -/A << /S /GoTo /D (spdata) >> +/Font << /F84 687 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1158 0 obj +% 1438 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 140.43 495.412 152.49] -/A << /S /GoTo /D (vdata) >> +/Type /Page +/Contents 1439 0 R +/Resources 1437 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1400 0 R >> -% 1159 0 obj +% 1441 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [379.43 128.475 391.385 140.535] -/A << /S /GoTo /D (table.12) >> +/Producer (GPL Ghostscript 9.22) +/CreationDate (D:20180323100658Z00'00') +/ModDate (D:20180323100658Z00'00') >> -% 1164 0 obj +% 1442 0 obj << -/D [1162 0 R /XYZ 149.705 753.953 null] +/Type /ExtGState +/OPM 1 >> -% 268 0 obj +% 1443 0 obj +<< +/BaseFont /XYUGDR+Times-Roman +/FontDescriptor 1445 0 R +/Type /Font +/FirstChar 48 +/LastChar 57 +/Widths [ 500 500 500 500 500 500 500 500 500 500] +/Encoding /WinAnsiEncoding +/Subtype /Type1 +>> +% 1444 0 obj << -/D [1162 0 R /XYZ 150.705 716.092 null] +/BaseFont /XISTAL+Times-Bold +/FontDescriptor 1446 0 R +/Type /Font +/FirstChar 48 +/LastChar 80 +/Widths [ 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611] +/Encoding /WinAnsiEncoding +/Subtype /Type1 >> -% 1165 0 obj +% 1445 0 obj << -/D [1162 0 R /XYZ 290.728 674.17 null] +/Type /FontDescriptor +/FontName /XYUGDR+Times-Roman +/FontBBox [ 0 -14 476 688] +/Flags 65568 +/Ascent 688 +/CapHeight 688 +/Descent -14 +/ItalicAngle 0 +/StemV 71 +/MissingWidth 250 +/CharSet (/eight/five/four/nine/one/seven/six/three/two/zero) +/FontFile3 1447 0 R >> -% 1166 0 obj +% 1446 0 obj << -/D [1162 0 R /XYZ 287.931 654.041 null] +/Type /FontDescriptor +/FontName /XISTAL+Times-Bold +/FontBBox [ 0 -13 600 688] +/Flags 65568 +/Ascent 688 +/CapHeight 676 +/Descent -13 +/ItalicAngle 0 +/StemV 90 +/MissingWidth 250 +/CharSet (/P/one/zero) +/FontFile3 1448 0 R >> -% 1167 0 obj +% 1440 0 obj << -/D [1162 0 R /XYZ 287.193 633.911 null] +/D [1438 0 R /XYZ 149.705 753.953 null] >> -% 1168 0 obj +% 1432 0 obj << -/D [1162 0 R /XYZ 150.705 447.252 null] +/D [1438 0 R /XYZ 150.705 282.918 null] >> -% 1161 0 obj +% 1437 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F60 1027 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F84 687 0 R >> +/XObject << /Im5 1422 0 R >> /ProcSet [ /PDF /Text ] >> -% 1177 0 obj +% 1453 0 obj << /Type /Page -/Contents 1178 0 R -/Resources 1176 0 R +/Contents 1454 0 R +/Resources 1452 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R -/Annots [ 1160 0 R 1169 0 R 1170 0 R 1171 0 R 1172 0 R 1173 0 R 1174 0 R 1175 0 R ] ->> -% 1160 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [378.159 654.503 390.114 666.562] -/A << /S /GoTo /D (table.12) >> +/Parent 1457 0 R +/Annots [ 1449 0 R 1450 0 R 1451 0 R ] >> -% 1169 0 obj +% 1449 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 588.085 444.603 600.145] +/Rect [368.549 298.8 444.603 310.86] /A << /S /GoTo /D (vdata) >> >> -% 1170 0 obj +% 1450 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [328.746 576.13 340.701 588.189] -/A << /S /GoTo /D (table.12) >> +/Rect [204.522 289.495 216.477 298.905] +/A << /S /GoTo /D (table.19) >> >> -% 1171 0 obj +% 1451 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 497.757 323.106 509.817] +/Rect [291.943 218.115 359.001 230.175] /A << /S /GoTo /D (descdata) >> >> -% 1172 0 obj +% 1455 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [338.139 460.563 345.113 472.623] -/A << /S /GoTo /D (equation.4.1) >> +/D [1453 0 R /XYZ 98.895 753.953 null] >> -% 1173 0 obj +% 304 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [336.486 445.951 343.459 458.011] -/A << /S /GoTo /D (equation.4.2) >> +/D [1453 0 R /XYZ 99.895 716.092 null] >> -% 1174 0 obj +% 1456 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [337.034 431.339 344.007 443.399] -/A << /S /GoTo /D (equation.4.3) >> +/D [1453 0 R /XYZ 99.895 460.417 null] >> -% 1175 0 obj +% 1452 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [202.52 189.579 214.475 201.639] -/A << /S /GoTo /D (table.12) >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F192 942 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1179 0 obj +% 1459 0 obj << -/D [1177 0 R /XYZ 98.895 753.953 null] +/Type /Page +/Contents 1460 0 R +/Resources 1458 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1457 0 R >> -% 1176 0 obj +% 1461 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F60 1027 0 R /F52 585 0 R /F59 812 0 R /F85 814 0 R >> +/D [1459 0 R /XYZ 149.705 753.953 null] +>> +% 1458 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1184 0 obj +% 1465 0 obj << /Type /Page -/Contents 1185 0 R -/Resources 1183 0 R +/Contents 1466 0 R +/Resources 1464 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R -/Annots [ 1181 0 R ] +/Parent 1457 0 R +/Annots [ 1462 0 R ] >> -% 1181 0 obj +% 1462 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 116.52 440.924 128.58] -/A << /S /GoTo /D (table.13) >> +/Rect [291.943 211.064 359.001 223.124] +/A << /S /GoTo /D (descdata) >> >> -% 1186 0 obj +% 1467 0 obj << -/D [1184 0 R /XYZ 149.705 753.953 null] +/D [1465 0 R /XYZ 98.895 753.953 null] >> -% 272 0 obj +% 308 0 obj << -/D [1184 0 R /XYZ 150.705 716.092 null] +/D [1465 0 R /XYZ 99.895 716.092 null] >> -% 1187 0 obj +% 1468 0 obj << -/D [1184 0 R /XYZ 150.705 268.704 null] +/D [1465 0 R /XYZ 99.895 449.977 null] >> -% 1183 0 obj +% 1464 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F60 1027 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F192 942 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1195 0 obj +% 1472 0 obj << /Type /Page -/Contents 1196 0 R -/Resources 1194 0 R +/Contents 1473 0 R +/Resources 1471 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R -/Annots [ 1182 0 R 1188 0 R 1189 0 R 1190 0 R 1191 0 R 1192 0 R 1193 0 R ] ->> -% 1182 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [305.144 654.503 312.117 666.562] -/A << /S /GoTo /D (section.3) >> +/Parent 1457 0 R +/Annots [ 1463 0 R 1469 0 R 1470 0 R ] >> -% 1188 0 obj +% 1463 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 586.032 444.603 598.092] +/Rect [338.319 642.547 414.374 654.607] /A << /S /GoTo /D (vdata) >> >> -% 1189 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [328.621 574.077 340.576 586.136] -/A << /S /GoTo /D (table.13) >> ->> -% 1190 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [378.159 493.651 390.114 505.711] -/A << /S /GoTo /D (table.13) >> ->> -% 1191 0 obj +% 1469 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 425.181 444.603 437.24] +/Rect [174.615 542.921 250.669 554.981] /A << /S /GoTo /D (vdata) >> >> -% 1192 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [328.746 413.225 340.701 425.285] -/A << /S /GoTo /D (table.13) >> ->> -% 1193 0 obj +% 1470 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 332.8 323.106 344.859] -/A << /S /GoTo /D (descdata) >> +/Rect [472.325 542.921 484.28 554.981] +/A << /S /GoTo /D (table.20) >> >> -% 1197 0 obj +% 1474 0 obj << -/D [1195 0 R /XYZ 98.895 753.953 null] +/D [1472 0 R /XYZ 149.705 753.953 null] >> -% 1194 0 obj +% 1471 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F59 812 0 R /F60 1027 0 R /F85 814 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1201 0 obj +% 1476 0 obj << /Type /Page -/Contents 1202 0 R -/Resources 1200 0 R +/Contents 1477 0 R +/Resources 1475 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R -/Annots [ 1198 0 R 1199 0 R ] +/Parent 1457 0 R >> -% 1198 0 obj +% 1478 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [211.646 410.079 223.601 419.489] -/A << /S /GoTo /D (table.13) >> +/D [1476 0 R /XYZ 98.895 753.953 null] >> -% 1199 0 obj +% 312 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [253.329 228.102 265.284 240.161] -/A << /S /GoTo /D (table.13) >> +/D [1476 0 R /XYZ 99.895 716.092 null] >> -% 1203 0 obj +% 316 0 obj << -/D [1201 0 R /XYZ 149.705 753.953 null] +/D [1476 0 R /XYZ 99.895 691.48 null] >> -% 1200 0 obj +% 1475 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F52 585 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R /F192 942 0 R >> /ProcSet [ /PDF /Text ] >> -% 1212 0 obj +% 1481 0 obj << /Type /Page -/Contents 1213 0 R -/Resources 1211 0 R +/Contents 1482 0 R +/Resources 1480 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R -/Annots [ 1204 0 R 1205 0 R 1206 0 R 1207 0 R 1208 0 R 1209 0 R 1210 0 R ] +/Parent 1457 0 R +/Annots [ 1479 0 R ] >> -% 1204 0 obj +% 1479 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 417.772 339.385 429.832] -/A << /S /GoTo /D (vdata) >> +/Rect [342.753 324.687 409.811 336.746] +/A << /S /GoTo /D (descdata) >> >> -% 1205 0 obj +% 1483 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [224.557 405.817 231.53 417.877] -/A << /S /GoTo /D (table.2) >> +/D [1481 0 R /XYZ 149.705 753.953 null] >> -% 1206 0 obj +% 1484 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 338.393 339.385 350.453] -/A << /S /GoTo /D (vdata) >> +/D [1481 0 R /XYZ 150.705 234.157 null] >> -% 1207 0 obj +% 1480 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [224.557 326.438 231.53 338.498] -/A << /S /GoTo /D (table.2) >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R /F78 686 0 R /F192 942 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1487 0 obj +<< +/Type /Page +/Contents 1488 0 R +/Resources 1486 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1492 0 R +/Annots [ 1485 0 R ] >> -% 1208 0 obj +% 1485 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 259.015 323.106 271.074] -/A << /S /GoTo /D (descdata) >> +/Rect [354.489 215.702 376.407 227.166] +/A << /S /GoTo /D (subsubsection.2.3.1) >> >> -% 1209 0 obj +% 1489 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 171.988 339.385 184.048] -/A << /S /GoTo /D (vdata) >> +/D [1487 0 R /XYZ 98.895 753.953 null] >> -% 1210 0 obj +% 1491 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [242.868 160.033 254.823 172.093] -/A << /S /GoTo /D (table.14) >> +/D [1487 0 R /XYZ 99.895 133.283 null] >> -% 1214 0 obj +% 1486 0 obj << -/D [1212 0 R /XYZ 98.895 753.953 null] +/Font << /F145 940 0 R /F84 687 0 R /F75 685 0 R /F190 941 0 R /F78 686 0 R /F192 942 0 R /F148 1490 0 R >> +/ProcSet [ /PDF /Text ] >> -% 276 0 obj +% 1494 0 obj << -/D [1212 0 R /XYZ 99.895 716.092 null] +/Type /Page +/Contents 1495 0 R +/Resources 1493 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1492 0 R >> -% 1215 0 obj +% 1496 0 obj << -/D [1212 0 R /XYZ 99.895 560.161 null] +/D [1494 0 R /XYZ 149.705 753.953 null] >> -% 1211 0 obj +% 1497 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/D [1494 0 R /XYZ 150.705 716.092 null] >> -% 1217 0 obj +% 1498 0 obj << -/Type /Page -/Contents 1218 0 R -/Resources 1216 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R +/D [1494 0 R /XYZ 150.705 687.379 null] >> -% 1219 0 obj +% 1499 0 obj << -/D [1217 0 R /XYZ 149.705 753.953 null] +/D [1494 0 R /XYZ 150.705 667.454 null] >> -% 1216 0 obj +% 1500 0 obj << -/Font << /F54 586 0 R /F51 584 0 R >> -/ProcSet [ /PDF /Text ] +/D [1494 0 R /XYZ 150.705 626.268 null] >> -% 1228 0 obj + +endstream +endobj +1506 0 obj << -/Type /Page -/Contents 1229 0 R -/Resources 1227 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1220 0 R 1221 0 R 1222 0 R 1223 0 R 1224 0 R ] +/Length 7189 >> -% 1220 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 99.895 706.129 Td [(6.2)-1000(psb)]TJ +ET +q +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 151.016 706.129 Td [(cdins)-250(\227)-250(Communication)-250(descriptor)-250(insert)-250(routine)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -51.121 -18.964 Td [(call)-525(psb_cdins\050nz,)-525(ia,)-525(ja,)-525(desc_a,)-525(info)-525([,ila,jla]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdins\050nz,ja,desc,info[,jla,mask,lidx]\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 114.839 654.844 Tm [(This)-292(subr)18(outine)-292(examine)1(s)-292(the)-292(edges)-291(of)-292(the)-291(graph)-292(associated)-292(with)-291(the)-292(dis-)]TJ 1.02 0 0 1 99.895 642.889 Tm [(cr)18(etization)-343(mesh)-343(\050and)-343(isomorphic)-342(to)-343(the)-343(sparsity)-343(pattern)-342(of)-343(a)-343(linear)-343(system)]TJ 0.98 0 0 1 99.895 630.934 Tm [(coef)18(\002cient)-226(matrix\051,)-232(storing)-226(them)-226(as)-227(necessary)-226(into)-226(the)-226(communication)-226(descriptor)75(.)]TJ 0.98 0 0 1 99.895 618.979 Tm [(In)-225(the)-225(\002rst)-225(form)-225(the)-226(edge)1(s)-226(ar)19(e)-225(speci\002ed)-225(as)-226(pairs)-225(of)-225(indices)]TJ/F78 9.9626 Tf 1 0 0 1 346.727 618.979 Tm [(i)-47(a)]TJ/F192 10.3811 Tf 7.91 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 366.079 618.979 Tm [(,)]TJ/F78 9.9626 Tf 1 0 0 1 370.653 618.979 Tm [(j)-40(a)]TJ/F192 10.3811 Tf 7.841 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 389.936 618.979 Tm [(;)-235(the)-225(starting)]TJ 1.02 0 0 1 99.895 607.023 Tm [(index)]TJ/F78 9.9626 Tf 1 0 0 1 127.77 607.023 Tm [(i)-47(a)]TJ/F192 10.3811 Tf 7.91 0 Td [(\050)]TJ/F78 9.9626 Tf 4.205 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 149.752 607.023 Tm [(should)-259(belong)-258(to)-259(the)-259(curr)18(ent)-259(pr)18(ocess.)-345(In)-259(the)-258(second)-259(form)-259(only)-258(the)]TJ 1 0 0 1 99.895 595.068 Tm [(r)18(emote)-250(indices)]TJ/F78 9.9626 Tf 67.342 0 Td [(j)-40(a)]TJ/F192 10.3811 Tf 7.84 0 Td [(\050)]TJ/F78 9.9626 Tf 4.205 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F84 9.9626 Tf 6.64 0 Td [(ar)18(e)-250(speci\002ed.)]TJ +0 g 0 G +/F75 9.9626 Tf -89.115 -20.366 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -19.304 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.305 Td [(nz)]TJ +0 g 0 G +/F84 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(points)-250(being)-250(inserted.)]TJ 8.857 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.305 Td [(ia)]TJ +0 g 0 G +/F84 9.9626 Tf 13.281 0 Td [(the)-250(indices)-250(of)-250(the)-250(starting)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 11.626 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F78 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F84 9.9626 Tf 10.336 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -207.221 -19.304 Td [(ja)]TJ +0 g 0 G +/F84 9.9626 Tf 13.281 0 Td [(the)-250(indices)-250(of)-250(the)-250(end)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 11.626 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F78 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F84 9.9626 Tf 10.336 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -207.221 -19.304 Td [(mask)]TJ +0 g 0 G +/F84 9.9626 Tf 29.33 0 Td [(Mask)-250(entries)-250(in)]TJ/F145 9.9626 Tf 69.983 0 Td [(ja)]TJ/F84 9.9626 Tf 10.461 0 Td [(,)-250(they)-250(ar)18(e)-250(inserted)-249(only)-250(when)-250(the)-250(corr)18(esponding)]TJ/F145 9.9626 Tf 213.278 0 Td [(mask)]TJ/F84 9.9626 Tf -298.145 -11.955 Td [(entries)-250(ar)18(e)]TJ/F145 9.9626 Tf 48.139 0 Td [(.true.)]TJ/F84 9.9626 Tf -48.139 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(array)-250(of)-250(length)]TJ/F78 9.9626 Tf 164.297 0 Td [(n)-25(z)]TJ/F84 9.9626 Tf 10.336 0 Td [(,)-250(default)]TJ/F145 9.9626 Tf 38.784 0 Td [(.true.)]TJ/F84 9.9626 Tf 31.382 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -269.706 -19.305 Td [(lidx)]TJ +0 g 0 G +/F84 9.9626 Tf 22.685 0 Td [(User)-250(de\002ned)-250(local)-250(indices)-250(for)]TJ/F145 9.9626 Tf 131.117 0 Td [(ja)]TJ/F84 9.9626 Tf 10.461 0 Td [(.)]TJ -139.356 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F78 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F84 9.9626 Tf 10.336 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -207.221 -20.366 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.305 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 168.346 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 123.397 168.146 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(updated)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 120.525 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 312.397 120.326 Td [(desc)]TJ +ET +q +1 0 0 1 333.945 120.525 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 337.084 120.326 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G + -91.235 -29.888 Td [(75)]TJ +0 g 0 G +ET + +endstream +endobj +1511 0 obj +<< +/Length 3083 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F75 9.9626 Tf 150.705 706.129 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.518 -19.925 Td [(ila)]TJ +0 g 0 G +/F84 9.9626 Tf 16.598 0 Td [(the)-250(local)-250(indices)-250(of)-250(the)-250(starting)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 8.309 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.965 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F78 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F84 9.9626 Tf 10.336 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -207.221 -19.925 Td [(jla)]TJ +0 g 0 G +/F84 9.9626 Tf 16.598 0 Td [(the)-250(local)-250(indices)-250(of)-250(the)-250(end)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 8.309 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.965 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F78 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F84 9.9626 Tf 10.336 0 Td [(.)]TJ/F75 11.9552 Tf -207.221 -21.918 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +0 g 0 G + [-469(This)-250(r)18(outine)-250(may)-250(only)-250(be)-250(called)-250(if)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(build)-250(state;)]TJ +0 g 0 G + 0 -19.925 Td [(2.)]TJ +0 g 0 G + 0.997 0 0 1 175.303 461.048 Tm [(This)-250(r)18(outine)-250(automatically)-249(ignor)18(es)-250(edges)-250(that)-250(do)-250(not)-250(insist)-250(on)-250(the)-250(curr)19(ent)]TJ 0.98 0 0 1 175.313 449.093 Tm [(pr)18(ocess,)-249(i.e.)-314(edges)-248(for)-248(which)-248(neither)-248(the)-248(starting)-248(nor)-248(the)-248(end)-248(vertex)-248(belong)]TJ 1 0 0 1 175.611 437.138 Tm [(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess.)]TJ +0 g 0 G + -12.453 -19.926 Td [(3.)]TJ +0 g 0 G + 1.02 0 0 1 175.303 417.212 Tm [(The)-286(second)-286(form)-287(of)-286(this)-286(r)18(outine)-286(will)-287(be)-286(useful)-286(when)-286(dealing)-286(with)-286(user)17(-)]TJ 1 0 0 1 175.611 405.257 Tm [(speci\002ed)-250(index)-250(mappings;)-250(see)-250(also)]TJ +0 0 1 rg 0 0 1 RG + [-250(2.3.1)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G + 141.968 -314.819 Td [(76)]TJ +0 g 0 G +ET + +endstream +endobj +1520 0 obj +<< +/Length 6186 +>> +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 99.895 706.129 Td [(6.3)-1000(psb)]TJ +ET +q +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 151.016 706.129 Td [(cdasb)-250(\227)-250(Communication)-250(descriptor)-250(assembly)-250(routine)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -51.121 -18.964 Td [(call)-525(psb_cdasb\050desc_a,)-525(info)-525([,)-525(mold]\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 625.596 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 123.397 625.397 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 312.397 577.576 Td [(desc)]TJ +ET +q +1 0 0 1 333.945 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 337.084 577.576 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -258.11 -19.925 Td [(mold)]TJ +0 g 0 G +/F84 9.9626 Tf 28.473 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(index)-250(storage.)]TJ -3.566 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.98 0 0 1 124.802 509.83 Tm [(Speci\002ed)-212(as:)-295(a)-212(object)-212(of)-212(type)-213(der)1(ived)-213(fr)19(om)-212(\050integer\051)]TJ/F145 9.9626 Tf 1 0 0 1 344.16 509.83 Tm [(psb)]TJ +ET +q +1 0 0 1 360.479 510.029 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 363.617 509.83 Td [(T)]TJ +ET +q +1 0 0 1 369.475 510.029 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 372.613 509.83 Td [(base)]TJ +ET +q +1 0 0 1 394.162 510.029 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 397.301 509.83 Td [(vect)]TJ +ET +q +1 0 0 1 418.849 510.029 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 421.988 509.83 Td [(type)]TJ/F84 9.9626 Tf 0.98 0 0 1 442.909 509.83 Tm [(.)]TJ +0 g 0 G +/F75 9.9626 Tf 1 0 0 1 99.895 487.912 Tm [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 468.186 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 123.397 467.987 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 420.366 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 312.397 420.166 Td [(desc)]TJ +ET +q +1 0 0 1 333.945 420.366 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 337.084 420.166 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -258.11 -19.925 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -21.917 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.454 -19.926 Td [(1.)]TJ +0 g 0 G + [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(assembled)-250(state.)]TJ 1.017 0 0 1 99.587 290.652 Tm [(This)-246(call)-245(will)-246(set)-246(up)-245(all)-246(the)-246(necessary)-245(information)-246(for)-246(the)-246(halo)-245(data)-246(exchanges.)]TJ 1.02 0 0 1 99.895 278.697 Tm [(In)-289(doing)-290(so,)-300(the)-289(library)-289(will)-289(need)-290(to)-289(identify)-289(the)-289(set)-290(of)-289(pr)18(ocesses)-289(owning)-290(the)]TJ 1.02 0 0 1 99.895 266.742 Tm [(halo)-332(indices)-332(thr)18(ough)-332(the)-331(use)-332(of)-332(the)]TJ/F145 9.9626 Tf 1 0 0 1 263.448 266.742 Tm [(desc%fnd_owner\050\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 350.505 266.742 Tm [(method;)-375(the)-332(owning)]TJ 1.02 0 0 1 99.596 254.786 Tm [(pr)18(ocesses)-361(ar)18(e)-361(the)-361(topological)-360(neighbours)-361(of)-361(the)-361(calling)-360(pr)17(ocess.)-650(If)-361(the)-361(user)]TJ 1.007 0 0 1 99.895 242.831 Tm [(has)-249(some)-249(backgr)18(ound)-249(information)-249(on)-249(the)-249(pr)18(ocesses)-249(that)-249(ar)18(e)-249(neighbours)-249(of)-249(the)]TJ 0.989 0 0 1 99.895 230.876 Tm [(curr)18(ent)-253(one,)-253(it)-253(is)-253(possible)-253(to)-253(specify)-253(explicitly)-253(the)-253(list)-253(of)-253(adjacent)-253(pr)18(ocesses)-253(with)]TJ 1.003 0 0 1 99.895 218.921 Tm [(a)-249(call)-249(to)]TJ/F145 9.9626 Tf 1 0 0 1 136.323 218.921 Tm [(desc%set_p_adjcncy\050list\051)]TJ/F84 9.9626 Tf 1.003 0 0 1 261.851 218.921 Tm [(;)-249(this)-249(will)-249(speed)-250(up)-249(the)-249(subsequent)-249(call)-249(to)]TJ/F145 9.9626 Tf 1 0 0 1 99.895 206.966 Tm [(psb_cdasb)]TJ/F84 9.9626 Tf 47.073 0 Td [(.)]TJ +0 g 0 G + 119.802 -116.528 Td [(77)]TJ +0 g 0 G +ET + +endstream +endobj +1527 0 obj +<< +/Length 3186 +>> +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 150.705 706.129 Td [(6.4)-1000(psb)]TJ +ET +q +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 201.825 706.129 Td [(cdcpy)-250(\227)-250(Copies)-250(a)-250(communication)-250(descriptor)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdcpy\050desc_in,)-525(desc_out,)-525(info\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 625.596 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 174.207 625.397 Td [(in)]TJ +0 g 0 G +/F84 9.9626 Tf 14.386 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -12.982 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 363.206 577.576 Td [(desc)]TJ +ET +q +1 0 0 1 384.755 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 387.893 577.576 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 535.932 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 174.207 535.733 Td [(out)]TJ +0 g 0 G +/F84 9.9626 Tf 19.925 0 Td [(the)-250(communication)-250(descriptor)-250(copy)111(.)]TJ -18.521 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.137 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 488.112 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 363.206 487.912 Td [(desc)]TJ +ET +q +1 0 0 1 384.755 488.112 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 387.893 487.912 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -258.11 -19.925 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 142.356 -329.728 Td [(78)]TJ +0 g 0 G +ET + +endstream +endobj +1532 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 417.772 339.385 429.832] -/A << /S /GoTo /D (vdata) >> +/Length 2169 >> -% 1221 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 99.895 706.129 Td [(6.5)-1000(psb)]TJ +ET +q +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 151.016 706.129 Td [(cdfree)-250(\227)-250(Frees)-250(a)-250(communication)-250(descriptor)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -51.121 -18.964 Td [(call)-525(psb_cdfree\050desc_a,)-525(info\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 625.596 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 123.397 625.397 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)-250(to)-250(be)-250(fr)18(eed.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 312.397 577.576 Td [(desc)]TJ +ET +q +1 0 0 1 333.945 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 337.084 577.576 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 142.357 -397.474 Td [(79)]TJ +0 g 0 G +ET + +endstream +endobj +1539 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [224.557 405.817 231.53 417.877] -/A << /S /GoTo /D (table.2) >> +/Length 5958 >> -% 1222 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 150.705 706.129 Td [(6.6)]TJ 0.984 0 0 1 177.604 706.129 Tm [(psb)]TJ +ET +q +1 0 0 1 197.92 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 0.984 0 0 1 201.506 706.129 Tm [(cdbldext)-253(\227)-253(Build)-253(an)-253(extended)-253(communication)-253(descrip-)]TJ 1 0 0 1 177.604 692.181 Tm [(tor)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -26.899 -19.693 Td [(call)-525(psb_cdbldext\050a,desc_a,nl,desc_out,)-525(info,)-525(extype\051)]TJ/F84 9.9626 Tf 0.982 0 0 1 165.649 649.066 Tm [(This)-254(subr)19(outine)-254(builds)-254(an)-254(extended)-253(communication)-254(descriptor)75(,)-254(b)1(ased)-254(on)-254(the)]TJ 1.019 0 0 1 150.705 637.111 Tm [(input)-244(descriptor)]TJ/F145 9.9626 Tf 1 0 0 1 225.863 637.111 Tm [(desc_a)]TJ/F84 9.9626 Tf 1.019 0 0 1 259.725 637.111 Tm [(and)-244(on)-244(the)-245(stencil)-244(speci\002ed)-244(thr)18(ough)-245(the)-244(input)-244(sparse)]TJ 1 0 0 1 150.705 625.156 Tm [(matrix)]TJ/F145 9.9626 Tf 31.491 0 Td [(a)]TJ/F84 9.9626 Tf 5.231 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -36.722 -21.054 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -21.429 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -21.43 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.574 0 Td [(A)-250(sparse)-250(matrix)-250(Scope:)]TJ/F75 9.9626 Tf 100.691 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -107.246 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(type.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -21.429 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 504.147 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 174.207 503.948 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 456.326 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 363.206 456.127 Td [(Tspmat)]TJ +ET +q +1 0 0 1 395.216 456.326 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 398.354 456.127 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -268.57 -21.43 Td [(nl)]TJ +0 g 0 G +/F84 9.9626 Tf 14.386 0 Td [(the)-250(number)-250(of)-250(additional)-250(layers)-250(desir)18(ed.)]TJ 10.52 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -57.434 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F78 9.9626 Tf 131.102 0 Td [(n)-25(l)]TJ/F190 10.3811 Tf 11.873 0 Td [(\025)]TJ/F84 9.9626 Tf 10.962 0 Td [(0.)]TJ +0 g 0 G +/F75 9.9626 Tf -178.843 -21.43 Td [(extype)]TJ +0 g 0 G +/F84 9.9626 Tf 34.869 0 Td [(the)-250(kind)-250(of)-250(estension)-250(r)18(equir)18(ed.)]TJ -9.963 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -57.434 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(optional)]TJ/F84 9.9626 Tf 40.678 0 Td [(.)]TJ -64.368 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.02 0 0 1 175.611 317.626 Tm [(Speci\002ed)-317(as:)-447(an)-317(i)1(nteger)-317(value)]TJ/F145 9.9626 Tf 1 0 0 1 313.312 317.626 Tm [(psb_ovt_xhal_)]TJ/F84 9.9626 Tf 1.02 0 0 1 381.307 317.626 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 387.247 317.626 Tm [(psb_ovt_asov_)]TJ/F84 9.9626 Tf 1.02 0 0 1 455.242 317.626 Tm [(,)-335(default:)]TJ/F145 9.9626 Tf 1 0 0 1 175.611 305.671 Tm [(psb_ovt_xhal_)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -23.422 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -21.43 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 261.018 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 174.207 260.819 Td [(out)]TJ +0 g 0 G +/F84 9.9626 Tf 19.925 0 Td [(the)-250(extended)-250(communication)-250(descriptor)74(.)]TJ -18.521 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 213.198 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 363.206 212.998 Td [(desc)]TJ +ET +q +1 0 0 1 384.755 213.198 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 387.893 212.998 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -258.11 -21.429 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -23.422 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 166.874 -29.888 Td [(80)]TJ +0 g 0 G +ET + +endstream +endobj +1543 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 338.393 339.385 350.453] -/A << /S /GoTo /D (vdata) >> +/Length 1748 >> -% 1223 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F84 9.9626 Tf 112.349 706.129 Td [(1.)]TJ +0 g 0 G + 0.98 0 0 1 124.802 706.129 Tm [(Specifying)]TJ/F145 9.9626 Tf 1 0 0 1 172.776 706.129 Tm [(psb_ovt_xhal_)]TJ/F84 9.9626 Tf 0.98 0 0 1 243.267 706.129 Tm [(for)-256(the)]TJ/F145 9.9626 Tf 1 0 0 1 274.24 706.129 Tm [(extype)]TJ/F84 9.9626 Tf 0.98 0 0 1 308.119 706.129 Tm [(ar)18(gument)-255(the)-256(user)-256(will)-255(obtain)-256(a)]TJ 0.98 0 0 1 124.802 694.174 Tm [(descriptor)-209(for)-209(a)-209(domain)-209(partition)-209(in)-209(which)-209(the)-209(additional)-209(layers)-209(ar)18(e)-209(fetched)]TJ 1.02 0 0 1 124.802 682.219 Tm [(as)-244(part)-244(of)-244(an)-244(\050extended\051)-244(halo;)-244(however)-244(the)-244(index-to-pr)18(ocess)-244(mapping)-244(is)]TJ 1 0 0 1 124.802 670.263 Tm [(identical)-250(to)-250(that)-250(of)-250(the)-250(base)-250(descriptor;)]TJ +0 g 0 G + -12.453 -19.925 Td [(2.)]TJ +0 g 0 G + 1.018 0 0 1 124.802 650.338 Tm [(Specifying)]TJ/F145 9.9626 Tf 1 0 0 1 174.542 650.338 Tm [(psb_ovt_asov_)]TJ/F84 9.9626 Tf 1.018 0 0 1 245.035 650.338 Tm [(for)-246(the)]TJ/F145 9.9626 Tf 1 0 0 1 277.02 650.338 Tm [(extype)]TJ/F84 9.9626 Tf 1.018 0 0 1 310.901 650.338 Tm [(ar)18(gument)-247(the)-246(user)-246(will)-247(obtain)]TJ 1.02 0 0 1 124.802 638.383 Tm [(a)-267(descriptor)-267(with)-268(an)-267(overlapped)-267(decomposition:)-348(the)-267(additional)-267(layer)-268(is)]TJ 1.02 0 0 1 124.802 626.428 Tm [(aggr)18(egated)-278(to)-278(the)-278(local)-278(subdomain)-278(\050and)-278(thus)-278(is)-278(an)-278(overlap\051,)-286(and)-278(a)-278(new)]TJ 1 0 0 1 124.802 614.473 Tm [(halo)-250(extending)-250(beyond)-250(the)-250(last)-250(additional)-250(layer)-250(is)-250(formed.)]TJ +0 g 0 G + 141.968 -524.035 Td [(81)]TJ +0 g 0 G +ET + +endstream +endobj +1551 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [224.557 326.438 231.53 338.498] -/A << /S /GoTo /D (table.2) >> +/Length 5951 >> -% 1224 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 150.705 706.129 Td [(6.7)-1000(psb)]TJ +ET +q +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 201.825 706.129 Td [(spall)-250(\227)-250(Allocates)-250(a)-250(sparse)-250(matrix)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -51.12 -19.277 Td [(call)-525(psb_spall\050a,)-525(desc_a,)-525(info)-525([,)-525(nnz,)-525(dupl,)-525(bldmode]\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -22.403 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -20.571 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.572 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 623.505 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 174.207 623.306 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 575.684 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 363.206 575.485 Td [(desc)]TJ +ET +q +1 0 0 1 384.755 575.684 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 387.893 575.485 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -258.11 -20.572 Td [(nnz)]TJ +0 g 0 G +/F84 9.9626 Tf 0.992 0 0 1 172.453 554.913 Tm [(An)-253(estimate)-253(of)-253(the)-254(number)-253(of)-253(nonzer)18(oes)-253(in)-253(the)-253(local)-253(part)-253(of)-254(the)-253(assembled)]TJ 1 0 0 1 175.611 542.958 Tm [(matrix.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.522 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -20.572 Td [(dupl)]TJ +0 g 0 G +/F84 9.9626 Tf 26.56 0 Td [(How)-250(to)-250(handle)-250(duplicate)-250(coef)18(\002cients.)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.522 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.966 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ 1.006 0 0 1 175.611 426.745 Tm [(Speci\002ed)-248(as:)-308(integer)74(,)-248(possible)-248(values:)]TJ/F145 9.9626 Tf 1 0 0 1 341.716 426.745 Tm [(psb_dupl_ovwrt_)]TJ/F84 9.9626 Tf 1.006 0 0 1 420.171 426.745 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 425.161 426.745 Tm [(psb_dupl_add_)]TJ/F84 9.9626 Tf 1.006 0 0 1 493.156 426.745 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 175.611 414.79 Tm [(psb_dupl_err_)]TJ/F84 9.9626 Tf 67.995 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -92.901 -20.572 Td [(bldmode)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 196.144 394.218 Tm [(Whether)-327(to)-327(keep)-327(track)-327(of)-327(matrix)-327(entries)-327(that)-327(do)-327(not)-327(belong)-327(to)-327(the)]TJ 1 0 0 1 175.611 382.263 Tm [(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.522 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ 0.98 0 0 1 175.611 334.443 Tm [(Speci\002ed)-194(as:)-286(an)-193(integer)-194(value)]TJ/F145 9.9626 Tf 1 0 0 1 301.54 334.443 Tm [(psb_matbld_noremote_)]TJ/F84 9.9626 Tf 0.98 0 0 1 406.147 334.443 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 410.601 334.443 Tm [(psb_matbld_remote_)]TJ/F84 9.9626 Tf 0.98 0 0 1 504.747 334.443 Tm [(.)]TJ 1 0 0 1 175.611 322.487 Tm [(Default:)]TJ/F145 9.9626 Tf 38.516 0 Td [(psb_matbld_noremote_)]TJ/F84 9.9626 Tf 104.606 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -168.028 -22.402 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.572 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(the)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf -28.652 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf -23.691 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 231.892 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 363.206 231.692 Td [(Tspmat)]TJ +ET +q +1 0 0 1 395.216 231.892 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 398.354 231.692 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -268.57 -20.571 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -22.564 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.453 -20.41 Td [(1.)]TJ +0 g 0 G + [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(sparse)-250(matrix)-250(is)-250(in)-250(the)-250(build)-250(state.)]TJ +0 g 0 G + 154.421 -29.888 Td [(82)]TJ +0 g 0 G +ET + +endstream +endobj +1556 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 259.015 323.106 271.074] -/A << /S /GoTo /D (descdata) >> +/Length 1305 >> -% 1230 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F84 9.9626 Tf 112.349 706.129 Td [(2.)]TJ +0 g 0 G + [-469(The)-250(descriptor)-250(may)-250(be)-250(in)-250(either)-250(the)-250(build)-250(or)-250(assembled)-250(state.)]TJ +0 g 0 G + 0 -19.925 Td [(3.)]TJ +0 g 0 G + 0.993 0 0 1 124.802 686.204 Tm [(Pr)18(oviding)-250(a)-251(good)-251(estimate)-250(for)-251(the)-251(number)-250(of)-251(nonzer)18(oes)]TJ/F78 9.9626 Tf 1 0 0 1 369.235 686.204 Tm [(n)-25(n)-25(z)]TJ/F84 9.9626 Tf 0.993 0 0 1 387.839 686.204 Tm [(in)-251(the)-250(assem-)]TJ 1.014 0 0 1 124.802 674.248 Tm [(bled)-245(matrix)-246(may)-245(substantially)-245(impr)17(ove)-245(performance)-245(in)-246(the)-245(matrix)-245(build)]TJ 1.02 0 0 1 124.503 662.293 Tm [(phase,)-315(as)-302(it)-301(will)-301(r)18(educe)-302(or)-301(eliminate)-301(the)-301(need)-302(for)-301(\050potentially)-301(multiple\051)]TJ 1 0 0 1 124.802 650.338 Tm [(data)-250(r)18(eallocations;)]TJ +0 g 0 G + -12.453 -19.925 Td [(4.)]TJ +0 g 0 G + 1.02 0 0 1 124.802 630.413 Tm [(Using)]TJ/F145 9.9626 Tf 1 0 0 1 154.449 630.413 Tm [(psb_matbld_remote_)]TJ/F84 9.9626 Tf 1.02 0 0 1 251.507 630.413 Tm [(is)-287(likely)-286(to)-287(cause)-286(a)-287(r)8(untime)-286(over)17(head)-286(at)-287(as-)]TJ 1 0 0 1 124.802 618.458 Tm [(sembly)-250(time;)]TJ +0 g 0 G + 141.968 -528.02 Td [(83)]TJ +0 g 0 G +ET + +endstream +endobj +1564 0 obj << -/D [1228 0 R /XYZ 98.895 753.953 null] +/Length 5490 >> +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 150.705 706.129 Td [(6.8)]TJ 0.994 0 0 1 177.604 706.129 Tm [(psb)]TJ +ET +q +1 0 0 1 198.119 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 0.994 0 0 1 201.706 706.129 Tm [(spins)-251(\227)-252(Insert)-251(a)-252(set)-251(of)-251(coef)18(\002cients)-252(into)-251(a)-251(sparse)-252(matrix)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf 1 0 0 1 150.705 685.756 Tm [(call)-525(psb_spins\050nz,)-525(ia,)-525(ja,)-525(val,)-525(a,)-525(desc_a,)-525(info)-525([,local]\051)]TJ 0 -11.956 Td [(call)-525(psb_spins\050nr,)-525(irw,)-525(irp,)-525(ja,)-525(val,)-525(a,)-525(desc_a,)-525(info)-525([,local]\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -24.099 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -22.835 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.834 Td [(nz)]TJ +0 g 0 G +/F84 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 8.856 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -22.834 Td [(nr)]TJ +0 g 0 G +/F84 9.9626 Tf 14.944 0 Td [(the)-250(number)-250(of)-250(r)18(ows)-250(to)-250(be)-250(inserted.)]TJ 9.963 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.983 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -22.834 Td [(irw)]TJ +0 g 0 G +/F84 9.9626 Tf 20.473 0 Td [(the)-250(\002rst)-250(r)18(ow)-250(to)-250(be)-250(inserted.)]TJ 4.434 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -22.834 Td [(ia)]TJ +0 g 0 G +/F84 9.9626 Tf 13.28 0 Td [(the)-250(r)18(ow)-250(indices)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 11.627 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F78 9.9626 Tf 160.8 0 Td [(n)-25(z)]TJ/F84 9.9626 Tf 10.336 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -196.043 -22.834 Td [(irp)]TJ +0 g 0 G +/F84 9.9626 Tf 18.261 0 Td [(the)-250(r)18(ow)-250(pointers)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 6.646 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F78 9.9626 Tf 160.8 0 Td [(n)-15(r)]TJ/F192 10.3811 Tf 11.85 0 Td [(+)]TJ/F84 9.9626 Tf 10.131 0 Td [(1.)]TJ +0 g 0 G +/F75 9.9626 Tf -207.688 -22.835 Td [(ja)]TJ +0 g 0 G +/F84 9.9626 Tf 13.28 0 Td [(the)-250(column)-250(indices)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F78 9.9626 Tf 160.8 0 Td [(n)-25(z)]TJ/F84 9.9626 Tf 10.336 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -196.043 -22.835 Td [(val)]TJ +0 g 0 G +/F84 9.9626 Tf 18.819 0 Td [(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 6.088 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.983 0 0 1 175.611 132.281 Tm [(Speci\002ed)-254(as:)-315(an)-253(array)-254(of)-254(size)]TJ/F78 9.9626 Tf 1 0 0 1 301.024 132.281 Tm [(n)-25(z)]TJ/F84 9.9626 Tf 0.983 0 0 1 311.36 132.281 Tm [(.)-315(Must)-254(be)-253(of)-254(the)-254(same)-253(type)-254(and)-254(kind)-253(of)-254(the)]TJ 1 0 0 1 175.611 120.326 Tm [(coef)18(\002cients)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F78 9.9626 Tf 141.593 0 Td [(a)]TJ/F84 9.9626 Tf 4.548 0 Td [(.)]TJ +0 g 0 G + -4.173 -29.888 Td [(84)]TJ +0 g 0 G +ET endstream endobj -1236 0 obj +1570 0 obj << -/Length 1288 +/Length 7379 >> stream 0 g 0 G 0 g 0 G +0 g 0 G BT -/F54 9.9626 Tf 175.611 706.129 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ +/F75 9.9626 Tf 99.895 706.129 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 123.397 706.129 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.654 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.249 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ +/F145 9.9626 Tf 136.327 0 Td [(psb)]TJ +ET +q +1 0 0 1 277.448 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 280.586 658.308 Td [(desc)]TJ ET q -1 0 0 1 331.456 706.328 cm +1 0 0 1 302.135 658.507 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 334.594 706.129 Td [(T)]TJ +/F145 9.9626 Tf 305.273 658.308 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -226.3 -33.398 Td [(local)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 125.957 624.91 Tm [(Whether)-378(the)-378(entries)-378(in)-377(the)-378(indices)-378(vectors)]TJ/F145 9.9626 Tf 1 0 0 1 323.219 624.91 Tm [(ia)]TJ/F84 9.9626 Tf 1.02 0 0 1 333.679 624.91 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 340.397 624.91 Tm [(ja)]TJ/F84 9.9626 Tf 1.02 0 0 1 354.698 624.91 Tm [(ar)18(e)-378(alr)17(eady)-377(in)-378(local)]TJ 1 0 0 1 124.802 612.954 Tm [(numbering.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -61.877 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value;)-250(default:)]TJ/F145 9.9626 Tf 162.678 0 Td [(.false.)]TJ/F84 9.9626 Tf 36.612 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -224.197 -23.056 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -21.444 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(into)-250(which)-250(coef)18(\002cients)-250(will)-250(be)-250(inserted.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf -28.652 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf -23.691 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 340.452 706.328 cm +1 0 0 1 309.258 484.968 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 343.59 706.129 Td [(vect)]TJ +/F145 9.9626 Tf 312.397 484.768 Td [(Tspmat)]TJ ET q -1 0 0 1 365.139 706.328 cm +1 0 0 1 344.406 484.968 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 368.277 706.129 Td [(type)]TJ +/F145 9.9626 Tf 347.544 484.768 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -268.571 -21.443 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 463.524 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 123.397 463.325 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 24.453 0 Td [(containing)-354(numbers)-355(of)]TJ -217.119 -11.955 Td [(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +/F84 9.9626 Tf 9.654 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.249 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG - [-250(14)]TJ +/F145 9.9626 Tf 136.327 0 Td [(psb)]TJ +ET +q +1 0 0 1 277.448 415.704 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 280.586 415.504 Td [(desc)]TJ +ET +q +1 0 0 1 302.135 415.704 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 305.273 415.504 Td [(type)]TJ 0 g 0 G - [(.)]TJ +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -226.3 -33.398 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -23.436 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.454 -21.064 Td [(1.)]TJ +0 g 0 G + 1.02 0 0 1 124.802 289.785 Tm [(On)-386(entry)-386(to)-385(this)-386(r)18(outine)-386(the)-386(descriptor)-386(may)-385(be)-386(in)-386(either)-386(the)-386(build)-385(or)]TJ 1 0 0 1 124.802 277.83 Tm [(assembled)-250(state.)]TJ +0 g 0 G + -12.453 -21.443 Td [(2.)]TJ +0 g 0 G + 1.02 0 0 1 124.802 256.387 Tm [(On)-271(entry)-271(to)-271(this)-271(r)18(outine)-271(the)-271(sparse)-271(matrix)-271(may)-271(be)-271(in)-271(either)-270(the)-271(build)-271(or)]TJ 1 0 0 1 124.802 244.432 Tm [(update)-250(state.)]TJ +0 g 0 G + -12.453 -21.444 Td [(3.)]TJ +0 g 0 G + 1.006 0 0 1 124.802 222.988 Tm [(If)-249(the)-250(descriptor)-249(is)-250(in)-249(the)-250(build)-249(state,)-250(then)-249(the)-250(sparse)-249(matrix)-249(must)-250(also)-249(be)]TJ 0.98 0 0 1 124.802 211.033 Tm [(in)-256(the)-256(build)-256(state;)-256(the)-256(action)-256(of)-256(the)-256(r)18(outine)-256(is)-256(to)-256(\050implicitly\051)-256(call)]TJ/F145 9.9626 Tf 1 0 0 1 396.533 211.033 Tm [(psb_cdins)]TJ/F84 9.9626 Tf 1.005 0 0 1 124.802 199.078 Tm [(to)-248(add)-249(entries)-248(to)-249(the)-248(sparsity)-249(pattern;)-248(each)-249(sparse)-248(matrix)-248(entry)-249(implicitly)]TJ 1.02 0 0 1 124.802 187.123 Tm [(de\002nes)-377(a)-377(graph)-378(edge,)-410(that)-377(is)-377(passed)-377(to)-378(the)-377(descriptor)-377(r)18(outine)-377(for)-378(the)]TJ 1 0 0 1 124.802 175.168 Tm [(appr)18(opriate)-250(pr)18(ocessing;)]TJ +0 g 0 G + -12.453 -21.444 Td [(4.)]TJ +0 g 0 G + [-469(The)-250(input)-250(data)-250(can)-250(be)-250(passed)-250(in)-250(either)-250(COO)-250(or)-250(CSR)-250(formats;)]TJ +0 g 0 G + 0 -21.443 Td [(5.)]TJ +0 g 0 G + 1.02 0 0 1 124.802 132.281 Tm [(In)-268(COO)-268(format)-268(the)-268(coef)18(\002cients)-268(to)-268(be)-268(inserted)-268(ar)18(e)-268(r)17(epr)18(esented)-268(by)-268(the)-268(or)18(-)]TJ 0.985 0 0 1 124.802 120.326 Tm [(der)18(ed)-253(triples)]TJ/F78 9.9626 Tf 1 0 0 1 182.455 120.326 Tm [(i)-47(a)]TJ/F192 10.3811 Tf 7.91 0 Td [(\050)]TJ/F78 9.9626 Tf 4.205 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F84 9.9626 Tf 0.985 0 0 1 201.807 120.326 Tm [(,)]TJ/F78 9.9626 Tf 1 0 0 1 206.394 120.326 Tm [(j)-40(a)]TJ/F192 10.3811 Tf 7.841 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F84 9.9626 Tf 0.985 0 0 1 225.677 120.326 Tm [(,)]TJ/F78 9.9626 Tf 1 0 0 1 229.915 120.326 Tm [(v)-40(a)-25(l)]TJ/F192 10.3811 Tf 13.37 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F84 9.9626 Tf 0.985 0 0 1 254.727 120.326 Tm [(,)-253(for)]TJ/F78 9.9626 Tf 1 0 0 1 274.702 120.326 Tm [(i)]TJ/F192 10.3811 Tf 5.856 0 Td [(=)]TJ/F84 9.9626 Tf 0.985 0 0 1 291.52 120.326 Tm [(1,)]TJ 1 0 0 1 300.664 120.326 Tm [(.)-192(.)-191(.)]TJ 0.985 0 0 1 313.74 120.326 Tm [(,)]TJ/F78 9.9626 Tf 1 0 0 1 317.978 120.326 Tm [(n)-25(z)]TJ/F84 9.9626 Tf 0.985 0 0 1 328.315 120.326 Tm [(;)-253(these)-253(triples)-253(ar)18(e)-253(arbitrary;)]TJ +0 g 0 G + 1 0 0 1 266.77 90.438 Tm [(85)]TJ +0 g 0 G +ET + +endstream +endobj +1580 0 obj +<< +/Length 5304 +>> +stream 0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 141.968 -535.99 Td [(56)]TJ +BT +/F84 9.9626 Tf 163.158 706.129 Td [(6.)]TJ +0 g 0 G + 1.02 0 0 1 175.611 706.129 Tm [(In)-389(CSR)-388(format)-389(the)-388(coef)17(\002)1(cients)-389(to)-388(be)-389(inserted)-388(for)-389(each)-389(inpu)1(t)-389(r)18(ow)]TJ/F78 9.9626 Tf 1 0 0 1 477.666 706.129 Tm [(i)]TJ/F192 10.3811 Tf 8.556 0 Td [(=)]TJ/F84 9.9626 Tf 1.017 0 0 1 175.113 694.174 Tm [(1,)]TJ/F78 9.9626 Tf 1 0 0 1 184.497 694.174 Tm [(n)-15(r)]TJ/F84 9.9626 Tf 1.017 0 0 1 196.778 694.174 Tm [(ar)18(e)-246(r)17(epr)18(esented)-246(by)-246(the)-246(or)18(der)18(ed)-246(triples)]TJ/F192 10.3811 Tf 1 0 0 1 367.425 694.174 Tm [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 5.026 0 Td [(+)]TJ/F78 9.9626 Tf 10.186 0 Td [(i)-22(r)-35(w)]TJ/F190 10.3811 Tf 16.593 0 Td [(\000)]TJ/F84 9.9626 Tf 1.017 0 0 1 413.566 694.174 Tm [(1)]TJ/F192 10.3811 Tf 1 0 0 1 418.756 694.174 Tm [(\051)]TJ/F84 9.9626 Tf 1.017 0 0 1 422.906 694.174 Tm [(,)]TJ/F78 9.9626 Tf 1 0 0 1 427.572 694.174 Tm [(j)-40(a)]TJ/F192 10.3811 Tf 7.841 0 Td [(\050)]TJ/F78 9.9626 Tf 4.622 0 Td [(j)]TJ/F192 10.3811 Tf 3.019 0 Td [(\051)]TJ/F84 9.9626 Tf 1.017 0 0 1 447.203 694.174 Tm [(,)]TJ/F78 9.9626 Tf 1 0 0 1 451.521 694.174 Tm [(v)-40(a)-25(l)]TJ/F192 10.3811 Tf 13.37 0 Td [(\050)]TJ/F78 9.9626 Tf 4.623 0 Td [(j)]TJ/F192 10.3811 Tf 3.018 0 Td [(\051)]TJ/F84 9.9626 Tf 1.017 0 0 1 476.682 694.174 Tm [(,)-246(for)]TJ/F78 9.9626 Tf 1 0 0 1 176.085 682.219 Tm [(j)]TJ/F192 10.3811 Tf 6.885 0 Td [(=)]TJ/F78 9.9626 Tf 12.116 0 Td [(i)-22(r)-90(p)]TJ/F192 10.3811 Tf 12.991 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 219.519 682.219 Tm [(,)]TJ 1 0 0 1 223.844 682.219 Tm [(.)-192(.)-191(.)]TJ 1.02 0 0 1 236.92 682.219 Tm [(,)]TJ/F78 9.9626 Tf 1 0 0 1 241.175 682.219 Tm [(i)-22(r)-90(p)]TJ/F192 10.3811 Tf 12.991 0 Td [(\050)]TJ/F78 9.9626 Tf 4.205 0 Td [(i)]TJ/F192 10.3811 Tf 5.245 0 Td [(+)]TJ/F84 9.9626 Tf 1.02 0 0 1 273.967 682.219 Tm [(1)]TJ/F192 10.3811 Tf 1 0 0 1 279.173 682.219 Tm [(\051)]TJ/F190 10.3811 Tf 6.431 0 Td [(\000)]TJ/F84 9.9626 Tf 1.02 0 0 1 295.955 682.219 Tm [(1;)-333(these)-303(triples)-304(should)-303(belong)-304(to)-303(the)-304(curr)18(ent)]TJ 0.98 0 0 1 175.313 670.263 Tm [(pr)18(ocess,)-219(i.e.)]TJ/F78 9.9626 Tf 1 0 0 1 227.459 670.263 Tm [(i)]TJ/F192 10.3811 Tf 4.622 0 Td [(+)]TJ/F78 9.9626 Tf 9.782 0 Td [(i)-22(r)-35(w)]TJ/F190 10.3811 Tf 16.189 0 Td [(\000)]TJ/F84 9.9626 Tf 0.98 0 0 1 267.779 670.263 Tm [(1)-211(shou)1(ld)-211(be)-210(one)-211(of)-210(the)-211(local)-210(indices,)-220(but)-210(ar)18(e)-210(otherwise)]TJ 1 0 0 1 175.611 658.308 Tm [(arbitrary;)]TJ +0 g 0 G + -12.453 -19.925 Td [(7.)]TJ +0 g 0 G + 0.991 0 0 1 175.303 638.383 Tm [(Ther)18(e)-253(is)-253(no)-253(r)19(equir)18(ement)-253(that)-253(a)-253(given)-253(r)18(ow)-253(must)-253(be)-252(passed)-253(in)-253(its)-253(entir)18(ety)-253(to)]TJ 0.98 0 0 1 175.611 626.428 Tm [(a)-241(single)-241(call)-241(to)-241(this)-242(r)19(outine:)-309(the)-242(bui)1(ldup)-242(of)-241(a)-241(r)19(ow)-242(may)-241(be)-241(split)-241(into)-241(as)-241(many)]TJ 1 0 0 1 175.611 614.473 Tm [(calls)-250(as)-250(desir)18(ed)-250(\050even)-250(in)-250(the)-250(CSR)-250(format\051;)]TJ +0 g 0 G + -12.453 -19.926 Td [(8.)]TJ +0 g 0 G + 1.016 0 0 1 175.611 594.547 Tm [(Coef)18(\002cients)-246(fr)17(om)-246(dif)18(fer)18(ent)-246(r)17(ows)-246(may)-246(also)-246(be)-247(mixed)-246(up)-246(fr)18(eely)-247(in)-246(a)-246(single)]TJ 1 0 0 1 175.611 582.592 Tm [(call,)-250(accor)18(ding)-250(to)-250(the)-250(application)-250(needs;)]TJ +0 g 0 G + -12.453 -19.925 Td [(9.)]TJ +0 g 0 G + 0.98 0 0 1 175.611 562.667 Tm [(Coef)18(\002cients)-229(fr)18(om)-228(matrix)-229(r)18(ows)-229(not)-229(owned)-229(by)-229(the)-229(calling)-229(p)1(r)18(ocess)-229(ar)18(e)-229(tr)19(eated)]TJ 1.002 0 0 1 175.611 550.712 Tm [(accor)18(ding)-250(to)-249(the)-249(value)-250(of)]TJ/F145 9.9626 Tf 1 0 0 1 287.159 550.712 Tm [(bldmode)]TJ/F84 9.9626 Tf 1.002 0 0 1 326.262 550.712 Tm [(speci\002ed)-249(at)-250(allocation)-249(time;)-250(if)]TJ/F145 9.9626 Tf 1 0 0 1 457.804 550.712 Tm [(bldmode)]TJ/F84 9.9626 Tf 1.02 0 0 1 175.193 538.757 Tm [(was)-272(chosen)-273(as)]TJ/F145 9.9626 Tf 1 0 0 1 241.813 538.757 Tm [(psb_matbld_remote_)]TJ/F84 9.9626 Tf 1.02 0 0 1 338.726 538.757 Tm [(the)-272(library)-273(wi)1(ll)-273(keep)-272(track)-272(of)-273(them,)]TJ 1 0 0 1 175.611 526.801 Tm [(otherwise)-250(they)-250(ar)18(e)-250(silently)-250(ignor)18(ed;)]TJ +0 g 0 G + -17.434 -19.925 Td [(10.)]TJ +0 g 0 G + 1.02 0 0 1 175.611 506.876 Tm [(If)-247(the)-247(descriptor)-248(is)-247(in)-247(the)-247(assembled)-247(state,)-248(then)-247(any)-247(entries)-247(in)-248(the)-247(sparse)]TJ 1.008 0 0 1 175.611 494.921 Tm [(matrix)-248(that)-247(would)-248(generate)-248(additional)-247(communication)-248(r)18(equir)18(ements)-248(ar)18(e)]TJ 1 0 0 1 175.611 482.966 Tm [(ignor)18(ed;)]TJ +0 g 0 G + -17.434 -19.926 Td [(11.)]TJ +0 g 0 G + 1.009 0 0 1 175.611 463.04 Tm [(If)-248(the)-248(matrix)-248(is)-248(in)-248(the)-248(update)-248(state,)-248(any)-248(entries)-248(in)-248(positions)-248(that)-248(wer)18(e)-248(not)]TJ 1 0 0 1 175.313 451.085 Tm [(pr)18(esent)-250(in)-250(the)-250(original)-250(matrix)-250(ar)18(e)-250(ignor)18(ed.)]TJ +0 g 0 G + 142.266 -360.647 Td [(86)]TJ 0 g 0 G ET endstream endobj -1245 0 obj +1593 0 obj << -/Length 7234 +/Length 6893 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.16)-1000(psb)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(6.9)-1000(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm +1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(geinv)-250(\227)-250(Entrywise)-250(Inversion)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(inverse)-250(of)-250(a)-250(vector)]TJ/F52 9.9626 Tf 252.097 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)-250(puts)-250(it)-250(into)]TJ/F52 9.9626 Tf 69.951 0 Td [(y)]TJ/F54 9.9626 Tf -184.401 -18.334 Td [(/)]TJ/F83 10.3811 Tf 9.054 0 Td [(\040)]TJ/F54 9.9626 Tf 13.272 0 Td [(1)-13(/)]TJ/F52 9.9626 Tf 11.562 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(.)]TJ/F59 9.9626 Tf -181.059 -18.334 Td [(psb_geinv\050x,)-525(y,)-525(desc_a,)-525(info,)-525([flag\051)]TJ +/F75 11.9552 Tf 151.016 706.129 Td [(spasb)-250(\227)-250(Sparse)-250(matrix)-250(assembly)-250(routine)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -51.121 -19.204 Td [(call)-525(psb_spasb\050a,)-525(desc_a,)-525(info)-525([,)-525(afmt,)-525(upd,)-1050(mold]\051)]TJ 0 g 0 G +/F75 9.9626 Tf 0 -22.289 Td [(T)90(ype:)]TJ 0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G +/F75 9.9626 Tf -29.828 -20.421 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.421 Td [(desc)]TJ ET q -1 0 0 1 183.343 637.562 cm -[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S +1 0 0 1 120.408 623.994 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 189.446 628.995 Td [(/)-12(,)]TJ/F52 9.9626 Tf 11.437 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F51 9.9626 Tf 99.042 0 Td [(Function)]TJ +/F75 9.9626 Tf 123.397 623.794 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in/out)]TJ/F84 9.9626 Tf 27.298 0 Td [(.)]TJ -59.098 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 183.343 625.209 cm -[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S +1 0 0 1 309.258 576.173 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 189.321 616.641 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F145 9.9626 Tf 312.397 575.974 Td [(desc)]TJ ET q -1 0 0 1 326.555 616.84 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 333.945 576.173 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 337.084 575.974 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -258.11 -20.421 Td [(afmt)]TJ +0 g 0 G +/F84 9.9626 Tf 26.013 0 Td [(the)-250(storage)-250(format)-250(for)-250(the)-250(sparse)-250(matrix.)]TJ -1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(array)-250(of)-250(characters.)-310(Defalt:)-310('CSR'.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -20.42 Td [(upd)]TJ +0 g 0 G +/F84 9.9626 Tf 23.243 0 Td [(Pr)18(ovide)-250(for)-250(updates)-250(to)-250(the)-250(matrix)-250(coef)18(\002cients.)]TJ 1.664 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(integer)74(,)-250(possible)-250(values:)]TJ/F145 9.9626 Tf 165.218 0 Td [(psb_upd_srch_)]TJ/F84 9.9626 Tf 67.995 0 Td [(,)]TJ/F145 9.9626 Tf 4.981 0 Td [(psb_upd_perm_)]TJ +0 g 0 G +/F75 9.9626 Tf -263.101 -20.421 Td [(mold)]TJ +0 g 0 G +/F84 9.9626 Tf 28.473 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(matrix)-250(storage.)]TJ -3.566 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F145 9.9626 Tf 201.393 0 Td [(psb)]TJ +ET +q +1 0 0 1 342.513 371.449 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 329.544 616.641 Td [(geinv)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F145 9.9626 Tf 345.652 371.249 Td [(T)]TJ ET q -1 0 0 1 326.555 604.885 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 351.51 371.449 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 329.544 604.686 Td [(geinv)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F145 9.9626 Tf 354.648 371.249 Td [(base)]TJ ET q -1 0 0 1 326.555 592.93 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 376.197 371.449 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 379.335 371.249 Td [(sparse)]TJ +ET +q +1 0 0 1 411.345 371.449 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 414.483 371.249 Td [(mat)]TJ/F84 9.9626 Tf 15.691 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -330.279 -22.289 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.421 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(assembled.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf -28.652 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf -23.691 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 280.918 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 312.397 280.719 Td [(Tspmat)]TJ +ET +q +1 0 0 1 344.406 280.918 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 329.544 592.731 Td [(geinv)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F145 9.9626 Tf 347.544 280.719 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -268.571 -20.421 Td [(desc)]TJ ET q -1 0 0 1 326.555 580.975 cm +1 0 0 1 120.408 260.497 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 329.544 580.775 Td [(geinv)]TJ +/F75 9.9626 Tf 123.397 260.298 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in/out)]TJ/F84 9.9626 Tf 27.298 0 Td [(.)]TJ 1.02 0 0 1 124.802 212.477 Tm [(Speci\002ed)-253(as:)-320(a)-253(str)8(uctur)17(ed)-253(data)-253(of)-253(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 296.601 212.477 Tm [(psb)]TJ +ET +q +1 0 0 1 312.92 212.677 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 316.058 212.477 Td [(desc)]TJ +ET +q +1 0 0 1 337.607 212.677 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 340.745 212.477 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 361.667 212.477 Tm [(.)-328(If)-253(the)-253(matrix)-253(was)]TJ 0.984 0 0 1 124.802 200.522 Tm [(allocated)-253(with)]TJ/F145 9.9626 Tf 1 0 0 1 188.786 200.522 Tm [(bldmode=psb_matbld_remote_)]TJ/F84 9.9626 Tf 0.984 0 0 1 324.774 200.522 Tm [(,)-253(then)-253(the)-253(descriptor)-253(will)-253(be)]TJ 1 0 0 1 124.802 188.567 Tm [(r)18(eassembled.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -20.421 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 142.357 -29.888 Td [(87)]TJ +0 g 0 G +ET + +endstream +endobj +1597 0 obj +<< +/Length 3496 +>> +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 150.705 706.129 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +0 g 0 G + 0.996 0 0 1 175.611 686.204 Tm [(On)-250(entry)-251(to)-250(this)-250(r)18(outine)-250(the)-250(descriptor)-251(must)-250(be)-250(in)-250(the)-251(assembled)-250(state,)-250(i.e.)]TJ/F145 9.9626 Tf 1 0 0 1 175.611 674.248 Tm [(psb_cdasb)]TJ/F84 9.9626 Tf 49.564 0 Td [(must)-250(alr)18(eady)-250(have)-250(been)-250(called.)]TJ +0 g 0 G + -62.017 -19.925 Td [(2.)]TJ +0 g 0 G + [-469(The)-250(sparse)-250(matrix)-250(may)-250(be)-250(in)-250(either)-250(the)-250(build)-250(or)-250(update)-250(state;)]TJ +0 g 0 G + 0 -19.925 Td [(3.)]TJ +0 g 0 G + 0.98 0 0 1 175.611 634.398 Tm [(Duplicate)-244(entries)-245(ar)18(e)-244(detected)-244(and)-245(handled)-244(in)-245(both)-244(build)-245(and)-244(update)-245(state,)]TJ 1.002 0 0 1 175.193 622.443 Tm [(with)-249(the)-250(exception)-249(of)-249(the)-250(err)18(or)-249(action)-249(that)-250(is)-249(only)-250(taken)-249(in)-249(the)-250(build)-249(state,)]TJ 1 0 0 1 175.611 610.488 Tm [(i.e.)-310(on)-250(the)-250(\002rst)-250(assembly;)]TJ +0 g 0 G + -12.453 -19.926 Td [(4.)]TJ +0 g 0 G + 0.98 0 0 1 175.611 590.562 Tm [(If)-211(the)-210(update)-211(choice)-211(is)]TJ/F145 9.9626 Tf 1 0 0 1 270.622 590.562 Tm [(psb_upd_perm_)]TJ/F84 9.9626 Tf 0.98 0 0 1 338.616 590.562 Tm [(,)-220(then)-210(subsequent)-211(calls)-211(to)]TJ/F145 9.9626 Tf 1 0 0 1 447.343 590.562 Tm [(psb_spins)]TJ/F84 9.9626 Tf 1.02 0 0 1 175.611 578.607 Tm [(to)-386(update)-387(the)-386(matrix)-387(must)-386(be)-386(arranged)-387(in)-386(such)-387(a)-386(way)-387(as)-386(to)-386(pr)17(oduce)]TJ 0.992 0 0 1 175.611 566.652 Tm [(exactly)-252(the)-253(same)-252(sequence)-253(of)-252(coef)18(\002cient)-252(values)-253(as)-252(encounter)18(ed)-252(at)-253(the)-252(\002rst)]TJ 1 0 0 1 175.611 554.697 Tm [(assembly;)]TJ +0 g 0 G + -12.453 -19.926 Td [(5.)]TJ +0 g 0 G + [-469(The)-250(output)-250(storage)-250(format)-250(need)-250(not)-250(be)-250(the)-250(same)-250(on)-250(all)-250(pr)18(ocesses;)]TJ +0 g 0 G + 0 -19.925 Td [(6.)]TJ +0 g 0 G + [-500(On)-249(exit)-249(fr)18(om)-250(this)-249(r)18(outine)-249(the)-249(matrix)-249(is)-250(in)-249(the)-249(assembled)-249(state,)-250(and)-249(thus)-249(is)]TJ 12.453 -11.955 Td [(suitable)-250(for)-250(the)-250(computational)-250(r)18(outines;)]TJ +0 g 0 G + -12.453 -19.925 Td [(7.)]TJ +0 g 0 G + 1.02 0 0 1 175.611 482.966 Tm [(If)-380(the)]TJ/F145 9.9626 Tf 1 0 0 1 204.239 482.966 Tm [(bldmode=psb_matbld_remote_)]TJ/F84 9.9626 Tf 1.02 0 0 1 344.09 482.966 Tm [(value)-380(was)-380(speci\002ed)-380(at)-380(allocation)]TJ 1.02 0 0 1 175.611 471.011 Tm [(time,)-381(cont)1(ributions)-354(de\002ned)-353(on)-354(the)-353(curr)18(ent)-354(pr)18(ocess)-354(but)-353(belonging)-353(to)-354(a)]TJ 0.98 0 0 1 175.611 459.055 Tm [(r)18(emote)-252(pr)18(ocess)-253(will)-252(be)-253(handled)-253(accor)19(dingly)113(.)-315(This)-253(is)-253(most)-252(likely)-253(to)-253(occur)-252(in)]TJ 1.016 0 0 1 175.611 447.1 Tm [(\002nite)-247(element)-247(applications,)-247(with)]TJ/F145 9.9626 Tf 1 0 0 1 322.076 447.1 Tm [(dupl=psb_dupl_add_)]TJ/F84 9.9626 Tf 1.016 0 0 1 416.222 447.1 Tm [(;)-247(it)-247(is)-247(necessary)-247(to)]TJ 0.994 0 0 1 175.611 435.145 Tm [(check)-252(for)-252(possible)-252(updates)-252(needed)-252(in)-252(the)-252(descriptor)74(,)-252(hence)-252(ther)18(e)-252(will)-252(be)-252(a)]TJ 1 0 0 1 175.611 423.19 Tm [(r)8(untime)-250(over)18(head.)]TJ +0 g 0 G + 141.968 -332.752 Td [(88)]TJ +0 g 0 G +ET + +endstream +endobj +1610 0 obj +<< +/Length 2988 +>> +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 99.895 706.129 Td [(6.10)-1000(psb)]TJ ET q -1 0 0 1 183.343 576.99 cm -[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 227.467 548.611 Td [(T)92(able)-250(16:)-310(Data)-250(types)]TJ +/F75 11.9552 Tf 156.993 706.129 Td [(spfree)-250(\227)-250(Frees)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G 0 g 0 G +/F145 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_spfree\050a,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf -127.572 -29.451 Td [(T)90(ype:)]TJ +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -18.492 Td [(On)-250(Entry)]TJ +/F75 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -18.491 Td [(x)]TJ + 0 -19.925 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F52 9.9626 Tf 174.06 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -164.321 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ +/F84 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(fr)18(eed.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf -28.652 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf -23.691 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ -ET -q -1 0 0 1 280.646 434.555 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 283.785 434.356 Td [(T)]TJ +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 289.642 434.555 cm +1 0 0 1 309.258 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 292.781 434.356 Td [(vect)]TJ +/F145 9.9626 Tf 312.397 577.576 Td [(Tspmat)]TJ ET q -1 0 0 1 314.33 434.555 cm +1 0 0 1 344.406 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 317.468 434.356 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ +/F145 9.9626 Tf 347.544 577.576 Td [(type)]TJ 0 g 0 G - [(.)]TJ +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -18.492 Td [(desc)]TJ +/F75 9.9626 Tf -268.571 -19.925 Td [(desc)]TJ ET q -1 0 0 1 120.408 404.108 cm +1 0 0 1 120.408 557.85 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 403.909 Td [(a)]TJ +/F75 9.9626 Tf 123.397 557.651 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F84 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 273.363 356.288 cm +1 0 0 1 309.258 510.029 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 356.088 Td [(desc)]TJ +/F145 9.9626 Tf 312.397 509.83 Td [(desc)]TJ ET q -1 0 0 1 298.05 356.288 cm +1 0 0 1 333.945 510.029 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 356.088 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -18.491 Td [(\003ag)]TJ -0 g 0 G -/F54 9.9626 Tf 21.589 0 Td [(check)-278(if)-279(any)-278(of)-278(the)]TJ/F52 9.9626 Tf 84.227 0 Td [(x)]TJ/F85 10.3811 Tf 5.329 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)-340(=)]TJ/F54 9.9626 Tf 19.049 0 Td [(0,)-285(and)-279(in)-278(case)-279(r)18(eturns)-278(err)18(or)-278(halting)-279(the)-278(compu-)]TJ -112.58 -11.955 Td [(tation.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 40.677 0 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -108.97 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F59 9.9626 Tf 132.133 0 Td [(flag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F145 9.9626 Tf 337.084 509.83 Td [(type)]TJ 0 g 0 G - [(.true.)]TJ -0 g 0 G -/F51 9.9626 Tf -157.04 -18.492 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -18.491 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F52 9.9626 Tf 160.849 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ -151.669 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ -ET -q -1 0 0 1 280.646 205.171 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 283.785 204.972 Td [(T)]TJ -ET -q -1 0 0 1 289.642 205.171 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 292.781 204.972 Td [(vect)]TJ -ET -q -1 0 0 1 314.33 205.171 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 317.468 204.972 Td [(type)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(16)]TJ +/F75 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G - [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -18.492 Td [(info)]TJ + 0 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 141.968 -36.266 Td [(57)]TJ + 142.357 -329.728 Td [(89)]TJ 0 g 0 G ET endstream endobj -1251 0 obj +1502 0 obj +<< +/Type /ObjStm +/N 100 +/First 979 +/Length 10335 +>> +stream +1501 0 1493 59 1505 141 1503 280 1507 425 320 483 1504 540 1510 662 1508 801 1512 959 +1513 1018 1514 1077 1515 1136 1509 1195 1519 1289 1516 1437 1517 1582 1521 1728 324 1786 1522 1843 +1518 1901 1526 1996 1523 2144 1524 2289 1528 2436 328 2495 1525 2553 1531 2648 1529 2787 1533 2932 +332 2990 1530 3047 1538 3142 1535 3290 1536 3435 1540 3582 336 3641 1537 3699 1542 3821 1544 3939 +1545 3997 1546 4055 1541 4113 1550 4195 1547 4343 1548 4490 1552 4635 340 4694 1553 4752 1549 4811 +1555 4906 1557 5024 1558 5082 1559 5140 1560 5198 1554 5256 1563 5351 1565 5469 344 5528 1562 5586 +1569 5708 1561 5865 1566 6012 1567 6157 1571 6304 1572 6362 1573 6419 1574 6477 1575 6535 1576 6593 +1568 6651 1579 6773 1581 6891 1582 6950 1583 7009 1584 7068 1585 7127 1586 7186 1587 7245 1578 7304 +1592 7427 1588 7584 1589 7731 1590 7876 1594 8023 348 8081 1591 8138 1596 8233 1598 8351 1599 8410 +1600 8469 1601 8528 1602 8587 1603 8646 1604 8705 1605 8764 1595 8823 1609 8918 1606 9066 1607 9209 +% 1501 0 obj +<< +/D [1494 0 R /XYZ 150.705 567.828 null] +>> +% 1493 0 obj +<< +/Font << /F84 687 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1505 0 obj +<< +/Type /Page +/Contents 1506 0 R +/Resources 1504 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1492 0 R +/Annots [ 1503 0 R ] +>> +% 1503 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 116.52 359.001 128.58] +/A << /S /GoTo /D (descdata) >> +>> +% 1507 0 obj +<< +/D [1505 0 R /XYZ 98.895 753.953 null] +>> +% 320 0 obj +<< +/D [1505 0 R /XYZ 99.895 716.092 null] +>> +% 1504 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R /F192 942 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1510 0 obj +<< +/Type /Page +/Contents 1511 0 R +/Resources 1509 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1492 0 R +/Annots [ 1508 0 R ] +>> +% 1508 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [328.975 401.451 350.892 413.511] +/A << /S /GoTo /D (subsubsection.2.3.1) >> +>> +% 1512 0 obj +<< +/D [1510 0 R /XYZ 149.705 753.953 null] +>> +% 1513 0 obj +<< +/D [1510 0 R /XYZ 150.705 496.698 null] +>> +% 1514 0 obj +<< +/D [1510 0 R /XYZ 150.705 474.179 null] +>> +% 1515 0 obj +<< +/D [1510 0 R /XYZ 150.705 430.343 null] +>> +% 1509 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1519 0 obj +<< +/Type /Page +/Contents 1520 0 R +/Resources 1518 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1492 0 R +/Annots [ 1516 0 R 1517 0 R ] +>> +% 1516 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 573.77 359.001 585.83] +/A << /S /GoTo /D (descdata) >> +>> +% 1517 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 416.361 359.001 428.42] +/A << /S /GoTo /D (descdata) >> +>> +% 1521 0 obj +<< +/D [1519 0 R /XYZ 98.895 753.953 null] +>> +% 324 0 obj +<< +/D [1519 0 R /XYZ 99.895 716.092 null] +>> +% 1522 0 obj +<< +/D [1519 0 R /XYZ 99.895 326.302 null] +>> +% 1518 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1526 0 obj +<< +/Type /Page +/Contents 1527 0 R +/Resources 1525 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1492 0 R +/Annots [ 1523 0 R 1524 0 R ] +>> +% 1523 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 573.77 409.811 585.83] +/A << /S /GoTo /D (descdata) >> +>> +% 1524 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 484.107 409.811 496.166] +/A << /S /GoTo /D (descdata) >> +>> +% 1528 0 obj +<< +/D [1526 0 R /XYZ 149.705 753.953 null] +>> +% 328 0 obj +<< +/D [1526 0 R /XYZ 150.705 716.092 null] +>> +% 1525 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1531 0 obj +<< +/Type /Page +/Contents 1532 0 R +/Resources 1530 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1534 0 R +/Annots [ 1529 0 R ] +>> +% 1529 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 573.77 359.001 585.83] +/A << /S /GoTo /D (descdata) >> +>> +% 1533 0 obj +<< +/D [1531 0 R /XYZ 98.895 753.953 null] +>> +% 332 0 obj +<< +/D [1531 0 R /XYZ 99.895 716.092 null] +>> +% 1530 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1538 0 obj +<< +/Type /Page +/Contents 1539 0 R +/Resources 1537 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1534 0 R +/Annots [ 1535 0 R 1536 0 R ] +>> +% 1535 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 452.321 420.271 464.381] +/A << /S /GoTo /D (spdata) >> +>> +% 1536 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 209.193 409.811 221.252] +/A << /S /GoTo /D (descdata) >> +>> +% 1540 0 obj +<< +/D [1538 0 R /XYZ 149.705 753.953 null] +>> +% 336 0 obj +<< +/D [1538 0 R /XYZ 150.705 716.092 null] +>> +% 1537 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1542 0 obj +<< +/Type /Page +/Contents 1543 0 R +/Resources 1541 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1534 0 R +>> +% 1544 0 obj +<< +/D [1542 0 R /XYZ 98.895 753.953 null] +>> +% 1545 0 obj +<< +/D [1542 0 R /XYZ 99.895 716.092 null] +>> +% 1546 0 obj +<< +/D [1542 0 R /XYZ 99.895 663.469 null] +>> +% 1541 0 obj +<< +/Font << /F84 687 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1550 0 obj +<< +/Type /Page +/Contents 1551 0 R +/Resources 1549 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1534 0 R +/Annots [ 1547 0 R 1548 0 R ] +>> +% 1547 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 571.679 409.811 583.739] +/A << /S /GoTo /D (descdata) >> +>> +% 1548 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 227.887 420.271 239.946] +/A << /S /GoTo /D (spdata) >> +>> +% 1552 0 obj +<< +/D [1550 0 R /XYZ 149.705 753.953 null] +>> +% 340 0 obj << -/Length 623 +/D [1550 0 R /XYZ 150.705 716.092 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 14.3462 Tf 150.705 706.042 Td [(5)-1000(Communication)-250(routines)]TJ/F54 9.9626 Tf 0 -22.702 Td [(The)-303(r)18(outines)-302(in)-303(this)-303(chapter)-302(implement)-303(various)-303(global)-302(communication)-303(opera-)]TJ 0 -11.955 Td [(tors)-271(on)-271(vectors)-271(associated)-271(with)-271(a)-272(discr)18(etization)-271(mesh.)-373(For)-271(auxiliary)-271(communi-)]TJ 0 -11.955 Td [(cation)-250(r)18(outines)-250(not)-250(tied)-250(to)-250(a)-250(discr)18(etization)-250(space)-250(see)]TJ -0 0 1 rg 0 0 1 RG - [-250(6)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G - 166.874 -568.992 Td [(58)]TJ -0 g 0 G -ET - -endstream -endobj -1259 0 obj +% 1553 0 obj << -/Length 6340 +/D [1550 0 R /XYZ 150.705 136.374 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(5.1)-1000(psb)]TJ -ET -q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 151.016 706.129 Td [(halo)-250(\227)-250(Halo)-250(Data)-250(Communication)]TJ/F54 9.9626 Tf -51.121 -19.15 Td [(These)-250(subr)18(outines)-250(gathers)-250(the)-250(values)-250(of)-250(the)-250(halo)-250(elements:)]TJ/F52 9.9626 Tf 158.568 -25.014 Td [(x)]TJ/F83 10.3811 Tf 8.097 0 Td [(\040)]TJ/F52 9.9626 Tf 13.567 0 Td [(x)]TJ/F54 9.9626 Tf -180.232 -22.11 Td [(wher)18(e:)]TJ -0 g 0 G -/F52 9.9626 Tf 0.294 -20.212 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 10.187 0 Td [(is)-250(a)-250(global)-250(dense)-250(submatrix.)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -q -1 0 0 1 179.582 596.326 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F60 9.9626 Tf 185.685 587.758 Td [(a)]TJ/F54 9.9626 Tf 5.384 0 Td [(,)]TJ/F52 9.9626 Tf 5.276 0 Td [(x)]TJ/F51 9.9626 Tf 110.13 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 179.582 583.972 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F54 9.9626 Tf 185.56 575.404 Td [(Integer)-8983(psb)]TJ -ET -q -1 0 0 1 322.794 575.603 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 325.783 575.404 Td [(halo)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 322.794 563.648 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 325.783 563.449 Td [(halo)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 322.794 551.693 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 325.783 551.494 Td [(halo)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 322.794 539.738 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 325.783 539.539 Td [(halo)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 322.794 527.783 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 325.783 527.583 Td [(halo)]TJ -ET -q -1 0 0 1 179.582 523.798 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -0 g 0 G -BT -/F54 9.9626 Tf 227.467 495.419 Td [(T)92(able)-250(17:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -127.572 -24.102 Td [(call)]TJ -0 g 0 G - [-525(psb_halo\050x,)-525(desc_a,)-525(info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_halo\050x,)-525(desc_a,)-525(info,)-525(work,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(data)]TJ -0 g 0 G - [(\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.301 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.308 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -20.309 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 89.688 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -79.949 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ -ET -q -1 0 0 1 385.864 348.823 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 389.002 348.623 Td [(T)]TJ -ET -q -1 0 0 1 394.86 348.823 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 397.998 348.623 Td [(vect)]TJ -ET -q -1 0 0 1 419.547 348.823 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 422.685 348.623 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(17)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -20.308 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 316.559 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 316.36 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 268.738 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 268.539 Td [(desc)]TJ -ET -q -1 0 0 1 333.945 268.738 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 337.084 268.539 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -258.11 -20.308 Td [(work)]TJ -0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(the)-250(work)-250(array)111(.)]TJ -3.875 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(of)-250(the)-250(same)-250(type)-250(of)]TJ/F52 9.9626 Tf 218.454 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -248.566 -20.309 Td [(data)]TJ -0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(index)-250(list)-250(selector)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)74(.)-290(V)92(alues:)]TJ/F59 9.9626 Tf 136.507 0 Td [(psb_comm_halo_)]TJ/F54 9.9626 Tf 73.224 0 Td [(,)]TJ/F59 9.9626 Tf 2.491 0 Td [(psb_comm_mov_)]TJ/F54 9.9626 Tf 67.995 0 Td [(,)]TJ/F59 9.9626 Tf 4.503 0 Td [(psb_comm_ext_)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)]TJ -352.714 -11.955 Td [(default:)]TJ/F59 9.9626 Tf 39.042 0 Td [(psb_comm_halo_)]TJ/F54 9.9626 Tf 73.225 0 Td [(.)-634(Chooses)-358(the)-358(index)-358(list)-358(on)-357(which)-358(to)-358(base)-358(the)]TJ -112.267 -11.955 Td [(data)-250(exchange.)]TJ -0 g 0 G - 141.968 -29.888 Td [(59)]TJ -0 g 0 G -ET - -endstream -endobj -1266 0 obj +% 1549 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1555 0 obj +<< +/Type /Page +/Contents 1556 0 R +/Resources 1554 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1534 0 R +>> +% 1557 0 obj +<< +/D [1555 0 R /XYZ 98.895 753.953 null] +>> +% 1558 0 obj +<< +/D [1555 0 R /XYZ 99.895 716.092 null] +>> +% 1559 0 obj +<< +/D [1555 0 R /XYZ 99.895 699.334 null] +>> +% 1560 0 obj +<< +/D [1555 0 R /XYZ 99.895 644.819 null] +>> +% 1554 0 obj +<< +/Font << /F84 687 0 R /F78 686 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1563 0 obj +<< +/Type /Page +/Contents 1564 0 R +/Resources 1562 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1534 0 R +>> +% 1565 0 obj +<< +/D [1563 0 R /XYZ 149.705 753.953 null] +>> +% 344 0 obj +<< +/D [1563 0 R /XYZ 150.705 716.092 null] +>> +% 1562 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R /F192 942 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1569 0 obj +<< +/Type /Page +/Contents 1570 0 R +/Resources 1568 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1577 0 R +/Annots [ 1561 0 R 1566 0 R 1567 0 R ] +>> +% 1561 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [260.133 654.503 327.191 666.562] +/A << /S /GoTo /D (descdata) >> +>> +% 1566 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 480.963 369.462 493.022] +/A << /S /GoTo /D (spdata) >> +>> +% 1567 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [260.133 411.699 327.191 423.758] +/A << /S /GoTo /D (descdata) >> +>> +% 1571 0 obj +<< +/D [1569 0 R /XYZ 98.895 753.953 null] +>> +% 1572 0 obj +<< +/D [1569 0 R /XYZ 99.895 306.27 null] +>> +% 1573 0 obj +<< +/D [1569 0 R /XYZ 99.895 272.927 null] +>> +% 1574 0 obj +<< +/D [1569 0 R /XYZ 99.895 236.878 null] +>> +% 1575 0 obj +<< +/D [1569 0 R /XYZ 99.895 167.614 null] +>> +% 1576 0 obj +<< +/D [1569 0 R /XYZ 99.895 146.171 null] +>> +% 1568 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R /F78 686 0 R /F192 942 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1579 0 obj +<< +/Type /Page +/Contents 1580 0 R +/Resources 1578 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1577 0 R +>> +% 1581 0 obj +<< +/D [1579 0 R /XYZ 149.705 753.953 null] +>> +% 1582 0 obj +<< +/D [1579 0 R /XYZ 150.705 716.092 null] +>> +% 1583 0 obj +<< +/D [1579 0 R /XYZ 150.705 651.514 null] +>> +% 1584 0 obj +<< +/D [1579 0 R /XYZ 150.705 608.346 null] +>> +% 1585 0 obj +<< +/D [1579 0 R /XYZ 150.705 575.798 null] +>> +% 1586 0 obj +<< +/D [1579 0 R /XYZ 150.705 520.007 null] +>> +% 1587 0 obj +<< +/D [1579 0 R /XYZ 150.705 476.171 null] +>> +% 1578 0 obj +<< +/Font << /F84 687 0 R /F78 686 0 R /F192 942 0 R /F190 941 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1592 0 obj << -/Length 3039 +/Type /Page +/Contents 1593 0 R +/Resources 1591 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1577 0 R +/Annots [ 1588 0 R 1589 0 R 1590 0 R ] >> -stream -0 g 0 G -0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 150.705 706.129 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(global)-250(dense)-250(r)18(esult)-250(matrix)]TJ/F52 9.9626 Tf 117.085 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -107.346 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Returned)-285(as:)-381(a)-285(rank)-285(one)-286(or)-285(two)-285(array)-285(containing)-285(numbers)-286(of)-285(type)-285(speci-)]TJ 0 -11.955 Td [(\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(17)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F52 9.9626 Tf 160.68 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -164.68 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value)-250(that)-250(contains)-250(an)-250(err)18(or)-250(code.)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -1 0 0 1 210.511 336.406 cm -q -.45 0 0 .45 0 0 cm -q -1 0 0 1 0 0 cm -/Im4 Do -Q -Q -0 g 0 G -1 0 0 1 -210.511 -336.406 cm -BT -/F54 9.9626 Tf 240.086 304.526 Td [(Figur)18(e)-250(3:)-310(Sample)-250(discr)18(etization)-250(mesh.)]TJ -0 g 0 G -0 g 0 G -/F51 11.9552 Tf -89.381 -23.91 Td [(Usage)-325(Example)]TJ/F54 9.9626 Tf 87.482 0 Td [(Consider)-325(the)-325(discr)18(etization)-324(mesh)-325(depicted)-325(in)-325(\002g.)]TJ -0 0 1 rg 0 0 1 RG - [-325(3)]TJ -0 g 0 G - [(,)-343(parti-)]TJ -87.482 -11.956 Td [(tioned)-219(among)-220(two)-219(pr)18(ocesses)-220(as)-219(shown)-220(b)1(y)-220(the)-219(dashed)-220(line;)-229(the)-220(data)-219(distribution)]TJ 0 -11.955 Td [(is)-343(such)-342(that)-343(each)-343(pr)18(ocess)-343(will)-342(own)-343(32)-343(entries)-343(in)-342(the)-343(index)-343(space,)-366(with)-342(a)-343(halo)]TJ 0 -11.955 Td [(made)-355(of)-355(8)-355(entries)-355(place)1(d)-355(at)-355(local)-355(indices)-355(33)-355(thr)18(ough)-355(40.)-624(If)-355(pr)18(ocess)-355(0)-355(assigns)]TJ 0 -11.955 Td [(an)-280(initial)-280(value)-280(of)-281(1)-280(to)-280(its)-280(entries)-280(in)-280(the)]TJ/F52 9.9626 Tf 173.857 0 Td [(x)]TJ/F54 9.9626 Tf 7.997 0 Td [(vector)74(,)-288(and)-280(pr)18(ocess)-280(1)-280(assigns)-280(a)-280(value)]TJ -181.854 -11.955 Td [(of)-314(2,)-329(then)-313(after)-314(a)-314(c)1(a)-1(l)1(l)-314(to)]TJ/F59 9.9626 Tf 106.994 0 Td [(psb_halo)]TJ/F54 9.9626 Tf 44.966 0 Td [(the)-314(conten)1(ts)-314(of)-314(the)-313(local)-314(vectors)-313(will)-314(be)-313(the)]TJ -151.96 -11.955 Td [(following:)]TJ -0 g 0 G - 166.874 -118.447 Td [(60)]TJ -0 g 0 G -ET - -endstream -endobj -1262 0 obj +% 1588 0 obj << -/Type /XObject -/Subtype /Form -/FormType 1 -/PTEX.FileName (./figures/try8x8.pdf) -/PTEX.PageNumber 1 -/PTEX.InfoDict 1269 0 R -/BBox [0 0 498 439] -/Resources << +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 572.168 359.001 584.228] +/A << /S /GoTo /D (descdata) >> +>> +% 1589 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 276.913 369.462 288.973] +/A << /S /GoTo /D (spdata) >> +>> +% 1590 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [295.605 208.672 362.663 220.731] +/A << /S /GoTo /D (descdata) >> +>> +% 1594 0 obj +<< +/D [1592 0 R /XYZ 98.895 753.953 null] +>> +% 348 0 obj +<< +/D [1592 0 R /XYZ 99.895 716.092 null] +>> +% 1591 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] -/ExtGState << -/R7 1270 0 R ->>/Font << /R8 1271 0 R/R10 1272 0 R>> >> -/Length 3349 -/Filter /FlateDecode +% 1596 0 obj +<< +/Type /Page +/Contents 1597 0 R +/Resources 1595 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1577 0 R >> -stream -xœ]›½Ž$¹„ý~ŠötkìÿªXtÒj½ÃY}Ð-q’¡×WWfDFr±Æ$ɬªo›1d%ç÷gù¨ÏrÿÃÏ×o¿Ìç¯ÿ}Œãù¿‡=¿üù1Žùœs<³hÔñü÷ã×^Ù½ŽÔ}ì®c©¿õÅþVzê_=ú©þ>⩽¦ÇŽí'ž»!ïˆw È;\)ònÒBBZÃ!-$äݤ…5=ÖOÈztRZHLk€Óâ•/©ÇDµX=&¬·œÖcâZ ¼× b!r+“ÈÙ@¶xå €ì1‘­d‰ì-Gö˜ÈÖ²Ç5ÙGù¬|‡|7ˆ|Ç+_@d‹ùnÙâ@¶-ä»Ed‹k ²¹×И…D¶-^ù {LdkÙc"{Ë‘=&²µ€ìqÍ@ö‘@>Cqò)ÍY¼òD>³ì¬Eä3 Ï[@>³ô¬Eä3‹ÏiŠy´Ÿ…aeMò³xå èf-ËÏZô³–åç-8ZËò³=­eù9M€>åyÝœÅFê@l#mÄH?Ki³h¤œy¤ž1ÒŽ–FêÙb¤®+Ô‘ ¬ü|cîÇÌ3FŒ9FŒ™#7³Fê#Æ#ÆŒcÖH1ÇÈ!€•ŸïÌ¥‰ùŽÅì#ÎÌgƈ1ÇH=cÄ™9âÌ>âÌ1RDàÌ9°òó¹+˜-fŒsŒ3GnfÔ3FŒ9FŒ#Ƭ‘"cŽ‘C+?ߘëÁlq0cĘcĘ9r3k¤ž1bÌ1bÌ1fsŒXùùÎ<¤A‹Å<¤Áqæ! j¤ž1âÌ#k#Î<²922Á!€•ŸoÌ—$xe^à•õwI~WVß%ñ]Y{—¤weå]Þ•tw…ì®MuIt›æ’ä6Å%ÁmzKrÛԖĶi-I-+MBK½ð9 Ë1>úó(çÇ,Ým;V¥¯Ç(ó£ñ7P?#¿Óz_þq"ûýmó»ÜÁëw¿›`ùÁòz|ÛíÌÚÑìwM,¤…fÍ~F~§Ã:Z?kF³ßE±Öš5í©ž, Í]ËÌ`ÍŒf¿ cí/4kÞÈï4R •¹¡U|ö$´»é3Ðü.ßvsò°ohSaAB»›†æù~éhíèÍ~ëÇŽFhÖ´§z~° dáµ2š-Eb‡%4kÞÈï´EG«kmh…Ÿ}éÚÝt€E4ÜåÛn5öM¶|Š=aBëò;Ýhc“­¯bß(4k:@Èwù¶¹‹Eï/õ}3^<1sÁ„˜Ýatε©Û³>øµI`AÌ&GR€0wªÆIØ¿ÿüúOQu}ýçN5h=u÷ |ù§¨z|ù·õœÅîOïžó¶û××cdg¶ÌóôXñ½°ø³y‰ìÛ¶Ö -;ãÍï,_0 Ž 7k‚3ãŠM·à¼¿Dz,íÙ·qž<à¬åõ|Ád8¬Ðlåã…ëͺâM༿Dz¬áçpŸ?,pw SpeŸV.Å<î®cB`jgý%Òc±æp0©È>ðùÃÔxó»åõ|Á$8®¹,vŸÂõf_ñNFpÞ_"=Ve/,ÐÖWøù»­în`\òµmqeqÏ‚0‹÷H ®CHåàÆ&3°x×pÖ€‘œ-¯§,„[áú‰ù˜›&ÔÀd.¹œŒfÅì…e1,jÙ’µ¤07®Æ©Ø•@!Lqu aî\ƒFT7¡ fâJî–÷±ÃÝŽ+ÜM/»Þ·×üÀ"Îw·ÈZh!öùA¾™¿ÂóùÄý­ß˜?´Cœõá¾/¬ƒÏûAúÈ~—ö•ÒïüúÉi™Y“âˆì 1 ÃÄ<qVÖÆ¢4"›,YIsCk1#›0B3¡õ¤‹¹¡ð§ºÙ -U1š\o{¯Wµçlµ…ëé}ùëÑJ“ë­´\ZŽ!¶Ybþín¨جòþÖ_µ«Oâã;¼ª'®7wÓ›þàs×[i¸´"óØ]Œù·»©|Þ_µµOæÃûºªÝ'®7wS"øÜõVZ .-ÊÏÌW9p=òYÕþV<‰ï檶 ä뜸ùÌõVZ .­ËïŸÚÿŠ'óÁõ¦6¨¸Þ]/ÊœÁçý‡Ö†ªp.¾Êy ëÏú§¶ÀâI|t½©=*ù:ç…®>ë?´6T5|t1䜺îoýS»`ñ$>ºÞÔ6×»ëEi9ø¼ÿÐÚP•‹«+󕘸ø¬j#,žÌ7¤ºø:ç…®¾N}0ÿÐ qÖ‡»[”̓Ïû§öÂâI|—ä®ç×ONËÌê˜Gd±J³0|Ìó'aem,J#²gÙ\O…Ÿõ¹¡µ˜‘M¡‹™ÐzÒÅÜÐFøSÝl…ª˜ M®·Õ}›v¬½•p=Uí_^K¸žWƒ‘_´FCl³ÄüÛÝpÁf•÷·þ¦±x+¹M;V\﵇ª¢žçý%%ŠÖh»‹1ÿv7H>ïoÚ‹'ó¡jÛ´cÅõ^€¨ªLàyÞ_Re¢h†xf¾Êy€ë‘Ïú›vÄâI|¬Ð6íXÉ×9/p=òYI剢5šÇîbÌ?8p=Þßú›vÄâI|¬Æ6íXq½—"ªjxž÷—T£(Z£!^™¯Ä<¸ë‘Ïú›vÄâÉ|Cú`á|ó×#_§>˜_´FCœõa7ÁçýM;bñ$¾Kòˆúƒ_?9-3«cR‘]b•f!} ù‹“°²6¥Ùmd×Ëe×–ö¬ñÕ‹oÞöÅã÷n&´žt17´þT7[¡*fBK®—ÎÁx ›ªZèàÏ]-r½SkC”yñÔŒÏòÍÝN.xëŸÚ‹'ñÑõ¦v¬¸WU.ð<ôkm¨S1࣋!ßÜ-5Ÿ÷OíˆÅ“ùàzS;V\²«Šxúµ6Ô òÁÅÀW9t½©ò¦øÊ>¿t½©+ù:ç…®>ï×ÚP§]ÀGCþÁy ëMU0ü¹ÈŸ×îzéÜŠÇp±© -†¸úµ6ÔÉ–O¹¬ÌWbàzSE 0¬àË®—Ψxܳ>P…Uƒ|úˆò¬Öhˆ³>ÜÝâð^ðyÿÔŽX<‰ï’<ÂÇfÔ1â@aÐMŠC5Ú¢"mZ½!qVÖÆ¢4"{^›ëéŠÏúÜÐZÌÈ&ŒÐÅLh=ébnh#ü©n¶BUÌ„&×]Õ ÝÅFS5Cg_QUÍðӂȯZ£!ö²{U5'"ýÈCS5 žßÛæzÖæI¿®+®÷ó'MÕ <Ïû«Ö†:%>A©ªfè\dðy׎X<™§úºv¬¸Þ¡4U3ð<ï¯ZêD ùp¥ªš¡â³þ®±xOðuíXÉ×9/<ŠRUÍÀ‰¿|p+q¥ªš¡³¨Áçý];bñ$>žÖëÚ±âz?ŽÒTÍÀó¼¿jm¨“~/žú[™¯Ä<¸ë‘Ïú»vÄâÉ|Cú€‹‘¯s^x(¥ªšá]ú€[!ÎúðS)MÕ ÞPÌïms=k_’} ×ONËÌê˜Gd×X¥YgSjT3âÔr°-J#²{Ë®ç_ŤŒ¹¡µ˜‘M¡‹™ÐzÒÅÜÐFøSÝl…ª˜ M®÷Ó[ÎÏŸŸåùËãý¤†¿õ8šÇ÷‰<ÿÓ”ÿüúøýQ=Æ×oÏ?~}üøåzÖöüúχÿeK}ŽãýÜò±ž_{üôCýô¹Ï£üÐ>}¥”:~ü<ðóÄωŸ×§Ÿ¿þåíÝíýŸšÇóë/Îñé뿟ߟvñvÿ®Ý¾k×ïÚeoë»öõ]{Þí·¯??·~¢ûCÞŸ#²ÚaYïá÷…–aXŽí)Ãüã—÷'W¯û³êíãýŸ¼žkà®3Ô{οþõÝ*ŸþðøÓ×Çßßÿþ{Ü -endstream -endobj -1275 0 obj +% 1598 0 obj << -/Filter /FlateDecode -/Subtype /Type1C -/Length 13073 +/D [1596 0 R /XYZ 149.705 753.953 null] >> -stream -xœºwxWö?laÏŒ˜ r‘G¶5h†ôB'ZBïL·1`pø˶$K–eK²%«Yr•-˽w Lï%„ ”$$$¤m²›Æî{½ûýÈ–ßû<ïûýãõ<~4£¹ºsçÜs>çs -ÏÇo”dz%!)&cÒæ”¤}ÉÞ뉚ç7Ê#ñ-VyRŸ/Å<ÌŸâ1¾ÅcüNŽc~G¿8ŽfüÇÑA>~<Þ¬5›íonݼý­ &.II•§'ÄÅKÇOŸ:mÆøhùøÝ¿4&#!.yüëÜIVÌ¡”Ô¤˜déú„¤èÌŒñ/ž<~sL\æ¡}éÿúøïDÿÿ¦æùsïæ³Ý§•À‹åý<*Åw¬ïjßM¾}~´_¤_¬_;ÆÃæbë±d,ëÆð`|<þ -¾—ãýø)üþ þÿ'‘G˜;?€Ì·ò«ø®ÑüÑ£•£¿'7ÇÉÈûà=°,Ù ¨­c|Æ?æ1³ÆèÆÜ;nìkcç]4vÍØð±ÛÆÆ-ûpì+ˆ¤ -ä‚|ApQð‘àgÁßÿôçû ü—û¯ößà¿Í¿Ø¿Ä¿É¿Ó¿×ÿI€0`I@x€2   0 $àTÀÕ€Û¾ ôd_ |#p]`Z`f 5°.ðbàõÀ? šô~Ðò ¸ Â ª Ú æ Ž ž þ ÓA烾 ú6è7¡¿P('|S8[ø¾p±p¥p¿0Gh– -Â&a‡°GxEø¡ð¡ð ! ö ‡³Á¯O^¼*8|=øqð“à?ÿüwʇR¯Ss¨ÔJjµ‹ÚO¤”TUBÕPõT'u’ºI=¦þLý&%"Eþ¢0ÑK¢·DSE³DóEËD«EE[D¢ƒ"¹¨HdÙEu¢ÑIÑ%Ñ ÑmÑ}ÑW¢ŸE#!£CD!lÈÄi!³C„, Y²1$2$>D’b ) q†4„´†t‡9r!äÇ!C~ù-…ú…Ž„ -Sðˆ—¢ÒÑ+F~*þŠÅhÃb8™øÂP©®œÃo€J)®ž«RÏ2ðÑd˜Ž·›¬ÖVÒ8äܪêéü&¼¾‚´Êð»úŠ\óþÈÆåàY“R)Ôz%-ÕV ±p;qFߦldz3÷Ön¤'-ÚµVZ›ÝØXç®·mF;k(5:ìâ–ÎúÃ'[’·2ë 4i½<o ?3[•pP à?žÏ¦P8‡^AQ“:§œÛÌ®½ôôÀ×4Ü ÇAFý%ùÉ–ËìGkgõN ãüŽ—'ö²çvuLB@¼Q½8©êÍt{Ð`‚;èã'Põ‹p¢'œÛÙ,\8.³0O­ 5 -«CÁ -»ªbcé}{ÉÙøCò}ÛÅó†Â¡ßFæÎ3û›bø¥¹¶¬tñª»O[w -v2R¼Åcõ¸pm­©¢ÊI—×jk¹‰”ýƒ4äýè÷Ï·Ü@¡wØ—/î:/>¤ïÚ±#²¸¦#±*­z#§ÈZ<{)2mbиï߀A0èÇgœv…Nù³¹)Ô—' £V.<Ûx8‹M9“sûcñŸŽ_»Ã€%dMù$ p‰`¼ßiÉ©ìÃnâØp}E}²ñÐ*ņÕêœ]E|%<à$vÙò*Œ'ùPNü°ç¢݇kV0­y»,!o»2LAXìp¦¤µT¹‹E&¸1/wg@žïó7j©KvYr7І%¡PØtîÔw⣹}imÌ`̬æÉ4j“á÷ -ÊòM‹É|ÍB_Ûk‰ù6u…ñ.î$.˜a>ºŠÁ™øH„ç ÕZî8 /í´3k_œŒÙ¯›`àƒB(Áòð½¾HGë4f[ëJmô{›·/ ïÜÿe{é Ö“]“qH¼'1eÇòøþ_¥LšZ‰—Ù­æ -èår½ÞÄ€¸¶ÊÑif¬eŦr¶uɈaÎh~F/Á´%-&ŒïäçO-äÖy¹–˜nÎ-7>àÃm|f]ýº¹þ´5 tyÆSŽÊs9ÝáP¬aÑ:bòðÖÏÏn>y„9r²öÚc1$bžìLÏÎN—•åÔg3-%%¿"£äÐAñÛ›–ÎKÉ(¯U3y®ÂÆa1Œ!Ú9³®b<ᵈžÿ|yzˆa¸KÇ#_Þ›†¨BþJ|¾¹Àbìå{’óÝÊʇV> p$xý KÀaš‡]ÃhÊc*IŸ—¥fTé©Yt|J÷QÖJ˜® õÃу|p¯z (E ÑgªÎ1ǣݨ.$¾v8Ošä$:@§èó³I˜U“ƒÇ*+ûŠX¸º0W"Ñ’8Ó% ß×£{žM½òNÜÂùì’eû'¼!Füþé0K?yÿ ýëÓ¾»wÙO>éÿî™øYì·‹î2÷Në{‰F?ŽQp œÈ[`8œˆ&¢-h3š„&£ o½ßys³òøÇ߈A¶d“Äl3›îJÀR@yëÈ{ØHðwäEÒ…äĽjU!wßš[a¼Ç÷8ΜG¨SÔ#¿×ÁùØÈU\‡æcxäÕÕ@ÂÌ?7R„gækféù2àù.¥’Æ'¤"ŸB>*Ã=K=Åœ# D6Z€5ã5¿ô¶A‡iøÈª‘¿bJÏU¢.ÅÄ%ÊZ5 '¿ Š7Kµ 9#k½z·¤iÎ’?ý×JVP@ÂQ*éÎldžΟ:¶…5%P=íIqqÉÉ1ñÉm}½ím½,ðµ‹`®Ï6í³¨Ë}ap-Ç£ V½Eo) -sh,:½8?_§fdJ˜ìÄ÷XÕî;&Ám¡m-0úµTß0„9Ñ^1`,/Ì寿ªr"˜g6Qh> NÎ߬ K*ÀrSe©©Œî.Í`G²ã¶¢¢¥y|ð¤ì?¯S-vçVpýûv×3†Õ 5ŠRÂU™Ÿ›_˜¯Éc9 ~ K{Š›¾µ•ž²ð]5([áÂOÙòÍSùÓS _ÂòË eâªrG-Sê?fêVôï…£( -Cs±]’e tFÔÈñ<­Ía·Øí¥, €Ó0Á{¸ø©ËyÎʯEÛdD±\—#Ž]„F½ŠøLŒÂoW:/3.$“—+•ËX•E¼ -ù‹à¨>qM¹­Çë3¸ß+¬U^˜W“qzE…²"‡;”Õ™¡h5Ê[>#!-Â&‡%."ÁœWâ;í'®3P7`°$Wµ„@Í"A éYå¦à›«Œ—ÚhDM}Û]±».†­N­Tv§.FãCûð ?–ÓӠЋÙê“}lNcnÅ¡z~âªmkyÔ1½gÅÂiE{:“ÕI¥^R_üHüÉkwN¸yó&P¯áPˆrX_•ï -;žÓ¸‹FÄd4½…‚ïLûí“‹=wް%1Žl§”¯(/Ï·Ó›Ùbg¿Äz ­¢À ¡÷'÷}¢`g¸(Ä ¦wÄ -GCvð‹Û” n¶JêH¯{Ÿ/°pÈ#'nÛ2¿#F‚BgEç*Òiu¾µJÆVeçYtb¦<5®Szúèáê¶6¶¾¾´²ó\í¡B>éï¼næ»P†œ8Y©•j‹‹óÓÙ‚L$B½ˆ ˜ª9¿¥R\ç*ëg*]8(îó|ÙÇûýWÎyjìãܳ® ר.÷=ýIüMòÃðóÌG+¯IhÔ Ãif‰q±Z½Pχ›=7¨Ž2Ç‘¯nä$ 3·ÎD>bDFÏšÉ$ïØ¤žAïW—v²àeÌÕÒñùåÇYÏ BoŽÌIÈ[¥ãôßa+.¥ûJ5Ñ,šJtƤ–ï!Y ½Œ‚ïÍ€¾úÝ'ºÙ5„µ¾qC®PZ­Z“Çd+Ò4Éô¬­ŸrêwòÞýsÑÛXÐË"ŸXJ²±E<ØÑrübûÁi ŸŠØA[5š™ãÀëœj#[ W¨™ÍsößÀrk5eâ²J{ c¯F‘Üc…&㧈:¶8tÅK[f¨Õ1ú0™Ëskòˆì¼’ -‡Ùæîd?„¼[hV!/Qæ‹só´™L$n•÷óQn?§à\ü’ÑfXÆwA7-ÎJ 7ñÑ\h†“ðßÌéåÓkL¡ø66b”ãÀEfå¼M‚zYŸÝn2•0à0i+.6ߕռÒ2¥`íО«Cƒug®2 Ø wzh7ÏC<ŸF©q”7rdÿ¤<Í*gIiµj"NZÕ`·7–5±GžaP÷¿U´æ(ŲÜ8&G;\¹xTZÛ×qì0ÔÏ0ÿo(wÉvÎÒ;dÄ]™Æ´ˆþã€v™ -œ ‚‰€„ösÙdÜ´£$ÃÂÔêË:«Þ¦·…™óšR½5,¹ùP»Ñ¯Sd»t´žûËmÚßÇv¨Éj:X¨ Õi±ùoEoÊë,E%6³Éfe._:Ü÷ñU¾nõì¦ô–u1¹á^U±™0³ÓÉ¡eC©b/;âO,(JÏËf4:UŠVjìu -¶A®*Í¡¥rEfâ€ôüÐ᪦¶·³þÖ#1ðúó9®ŒDKJÕBZðݧr -0ñ6š{(Û¨ObáJâvµóÇÄ/Q¶–Z³›þÓ5§¯]?-+ÇRÏÑ$‰0à+¯fªkP–¢¯”ü!0LÖe¶€µš`r'$zqÝY{ÖÌpû'€ob.YÁdNmÞÐNÅä¨ÔRZ•]R*g3*0ḊŒŒ’LZ81E"×Q^íl„8YUØD_€A¸ŽZ³™žÎ!GºpH­ýø-ñæ¾BQè£7ž;Ý2Üͺ+Û\øè £n:ºûOѧÛcVYX #Éò³¸€&l©Äóæ¾ž7á= -%kÕ²}FnÄäPæ"ä&lŸY]{] “› (B°kíj¥Xž§KàF÷)Æ^k«)7fÖ§gdffhl -»Œqï°flÇ™”Ö}5|‡LfÓÐR…<-Å©l«(³”6²Îk¤gËÒ3Ü\ çv76f»¥,'®‹›/yBÁθ¸ÕJ¶ ÷"o‡»p-d%ðÊC_xÆRÓ Ç„8Â+WÀlÎMí´èKßñŸ¶Õš†9ò-Z­¸@«W3r˜ü‡·çvã8ÙÓØÐÝ™Y«Õ:Æ 7é]‰£«¥ç|? èrg¡ÖÉ6äa O¢×/Ý>•ѵa½DÅgHÀšè̃\¤q{óO?ß¾ú»›‘Y°÷Ó#¢WÒ+wœºÕæ¨kè`UmÚº½¶¢Î-­ŒOÏÔçÆ±€* ÷ªc8n·Þ8Gº$—蠟-\ŸT#oiuÖtX‹íÆÖXŽ•]憚£tcmÊÎÚsl‡¤bÃÚ™qàØÊd —,$#¬Û$î…Á’Ø­±¬@AÂK25Æ,–î*ñYYk|¼,e[®e¯9‡D?cŽ%TbZZbbsZgGssggZó!?RÝÙö6ºZ[/óÑßÏP¦Ø’Øò8¾‰“ŒQ,_›40|¥¥Žb>?G} òÞS¨æpáþk\¸oj·XZM|8îÿ;ÞXÇ áÛä2 Øÿ²`a„Ïÿ+^íÙå–­¦Õ3½xSDÂÃ.BjÂÀú=Êìýâ §£ïß?5xýXVëò“LêZêpjUö!ñA©4>:µýDWEC ö;vQI8Q”$p8AÀ©$Тä‹ðz<8`k¦…Ÿ'Àóhøµ–GâhªÅvÁ•Ónsž£K)Dã†+°!øµμ|»–.*4 -ÙW·b ]r‚$²Hà,kV:Ó‰£SS£åyÖêL¦J–gË£³•9Y‰íò³Ð÷óKàhGMC«øDdïúuRWÄ3Mrl¨Ã]ÙK÷ÔÉ’bÓ2—IYAÖ{$ðØŸ_šºYgÕš9Ÿ09R+y:bÇ•ú‹¤>³µ½¾¾­MZŸÌ -ŽŸmâ~Þ™>•‰yä‰&n¶¨Ú™el}4½+*}ã&|$™z#Â( -½‹fr1Åø.šwrÇl8 î€;¸«¹h+§¦(ÍE³Ñv´ÎD³¹Øb7à]¸FqCf¡(nH#ùÂ"ÏKn)¢ìƒáÀ¶†¹(¸ͨ5X¿èêøÒÆi{TÔÖð£{€'ŽZž Ñ/:dW…–Y1³½Ää Ûì¹›Yt÷ü‘}íõdžÅO\E~±)ºœh¦@ª)Ê£³4¥µEly¿Ây¸Â!øµ›÷ø¬{æ úȯŀø¸ºj R%ß ³ŸI5è…h2Ujdb@Í—€ç¡T½;UŸ¶ÉÌŽ¼CÌÒ¦eæiJZf(3·<ô†+ ™TÊJJe^äljm¥¦†®sV7ÝÙ2Á¦´]qlRt^¬n<%¯H†<¿æµB.Öžü憓Ȃ‰g?µ"!¯pQ÷†å k·p¶ºø=¯Ãñ°ÙeÕÁ , O)Ì÷æ1ŒÅµûY×e€j¿Ð{üÞÉh4#ÜFíZ¸$rso;#ÈV$drŽ&e;yf(± $A¡¤·JHPÉÙû/`äµ›ž¸‘¸U„ÀE,Êqâ. øJ˜è$¶Z5¥gÅžŸ8v•oЩuaà Ù~¿Žˆ»rk22reà°„ pR|}ÛÑ÷£ÔR)“%ÓÈrv7©B‡ëÁMÞ7¤'&Q#q€bþwâ‘@?’{0@‰=ŸöƒÊÒÊ7HçQgH¸·…Û®ó Ñw–Qk¡ñ:!ym•'η - Fì¬Á)[+nÉcÑ–G¨Ç>?ö)Øõ˜ÅNC–Èi -õ!:+ËVª`OL—Û£èÝß°ÛL@'LGÜ?wze;üHhG‰ˆÐFÜxûÅØpvˆ®¯(Hu±àÊì"œ(rÿÜé†3(OI.I§wïÎ8Â&íÕ(|‡f‘¦øœ|N¹Æ_CZüþ®¼ùO%R¼Ø -¼;ʪaêÖÆÂúá‰Nð{±Ûóí0–<ó…ïŠ8\â|”ÃÏQ_e®§¿¼°iæ»Û¶Ì“ ™X/Ä2£RÉ PQ¡ÆMyTPk?ôÝ9nÐüîWHóBÕ‚—Ƹ°ß®ÊÌ/L1†¡±ø“/±«à)Ïï%9áÎlim¨oÍ#SY çTLZÎ@òbŽ[N—,žÈEøÇ@wQuÑ`_ëvë+èúj¸_1pø}¡çàŽˆ¸ƒ;ØD€N่Ūq×ý–¦|¹|ÉÜ-áÛoßgRiJ2p‰VCq¯³ÝbÀB1|…ÄA9•Š»àÎv8ßͼ»_Àõ_ø‚nˆ{æCÎByCžw†8ÿ}ǃµˆý|ü³+͹yn@Á·Hè(øˆ,ª^Ï?¿'AQ4¦Å}Ö[5¸˜ -Âx±¼â’¹Áêg7Üzòú±÷â®g¹Ç„tƒ“0„Ì¡îÕyº®Úd=ÌáQº"Gª3éÌ:Öª^±ÉœWaºVA€¯AâÒÎë`Éé)h{aÉq±mÉà_鋂İíòO^8Û tf'1Ç’Wþ‰~€Z»ˆÇ=ö¦~öŽaó é|ùˆ/ü€ó¸c®ž\@y5|$ŽÁI5@ÉÝp­ }äÑ|ä Ð<3 Šk<Ó8‰]ò¹®¥¹ÐM?ô p .§<<o„Ç;*£߮×`½-à2v]w¯ø>?H;€üëY5߇„•LØ´Uó3Ù¯ @å1}$$sëŽ*ކŽ7êô™,uëõè/|Iô<âyÄ?"pð¾v®î…KZx@QîæT ^¿7Ýßò·x2–]½744´µHSXAC]{{zc+(5•Óàù«ÜdÃôcàCNgÆÌwgÍž3ušÐ'؇òù„ø„ú„ùˆ}}‚|€·ªäç³ÃçSÅ+æyF½2Êéëç»ß·• ´Å÷= ÷xðÛ'¾ž(¡rm±›ò´‘œÚàð=¸{úýÀý‡âŸç>xeËöÌý±Lb‚2Q±´NúçÃ=·9Û<ë½ÝSfMgÑr´ S{Ä„@˩ԷW‚àÇbî?‚“àcêrƒ!¿½.í_.~7|Í¢”l{c,ãJVØ´T©ÊŠ?šyûþ£æÁaöÄ`Ó¥Ågrާt0Yõ²Š½µ|á—·N4õ¾öüä{‰qLRŠ2=kƒ³0ôÞác×é›C{×Ä)ãÓÓØ$îΊ >÷ZnøÆ Ïûî xìáüÂï=‡`õšgÓ}8² [»ÑE£÷dkcÖ²-pôƒ>pZ3E£ê«“KP u`ý²w¶þ§œv´ºlu]Gi-и=wïñ<[9O1‚ãÑ5ƒÖV^0…Á$íùû‡ñÏ_öJ£õ¹ 5èÂýe÷`ÊG[î À£‚úù\÷à-ñ㥗_çBœ•SW×&?›ÂŸ"Þì¬äåâ·®„8óàæo†ÑØoù«ÔÝíqáâÕQQ«ì¸ô䃺ÁËÃŒðÁÉÔôKqÇ/ˆ/¼~ëèžåëÓö¯ˆbZ¨9êñkåµç{9“‚~•çkoäÿ~ÿ*Òb8g*DLÑHpg.ô9¤zð,f!ßñhb^†Ï©ìÂü¼|Fš£Š£D|ÁÅê+uͬ»¦©¼þêØLô˽`óó f^ã°òG_h>›Ú²/'.–݃4ZŒFu!þ‰pfOßéŒKôé£5=½ì¥óýNJᾩð%Â¥Q_CŠA1{ÆÏœ¹çoðL8G=a½zø<€Ãª„'¾Á6'>_Q>ld9:P`F+§b*Â9Ðh/¥ëJ•‰ìÈN°V¡Z`ä TUžme0ª -ï'ƒ“Âç±Á+ÕdµÚ,k‰Õj3fÀê¶Y­î¢Ì:f,Oà5ó½>Gx8ï,ïþ¨M£®Žºé;Å7Ê7Ú·Ø÷¾ïcßïüÄ~‰~R?›ßE¿ë~ëÂNcÿÄq|4.Ä_Çâ…„¡& -‰ßùß¿0zúè„Ñé£?ýýè¿ÛÈDò6ù|Jþ Ð`&X*Á1pÜCY}#úE„DÿB†CÆ…¼òzÈ„ç^sGJþ@ïÌ6¿ÊG Ä -]Œ"‰‰H]sh!½tWã‰TVÞ©í».†wOJ>õ&šzeÄ£?Mè-úlœyŸôÜ`” †U„ùYyÕg¥|ð|[-eÑ”ÅßCfÏ¡Ù8:0¢^ƒ^WçDÂdpW-‘]ÑVa©²U²Çáh :ñÁ XEª=M%NWl322è Fr9² - ÈzÔŠ8›Z±7y ½yoÛÅdVÞ¯í4\ãëxÁš¶ôN~oÒîºíôÎHyB ºtŽ1wfBâOŸ\†>GôN¤–w÷]¥¯6¦Ì¨dÁƒ–V_ÏAO(50ŸÈRYìÙl¦+KK±¥Ñ“-²¶9æÃƒlZFV¶:¯¨0,+S—§g–çÉf–lO;+Þö`ÏŸr†éï¨éì_‹\deU‚m5g”V‹Ý]=-¦¨·Ö`¦û;[:Ø®VgÏ)q›¾AÕÉ íˆ˜Òv?Ô¦¿éÊÛ´à¦onj(ufmð{êrkÏñŽfER5S_µ_ž¿{]ô?2% „ó°u8ò¯ºq8ö7(xüäû|'ü…rü¡ÞQ`^ÀGï‰R£Yop ¼þ‡Öo‡N2'^o¹E_8%‹ëgÛ“«ÓšÖóð‡eâÙ:yn­É±Øå¬3*ÒEsaç+è ´÷­–…6²ËoþGÑ_Á€žžÃ,ÜC˜¯–W\´ò'ų È•xgu~N¡QW¤eßFmˆ€%˜®ª°ªLìrÚš'ŠVÍ…5yÄ‘1´"Õd´iø­;«c•âézÅ!&]™%SѺ|oÑÒ.—[sè ™,ý`GúÑ[ÇÏÃ×γp²gbegyY£% dJÞ&ïsÏv*fr\–.GMç+-6%[³k§#’ž3ûЖuì²u1SÐ(1Ú ÃÐ8GI¹ [ºxeÒlzm¸»/»åä¡o Ÿ®úžƒó×.žÉ:p˜éLt¦Ô®æ¢I_eÓ–zK°(´ )[0%á0aæŠ -SÝXš³‡EJ°(_³¢ˆò'¾¨%ŸªÅç˜UU†{PHš?©¬xláŒè”ŒøR_®1KŒ Ôy³¸Á¾ž—áTjYD\ÆfzÊÚO!ñûùÛŸõVjbÊØ’L,ÙáRÔÓÍuµ –˜µkGv\4»sOÊŠybD=š }®îèeZj[Û/óõa×±i$hùw@ò©¾Ray‰¾!F( C døÑª‚ÈÃìÈ:ÂðR®bŽž/ƒêZb¾UYgøïG›jøJsN•á2fGNµ–”ê5åŒÒY¡¯¤5­]Yî„})™Kv²ßqqlxfœ*\–CXªJMtÓ aXÃvnm\½âÍë¿+g%Ï|¹ˆ?RâM‰µH®‹=‘1’zgžw!MÉ­òI*õ„¢0Äà°ÞóæD?EˆÄ\¸ãã*ç5328ªy SzÆ`™£ æ9ñµ–¼2ã¾ç¢Ð¼#'6o©",§„0•W˜tg™*’I#Œ[Uªu…œ¦÷Ãôö>XÙËó¼üßç=. -¾4r5›øÀДŸ)Öåéòd‘\¯ÎÛ«ã+¢ÖH쳦•;Åõ —2­g1¸MÄ Ëµùkó†P™ÇN Á÷° |•*G~þüPšÕVÈê-E5b˜_#>6Ôf;Þç×ÁÜ,\;/[ù²|¡o0•ØÝôW8|­I5µ‚9Ò)ø¼ƒ¡;HJOOLnJoëhnlïHkNbÓé¬vV—•^¹ÙÞyûÓFç‡Þ‚_¡Œ8a,×g‹ãÞæØ‡ ˜@|PíºhbÌ¥Å&ëBƒ3$pðÉÅ\U¥o`‹Dð9À_¿Cn¢×$´¨zQ)°gÔBN‡©ŽÔ;ëú®nÓ[ÍF]b±”TF_šÿg¶Èj,Ö•6uÕÔu ðÛz›WŠmz›^—-ÏR0Æ¢CIÉiZ]6¿xr¨6¼lsž7ÙëĽªÚ´T•,.ˤ« -g$$Ï_žNí—ŒÇz×DU®¢ÑVŽÅ½‚ö¬q®iÞɺ⫳ÄLDlèIüäc±×ŽçÞ¢á6.î} ü õ+ïQ¸£ûù{×PÝ«+ç‰Ñæ¹h4 -Ý]{b/“ÞC%ÜPŸÿZ Ã?‡†\”íb@q삵ÿ­ª=¡¼U3ÓB>Ú6{ÏcMü«|&ƒ}ÄÿZnÃáStB¯Ãl¼£Ì1ðÍÿ^eƒø:!ˆ!G"‰eFYɧ|¹­>DYqø.z«ÌµèµbÞÁä+`tÏΨlJcQÜçÀ—;ZZ*Ía5(\QAÔ•iÚB­Zâhögâ¿ÕDZ ÌS¼ "ÔÂF xîõø)2üŒ±¦ C¬)Tk” 'é,ô&LÇòªuMãÄp¥ÈQ˜!Δ&mÙÁ eh&÷¦OÊJ?+ᔬOF<З«Í‹ɼ¼÷½ÒéyÑœRn¼Ç‡ˆæº¿¾Œ=˜vy†X™«K62ú¼b£š•ÁKµÄv“ªlêOH-¡p3ÜxëƒjÇ-[X-rà# ·N"ÿ`’>•ÞÖv“m!¾¶;O™¼Uð”&2•Øg‘—Ë™¤Úœ‹ô@­ò 82íû«Ä“㤶–êíÆù0‚x}nÎÆ}v3ð"¡·Ä«Ó ¶ä„¥¾(â[ÍÅ%ôQGÁ^v$‘0,ÑlÖò•ð“Ød+(7p?×ǴצO6Å-cÑ.°4?o³žOx«xÃC°×Íûۯ𯾵"ØQ‹/,QWïða4¼€¶;HÏFb—mCà6îddY- ¶KRS^7`^5µ&=+5Ö™ì*¶D™kÍ£S¤™éI­™}=í•-Ílcƒ»¿ý!À»¡-wìeö2Sh›$W/WÅ0sQÒ,˜†ÉO­å⺊ò#ã¨A;¸ÓØ­/.>‡$a@FÎjß{ùdã™.&·;.ûÉÖ?‘@wpj«¥~˜½Ñ~øÊ-ña•·áHÚ”YQÓåt–9«ùBOskÍ‘~ñ…¸³;˜Æ½moÓ»÷Q–ºRSýÕíóæn˜¦P›léì?[c™ÉE Ÿýzcë;³Ömž–‘k¶'³‚ÿTf¼€øE9ÎÛ,€·`…Vk•²­&Ý[¼:í"RMGRz‹W+ø*ÔŸzÒÝ~ÖVn¼eñ:¢ÅX¥‘‹Uú\E -G¨P2Á}˜¬½ ¶Z\ï*ëcª\èœo0œÒ6dY^<6™ýNMZj–"á ù"\Ø«7ºžü$†[9·ÿ*ZÃPpÌc4–SíUó@þhôW6Aê˜ÓÜð"ÛÙÖPØI?îÙ’ÄzYÿ6IJœ7J|ûoãOP”ãCzk¾yAKáâ)œ ál)N•pß,ç¾ùÒî²zí+¡Ìßaà+€ÚcV;ŒÍ|ø#¡² r &%J›ª¬VºÄ¬ËdQa”j´û ü NxsÆg ÕÞœqªDJŽý)jˆJ ”âÿñ©Ï,~Vü îìÂn&Ú•mFA&>:(Ç_ýËðÏtåÞØrd?¥ÎJSéMZ¦G“SF§fJS÷ &]‚>WïÀ@ÖƒæOJ½ xÖœÀj< ¸å÷ÊšGvî‹×nÞ¼é ´( &i:=‚+AŸ|3Ÿ+m(‰‚z¾Ð¤.3ÞàÃi„Pv°·As„†à[è_ƒËž¾ ŒS"$_‹ÁŽ"¯A„“èœ7¹>…”‹h9ðL¡ã·:“—–³5xd1ô çfm&Î}ÅÆ©’¯ÄÀâ2WšKùÕ÷K¯´‰ëe.i†L.UTMs½Í€m6Wö Ýý™ƒ=œ°xÏàð3_xÞ¢à0ŠÃAƒ´µ­¡´H€ç\Š¿(6Ψõ6 ÌÄFŠ«IxÙEL“UÝ6²ÀT^ΩSc•*óPZz¢Vo²ªY›2ßœOgæäd¥5(:ï|p÷Ñ™”öZ¥ÑÂTR%ðÀ]‹œ‚ŽƒG»Îö1ÙÕX*0“ph"ÿ(*¥lÛ¿ðŽXG~ÌAyìgÒ³h ±ù@Á gPìçP‘uý=,ÙùåØI yÜ,ÞPäêŸÍ=‘G¢¹Þ´<œûǧÙ{ÍH¸‘ŒÄ!œŽïßA -à= -®B°ÓÛ]9â¥n¯·á6Yâm³1—ü§,PKÊ8ïKüËûþ;oŸõ76þOCáÓ¿ÀJÐ~…ÚÝ‘ÝÔ&nk¨ëê«ÏŒ=¤â8'?“ŽU¶\gA“»º£K|cKßä”\CA"£ËÖ -è\mI¹‘Uáè ViÔ[Œ)HuáÖܪóbøùâ'‹a«‰E5„q9ãÓ½%Ý•‘}£³[!÷ W~` d±Æwó½Ô ÜôêU-þmùÉéOŽïY#d'@5áåý˜£ÉâtŠ;ÔÕÒ,•<-×–Q’ÅO8Õyk½PÙß,nTÔdÄ+¢_Ñ1úyà¸oÕIóï’„À£*‚!žQ0„7À0™´Öÿ 8´JMáv(RÐ÷*÷s†³Á/žÏ¦vã&RpÄÅ!ÈHÜHÜͧ«FâɃ¿]Wž$Ÿ’‚Žòg]Qõïæ¡ yú½%N»©”Ô­®¡ÿ U…··OÀû›ÄëC’¿ð\uƒÏÏnÞ_~å¶ÏCðÀ ›*¤é%™4€Éó¾÷vÜýÉÜ¢%‰كœ‹l0^2”«›ÃFŒ~ 996®=¹¯§­½§/¹ ¼`ðÍ/üvà Ípć0µ´e4:GÕ*ð­Ùîó,ÕÈ-¥%¥|€ 4×TtÐuuúü$5¥”…Û‡+ëJ\Þf§o´Ë[q¼æÁ¯ñ€ˆC +ðÓE€Ï'Ãl>Ž„hà,@Ñ[hZßBÁAñ¾ÿ¸Mrïë òeJ}­×xsi@ŒB¿`Ð_ ðö‡„³À£‚)×y`mq»GÜÚÚúz’Û½bøáÙòMà‚æ| Ët„Zû€®ˆ)Њ ùÙŽ›èuµNÒÁŽc°©pÊM”SWáo3„©°r#€«½å] a×5ÄéÊÍ¿+Wy{ê?q*>;^›Z9ëÚÕ —ºyÞÆ‡Å’¬pºˆ‹€'óÑ‚ó\©h"Õ²¿k¶px”’¦ÑتTŒÛÛRà)ú§°oǤ°„p!ÈUK¹ÍQìÿ,…KŸ -endstream -endobj -1276 0 obj +% 1599 0 obj << -/Filter /FlateDecode -/Subtype /Type1C -/Length 11578 +/D [1596 0 R /XYZ 150.705 701.929 null] >> -stream -xœzwxTÕÚ/CØ…½’I™Ùf³÷F&X*ˆˆ€ô -dÒë¤L’I&½Ìd&½·I2“BHB „Б*Š"¢¢Ç‚¢~õ¨krV<÷® úï»Ï½ß÷™Ì“é{­w½ëWÞwI&M™qxâð«Ã˜t¶t‰t·t¿4Iš*Í‘vJû¤¤Ÿ9Nv”9.s\åxÄÑß1Â1ɱƱÉñ¼ãÇGŽß:N38-rZéôšÓ:§ÍN¾NÉNeN§>§!§»N÷œ8}éôÓÿržê,u~ÆYtžã¼Ðyµó^gogµs–ss›s¯ó€óMçÏsþ§ í¹¼ä²Þ個K˜‹Ê%Î%É%եĥÅeÈå¬Ë—.w\î¹|íò™Df/s–Í’-‘mùËÔ²Y¦¬Tf–õÉÎÊ®ÊnÊÞ—}"ûRöDö£ìWÙ?åùT¹TÎÉÈWÈ7Ê÷È˽åAòpy´<^ž"7È«ä-ò>ù¨ü–üùòoä?Ê“³–fØéìv1»†ÝÄîc°¾lÍjØ46‡5°El9[Ï6³Ýìö2ûûýšý‘ý……5c"{'·>’wKNYŸE«¬ŸR}Ј C7«#Âiš×¹­ •"̦ ¤ßÑb3|E&!Ò/M2Ð0‰ê©,…+ΡéÁ=Æ? µs€õ6¦Têh8Ž<¢©³¹p*NRÁÁêÔpîX’霿A»£©þŒb8•A…Ô’÷G£]5#W„­Ô@µ¹àV1 Æ6ZX¯Ô*è|@DÑÖ&<4¤?­|!%ùHútô²P» -bË›•ÅæÆ BucÉÎ×*+`±ú°ha49 «ÈŒV¤êÒÓUÂ&´=ÜA¨Ïåå•(ª+Š-ùB0£ühª)·(·(=_?´ æ´Gg¦%¸‡U!Ñ\zFI©!ÏhÈ …†ÂE[Œ)P¥<Úr¹³­¼±Ahh2õu= ¬ÏN«=UVÖžGîn»<Mvç”gÄ(´ZeèëBr@F¬6 u »>á3ãD™¢¦¬°'_0#m4Õ›cÐ×o1ä3ò36¡µÓ¢o|KÄdW—+jËŠ[ ð§TÑTsvAVË8œ™f&¡3¼ñ3ºA4‘ð%ëœêî’’–Âé~ ²pN,•”••­å²Ò…±<$¦ †{yËŽ[ú=~ò¨B~aQž=N'D W-dcyQQ)WX¢Ë)SÚFRÛ¸Ç7®ýžr`Þ)qS[„¥SÑÑjéiiNkÔå Õ^4€Ö0ÉÌB3Ðì'ÏÁiÐõ»_àLøìÂ_+BÙ+õûÑJô¢Ú}Ïnõ98®¯½.‚±]c+Y©"ßÉ®ÉW¨µ!QÞ²C;=LÊ-Ôu…Ó“›r**Õ%ý‚Å@XЗ*ªS×o -‚ÛÐçÓжPkKÓÊu÷hx°…‚¾è6}ÈñmÖØ¢‚êü"¡¬¨¦ÐÂ}Óí=k[’_Hœ•”¹VOƒZ"?ÍXù:’-ÝÁ^ñNñ¥9ÊMñÖa¯×w¶œU -‘ÝɉïÒá(¯l.*)©áJÊs3Åijݙƒœÿ÷Ï¡Ë}Ïû[OˆhÞûξXËÅõS=ïœ>ïRè ¬ 7m¥Á0ßÖu2ìx (5.âÁ¾¤7x`fƒ -´Õ= -«ÚM f”¤|ÒŸÌ Ï4:øøG%ã–îÿÎøåÄÑšêŒÔ"±0=/ÏGqÈPÛÃj6÷^ß|f š1ïYôáÿíÐÿœ  Å›;¯2{ÇñȃsÑsÐÿ8ÉÜsK„2Êø~yÅûx×^®¬ºU*XPˆŠz?Ó·A®Pègë:¨HSmÒÞ3âx8¥ß­ÑnÈ u%\ÊFaé š˜@è´ez:œ„ë<¢yœ¢2Ñ,¢™´|×\gê‘v|þ4•UFe|¶ðÆK8ãà"FQº“5­Ú„üÕT›¾&NçO£áèo¢" šà èc§¡HÊ ‰ÐÓjèx]wMCÛŸßjÕWÇéið -ÓK?ãÒúåkÇÍù?”m¶ÆYg±ç‘+%kÛÌÈz×ó²Ëך\Wô¥hdïù3²E! ˆgà]`U²Æ²¢.¢î3Caƒâ¸º1"2.6L›W'”(‹ƒ ÃiÐØh®hâºj#÷‰(›Ò+µšà\:¦P`yAj¹îm5SÐ1gÔ£ºÔAbü2ymd-£CЩ€7Sºµ©i+0~ƒXE¬6=P'dëµúd=~a Ó_Žs"DcêÓ‹Ö¯ÐÞhªC_oPÒãÅø’ q!z|É -|ŸÈÀÏaû#ÚB”&3ÓMj¤¤†C¦2JÜíõ͆EÓ¾ƒÂ­oÌu׋§›P¬º‚j7«U‰ñšq«á\"©-§¬TQ[[ajM肚Ém¸22 h±æ€2Ô—KSÃS<éSm©,ª®n¥«5Hôb@Rž vöÁ“=HÀ¹ðu¸ÔVÀ‹¬ÞlÊ«æ¾8_fÏ·µ_¸©¸|aw»`ò *Þ¡OÕ&r4·:Q·¦ ‹ø5üÔ²„·š§Luø«'š³RkÄ–ø]Šyþǧjò Aº‘ÀöŒà¨cŠ=÷ƒ  \ýÅèö ¼o×ð8¿räl3Õ|âDKK»(ÝÈxk™ ýƒcÈãÙµ9…¹tAn~V¶"--!5U@r$'¬ëÑ®hꄾ:Áà7±¾šøÐ‰•0SG‹sŠôÃ4$©&ówHAÔ©ŠR2)ÉYz!jÍTjv~EºˆÜ`Ãßm®»Q‚ÿ<Àî†ÏÚÙõÕ -SyE½PjB1j²#£8mIàTÔ=-ü Â?)9!™ËÌ.,‹)Ca^A>¦³Æð UÀ±“ÁÃÃ=7Íä­Äi -‹» xŸÃk\_¥OÌ -O‹äßÞu²í/ì1ðIµ`<¦²O`ÎO(‡'[[Ož ÇïÖߨ­6ôѰ€ú›ÛÅWwyDìöÀ÷y]ÖtJ~€Ëa|Ó®ÊÞ%a(| -ÐC"tFI"º¢"od×'ŽÒãi”Wdì>ê¬Ù&SEq෺„÷ª7—+^P/ ñ<‚ŽÄlå<µ•§E@é®V×u2¹WjêÏézº«®'Gu5ñº=ôx#а6ú1¤t//5I¬³ ”=ê–àÃ)Ãk»Ôb|{fGÎæ‘YŸ$ ‡öÐÝ!ž¦ýrXù<šfü”><ÓþÍM•ÁÕ,dòk{Ïr­uI¡%bA|~”á D×^ãÕ¸€îÌ¡#m¡§ ó EB¯Uù¼á.œ—”îJµmˆÀÈÉY8>gØÁB«‹¦.›‡¸M槼ŀ®ó1§¹ó—›ºÅî~ÓÛwð™eÿHA¯ÄáHŒ,Ë"óƒ¶+Ðê„}Ž% ÿ -·Õ\Á¹ºÛZ:¤€_§RhåV"Ž*;^WXÂÕk#DE6$§nãÔ0‡·%á ¸Ào`îóÀzµ+€(cà‹ŸÝ€S¡üåwÑ«±9y¹‡E¿)ÆšÒ¼rîÑо¹‹}ÜÖû¥Tõ‹`´¼©Ôl¢eÖ‹ià”âbÈÙ=íB“·géznzšIÙ¯O†.Zâí¶A©­ê­mSêI™õog®\~ôÈÊœ´p&/è‰VÀÈSY¼q±ª!‚€N|K±|Ú²Gá§ŠRjRŠLqBXsFAx>™®ÕÆ+‚{ãÞèÉa Ýá/îÕ¤—Vä -„rêxtc2Ful¿ÿµ«Ã]˜$ÃBƒý[Ã{ñy -GŠd€o ÓÛÞÞÓzÂ_Ag!%6ÁbêzöñÄ.ázØÎ“ë87ÿ¤QVÚ >dÆœL. ¯6öÁs]ë›úÁÕzƒì‡D ™¾€Ï1š„25§V©Ëµmñ"œµ„°~þ6HãSüž »{GHôáx:%<8+‚Û²½k4\ ?ŸüÞO -è ~nè¸Ó\“¡ÍÑeåf -Y©ºŒL…¦4©"G^î~ŠàU^Úi“#UuH€ø%<W'1¶Y†4US–œª8]½ŽZuFùsK}~y».*ní8ýr´&7=\ÈÑfè29mVQ…˜ˆq®…´dæØÁ>ØÇ¢DÃRß1¦hÇ/K`v`Α¥}’_ óÏ`A€ß²QЦŽmb½}<;‚;;ƒ:½Ei¿’!` ©ÛKQÆ}I‰ k˜ØmvÔ%zW -å{Xƒ‡!*XžÇ^1ÖÆŽ€É?1àÌØÔLÀÞ&ù8+°ˆÀÎ@Ù#(Ûx¨tý·ð6…U'”M(¬–Vï­â;(„Fæ¼®ú/4Ùi)Z9#€®”¾³¢¾¹6ë2 s¶ûäð°²ÃÌ붸´¦¢Rh45U4r£æ]è08ÝÊᡞž! ¸pê€Gx¸'½Èr—;±—‚ÕP…ð?¼æŽ/ Q‡éèl5 0‘Q…)&CG³“ÛjÁ{ß›4(Žnñûùz€MQ¨)‡ -Ó4vE`éKå.ö™ú,bms±¹p„–FjMF¼æU]o[óÚ#hÖ—dF)ÐEÊ Ô&Ú€:x(•ž‡OúŸÆãVöàùœo9Û*ÔŸ(鹪îÑ^‘b\`Ʊe -€Ò|—òp ¼?2‰À5,æ)¾´šÍ­Oñ…Œ‰‰ŠhŠiiijji‰iŠAÑBºz ëéïïåqÒètw÷i`â¥?3¥%˜õ ê뛹J@þˆù'à¶Š - iŽ:ÑÞÜÜ~BÕ,‚ŒÍè©)8ùÚà'@y‘ïí>ÑÞÓÚî'¶ÁÂ"›*p1Q®“V±ç‰ä¯ãkÙS-µB¤Ò–Ôè+4 °ÕS¹"H0“½ ÕÜ© (/-S3NÀiâáéúDò€ÚtQ]4œI4‹2&Ç®¾Vƒýƒ*/ÛöϱI@¬èúEé÷pë)8딜汣à Êa@ Ä6-ãEl½“%;º€!ARyyZ —_dÌ/‰ -Ç݈‰}˜Ð:±ÖkÀx ÌäÝ¢–γ\_S¨hîǃìŸ$èRÜ>uøp°Ïö¨BÛ|ñŒµÙ -(i|½Õ§zU“ ÀˆìŸIò1’m+(,,,((Î/.üÚÞ~ ®ØRb©(+-(¬°wxÚÙ7É<é;Él‰I2,¹+“gOž79rÅäÎÉßÚM±[n·Ù.ÒNm÷Ñ”W¦lšR5¥nÊ}b1L\ ®ïŸ?“!äWÔkTUHÕQŸÐ›èP:—®¢ÿ˜:yê SÓ§~Â,f¶01Œ†IgŒÌ s H€ LžÀ:À5pÓ^jÏÚO·çíØ/³ßn¿ßþ°½—}®}½}“ýÇö_8Lr f9¸9¨RÚ>vøÌá±Ãw¿;ŒK'IIéé*éZé^©Fš*Í•öJ‡¥W¥w¥Hÿp´wtr|ÅñUÇ×}3›O;žs¼êxÇñsÇ_œœ^rÚáäéádp:îÔã4êtÓé§Oþæô›Ó¿œ§9Ïs^îü†ófg/ç`çXçTçRç&çÎ=ÎÎÃÎç?tþÑ…p™á2Óe¹Ëz—Ý.‡]¼]]’]Š]Z]κÜuùÂåw-“ËfÈž“=/[#Û-ó”EÉ2dY™¬ZÖ ë•–Ý½#»'ûJö›œ”?#Vþ¼ü%ù+òuò=òCò ¹Zž&Ï’åeòZy£¼MÞ%‘ß’$$ÿ^>ÆNb¥ì3ìLv»}™]ƾÉîb²Al›Áæ²¥l-{œígϰçÙ«ìmö=öûû»ëdW©«Âu®ë‹®Ë]_wÝàºÕu·«›ëWo×`×XWk¶k‘k•«Åõ¤ë°ëˆëE×kc{þ,I?Ènˆ{s¢j9ß[<çuÎà”ñAUå'FŒ¼UÔݬr­ñM}A-h8ü¶—xæfÂûÜï6vˆ§{š?üVq“Ò\èÌ>–àÍù…×uʼn '2:rîÒ°Ìú,i8¤‡î 9jÚÇ!§óÑ\4÷Ó…Ð铳íOn‰˜ÜײÐ>¿®ï,7\„ìŒb¬ÈkêyH‰uí<4úâÛ<|óƒÞh>Þ€ž¢>˜…“o"䇎íÄ -œFÔ,¨¼`è Hé"°†ÝdÓÒˆÜÜ\}6—œYR+Â*êóµ£HŠˆµßð1G÷÷µš;«²+µ5BzYnQ‘¢¡¥æ¤þd‹ÐVy!·:-T¡ÍMÊIÐô=š¿"R;s+ÊæêÒÁ|Áh«vˆtKEõëLéåÑ] §!½ì­“ˆìZ]q©¢­¢t´@° au*«,½ö0 ÿqZ+ wZ£àÚñ(¢…„G­?Vö56tæOI¼‹c¹[Bò²»³Ù·óÙOË0Á]&‘#Ü@¼Dâl .‘pÊR+iaר«ôçik°Æ²rCWRœ›] –¦åé ÏÓÈ -Ë.ŠöUŸ ºÍAúÁïpÙ½äwbÛDSR|Y4çZÓ‘"¶¥çëJ2èÊäâ(¥bÏ[»½:ð u:¢783G¯ÏÁõÀ*{BF®àÁ#^_ÿ§ÈOnKx*òÇ\ š 'ƒ×¨CChÿcûÖp;m:kèDš~n›êÏ*êvn½ÆÊèÝ“S7dÐ*ë} -lÌ3Y]\>âe­'ÍÛyÙéÅŒìÁ«<ø¬ÂM8ÚŸJPbNÿÏ> Ñ”ùhÊÄ>Úbÿ )½öESƒ™%éï4Æ;#Ý+kÅ,3€­{·š¨üâò²2dðpÖ—è±³f©ÙÙKÞx]\¿Ñwá<¢{^y´VØpÿ«ÀŸ¸Ÿ¿ê~ÿ}ñþýžo~UüêÿxÝû‡o,î~–Cß»°p?\„oûá>¸-BûÑ^ôzíZ°æÄíMÂæ;¾Vxm!šÕ‡MbilA‚J‘Ñ?Œ¡Ò{ìÒёРIòëÜуe/ÐpëxºŽ¼ «NèáäÒ jk+yz*2Ùm؇¦h%附IWÈý2©N‰æÂâZú®œ:õq~qk"ŸaryEÙËðŒ™Ý™ŸZ©¿AÃ=Ôhë@GIi¦¦\ˆ¬¯No溛[{†‚:ŽRFm:*Êî~ŠöDS§²ËS&:6i©Þ™8põ 0`L -³Ø(Ò­Úô:Òê8F5ã T:F´’g xºÎÓU: ݆q—?ˆië -H¬/Á ö,"‰’¸‚”tETBª· R˦t2Ú˜Y¬éBeÖŸ¦EÈgütÜÖ°`?Ýt5Tšb©ÀÈzsi~eI£ØóñLg?CžZL‚»le<”˜p±Þ7©ÈCMa„.« Þ¾ZÑ0(~Ú{ññ -Hnú~f§Z–h\_˜`˜VNéÏÖÔàÌ4¡ûj39¢«MЦS)ŸÜÍ9‘™¾ñÓWø¸-™¯˜ñåOý„ØÓWR¯sðE«†ý9ëâkh¦‰û_›“% Ù®sŠö?„¢μôð§"Aú -?ÑæXË'¦Çƒe¼©ë/ð&/ËÜÀÈ6üUÐÑøÚ“-ÙÜÇ1¼6NglUh88 ÿ]ˆ.‚ÙÇ(…œ(6«à]ê*J£€—)«+"¡Š¬5” -å%õE­Ü{ç‚ßê[ܪÖ/W,R/ûïµi˜Œ´t¬Ìª ©…ÚjtÅ*wC·“–Jð–=¥¢fåF+4Ù¡¡ -Cá ¦ŒïWWß+xÊ#­Ó,ÊJ; t¹‰5^E¾+¶, -¬Á®¶QU¯ާ_‹ôõ;¨8V—p3F,ú^E~–SfÜGÏ¢†ÇnÞîS?,x]Šç†Û†,BlU¶N¯ÏÈ™žÆ\ØÛ9uÉFwz<œÒ¹'§àU¶¾KõBo"’ Ý¢ŽŒ <Ý4opÛ*°CH£~ïS"²\l†ŸG‘ÉhŠ_À, ã©îŠÒ _žO‰hÚ#«Â*7ÓfxhÛ^ÐCÐÀÿÕçm -êZ;ÌüUÅ‘µ5{ç`A½Óí~Â.ö‹ÐL¤6ê ‹55¥-B RW9qm;áËèâ4ôŠ­ ®†_›Â¶ðù—h¸™‚ -ØÒs¿¼|¨hº íS›¨ •&+QƒÍº+üÂB®)Í(Õ}@Ãä ƒµô'íƒ×>R@zÑm´z¢]ÿNvU’q½­]¯ß‘ë™Cç¦ét©"Ô[?b›j+:ux¡IÛS”Q1‚::8sÆ–²K¶Dïß*®Ûî¿MQ`ƒ¤@3 <ˆüþpÇMáÖ¶5í³¹‚@ö÷kh&“ƒÛÐÔ™G=[‡"ÅÐ Éïþª€N¦†/ -µ}œJ©‹U'ëÊ3S²s³²Ò„ô¤œ´TÌ7kTäÎe¾ž[÷Ó‘£èµuÀ÷î¾ãŸö ªrB©Ö$Çq©µ}"¼`ë_)d\xûöAe¸VUs£{ÍÖ®š†Âbºç|us­¢8§ ###G›$èõ:]®y¡•Ór(ŒJ«°»§ïŒÜþ›âî±á]‡‚CÀÓ½­«VÛööÈðêpÝ–@>þjëYÄh²ótQ"$ -Ê Å±»ò - Œ­šãqä¤ÿ𙞓ÃglÕÀþú¦­¦{SëKÈÂ$»S­'ÄNúÀ#h!| ݧÀ #âÐkh-V`xÝÑóXÍÀNr t„ÄE鯶îÐQî››¿`©ê|ø›ÍÑ1!Áõq]Yb_¦A_©¥«‹#<s=·­÷¬í‰Z®V (ÎÄ´(£CyVh:bÛ7ÖÃG¤tÜàI?\cÿ¬¨š¨¨¹åÒ@§@ç`¤ƒið{ÛÒÐjæÌeqØBæàRï=¿ãH@øÆáG -mÙ_¨&òK - ùœyÂn߇ÎÔÄ’1àò"@ýÕ[Á€§E’bKÑ9Zúú¸{¡Yä±?jþ¦Á†âòÖ&Ü:h²¾ZÀ£¿f·%ñà SÀ5Áøê§Ý¡Ò‰îPéŸÝ¡¬¤t]&•cë}eMg‡ë•/¾˜päHBד'uÝgDàþ˜ÖçØ>&F¨çW0î¿´òQ2õÊzâõÕ=©ÿKWßL=íûK­snß¾mج´Ä­£ŽKåÜTW¡²-W¯t´G+«D`s21;ù¿Úä{¡œx@¡^(‹qƒþ«ˆ?ìHðC_j;‘“’¥Ïà|BºßOÑä:6# r{B˜›’Èy©GqèÅ.ö6û ºC”Æk‹Ë -ó‹‹Ê„²êlŽG™ƒýUª€ÝW޶ZmL¤:`"Ò2 ?¯žì…5½.ð,ô_Ä2®’ý:dïzj²Ì¸<Þ,†‰V²›¿z»;mÇ\†Ò‹² Þôÿ›à+5@;L p6È3IÀ+L»©ÎâãáK| ÐùÃçy`3XôæC[—*Þê?zm¤ãĹ¡°ÞÙYyú,AŸ«ËÓq©¥}ý÷GEðSl(ãþž°ÛÃc¼x[3a_b)ÄÉq“ô!vSÆS’ÞPœP©?GÃD˜…)ÝÁDíÛÙ¥o)pÞØÈC¹Jô­#€•€ Ùõ¥¯ç8x³!¿¦¤AŒ?ÞœÞÊõ¶4o©ŠˆÎJÂÛs[BxLt]U Íùp6¯«|ëɧmyœ&`aÿ”6 t®¡ö¶­pø­Šz'פÁ„‚|Qô¥ €1oÚâ›Ybºž´ ZWD¶jcµŒ¿QÆe©aœ)“ü³r|š?n MÀ”±Ä†žh9Z†Qâ \ŽVÀCø¶®À{?[…ˆàd yµ[µ­K0y²¦­EGV+ß„CÝî×|3Hð4ÚÎî2jêuØ#?K=cÑžæ ñðp>œùÆdb  õHœº½nM«l%:Œ¡H°€Qûz¬æ!F\„D©Bþ,ŸhjÅ”5)Ï £h‡³Ë PvÜ¥>' [ -åìe@<˜åщP.‡î8”+ð͇r9|Lœh02à+Æêcb=È]s‰:€Ö·‘…ßœùãû”ƒ=¢þ{¢\ä{ÀÓÎx}sV='›Ñ„¤€n¶Îa;˜/?¯½Ždñ9q…Œøt]—QfÑ‹ C¨ -Š,x‹C;à N 2¾‡ák¸¾Æçܶj·PÒ£¼ídÖ±Cõ¡~Š M\THrÓÈ`AM}« e­s¶ŒXnS?¼=Ž¿t›G[G¼>¼wîÜ=0aKá6“änogm7³ÐÚ£¸x¤oë[îž»|ð°Þ=ŸÞþÁ㋆:îw ðþý€ûî≯¯Š²ŽñgCXUtrf4§Œ7ÛjèÚH2 )=-ŽKO.(‹Í{Ž”ÛúÂ(€B¯A"¾)³²Jaª©hÀŸýÀæþø ÜÅþ¿èy°†æõ4€/ÃÅp6¾¿Œ×îþë@S½ °;`õ¨«ØÒ#ÞÑ‘jn‹÷Íäˆ:0ª={ºaÓ™x“ P·]ú†¥Q ‡9­:Ó7gwÚü:@î‰×I„Áÿ<ˆ/›(ƒ‰Ë©-J»i€Vgó¶îkn5†'yCô9)"¸*×àû×ìÆüŽ¹â£‰ÖPœó;¾yë‘ð@ni—áºnøj¯D«Ô*›š-øäZ×pKBJ• Á¤ãÖD*Р 1¿Â¨¨b÷)ÓÛï`±¿fÁåX[!þnÿ3 ïwre`¦ -ØÎ}àÜ%àr€u=˜8«’€qtK~‡Î6‰jÆ[È…8Cžu;2vä#$ Ñ³”º ©4gÞ3Œí0#@¿ìÌÊ -ÒOWk'T²_Ž–Pài³ì¤®¦$1R¸(Ïlç?Àÿñå¼£ -endstream -endobj -1279 0 obj +% 1600 0 obj +<< +/D [1596 0 R /XYZ 150.705 667.454 null] +>> +% 1601 0 obj +<< +/D [1596 0 R /XYZ 150.705 647.529 null] +>> +% 1602 0 obj +<< +/D [1596 0 R /XYZ 150.705 603.693 null] +>> +% 1603 0 obj +<< +/D [1596 0 R /XYZ 150.705 547.902 null] +>> +% 1604 0 obj +<< +/D [1596 0 R /XYZ 150.705 527.977 null] +>> +% 1605 0 obj +<< +/D [1596 0 R /XYZ 150.705 496.097 null] +>> +% 1595 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1609 0 obj +<< +/Type /Page +/Contents 1610 0 R +/Resources 1608 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1577 0 R +/Annots [ 1606 0 R 1607 0 R ] +>> +% 1606 0 obj << -/Length 3048 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 573.77 369.462 585.83] +/A << /S /GoTo /D (spdata) >> +>> +% 1607 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 506.024 359.001 518.084] +/A << /S /GoTo /D (descdata) >> >> -stream -0 g 0 G -0 g 0 G -0 g 0 G -0 g 0 G -0 g 0 G -BT -/F54 8.9664 Tf 209.77 645.656 Td [(Pr)18(ocess)-250(0)-7729(Pr)18(ocess)-250(1)]TJ -31.696 -10.959 Td [(I)-1333(GLOB\050I\051)-1334(X\050I\051)-4663(I)-1333(GLOB\050I\051)-1333(X\050I\051)]TJ -1.462 -10.959 Td [(1)-4607(1)-1754(1.0)-4500(1)-4107(33)-1753(2.0)]TJ 0 -10.959 Td [(2)-4607(2)-1754(1.0)-4500(2)-4107(34)-1753(2.0)]TJ 0 -10.959 Td [(3)-4607(3)-1754(1.0)-4500(3)-4107(35)-1753(2.0)]TJ 0 -10.959 Td [(4)-4607(4)-1754(1.0)-4500(4)-4107(36)-1753(2.0)]TJ 0 -10.959 Td [(5)-4607(5)-1754(1.0)-4500(5)-4107(37)-1753(2.0)]TJ 0 -10.959 Td [(6)-4607(6)-1754(1.0)-4500(6)-4107(38)-1753(2.0)]TJ 0 -10.959 Td [(7)-4607(7)-1754(1.0)-4500(7)-4107(39)-1753(2.0)]TJ 0 -10.958 Td [(8)-4607(8)-1754(1.0)-4500(8)-4107(40)-1753(2.0)]TJ 0 -10.959 Td [(9)-4607(9)-1754(1.0)-4500(9)-4107(41)-1753(2.0)]TJ -4.483 -10.959 Td [(10)-4107(10)-1754(1.0)-4000(10)-4107(42)-1753(2.0)]TJ 0 -10.959 Td [(11)-4107(11)-1754(1.0)-4000(11)-4107(43)-1753(2.0)]TJ 0 -10.959 Td [(12)-4107(12)-1754(1.0)-4000(12)-4107(44)-1753(2.0)]TJ 0 -10.959 Td [(13)-4107(13)-1754(1.0)-4000(13)-4107(45)-1753(2.0)]TJ 0 -10.959 Td [(14)-4107(14)-1754(1.0)-4000(14)-4107(46)-1753(2.0)]TJ 0 -10.959 Td [(15)-4107(15)-1754(1.0)-4000(15)-4107(47)-1753(2.0)]TJ 0 -10.959 Td [(16)-4107(16)-1754(1.0)-4000(16)-4107(48)-1753(2.0)]TJ 0 -10.959 Td [(17)-4107(17)-1754(1.0)-4000(17)-4107(49)-1753(2.0)]TJ 0 -10.958 Td [(18)-4107(18)-1754(1.0)-4000(18)-4107(50)-1753(2.0)]TJ 0 -10.959 Td [(19)-4107(19)-1754(1.0)-4000(19)-4107(51)-1753(2.0)]TJ 0 -10.959 Td [(20)-4107(20)-1754(1.0)-4000(20)-4107(52)-1753(2.0)]TJ 0 -10.959 Td [(21)-4107(21)-1754(1.0)-4000(21)-4107(53)-1753(2.0)]TJ 0 -10.959 Td [(22)-4107(22)-1754(1.0)-4000(22)-4107(54)-1753(2.0)]TJ 0 -10.959 Td [(23)-4107(23)-1754(1.0)-4000(23)-4107(55)-1753(2.0)]TJ 0 -10.959 Td [(24)-4107(24)-1754(1.0)-4000(24)-4107(56)-1753(2.0)]TJ 0 -10.959 Td [(25)-4107(25)-1754(1.0)-4000(25)-4107(57)-1753(2.0)]TJ 0 -10.959 Td [(26)-4107(26)-1754(1.0)-4000(26)-4107(58)-1753(2.0)]TJ 0 -10.959 Td [(27)-4107(27)-1754(1.0)-4000(27)-4107(59)-1753(2.0)]TJ 0 -10.958 Td [(28)-4107(28)-1754(1.0)-4000(28)-4107(60)-1753(2.0)]TJ 0 -10.959 Td [(29)-4107(29)-1754(1.0)-4000(29)-4107(61)-1753(2.0)]TJ 0 -10.959 Td [(30)-4107(30)-1754(1.0)-4000(30)-4107(62)-1753(2.0)]TJ 0 -10.959 Td [(31)-4107(31)-1754(1.0)-4000(31)-4107(63)-1753(2.0)]TJ 0 -10.959 Td [(32)-4107(32)-1754(1.0)-4000(32)-4107(64)-1753(2.0)]TJ 0 -10.959 Td [(33)-4107(33)-1754(2.0)-4000(33)-4107(25)-1753(1.0)]TJ 0 -10.959 Td [(34)-4107(34)-1754(2.0)-4000(34)-4107(26)-1753(1.0)]TJ 0 -10.959 Td [(35)-4107(35)-1754(2.0)-4000(35)-4107(27)-1753(1.0)]TJ 0 -10.959 Td [(36)-4107(36)-1754(2.0)-4000(36)-4107(28)-1753(1.0)]TJ 0 -10.959 Td [(37)-4107(37)-1754(2.0)-4000(37)-4107(29)-1753(1.0)]TJ 0 -10.958 Td [(38)-4107(38)-1754(2.0)-4000(38)-4107(30)-1753(1.0)]TJ 0 -10.959 Td [(39)-4107(39)-1754(2.0)-4000(39)-4107(31)-1753(1.0)]TJ 0 -10.959 Td [(40)-4107(40)-1754(2.0)-4000(40)-4107(32)-1753(1.0)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 94.641 -105.903 Td [(61)]TJ -0 g 0 G -ET endstream endobj -1287 0 obj +1617 0 obj << -/Length 7519 +/Length 3932 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(5.2)-1000(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(6.11)]TJ 1.009 0 0 1 183.582 706.129 Tm [(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 204.395 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(ovrl)-250(\227)-250(Overlap)-250(Update)]TJ/F54 9.9626 Tf -51.12 -18.964 Td [(These)-250(subr)18(outines)-250(applies)-250(an)-250(overlap)-250(operator)-250(to)-250(the)-250(input)-250(vector:)]TJ/F52 9.9626 Tf 154.518 -23.824 Td [(x)]TJ/F83 10.3811 Tf 8.097 0 Td [(\040)]TJ/F52 9.9626 Tf 13.497 0 Td [(Q)-42(x)]TJ/F54 9.9626 Tf -176.112 -21.014 Td [(wher)18(e:)]TJ +/F75 11.9552 Tf 1.009 0 0 1 207.982 706.129 Tm [(sprn)-246(\227)-247(Reinit)-246(sparse)-246(matrix)-247(structure)-246(for)-246(psblas)-247(rou-)]TJ 1 0 0 1 183.582 692.181 Tm [(tines.)]TJ 0 g 0 G -/F52 9.9626 Tf 0.294 -19.203 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 10.186 0 Td [(is)-250(the)-250(global)-250(dense)-250(submatrix)]TJ/F52 9.9626 Tf 131.351 0 Td [(x)]TJ +/F145 9.9626 Tf -32.877 -18.964 Td [(call)-525(psb_sprn\050a,)-525(decsc_a,)-525(info,)-525(clear\051)]TJ 0 g 0 G - -141.607 -19.564 Td [(Q)]TJ +/F75 9.9626 Tf 0 -21.917 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 12.857 0 Td [(is)-250(the)-250(overlap)-250(operator;)-250(it)-250(is)-250(the)-250(composition)-250(of)-250(two)-250(operators)]TJ/F52 9.9626 Tf 271.931 0 Td [(P)]TJ/F52 7.5716 Tf 5.424 -1.494 Td [(a)]TJ/F54 9.9626 Tf 6.445 1.494 Td [(and)]TJ/F52 9.9626 Tf 19.681 0 Td [(P)]TJ/F52 7.5716 Tf 6.405 3.616 Td [(T)]TJ/F54 9.9626 Tf 5.401 -3.616 Td [(.)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G +/F75 9.9626 Tf -29.828 -19.926 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G + 0 -19.925 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(r)18(einitialized.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf -28.652 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf -23.69 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.137 0 Td [(psb)]TJ ET q -1 0 0 1 230.392 581.71 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F52 9.9626 Tf 236.663 573.142 Td [(x)]TJ/F51 9.9626 Tf 120.622 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 230.392 569.356 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 360.068 563.828 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 236.369 560.788 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F145 9.9626 Tf 363.206 563.628 Td [(Tspmat)]TJ ET q -1 0 0 1 373.603 560.988 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 395.216 563.828 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 560.788 Td [(ovrl)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F145 9.9626 Tf 398.354 563.628 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -268.57 -19.925 Td [(desc)]TJ ET q -1 0 0 1 373.603 549.032 cm +1 0 0 1 171.218 543.902 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 548.833 Td [(ovrl)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F75 9.9626 Tf 174.207 543.703 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 373.603 537.077 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 360.068 496.082 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 536.878 Td [(ovrl)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F145 9.9626 Tf 363.206 495.882 Td [(desc)]TJ ET q -1 0 0 1 373.603 525.122 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 384.755 496.082 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 524.923 Td [(ovrl)]TJ -ET -q -1 0 0 1 230.392 521.137 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q +/F145 9.9626 Tf 387.893 495.882 Td [(type)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 278.277 492.758 Td [(T)92(able)-250(18:)-310(Data)-250(types)]TJ +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G +/F75 9.9626 Tf -258.11 -19.925 Td [(clear)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -127.572 -23.549 Td [(call)]TJ +/F84 9.9626 Tf 26.56 0 Td [(Choose)-250(whether)-250(to)-250(zer)18(o)-250(out)-250(matrix)-250(coef)18(\002cients)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -61.878 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)-310(tr)8(ue.)]TJ 0 g 0 G - [-525(psb_ovrl\050x,)-525(desc_a,)-525(info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ +/F75 9.9626 Tf -24.906 -21.917 Td [(On)-250(Return)]TJ 0 g 0 G - [-525(psb_ovrl\050x,)-525(desc_a,)-525(info,)-525(update)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ 0 g 0 G - [(update_type,)-525(work)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + 0 -19.926 Td [(info)]TJ 0 g 0 G - [(work\051)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -21.918 Td [(Notes)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.014 Td [(T)90(ype:)]TJ +/F84 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(sparse)-250(matrix)-250(is)-250(in)-250(the)-250(update)-250(state.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.564 Td [(On)-250(Entry)]TJ + 154.421 -206.192 Td [(90)]TJ 0 g 0 G +ET + +endstream +endobj +1624 0 obj +<< +/Length 6512 +>> +stream 0 g 0 G - 0 -19.564 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 89.687 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -79.948 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ -ET -q -1 0 0 1 436.673 349.49 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q BT -/F59 9.9626 Tf 439.811 349.291 Td [(T)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(6.12)-1000(psb)]TJ ET q -1 0 0 1 445.669 349.49 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 448.807 349.291 Td [(vect)]TJ -ET -q -1 0 0 1 470.356 349.49 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 473.495 349.291 Td [(type)]TJ +/F75 11.9552 Tf 156.993 706.129 Td [(geall)-250(\227)-250(Allocates)-250(a)-250(dense)-250(matrix)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(18)]TJ 0 g 0 G - [(.)]TJ +/F145 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geall\050x,)-525(desc_a,)-525(info[,)-525(dupl,)-525(bldmode,)-525(n,)-525(lb]\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -19.627 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.564 Td [(desc)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -19.01 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.009 Td [(desc)]TJ ET q -1 0 0 1 171.218 317.971 cm +1 0 0 1 120.408 629.719 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 317.772 Td [(a)]TJ +/F75 9.9626 Tf 123.397 629.519 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F84 9.9626 Tf 9.654 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.249 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F145 9.9626 Tf 136.327 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 270.151 cm +1 0 0 1 277.448 581.898 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 269.951 Td [(desc)]TJ +/F145 9.9626 Tf 280.586 581.699 Td [(desc)]TJ ET q -1 0 0 1 384.755 270.151 cm +1 0 0 1 302.135 581.898 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 269.951 Td [(type)]TJ +/F145 9.9626 Tf 305.273 581.699 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.564 Td [(update)]TJ +/F75 9.9626 Tf -226.3 -30.965 Td [(n)]TJ 0 g 0 G -/F54 9.9626 Tf 36.523 0 Td [(Update)-250(operator)74(.)]TJ +/F84 9.9626 Tf 10.76 0 Td [(The)-250(number)-250(of)-250(columns)-250(of)-250(the)-250(dense)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 14.147 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.02 0 0 1 124.802 502.914 Tm [(Speci\002ed)-262(as:)-339(Integer)-262(scalar)73(,)-267(default)-262(1.)-356(It)-262(is)-263(not)-262(a)-262(valid)-263(ar)18(gument)-262(if)]TJ/F78 9.9626 Tf 1 0 0 1 420.723 502.914 Tm [(x)]TJ/F84 9.9626 Tf 1.02 0 0 1 428.594 502.914 Tm [(is)-262(a)]TJ 1 0 0 1 124.802 490.959 Tm [(rank-1)-250(array)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf -11.617 -31.519 Td [(update)-250(=)-250(psb)]TJ -ET -q -1 0 0 1 235.367 219.067 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 238.356 218.868 Td [(none)]TJ -ET -q -1 0 0 1 261.648 219.067 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q +/F75 9.9626 Tf -24.907 -19.01 Td [(lb)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 269.619 218.868 Td [(Do)-250(nothing;)]TJ +/F84 9.9626 Tf 1.02 0 0 1 113.973 471.949 Tm [(The)-359(lower)-359(bound)-359(for)-359(the)-359(column)-359(index)-359(range)-359(of)-359(the)-359(dense)-359(matrix)-359(to)-359(be)]TJ 1 0 0 1 124.802 459.994 Tm [(allocated.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.02 0 0 1 124.802 412.174 Tm [(Speci\002ed)-262(as:)-339(Integer)-262(scalar)73(,)-267(default)-262(1.)-356(It)-262(is)-263(not)-262(a)-262(valid)-263(ar)18(gument)-262(if)]TJ/F78 9.9626 Tf 1 0 0 1 420.723 412.174 Tm [(x)]TJ/F84 9.9626 Tf 1.02 0 0 1 428.594 412.174 Tm [(is)-262(a)]TJ 1 0 0 1 124.802 400.218 Tm [(rank-1)-250(array)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf -94.008 -15.579 Td [(update)-250(=)-250(psb)]TJ -ET -q -1 0 0 1 235.367 203.488 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 238.356 203.289 Td [(add)]TJ -ET -q -1 0 0 1 256.109 203.488 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q +/F75 9.9626 Tf -24.907 -19.009 Td [(dupl)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 264.079 203.289 Td [(Sum)-250(overlap)-250(entries,)-250(i.e.)-310(apply)]TJ/F52 9.9626 Tf 137.239 0 Td [(P)]TJ/F52 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F54 9.9626 Tf 5.4 -3.617 Td [(;)]TJ +/F84 9.9626 Tf 26.561 0 Td [(How)-250(to)-250(handle)-250(duplicate)-250(coef)18(\002cients.)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.006 0 0 1 124.802 333.389 Tm [(Speci\002ed)-248(as:)-308(inte)1(ger)73(,)-248(possible)-248(values:)]TJ/F145 9.9626 Tf 1 0 0 1 290.906 333.389 Tm [(psb_dupl_ovwrt_)]TJ/F84 9.9626 Tf 1.006 0 0 1 369.361 333.389 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 374.352 333.389 Tm [(psb_dupl_add_)]TJ/F84 9.9626 Tf 1.006 0 0 1 442.346 333.389 Tm [(;)]TJ/F145 9.9626 Tf 1 0 0 1 124.802 321.433 Tm [(psb_dupl_err_)]TJ/F84 9.9626 Tf 70.485 0 Td [(has)-250(no)-250(ef)18(fect.)]TJ 0 g 0 G -/F51 9.9626 Tf -237.512 -15.579 Td [(update)-250(=)-250(psb)]TJ -ET -q -1 0 0 1 235.367 187.91 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 238.356 187.71 Td [(avg)]TJ -ET -q -1 0 0 1 255.013 187.91 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q +/F75 9.9626 Tf -95.392 -19.009 Td [(bldmode)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 262.983 187.71 Td [(A)92(verage)-250(overlap)-250(entries,)-250(i.e.)-310(apply)]TJ/F52 9.9626 Tf 153.667 0 Td [(P)]TJ/F52 7.5716 Tf 5.424 -1.494 Td [(a)]TJ/F52 9.9626 Tf 4.278 1.494 Td [(P)]TJ/F52 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F54 9.9626 Tf 5.401 -3.617 Td [(;)]TJ -262.547 -19.564 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F52 9.9626 Tf 38.64 0 Td [(u)-80(p)-25(d)-40(a)-25(t)-25(e)]TJ +/F84 9.9626 Tf 1.02 0 0 1 145.335 302.424 Tm [(Whether)-327(to)-327(keep)-327(track)-327(of)-327(matrix)-327(entries)-327(that)-327(do)-327(not)-327(belong)-327(to)-327(the)]TJ 1 0 0 1 124.802 290.469 Tm [(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.98 0 0 1 124.802 242.648 Tm [(Speci\002ed)-194(as:)-286(an)-193(integer)-194(value)]TJ/F145 9.9626 Tf 1 0 0 1 250.731 242.648 Tm [(psb_matbld_noremote_)]TJ/F84 9.9626 Tf 0.98 0 0 1 355.338 242.648 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 359.791 242.648 Tm [(psb_matbld_remote_)]TJ/F84 9.9626 Tf 0.98 0 0 1 453.937 242.648 Tm [(.)]TJ 1 0 0 1 124.802 230.693 Tm [(Default:)]TJ/F145 9.9626 Tf 38.515 0 Td [(psb_matbld_noremote_)]TJ/F84 9.9626 Tf 104.607 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -168.029 -19.627 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.009 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.654 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 15.253 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ 1.01 0 0 1 124.802 144.236 Tm [(Speci\002ed)-247(as:)-306(a)-247(rank)-246(one)-247(or)-246(two)-247(array)-246(with)-247(the)-246(ALLOCA)73(T)73(ABLE)-246(attribute)]TJ 1 0 0 1 124.802 132.281 Tm [(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 86.634 0 Td [(psb)]TJ ET q -1 0 0 1 244.034 144.435 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 227.755 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 247.147 144.236 Td [(t)-25(y)-80(p)-25(e)]TJ/F85 10.3811 Tf 21.467 0 Td [(=)]TJ/F52 9.9626 Tf 11.634 0 Td [(p)-25(s)-25(b)]TJ +/F145 9.9626 Tf 230.893 132.281 Td [(T)]TJ ET q -1 0 0 1 294.938 144.435 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 236.751 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 298.201 144.236 Td [(a)-25(v)-47(g)]TJ +/F145 9.9626 Tf 239.889 132.281 Td [(vect)]TJ ET q -1 0 0 1 314.026 144.435 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 261.438 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 175.611 132.281 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(integer)-250(variable.)]TJ +/F145 9.9626 Tf 264.576 132.281 Td [(type)]TJ 0 g 0 G - 141.968 -29.888 Td [(62)]TJ +/F84 9.9626 Tf 20.922 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ +0 g 0 G + -18.728 -41.843 Td [(91)]TJ 0 g 0 G ET endstream endobj -1296 0 obj +1629 0 obj << -/Length 5447 +/Length 991 >> stream 0 g 0 G 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(work)]TJ -0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(the)-250(work)-250(array)111(.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one)-250(dimensional)-250(array)-250(of)-250(the)-250(same)-250(type)-250(of)]TJ/F52 9.9626 Tf 252.794 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -282.907 -19.925 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(x)]TJ +/F75 9.9626 Tf 150.705 706.129 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(r)18(esult)-250(matrix)]TJ/F52 9.9626 Tf 117.084 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ -107.346 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-379(as:)-568(an)-379(array)-379(of)-379(rank)-379(one)-379(or)-379(two)-379(containing)-379(numbers)-379(of)-379(type)]TJ 0 -11.955 Td [(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(18)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(If)-241(ther)18(e)-240(is)-241(no)-241(overlap)-240(in)-241(the)-241(data)-240(distribution)-241(associated)-241(with)-240(the)-241(descrip-)]TJ 12.453 -11.955 Td [(tor)74(,)-250(no)-250(operations)-250(ar)18(e)-250(performed;)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -21.918 Td [(Notes)]TJ 0 g 0 G - -12.453 -19.926 Td [(2.)]TJ -0 g 0 G - [-500(The)-284(operator)]TJ/F52 9.9626 Tf 72.855 0 Td [(P)]TJ/F52 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F54 9.9626 Tf 8.232 -3.617 Td [(performs)-284(the)-284(r)18(eduction)-285(sum)-284(of)-284(overlap)-284(elements;)-302(it)-284(is)-284(a)]TJ -75.039 -11.955 Td [(\223pr)18(olongation\224)-265(operator)]TJ/F52 9.9626 Tf 110.535 0 Td [(P)]TJ/F52 7.5716 Tf 6.405 3.616 Td [(T)]TJ/F54 9.9626 Tf 8.044 -3.616 Td [(that)-265(r)18(eplicates)-266(ov)1(erlap)-266(elements,)-269(accounting)]TJ -124.984 -11.955 Td [(for)-250(the)-250(physical)-250(r)18(eplication)-250(of)-250(data;)]TJ -0 g 0 G - -12.453 -19.925 Td [(3.)]TJ -0 g 0 G - [-500(The)-190(operator)]TJ/F52 9.9626 Tf 70.978 0 Td [(P)]TJ/F52 7.5716 Tf 5.423 -1.495 Td [(a)]TJ/F54 9.9626 Tf 5.848 1.495 Td [(performs)-190(a)-190(scaling)-190(on)-190(the)-190(overlap)-190(elements)-190(by)-190(the)-190(amount)]TJ -69.796 -11.956 Td [(of)-325(r)18(eplication;)-363(thus,)-343(when)-325(combined)-325(with)-325(the)-325(r)18(eduction)-325(operator)74(,)-344(it)-325(im-)]TJ 0 -11.955 Td [(plements)-250(the)-250(average)-250(of)-250(r)18(eplicated)-250(elements)-250(over)-250(all)-250(of)-250(their)-250(instances.)]TJ/F51 11.9552 Tf -24.907 -19.925 Td [(Example)-320(of)-320(use)]TJ/F54 9.9626 Tf 87.879 0 Td [(Consider)-320(the)-320(discr)18(etization)-320(mesh)-320(depicted)-320(in)-320(\002g.)]TJ -0 0 1 rg 0 0 1 RG - [-320(4)]TJ -0 g 0 G - [(,)-337(parti-)]TJ -87.879 -11.955 Td [(tioned)-262(among)-262(two)-263(pr)18(ocesse)1(s)-263(as)-262(shown)-262(by)-262(the)-262(dashed)-263(li)1(nes,)-266(with)-262(an)-262(overlap)-262(of)]TJ 0 -11.955 Td [(1)-261(extr)1(a)-261(layer)-260(with)-261(r)18(espect)-260(to)-261(the)-260(partition)-261(of)-260(\002g.)]TJ -0 0 1 rg 0 0 1 RG - [-261(3)]TJ +/F84 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G - [(;)-265(the)-261(data)-260(distribution)-261(is)-260(such)]TJ 0 -11.956 Td [(that)-267(each)-268(pr)18(ocess)-267(will)-267(own)-267(40)-268(entries)-267(in)-267(the)-267(index)-268(space,)-271(with)-267(an)-268(overlap)-267(of)-267(16)]TJ 0 -11.955 Td [(entries)-249(placed)-248(at)-249(local)-249(i)1(ndices)-249(25)-249(thr)18(ough)-248(40;)-249(the)-249(halo)-249(will)-248(r)8(un)-249(fr)18(om)-249(local)-248(index)]TJ 0 -11.955 Td [(41)-236(thr)18(ough)-237(local)-236(index)-237(48..)-305(If)-236(pr)18(ocess)-237(0)-236(assigns)-237(an)-236(initial)-236(value)-237(of)-236(1)-236(to)-237(its)-236(entries)]TJ 0 -11.955 Td [(in)-259(the)]TJ/F52 9.9626 Tf 27.963 0 Td [(x)]TJ/F54 9.9626 Tf 7.782 0 Td [(vector)74(,)-261(and)-258(pr)18(ocess)-259(1)-259(assigns)-258(a)-259(value)-259(of)-258(2,)-261(then)-259(after)-258(a)-259(call)-259(to)]TJ/F59 9.9626 Tf 266.124 0 Td [(psb_ovrl)]TJ/F54 9.9626 Tf -301.869 -11.955 Td [(with)]TJ/F59 9.9626 Tf 22.816 0 Td [(psb_avg_)]TJ/F54 9.9626 Tf 44.404 0 Td [(and)-257(a)-257(call)-257(to)]TJ/F59 9.9626 Tf 55.983 0 Td [(psb_halo_)]TJ/F54 9.9626 Tf 49.635 0 Td [(the)-257(contents)-257(of)-257(the)-257(local)-257(vectors)-257(will)-258(b)1(e)]TJ -172.838 -11.955 Td [(the)-250(following)-250(\050showing)-250(a)-250(transition)-250(among)-250(the)-250(two)-250(subdomains\051)]TJ + 1.02 0 0 1 175.611 616.465 Tm [(Using)]TJ/F145 9.9626 Tf 1 0 0 1 205.259 616.465 Tm [(psb_matbld_remote_)]TJ/F84 9.9626 Tf 1.02 0 0 1 302.317 616.465 Tm [(is)-287(li)1(kely)-287(to)-287(cause)-286(a)-287(r)8(untime)-286(over)17(head)-286(at)-287(as-)]TJ 1 0 0 1 175.611 604.51 Tm [(sembly)-250(time;)]TJ 0 g 0 G - 166.875 -143.462 Td [(63)]TJ + 141.968 -514.072 Td [(92)]TJ 0 g 0 G ET endstream endobj -1304 0 obj +1636 0 obj << -/Length 3551 +/Length 6572 >> stream 0 g 0 G 0 g 0 G -0 g 0 G -0 g 0 G -0 g 0 G BT -/F54 7.9701 Tf 265.805 653.177 Td [(Pr)18(ocess)-250(0)-8396(Pr)18(ocess)-250(1)]TJ -31.163 -9.464 Td [(I)-1500(GLOB\050I\051)-1500(X\050I\051)-5163(I)-1500(GLOB\050I\051)-1500(X\050I\051)]TJ -1.299 -9.465 Td [(1)-4774(1)-1920(1.0)-5000(1)-4274(33)-1920(1.5)]TJ 0 -9.464 Td [(2)-4774(2)-1920(1.0)-5000(2)-4274(34)-1920(1.5)]TJ 0 -9.465 Td [(3)-4774(3)-1920(1.0)-5000(3)-4274(35)-1920(1.5)]TJ 0 -9.464 Td [(4)-4774(4)-1920(1.0)-5000(4)-4274(36)-1920(1.5)]TJ 0 -9.465 Td [(5)-4774(5)-1920(1.0)-5000(5)-4274(37)-1920(1.5)]TJ 0 -9.464 Td [(6)-4774(6)-1920(1.0)-5000(6)-4274(38)-1920(1.5)]TJ 0 -9.465 Td [(7)-4774(7)-1920(1.0)-5000(7)-4274(39)-1920(1.5)]TJ 0 -9.464 Td [(8)-4774(8)-1920(1.0)-5000(8)-4274(40)-1920(1.5)]TJ 0 -9.465 Td [(9)-4774(9)-1920(1.0)-5000(9)-4274(41)-1920(2.0)]TJ -3.985 -9.464 Td [(10)-4274(10)-1920(1.0)-4500(10)-4274(42)-1920(2.0)]TJ 0 -9.465 Td [(11)-4274(11)-1920(1.0)-4500(11)-4274(43)-1920(2.0)]TJ 0 -9.464 Td [(12)-4274(12)-1920(1.0)-4500(12)-4274(44)-1920(2.0)]TJ 0 -9.465 Td [(13)-4274(13)-1920(1.0)-4500(13)-4274(45)-1920(2.0)]TJ 0 -9.464 Td [(14)-4274(14)-1920(1.0)-4500(14)-4274(46)-1920(2.0)]TJ 0 -9.465 Td [(15)-4274(15)-1920(1.0)-4500(15)-4274(47)-1920(2.0)]TJ 0 -9.464 Td [(16)-4274(16)-1920(1.0)-4500(16)-4274(48)-1920(2.0)]TJ 0 -9.465 Td [(17)-4274(17)-1920(1.0)-4500(17)-4274(49)-1920(2.0)]TJ 0 -9.464 Td [(18)-4274(18)-1920(1.0)-4500(18)-4274(50)-1920(2.0)]TJ 0 -9.465 Td [(19)-4274(19)-1920(1.0)-4500(19)-4274(51)-1920(2.0)]TJ 0 -9.464 Td [(20)-4274(20)-1920(1.0)-4500(20)-4274(52)-1920(2.0)]TJ 0 -9.465 Td [(21)-4274(21)-1920(1.0)-4500(21)-4274(53)-1920(2.0)]TJ 0 -9.464 Td [(22)-4274(22)-1920(1.0)-4500(22)-4274(54)-1920(2.0)]TJ 0 -9.465 Td [(23)-4274(23)-1920(1.0)-4500(23)-4274(55)-1920(2.0)]TJ 0 -9.464 Td [(24)-4274(24)-1920(1.0)-4500(24)-4274(56)-1920(2.0)]TJ 0 -9.465 Td [(25)-4274(25)-1920(1.5)-4500(25)-4274(57)-1920(2.0)]TJ 0 -9.464 Td [(26)-4274(26)-1920(1.5)-4500(26)-4274(58)-1920(2.0)]TJ 0 -9.465 Td [(27)-4274(27)-1920(1.5)-4500(27)-4274(59)-1920(2.0)]TJ 0 -9.464 Td [(28)-4274(28)-1920(1.5)-4500(28)-4274(60)-1920(2.0)]TJ 0 -9.465 Td [(29)-4274(29)-1920(1.5)-4500(29)-4274(61)-1920(2.0)]TJ 0 -9.464 Td [(30)-4274(30)-1920(1.5)-4500(30)-4274(62)-1920(2.0)]TJ 0 -9.465 Td [(31)-4274(31)-1920(1.5)-4500(31)-4274(63)-1920(2.0)]TJ 0 -9.464 Td [(32)-4274(32)-1920(1.5)-4500(32)-4274(64)-1920(2.0)]TJ 0 -9.465 Td [(33)-4274(33)-1920(1.5)-4500(33)-4274(25)-1920(1.5)]TJ 0 -9.464 Td [(34)-4274(34)-1920(1.5)-4500(34)-4274(26)-1920(1.5)]TJ 0 -9.465 Td [(35)-4274(35)-1920(1.5)-4500(35)-4274(27)-1920(1.5)]TJ 0 -9.464 Td [(36)-4274(36)-1920(1.5)-4500(36)-4274(28)-1920(1.5)]TJ 0 -9.465 Td [(37)-4274(37)-1920(1.5)-4500(37)-4274(29)-1920(1.5)]TJ 0 -9.464 Td [(38)-4274(38)-1920(1.5)-4500(38)-4274(30)-1920(1.5)]TJ 0 -9.465 Td [(39)-4274(39)-1920(1.5)-4500(39)-4274(31)-1920(1.5)]TJ 0 -9.464 Td [(40)-4274(40)-1920(1.5)-4500(40)-4274(32)-1920(1.5)]TJ 0 -9.465 Td [(41)-4274(41)-1920(2.0)-4500(41)-4274(17)-1920(1.0)]TJ 0 -9.464 Td [(42)-4274(42)-1920(2.0)-4500(42)-4274(18)-1920(1.0)]TJ 0 -9.465 Td [(43)-4274(43)-1920(2.0)-4500(43)-4274(19)-1920(1.0)]TJ 0 -9.464 Td [(44)-4274(44)-1920(2.0)-4500(44)-4274(20)-1920(1.0)]TJ 0 -9.465 Td [(45)-4274(45)-1920(2.0)-4500(45)-4274(21)-1920(1.0)]TJ 0 -9.464 Td [(46)-4274(46)-1920(2.0)-4500(46)-4274(22)-1920(1.0)]TJ 0 -9.465 Td [(47)-4274(47)-1920(2.0)-4500(47)-4274(23)-1920(1.0)]TJ 0 -9.464 Td [(48)-4274(48)-1920(2.0)-4500(48)-4274(24)-1920(1.0)]TJ -0 g 0 G +/F75 11.9552 Tf 99.895 706.129 Td [(6.13)-1000(psb)]TJ +ET +q +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 156.993 706.129 Td [(geins)-250(\227)-250(Dense)-250(matrix)-250(insertion)-250(routine)]TJ 0 g 0 G -/F54 9.9626 Tf 88.221 -98.979 Td [(64)]TJ 0 g 0 G -ET - -endstream -endobj -1308 0 obj -<< -/Length 321 ->> -stream +/F145 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geins\050m,)-525(irw,)-525(val,)-525(x,)-525(desc_a,)-525(info)-525([,local]\051)]TJ 0 g 0 G +/F75 9.9626 Tf 0 -20.57 Td [(T)90(ype:)]TJ 0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G +/F75 9.9626 Tf -29.44 -19.386 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G -1 0 0 1 104.053 292.88 cm -q -.65 0 0 .65 0 0 cm -q -1 0 0 1 0 0 cm -/Im5 Do -Q -Q + 0 -19.386 Td [(m)]TJ 0 g 0 G -1 0 0 1 -104.053 -292.88 cm -BT -/F54 9.9626 Tf 189.276 261 Td [(Figur)18(e)-250(4:)-310(Sample)-250(discr)18(etization)-250(mesh.)]TJ +/F84 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows)-250(in)]TJ/F78 9.9626 Tf 86.57 0 Td [(v)-40(a)-25(l)]TJ/F84 9.9626 Tf 15.736 0 Td [(to)-250(be)-250(inserted.)]TJ -91.237 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ 0 g 0 G +/F75 9.9626 Tf -24.907 -19.386 Td [(irw)]TJ 0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 120.368 560.616 Tm [(Indices)-354(of)-353(the)-354(r)18(ows)-354(to)-353(be)-354(inserted.)-629(Speci\002cally)109(,)-381(r)18(ow)]TJ/F78 9.9626 Tf 1 0 0 1 361.94 560.616 Tm [(i)]TJ/F84 9.9626 Tf 1.02 0 0 1 368.497 560.616 Tm [(of)]TJ/F78 9.9626 Tf 1 0 0 1 381.147 560.616 Tm [(v)-40(a)-25(l)]TJ/F84 9.9626 Tf 1.02 0 0 1 397.986 560.616 Tm [(will)-354(be)-353(in-)]TJ 1.02 0 0 1 124.802 548.661 Tm [(serted)-307(into)-307(the)-307(local)-308(r)18(ow)-307(corr)18(esponding)-308(to)-307(the)-307(global)-307(r)18(ow)-308(index)]TJ/F78 9.9626 Tf 1 0 0 1 416.713 548.661 Tm [(i)-22(r)-35(w)]TJ/F192 10.3811 Tf 14.654 0 Td [(\050)]TJ/F78 9.9626 Tf 4.205 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 442.809 548.661 Tm [(.)]TJ 1 0 0 1 124.802 536.706 Tm [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ 0 g 0 G - 77.494 -170.562 Td [(65)]TJ +/F75 9.9626 Tf -24.907 -19.386 Td [(val)]TJ 0 g 0 G -ET - -endstream -endobj -1291 0 obj -<< -/Type /XObject -/Subtype /Form -/FormType 1 -/PTEX.FileName (./figures/try8x8_ov.pdf) -/PTEX.PageNumber 1 -/PTEX.InfoDict 1310 0 R -/BBox [0 0 516 439] -/Resources << -/ProcSet [ /PDF /Text ] -/ExtGState << -/R7 1311 0 R ->>/Font << /R8 1312 0 R/R10 1313 0 R>> ->> -/Length 3413 -/Filter /FlateDecode ->> -stream -xœ…›Aä¸ …ïõ+ê8s˜^Ë’-é SvÒ·Ážj‘éÃ&‡üýØä{|ÔI°‡¦(ÚõmÑ|%ZšßŸÛ[yn÷øûúxüôµ?û×£½Ï?lîùõO³ögïíùaV+íùÇ_Í.µÐ=äÝ¥Mù÷:éß·šü³†ÿèòןZKúضÅÇÖŸ[J'¥™Ä´8ÍžùºMTÕmÂúÈiÝ&®ÀëvÉ ö™@î%o3ï‘o{æ ˆlv ß#"›È6²Ù|ˆlvÉD¶"ï×Õ@6“È6²Ù3_d·‰l# »Md9²ÛD¶Ý.È>È}òmò= òmÏ|‘Íä{Dd³ÙF@6;ï‘Í.€È6CäZ£âÌ$² €löÌÙm"ÛÈnÙGŽì6‘md·K²Ïòˆò33‡ÊÏì™/ òÈåg#"\~>òÈåg#"\~N³ [‹ò3“È6²Ù3_d·‰l# »Md9²ÛD¶Ý.È>ßòãМqhÌtÌÔ3͇ÆL9óÌ.‡æL€CÚ ½·Ðf4f :f š37´fj‹ƒŽƒæL9óÌ.ƒŽ™.ƒÚ ]f h³33Í™Z3µÅŒAÇŒAs¦œyfAÇL€A Í › -ÑlA7bÌ8tS!j¦¶˜qè– ‘3åÌ3»º¥BäÄ– z¨G.á*¹‡jpäªÀ‘ p¨þF.¿¡ê©ø†jo,¥—*o)¼TwKÙ¥ª[Š.ÕÜRr©â–‚Kõ–Ë-U[rCïÌÜŽöVŸç8ßúV]¾c­úz£¿íü½?ZÄ”à+ÄÌ£¾õgD_œßå6^¸û=t‹–×ãûªjn–Œf¿9±¼š ñ…ÖÑêY2šý&Å -\h6´Oõø`Ih®]¦³g4û¡ŒŽ@h6¼!PN¶õ­à»7#¡ÝCèæwù¾*”›uA«H… íšÇMGÛšÑì×?ú¡ÙÐ>ÕãƒEhÐ!3ÇÌh¶$‰¾Kh6¼!P­Ì¹ müî·º ÝC˜DÃ]¾¯jcf]ÊÀ–QÑ)&´Š2@üAZ[ÊÀÖYÑM -͆e€»|_ôŬ롾oÆ‹;2Ñ—"è¨F:çš÷#}]Ëè‰/~.%0QŒ&Gª@_©v&a}þùøwQU=þ}¥j”ž²jþ.ªÿ²®3Ûõ©ìûu¹Ë–:Ù×ÕÉîý 2çË=ÄÇÒïÅe ¥†ñ·p¡?·TòþæwžÄÇ•šÛ%ñ™v©>÷ûÊ®ôÈm—)Æßê¥.=øÜoŸ‹øàÉ|X”Ù"È• -×›€éíAð¹ßÖCˆù\«ÈW˜hùÌï ]|Ûš_®¿Ü®™¯2/7ò™ß×k5ø Ln»`1þ` p¼¿ùís<‰K-³]³p½I™ÞØŸûm•ƒøX¦½¸d›™o‹<¸Ê‘ÏüÎ0ƒ/ÉܲÜ2»æú0=Ó«&ñUÖãcEF¾–ëÃMo£‚ÏýΠúHZ—×XfB¿p}gZz®ŽÎâˆh®ÃGCüdf®ÉÒˆh²äÒH•Ñ´=2²FÔEOh5ÕE_ÐZèSYd…UÑZR½Ôçº ë§T/^†]·=O©Þ©UŸ:á»bÏâMÝüŸg÷7¿3x|ð$>ªžÛ%ñ¹ºÅk¼àsÿ©¥Ÿš^ðQÅoê/ú‚Ïýö¹ˆžÌÕ³¦*æ×»ºÅ Èàsÿ©õŸú[òAÅÀW˜ªøÌï ]|Ûš_ªžÛ5óUæ…ª>óŸZª•U ñó@ÕÃýÍoŸ‹øàI|T=³¡b~½«[¼ô >÷ŸZ ªk}±ƒ™o‹<@õÀg~g˜Á—U/µŸn×\®nñ¶Z|•õÁøS+3ع>\Ýâ…vð¹ßTYõÔqºIóë;ÓÒsutGDŸ±@33tÌã'“0smL–FD“%—FªŒ¾ í‘‘¥0¢.zB«©.ú‚ÖBŸÊ"+¬ŠžÐ¤zË{¿ª^t¯[¨žÞ§¿û¾…êùÛ@Ä­Ñ`[–«v ,«¼¿ù«º]ñ$>¾È«jHq½©›v‚ÏýEkC½TŒñ·ºi¯ øÜ_ÕòŠ'óá]UWŠëMÝ´‡|î/Zê}ù\ÅÈW˜¨ùÌ_Õ÷Š'ññõ\UkJ¾Ê¼@õÈgþ¢µ¡^í*Æøƒy€êñþæ¯j~Å“øø&®ª?Åõ¦nÚ7 ->÷­ õïÅ7z3óm‘W=ò™¿ªOækª¨ù*óÕ#_e}0¾h;ׇ©›öÄ‚ÏýUm°xßPyPÇp}gZz®ŽÎâˆè«43©cˆŸLÂ̵1Y][V½üέ¦†5½xò–Ï]Oh5ÕE_ÐZèSYd…UÑZR½´â6Tl4©^lÉ]·íMª×µ6ÔNÉ‹»&ž%Ä›ºõ)ÕÃýÍ?Ô‹'ñQõ†:V\ïꛉÁ¿Ö†ÚU ñ¦n±Ý|îêˆÅ“ù zC+®wu‹mÐàƒ_kCí*¾Â\Ýb[=øÜ?Ô‹'ñ •Gè˜_ß™–ž«£³8"ºÇ*ÍÌÐ1ŸLÂ̵1Y=Æ¢zÚð¬÷mŒ,…uÑZMuÑ´úTYaUô„&Õ[ö…›:ÖZG¨žvõ_ºP=ß-Fü®5lËãouÃYË*ïoþ¦ŽX<‰½M+®7uÓy„àsÿ®µ¡6‰Ácü­n:±|îoêˆÅ“ù°§ÛÔ±âzS7¤>÷ïZj?˜|®bä+ÌT|æoêˆÅ“ø¸}ÛÔ±’¯2/P=ò™×ÚP[¿àƒŠ1þ` z¼¿ù›:bñ$>îÔ6u¬¸ÞÔM§W‚Ïý»Ö†Úå}qÇwf¾-òàªG>ó7uÄâÉ|Mõ#_e^ z䫬ÆïZ£ÁÎõaꦓ9Áçþ¦ŽX<‰o¨<¨c¸¾3-=WGgqDô«43©cˆŸLÂ̵1YÝö¬zy?¶¥ž5½xò–Ï]Oh5ÕE_ÐZèSYd…UÑZR½tNÆm¨ØÔn†]·ÚÍðÓ3ˆïZ£Áö, ífณguj7 ?ÏUõÒ‘Ø%ñ¹º ífàóÜßµ6Ô¡ðQņv3tè)øÜ?Õ‹'óAõ¦:V\ïê6´›Ïs×ÚPçcÈÚÍÐq-ñ™ª#Oâ£êMu¬ä«Ì Uoh7Ç_jðQõºÖnŒ?˜ªÞÔn†.â繪^:Óâ6Tlj7Cà‚Ïý]kCzyñÌÌ|[äª7µ›†|YõÒñ·k®W·¡Ý òUÖã»Öh°s}¸º ífðþõÁøy®ª§“*nRÇfìfÄàë,Žˆî±J33tlÄnFI ¶ÉÒˆèy.ª§Ó)žõ¾ í‘‘¥0¢.zB«©.ú‚ÖBŸÊ"+¬ŠžÐ¤zíÐn†Û®b­i7Cg _Vµ›á§ _µFƒmYbü­n81iYåýͨ#OâãAÀC+®7uÓ©ÈàsÕÚP‡Ácü­n:7|î?Ô‹'óáÌß¡Ž×›ºé®?Œ÷Æå‡ñ¶ŽùÃxü0î÷øúz~ÙëDõ¹žDí‡E]Ó×…aXŽí!Í>øïŸ×düoõ–‘¿Ö«\¯ç­ÜýÌ•çë»ýô—w-ÿ/Iÿõv×ï!o'ÈŸÿ`[G. -endstream -endobj -1316 0 obj -<< -/Filter /FlateDecode -/Subtype /Type1C -/Length 13073 ->> -stream -xœºwxWö?laÏŒ˜ r‘G¶5h†ôB'ZBïL·1`pø˶$K–eK²%«Yr•-˽w Lï%„ ”$$$¤m²›Æî{½ûýÈ–ßû<ïûýãõ<~4£¹ºsçÜs>çs -ÏÇo”dz%!)&cÒæ”¤}ÉÞ뉚ç7Ê#ñ-VyRŸ/Å<ÌŸâ1¾ÅcüNŽc~G¿8ŽfüÇÑA>~<Þ¬5›íonݼý­ &.II•§'ÄÅKÇOŸ:mÆøhùøÝ¿4&#!.yüëÜIVÌ¡”Ô¤˜déú„¤èÌŒñ/ž<~sL\æ¡}éÿúøïDÿÿ¦æùsïæ³Ý§•À‹åý<*Åw¬ïjßM¾}~´_¤_¬_;ÆÃæbë±d,ëÆð`|<þ -¾—ãýø)üþ þÿ'‘G˜;?€Ì·ò«ø®ÑüÑ£•£¿'7ÇÉÈûà=°,Ù ¨­c|Æ?æ1³ÆèÆÜ;nìkcç]4vÍØð±ÛÆÆ-ûpì+ˆ¤ -ä‚|ApQð‘àgÁßÿôçû ü—û¯ößà¿Í¿Ø¿Ä¿É¿Ó¿×ÿI€0`I@x€2   0 $àTÀÕ€Û¾ ôd_ |#p]`Z`f 5°.ðbàõÀ? šô~Ðò ¸ Â ª Ú æ Ž ž þ ÓA烾 ú6è7¡¿P('|S8[ø¾p±p¥p¿0Gh– -Â&a‡°GxEø¡ð¡ð ! ö ‡³Á¯O^¼*8|=øqð“à?ÿüwʇR¯Ss¨ÔJjµ‹ÚO¤”TUBÕPõT'u’ºI=¦þLý&%"Eþ¢0ÑK¢·DSE³DóEËD«EE[D¢ƒ"¹¨HdÙEu¢ÑIÑ%Ñ ÑmÑ}ÑW¢ŸE#!£CD!lÈÄi!³C„, Y²1$2$>D’b ) q†4„´†t‡9r!äÇ!C~ù-…ú…Ž„ -Sðˆ—¢ÒÑ+F~*þŠÅhÃb8™øÂP©®œÃo€J)®ž«RÏ2ðÑd˜Ž·›¬ÖVÒ8äܪêéü&¼¾‚´Êð»úŠ\óþÈÆåàY“R)Ôz%-ÕV ±p;qFߦldz3÷Ön¤'-ÚµVZ›ÝØXç®·mF;k(5:ìâ–ÎúÃ'[’·2ë 4i½<o ?3[•pP à?žÏ¦P8‡^AQ“:§œÛÌ®½ôôÀ×4Ü ÇAFý%ùÉ–ËìGkgõN ãüŽ—'ö²çvuLB@¼Q½8©êÍt{Ð`‚;èã'Põ‹p¢'œÛÙ,\8.³0O­ 5 -«CÁ -»ªbcé}{ÉÙøCò}ÛÅó†Â¡ßFæÎ3û›bø¥¹¶¬tñª»O[w -v2R¼Åcõ¸pm­©¢ÊI—×jk¹‰”ýƒ4äýè÷Ï·Ü@¡wØ—/î:/>¤ïÚ±#²¸¦#±*­z#§ÈZ<{)2mbиï߀A0èÇgœv…Nù³¹)Ô—' £V.<Ûx8‹M9“sûcñŸŽ_»Ã€%dMù$ p‰`¼ßiÉ©ìÃnâØp}E}²ñÐ*ņÕêœ]E|%<à$vÙò*Œ'ùPNü°ç¢݇kV0­y»,!o»2LAXìp¦¤µT¹‹E&¸1/wg@žïó7j©KvYr7І%¡PØtîÔw⣹}imÌ`̬æÉ4j“á÷ -ÊòM‹É|ÍB_Ûk‰ù6u…ñ.î$.˜a>ºŠÁ™øH„ç ÕZî8 /í´3k_œŒÙ¯›`àƒB(Áòð½¾HGë4f[ëJmô{›·/ ïÜÿe{é Ö“]“qH¼'1eÇòøþ_¥LšZ‰—Ù­æ -èår½ÞÄ€¸¶ÊÑif¬eŦr¶uɈaÎh~F/Á´%-&ŒïäçO-äÖy¹–˜nÎ-7>àÃm|f]ýº¹þ´5 tyÆSŽÊs9ÝáP¬aÑ:bòðÖÏÏn>y„9r²öÚc1$bžìLÏÎN—•åÔg3-%%¿"£äÐAñÛ›–ÎKÉ(¯U3y®ÂÆa1Œ!Ú9³®b<ᵈžÿ|yzˆa¸KÇ#_Þ›†¨BþJ|¾¹Àbìå{’óÝÊʇV> p$xý KÀaš‡]ÃhÊc*IŸ—¥fTé©Yt|J÷QÖJ˜® õÃу|p¯z (E ÑgªÎ1ǣݨ.$¾v8Ošä$:@§èó³I˜U“ƒÇ*+ûŠX¸º0W"Ñ’8Ó% ß×£{žM½òNÜÂùì’eû'¼!Füþé0K?yÿ ýëÓ¾»wÙO>éÿî™øYì·‹î2÷Në{‰F?ŽQp œÈ[`8œˆ&¢-h3š„&£ o½ßys³òøÇ߈A¶d“Äl3›îJÀR@yëÈ{ØHðwäEÒ…äĽjU!wßš[a¼Ç÷8ΜG¨SÔ#¿×ÁùØÈU\‡æcxäÕÕ@ÂÌ?7R„gækféù2àù.¥’Æ'¤"ŸB>*Ã=K=Åœ# D6Z€5ã5¿ô¶A‡iøÈª‘¿bJÏU¢.ÅÄ%ÊZ5 '¿ Š7Kµ 9#k½z·¤iÎ’?ý×JVP@ÂQ*éÎldžΟ:¶…5%P=íIqqÉÉ1ñÉm}½ím½,ðµ‹`®Ï6í³¨Ë}ap-Ç£ V½Eo) -sh,:½8?_§fdJ˜ìÄ÷XÕî;&Ám¡m-0úµTß0„9Ñ^1`,/Ì寿ªr"˜g6Qh> NÎ߬ K*ÀrSe©©Œî.Í`G²ã¶¢¢¥y|ð¤ì?¯S-vçVpýûv×3†Õ 5ŠRÂU™Ÿ›_˜¯Éc9 ~ K{Š›¾µ•ž²ð]5([áÂOÙòÍSùÓS _ÂòË eâªrG-Sê?fêVôï…£( -Cs±]’e tFÔÈñ<­Ía·Øí¥, €Ó0Á{¸ø©ËyÎʯEÛdD±\—#Ž]„F½ŠøLŒÂoW:/3.$“—+•ËX•E¼ -ù‹à¨>qM¹­Çë3¸ß+¬U^˜W“qzE…²"‡;”Õ™¡h5Ê[>#!-Â&‡%."ÁœWâ;í'®3P7`°$Wµ„@Í"A éYå¦à›«Œ—ÚhDM}Û]±».†­N­Tv§.FãCûð ?–ÓӠЋÙê“}lNcnÅ¡z~âªmkyÔ1½gÅÂiE{:“ÕI¥^R_üHüÉkwN¸yó&P¯áPˆrX_•ï -;žÓ¸‹FÄd4½…‚ïLûí“‹=wް%1Žl§”¯(/Ï·Ó›Ùbg¿Äz ­¢À ¡÷'÷}¢`g¸(Ä ¦wÄ -GCvð‹Û” n¶JêH¯{Ÿ/°pÈ#'nÛ2¿#F‚BgEç*Òiu¾µJÆVeçYtb¦<5®Szúèáê¶6¶¾¾´²ó\í¡B>éï¼næ»P†œ8Y©•j‹‹óÓÙ‚L$B½ˆ ˜ª9¿¥R\ç*ëg*]8(îó|ÙÇûýWÎyjìãܳ® ר.÷=ýIüMòÃðóÌG+¯IhÔ Ãif‰q±Z½Pχ›=7¨Ž2Ç‘¯nä$ 3·ÎD>bDFÏšÉ$ïØ¤žAïW—v²àeÌÕÒñùåÇYÏ BoŽÌIÈ[¥ãôßa+.¥ûJ5Ñ,šJtƤ–ï!Y ½Œ‚ïÍ€¾úÝ'ºÙ5„µ¾qC®PZ­Z“Çd+Ò4Éô¬­ŸrêwòÞýsÑÛXÐË"ŸXJ²±E<ØÑrübûÁi ŸŠØA[5š™ãÀëœj#[ W¨™ÍsößÀrk5eâ²J{ c¯F‘Üc…&㧈:¶8tÅK[f¨Õ1ú0™Ëskòˆì¼’ -‡Ùæîd?„¼[hV!/Qæ‹só´™L$n•÷óQn?§à\ü’ÑfXÆwA7-ÎJ 7ñÑ\h†“ðßÌéåÓkL¡ø66b”ãÀEfå¼M‚zYŸÝn2•0à0i+.6ߕռÒ2¥`íО«Cƒug®2 Ø wzh7ÏC<ŸF©q”7rdÿ¤<Í*gIiµj"NZÕ`·7–5±GžaP÷¿U´æ(ŲÜ8&G;\¹xTZÛ×qì0ÔÏ0ÿo(wÉvÎÒ;dÄ]™Æ´ˆþã€v™ -œ ‚‰€„ösÙdÜ´£$ÃÂÔêË:«Þ¦·…™óšR½5,¹ùP»Ñ¯Sd»t´žûËmÚßÇv¨Éj:X¨ Õi±ùoEoÊë,E%6³Éfe._:Ü÷ñU¾nõì¦ô–u1¹á^U±™0³ÓÉ¡eC©b/;âO,(JÏËf4:UŠVjìu -¶A®*Í¡¥rEfâ€ôüÐ᪦¶·³þÖ#1ðúó9®ŒDKJÕBZðݧr -0ñ6š{(Û¨ObáJâvµóÇÄ/Q¶–Z³›þÓ5§¯]?-+ÇRÏÑ$‰0à+¯fªkP–¢¯”ü!0LÖe¶€µš`r'$zqÝY{ÖÌpû'€ob.YÁdNmÞÐNÅä¨ÔRZ•]R*g3*0ḊŒŒ’LZ81E"×Q^íl„8YUØD_€A¸ŽZ³™žÎ!GºpH­ýø-ñæ¾BQè£7ž;Ý2Üͺ+Û\øè £n:ºûOѧÛcVYX #Éò³¸€&l©Äóæ¾ž7á= -%kÕ²}FnÄäPæ"ä&lŸY]{] “› (B°kíj¥Xž§KàF÷)Æ^k«)7fÖ§gdffhl -»Œqï°flÇ™”Ö}5|‡LfÓÐR…<-Å©l«(³”6²Îk¤gËÒ3Ü\ çv76f»¥,'®‹›/yBÁθ¸ÕJ¶ ÷"o‡»p-d%ðÊC_xÆRÓ Ç„8Â+WÀlÎMí´èKßñŸ¶Õš†9ò-Z­¸@«W3r˜ü‡·çvã8ÙÓØÐÝ™Y«Õ:Æ 7é]‰£«¥ç|? èrg¡ÖÉ6äa O¢×/Ý>•ѵa½DÅgHÀšè̃\¤q{óO?ß¾ú»›‘Y°÷Ó#¢WÒ+wœºÕæ¨kè`UmÚº½¶¢Î-­ŒOÏÔçÆ±€* ÷ªc8n·Þ8Gº$—蠟-\ŸT#oiuÖtX‹íÆÖXŽ•]憚£tcmÊÎÚsl‡¤bÃÚ™qàØÊd —,$#¬Û$î…Á’Ø­±¬@AÂK25Æ,–î*ñYYk|¼,e[®e¯9‡D?cŽ%TbZZbbsZgGssggZó!?RÝÙö6ºZ[/óÑßÏP¦Ø’Øò8¾‰“ŒQ,_›40|¥¥Žb>?G} òÞS¨æpáþk\¸oj·XZM|8îÿ;ÞXÇ áÛä2 Øÿ²`a„Ïÿ+^íÙå–­¦Õ3½xSDÂÃ.BjÂÀú=Êìýâ §£ïß?5xýXVëò“LêZêpjUö!ñA©4>:µýDWEC ö;vQI8Q”$p8AÀ©$Тä‹ðz<8`k¦…Ÿ'Àóhøµ–GâhªÅvÁ•Ónsž£K)Dã†+°!øµμ|»–.*4 -ÙW·b ]r‚$²Hà,kV:Ó‰£SS£åyÖêL¦J–gË£³•9Y‰íò³Ð÷óKàhGMC«øDdïúuRWÄ3Mrl¨Ã]ÙK÷ÔÉ’bÓ2—IYAÖ{$ðØŸ_šºYgÕš9Ÿ09R+y:bÇ•ú‹¤>³µ½¾¾­MZŸÌ -ŽŸmâ~Þ™>•‰yä‰&n¶¨Ú™el}4½+*}ã&|$™z#Â( -½‹fr1Åø.šwrÇl8 î€;¸«¹h+§¦(ÍE³Ñv´ÎD³¹Øb7à]¸FqCf¡(nH#ùÂ"ÏKn)¢ìƒáÀ¶†¹(¸ͨ5X¿èêøÒÆi{TÔÖð£{€'ŽZž Ñ/:dW…–Y1³½Ää Ûì¹›Yt÷ü‘}íõdžÅO\E~±)ºœh¦@ª)Ê£³4¥µEly¿Ây¸Â!øµ›÷ø¬{æ úȯŀø¸ºj R%ß ³ŸI5è…h2Ujdb@Í—€ç¡T½;UŸ¶ÉÌŽ¼CÌÒ¦eæiJZf(3·<ô†+ ™TÊJJe^äljm¥¦†®sV7ÝÙ2Á¦´]qlRt^¬n<%¯H†<¿æµB.Öžü憓Ȃ‰g?µ"!¯pQ÷†å k·p¶ºø=¯Ãñ°ÙeÕÁ , O)Ì÷æ1ŒÅµûY×e€j¿Ð{üÞÉh4#ÜFíZ¸$rso;#ÈV$drŽ&e;yf(± $A¡¤·JHPÉÙû/`äµ›ž¸‘¸U„ÀE,Êqâ. øJ˜è$¶Z5¥gÅžŸ8v•oЩuaà Ù~¿Žˆ»rk22reà°„ pR|}ÛÑ÷£ÔR)“%ÓÈrv7©B‡ëÁMÞ7¤'&Q#q€bþwâ‘@?’{0@‰=ŸöƒÊÒÊ7HçQgH¸·…Û®ó Ñw–Qk¡ñ:!ym•'η - Fì¬Á)[+nÉcÑ–G¨Ç>?ö)Øõ˜ÅNC–Èi -õ!:+ËVª`OL—Û£èÝß°ÛL@'LGÜ?wze;üHhG‰ˆÐFÜxûÅØpvˆ®¯(Hu±àÊì"œ(rÿÜé†3(OI.I§wïÎ8Â&íÕ(|‡f‘¦øœ|N¹Æ_CZüþ®¼ùO%R¼Ø -¼;ʪaêÖÆÂúá‰Nð{±Ûóí0–<ó…ïŠ8\â|”ÃÏQ_e®§¿¼°iæ»Û¶Ì“ ™X/Ä2£RÉ PQ¡ÆMyTPk?ôÝ9nÐüîWHóBÕ‚—Ƹ°ß®ÊÌ/L1†¡±ø“/±«à)Ïï%9áÎlim¨oÍ#SY çTLZÎ@òbŽ[N—,žÈEøÇ@wQuÑ`_ëvë+èúj¸_1pø}¡çàŽˆ¸ƒ;ØD€N่Ūq×ý–¦|¹|ÉÜ-áÛoßgRiJ2p‰VCq¯³ÝbÀB1|…ÄA9•Š»àÎv8ßͼ»_Àõ_ø‚nˆ{æCÎByCžw†8ÿ}ǃµˆý|ü³+͹yn@Á·Hè(øˆ,ª^Ï?¿'AQ4¦Å}Ö[5¸˜ -Âx±¼â’¹Áêg7Üzòú±÷â®g¹Ç„tƒ“0„Ì¡îÕyº®Úd=ÌáQº"Gª3éÌ:Öª^±ÉœWaºVA€¯AâÒÎë`Éé)h{aÉq±mÉà_鋂İíòO^8Û tf'1Ç’Wþ‰~€Z»ˆÇ=ö¦~öŽaó é|ùˆ/ü€ó¸c®ž\@y5|$ŽÁI5@ÉÝp­ }äÑ|ä Ð<3 Šk<Ó8‰]ò¹®¥¹ÐM?ô p .§<<o„Ç;*£߮×`½-à2v]w¯ø>?H;€üëY5߇„•LØ´Uó3Ù¯ @å1}$$sëŽ*ކŽ7êô™,uëõè/|Iô<âyÄ?"pð¾v®î…KZx@QîæT ^¿7Ýßò·x2–]½744´µHSXAC]{{zc+(5•Óàù«ÜdÃôcàCNgÆÌwgÍž3ušÐ'؇òù„ø„ú„ùˆ}}‚|€·ªäç³ÃçSÅ+æyF½2Êéëç»ß·• ´Å÷= ÷xðÛ'¾ž(¡rm±›ò´‘œÚàð=¸{úýÀý‡âŸç>xeËöÌý±Lb‚2Q±´NúçÃ=·9Û<ë½ÝSfMgÑr´ S{Ä„@˩ԷW‚àÇbî?‚“àcêrƒ!¿½.í_.~7|Í¢”l{c,ãJVØ´T©ÊŠ?šyûþ£æÁaöÄ`Ó¥Ågrާt0Yõ²Š½µ|á—·N4õ¾öüä{‰qLRŠ2=kƒ³0ôÞác×é›C{×Ä)ãÓÓØ$îΊ >÷ZnøÆ Ïûî xìáüÂï=‡`õšgÓ}8² [»ÑE£÷dkcÖ²-pôƒ>pZ3E£ê«“KP u`ý²w¶þ§œv´ºlu]Gi-и=wïñ<[9O1‚ãÑ5ƒÖV^0…Á$íùû‡ñÏ_öJ£õ¹ 5èÂýe÷`ÊG[î À£‚úù\÷à-ñ㥗_çBœ•SW×&?›ÂŸ"Þì¬äåâ·®„8óàæo†ÑØoù«ÔÝíqáâÕQQ«ì¸ô䃺ÁËÃŒðÁÉÔôKqÇ/ˆ/¼~ëèžåëÓö¯ˆbZ¨9êñkåµç{9“‚~•çkoäÿ~ÿ*Òb8g*DLÑHpg.ô9¤zð,f!ßñhb^†Ï©ìÂü¼|Fš£Š£D|ÁÅê+uͬ»¦©¼þêØLô˽`óó f^ã°òG_h>›Ú²/'.–݃4ZŒFu!þ‰pfOßéŒKôé£5=½ì¥óýNJᾩð%Â¥Q_CŠA1{ÆÏœ¹çoðL8G=a½zø<€Ãª„'¾Á6'>_Q>ld9:P`F+§b*Â9Ðh/¥ëJ•‰ìÈN°V¡Z`ä TUžme0ª -ï'ƒ“Âç±Á+ÕdµÚ,k‰Õj3fÀê¶Y­î¢Ì:f,Oà5ó½>Gx8ï,ïþ¨M£®Žºé;Å7Ê7Ú·Ø÷¾ïcßïüÄ~‰~R?›ßE¿ë~ëÂNcÿÄq|4.Ä_Çâ…„¡& -‰ßùß¿0zúè„Ñé£?ýýè¿ÛÈDò6ù|Jþ Ð`&X*Á1pÜCY}#úE„DÿB†CÆ…¼òzÈ„ç^sGJþ@ïÌ6¿ÊG Ä -]Œ"‰‰H]sh!½tWã‰TVÞ©í».†wOJ>õ&šzeÄ£?Mè-úlœyŸôÜ`” †U„ùYyÕg¥|ð|[-eÑ”ÅßCfÏ¡Ù8:0¢^ƒ^WçDÂdpW-‘]ÑVa©²U²Çáh :ñÁ XEª=M%NWl322è Fr9² - ÈzÔŠ8›Z±7y ½yoÛÅdVÞ¯í4\ãëxÁš¶ôN~oÒîºíôÎHyB ºtŽ1wfBâOŸ\†>GôN¤–w÷]¥¯6¦Ì¨dÁƒ–V_ÏAO(50ŸÈRYìÙl¦+KK±¥Ñ“-²¶9æÃƒlZFV¶:¯¨0,+S—§g–çÉf–lO;+Þö`ÏŸr†éï¨éì_‹\deU‚m5g”V‹Ý]=-¦¨·Ö`¦û;[:Ø®VgÏ)q›¾AÕÉ íˆ˜Òv?Ô¦¿éÊÛ´à¦onj(ufmð{êrkÏñŽfER5S_µ_ž¿{]ô?2% „ó°u8ò¯ºq8ö7(xüäû|'ü…rü¡ÞQ`^ÀGï‰R£Yop ¼þ‡Öo‡N2'^o¹E_8%‹ëgÛ“«ÓšÖóð‡eâÙ:yn­É±Øå¬3*ÒEsaç+è ´÷­–…6²ËoþGÑ_Á€žžÃ,ÜC˜¯–W\´ò'ų È•xgu~N¡QW¤eßFmˆ€%˜®ª°ªLìrÚš'ŠVÍ…5yÄ‘1´"Õd´iø­;«c•âézÅ!&]™%SѺ|oÑÒ.—[sè ™,ý`GúÑ[ÇÏÃ×γp²gbegyY£% dJÞ&ïsÏv*fr\–.GMç+-6%[³k§#’ž3ûЖuì²u1SÐ(1Ú ÃÐ8GI¹ [ºxeÒlzm¸»/»åä¡o Ÿ®úžƒó×.žÉ:p˜éLt¦Ô®æ¢I_eÓ–zK°(´ )[0%á0aæŠ -SÝXš³‡EJ°(_³¢ˆò'¾¨%ŸªÅç˜UU†{PHš?©¬xláŒè”ŒøR_®1KŒ Ôy³¸Á¾ž—áTjYD\ÆfzÊÚO!ñûùÛŸõVjbÊØ’L,ÙáRÔÓÍuµ –˜µkGv\4»sOÊŠybD=š }®îèeZj[Û/óõa×±i$hùw@ò©¾Ray‰¾!F( C døÑª‚ÈÃìÈ:ÂðR®bŽž/ƒêZb¾UYgøïG›jøJsN•á2fGNµ–”ê5åŒÒY¡¯¤5­]Yî„})™Kv²ßqqlxfœ*\–CXªJMtÓ aXÃvnm\½âÍë¿+g%Ï|¹ˆ?RâM‰µH®‹=‘1’zgžw!MÉ­òI*õ„¢0Äà°ÞóæD?EˆÄ\¸ãã*ç5328ªy SzÆ`™£ æ9ñµ–¼2ã¾ç¢Ð¼#'6o©",§„0•W˜tg™*’I#Œ[Uªu…œ¦÷Ãôö>XÙËó¼üßç=. -¾4r5›øÀДŸ)Öåéòd‘\¯ÎÛ«ã+¢ÖH쳦•;Åõ —2­g1¸MÄ Ëµùkó†P™ÇN Á÷° |•*G~þüPšÕVÈê-E5b˜_#>6Ôf;Þç×ÁÜ,\;/[ù²|¡o0•ØÝôW8|­I5µ‚9Ò)ø¼ƒ¡;HJOOLnJoëhnlïHkNbÓé¬vV—•^¹ÙÞyûÓFç‡Þ‚_¡Œ8a,×g‹ãÞæØ‡ ˜@|PíºhbÌ¥Å&ëBƒ3$pðÉÅ\U¥o`‹Dð9À_¿Cn¢×$´¨zQ)°gÔBN‡©ŽÔ;ëú®nÓ[ÍF]b±”TF_šÿg¶Èj,Ö•6uÕÔu ðÛz›WŠmz›^—-ÏR0Æ¢CIÉiZ]6¿xr¨6¼lsž7ÙëĽªÚ´T•,.ˤ« -g$$Ï_žNí—ŒÇz×DU®¢ÑVŽÅ½‚ö¬q®iÞɺ⫳ÄLDlèIüäc±×ŽçÞ¢á6.î} ü õ+ïQ¸£ûù{×PÝ«+ç‰Ñæ¹h4 -Ý]{b/“ÞC%ÜPŸÿZ Ã?‡†\”íb@q삵ÿ­ª=¡¼U3ÓB>Ú6{ÏcMü«|&ƒ}ÄÿZnÃáStB¯Ãl¼£Ì1ðÍÿ^eƒø:!ˆ!G"‰eFYɧ|¹­>DYqø.z«ÌµèµbÞÁä+`tÏΨlJcQÜçÀ—;ZZ*Ía5(\QAÔ•iÚB­Zâhögâ¿ÕDZ ÌS¼ "ÔÂF xîõø)2üŒ±¦ C¬)Tk” 'é,ô&LÇòªuMãÄp¥ÈQ˜!Δ&mÙÁ eh&÷¦OÊJ?+ᔬOF<З«Í‹ɼ¼÷½ÒéyÑœRn¼Ç‡ˆæº¿¾Œ=˜vy†X™«K62ú¼b£š•ÁKµÄv“ªlêOH-¡p3ÜxëƒjÇ-[X-rà# ·N"ÿ`’>•ÞÖv“m!¾¶;O™¼Uð”&2•Øg‘—Ë™¤Úœ‹ô@­ò 82íû«Ä“㤶–êíÆù0‚x}nÎÆ}v3ð"¡·Ä«Ó ¶ä„¥¾(â[ÍÅ%ôQGÁ^v$‘0,ÑlÖò•ð“Ød+(7p?×ǴצO6Å-cÑ.°4?o³žOx«xÃC°×Íûۯ𯾵"ØQ‹/,QWïða4¼€¶;HÏFb—mCà6îddY- ¶KRS^7`^5µ&=+5Ö™ì*¶D™kÍ£S¤™éI­™}=í•-Ílcƒ»¿ý!À»¡-wìeö2Sh›$W/WÅ0sQÒ,˜†ÉO­å⺊ò#ã¨A;¸ÓØ­/.>‡$a@FÎjß{ùdã™.&·;.ûÉÖ?‘@wpj«¥~˜½Ñ~øÊ-ña•·áHÚ”YQÓåt–9«ùBOskÍ‘~ñ…¸³;˜Æ½moÓ»÷Q–ºRSýÕíóæn˜¦P›léì?[c™ÉE Ÿýzcë;³Ömž–‘k¶'³‚ÿTf¼€øE9ÎÛ,€·`…Vk•²­&Ý[¼:í"RMGRz‹W+ø*ÔŸzÒÝ~ÖVn¼eñ:¢ÅX¥‘‹Uú\E -G¨P2Á}˜¬½ ¶Z\ï*ëcª\èœo0œÒ6dY^<6™ýNMZj–"á ù"\Ø«7ºžü$†[9·ÿ*ZÃPpÌc4–SíUó@þhôW6Aê˜ÓÜð"ÛÙÖPØI?îÙ’ÄzYÿ6IJœ7J|ûoãOP”ãCzk¾yAKáâ)œ ál)N•pß,ç¾ùÒî²zí+¡Ìßaà+€ÚcV;ŒÍ|ø#¡² r &%J›ª¬VºÄ¬ËdQa”j´û ü NxsÆg ÕÞœqªDJŽý)jˆJ ”âÿñ©Ï,~Vü îìÂn&Ú•mFA&>:(Ç_ýËðÏtåÞØrd?¥ÎJSéMZ¦G“SF§fJS÷ &]‚>WïÀ@ÖƒæOJ½ xÖœÀj< ¸å÷ÊšGvî‹×nÞ¼é ´( &i:=‚+AŸ|3Ÿ+m(‰‚z¾Ð¤.3ÞàÃi„Pv°·As„†à[è_ƒËž¾ ŒS"$_‹ÁŽ"¯A„“èœ7¹>…”‹h9ðL¡ã·:“—–³5xd1ô çfm&Î}ÅÆ©’¯ÄÀâ2WšKùÕ÷K¯´‰ëe.i†L.UTMs½Í€m6Wö Ýý™ƒ=œ°xÏàð3_xÞ¢à0ŠÃAƒ´µ­¡´H€ç\Š¿(6Ψõ6 ÌÄFŠ«IxÙEL“UÝ6²ÀT^ΩSc•*óPZz¢Vo²ªY›2ßœOgæäd¥5(:ï|p÷Ñ™”öZ¥ÑÂTR%ðÀ]‹œ‚ŽƒG»Îö1ÙÕX*0“ph"ÿ(*¥lÛ¿ðŽXG~ÌAyìgÒ³h ±ù@Á gPìçP‘uý=,ÙùåØI yÜ,ÞPäêŸÍ=‘G¢¹Þ´<œûǧÙ{ÍH¸‘ŒÄ!œŽïßA -à= -®B°ÓÛ]9â¥n¯·á6Yâm³1—ü§,PKÊ8ïKüËûþ;oŸõ76þOCáÓ¿ÀJÐ~…ÚÝ‘ÝÔ&nk¨ëê«ÏŒ=¤â8'?“ŽU¶\gA“»º£K|cKßä”\CA"£ËÖ -è\mI¹‘Uáè ViÔ[Œ)HuáÖܪóbøùâ'‹a«‰E5„q9ãÓ½%Ý•‘}£³[!÷ W~` d±Æwó½Ô ÜôêU-þmùÉéOŽïY#d'@5áåý˜£ÉâtŠ;ÔÕÒ,•<-×–Q’ÅO8Õyk½PÙß,nTÔdÄ+¢_Ñ1úyà¸oÕIóï’„À£*‚!žQ0„7À0™´Öÿ 8´JMáv(RÐ÷*÷s†³Á/žÏ¦vã&RpÄÅ!ÈHÜHÜͧ«FâɃ¿]Wž$Ÿ’‚Žòg]Qõïæ¡ yú½%N»©”Ô­®¡ÿ U…··OÀû›ÄëC’¿ð\uƒÏÏnÞ_~å¶ÏCðÀ ›*¤é%™4€Éó¾÷vÜýÉÜ¢%‰كœ‹l0^2”«›ÃFŒ~ 996®=¹¯§­½§/¹ ¼`ðÍ/üvà Ípć0µ´e4:GÕ*ð­Ùîó,ÕÈ-¥%¥|€ 4×TtÐuuúü$5¥”…Û‡+ëJ\Þf§o´Ë[q¼æÁ¯ñ€ˆC +ðÓE€Ï'Ãl>Ž„hà,@Ñ[hZßBÁAñ¾ÿ¸Mrïë òeJ}­×xsi@ŒB¿`Ð_ ðö‡„³À£‚)×y`mq»GÜÚÚúz’Û½bøáÙòMà‚æ| Ët„Zû€®ˆ)Њ ùÙŽ›èuµNÒÁŽc°©pÊM”SWáo3„©°r#€«½å] a×5ÄéÊÍ¿+Wy{ê?q*>;^›Z9ëÚÕ —ºyÞÆ‡Å’¬pºˆ‹€'óÑ‚ó\©h"Õ²¿k¶px”’¦ÑتTŒÛÛRà)ú§°oǤ°„p!ÈUK¹ÍQìÿ,…KŸ +/F84 9.9626 Tf 18.82 0 Td [(the)-250(dense)-250(submatrix)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)-310(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.386 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 414.446 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 123.397 414.247 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 366.626 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 312.397 366.426 Td [(desc)]TJ +ET +q +1 0 0 1 333.945 366.626 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 337.084 366.426 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -258.11 -19.386 Td [(local)]TJ +0 g 0 G +/F84 9.9626 Tf 1.004 0 0 1 125.957 347.04 Tm [(Whether)-248(the)-248(entries)-249(in)-248(the)-248(index)-248(vector)]TJ/F145 9.9626 Tf 1 0 0 1 300.374 347.04 Tm [(irw)]TJ/F84 9.9626 Tf 1.004 0 0 1 316.065 347.04 Tm [(,)-248(ar)18(e)-249(alr)18(eady)-248(in)-248(local)-248(number)18(-)]TJ 1 0 0 1 124.802 335.085 Tm [(ing.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -61.877 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value;)-250(default:)]TJ/F145 9.9626 Tf 162.678 0 Td [(.false.)]TJ/F84 9.9626 Tf 36.612 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -224.197 -20.57 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.387 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(output)-250(dense)-250(matrix.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ 0.98 0 0 1 124.802 211.443 Tm [(Speci\002ed)-237(as:)-308(a)-237(rank)-238(one)-237(or)-238(two)-237(array)-238(or)-237(an)-238(object)-237(of)-238(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 368.349 211.443 Tm [(psb)]TJ +ET +q +1 0 0 1 384.668 211.642 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 387.806 211.443 Td [(T)]TJ +ET +q +1 0 0 1 393.664 211.642 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 396.802 211.443 Td [(vect)]TJ +ET +q +1 0 0 1 418.351 211.642 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 421.49 211.443 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 442.411 211.443 Tm [(,)]TJ 1 0 0 1 124.802 199.488 Tm [(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -31.342 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 142.357 -29.888 Td [(93)]TJ +0 g 0 G +ET + endstream endobj -1317 0 obj +1640 0 obj << -/Filter /FlateDecode -/Subtype /Type1C -/Length 11578 +/Length 578 >> stream -xœzwxTÕÚ/CØ…½’I™Ùf³÷F&X*ˆˆ€ô -dÒë¤L’I&½Ìd&½·I2“BHB „Б*Š"¢¢Ç‚¢~õ¨krV<÷® úï»Ï½ß÷™Ì“é{­w½ëWÞwI&M™qxâð«Ã˜t¶t‰t·t¿4Iš*Í‘vJû¤¤Ÿ9Nv”9.s\åxÄÑß1Â1ɱƱÉñ¼ãÇGŽß:N38-rZéôšÓ:§ÍN¾NÉNeN§>§!§»N÷œ8}éôÓÿržê,u~ÆYtžã¼Ðyµó^gogµs–ss›s¯ó€óMçÏsþ§ í¹¼ä²Þ個K˜‹Ê%Î%É%եĥÅeÈå¬Ë—.w\î¹|íò™Df/s–Í’-‘mùËÔ²Y¦¬Tf–õÉÎÊ®ÊnÊÞ—}"ûRöDö£ìWÙ?åùT¹TÎÉÈWÈ7Ê÷È˽åAòpy´<^ž"7È«ä-ò>ù¨ü–üùòoä?Ê“³–fØéìv1»†ÝÄîc°¾lÍjØ46‡5°El9[Ï6³Ýìö2ûûýšý‘ý……5c"{'·>’wKNYŸE«¬ŸR}Ј C7«#Âiš×¹­ •"̦ ¤ßÑb3|E&!Ò/M2Ð0‰ê©,…+ΡéÁ=Æ? µs€õ6¦Têh8Ž<¢©³¹p*NRÁÁêÔpîX’霿A»£©þŒb8•A…Ô’÷G£]5#W„­Ô@µ¹àV1 Æ6ZX¯Ô*è|@DÑÖ&<4¤?­|!%ùHútô²P» -bË›•ÅæÆ BucÉÎ×*+`±ú°ha49 «ÈŒV¤êÒÓUÂ&´=ÜA¨Ïåå•(ª+Š-ùB0£ühª)·(·(=_?´ æ´Gg¦%¸‡U!Ñ\zFI©!ÏhÈ …†ÂE[Œ)P¥<Úr¹³­¼±Ahh2õu= ¬ÏN«=UVÖžGîn»<Mvç”gÄ(´ZeèëBr@F¬6 u »>á3ãD™¢¦¬°'_0#m4Õ›cÐ×o1ä3ò36¡µÓ¢o|KÄdW—+jËŠ[ ð§TÑTsvAVË8œ™f&¡3¼ñ3ºA4‘ð%ëœêî’’–Âé~ ²pN,•”••­å²Ò…±<$¦ †{yËŽ[ú=~ò¨B~aQž=N'D W-dcyQQ)WX¢Ë)SÚFRÛ¸Ç7®ýžr`Þ)qS[„¥SÑÑjéiiNkÔå Õ^4€Ö0ÉÌB3Ðì'ÏÁiÐõ»_àLøìÂ_+BÙ+õûÑJô¢Ú}Ïnõ98®¯½.‚±]c+Y©"ßÉ®ÉW¨µ!QÞ²C;=LÊ-Ôu…Ó“›r**Õ%ý‚Å@XЗ*ªS×o -‚ÛÐçÓжPkKÓÊu÷hx°…‚¾è6}ÈñmÖØ¢‚êü"¡¬¨¦ÐÂ}Óí=k[’_Hœ•”¹VOƒZ"?ÍXù:’-ÝÁ^ñNñ¥9ÊMñÖa¯×w¶œU -‘ÝɉïÒá(¯l.*)©áJÊs3Åijݙƒœÿ÷Ï¡Ë}Ïû[OˆhÞûξXËÅõS=ïœ>ïRè ¬ 7m¥Á0ßÖu2ìx (5.âÁ¾¤7x`fƒ -´Õ= -«ÚM f”¤|ÒŸÌ Ï4:øøG%ã–îÿÎøåÄÑšêŒÔ"±0=/ÏGqÈPÛÃj6÷^ß|f š1ïYôáÿíÐÿœ  Å›;¯2{ÇñȃsÑsÐÿ8ÉÜsK„2Êø~yÅûx×^®¬ºU*XPˆŠz?Ó·A®Pègë:¨HSmÒÞ3âx8¥ß­ÑnÈ u%\ÊFaé š˜@è´ez:œ„ë<¢yœ¢2Ñ,¢™´|×\gê‘v|þ4•UFe|¶ðÆK8ãà"FQº“5­Ú„üÕT›¾&NçO£áèo¢" šà èc§¡HÊ ‰ÐÓjèx]wMCÛŸßjÕWÇéið -ÓK?ãÒúåkÇÍù?”m¶ÆYg±ç‘+%kÛÌÈz×ó²Ëך\Wô¥hdïù3²E! ˆgà]`U²Æ²¢.¢î3Caƒâ¸º1"2.6L›W'”(‹ƒ ÃiÐØh®hâºj#÷‰(›Ò+µšà\:¦P`yAj¹îm5SÐ1gÔ£ºÔAbü2ymd-£CЩ€7Sºµ©i+0~ƒXE¬6=P'dëµúd=~a Ó_Žs"DcêÓ‹Ö¯ÐÞhªC_oPÒãÅø’ q!z|É -|ŸÈÀÏaû#ÚB”&3ÓMj¤¤†C¦2JÜíõ͆EÓ¾ƒÂ­oÌu׋§›P¬º‚j7«U‰ñšq«á\"©-§¬TQ[[ajM肚Ém¸22 h±æ€2Ô—KSÃS<éSm©,ª®n¥«5Hôb@Rž vöÁ“=HÀ¹ðu¸ÔVÀ‹¬ÞlÊ«æ¾8_fÏ·µ_¸©¸|aw»`ò *Þ¡OÕ&r4·:Q·¦ ‹ø5üÔ²„·š§Luø«'š³RkÄ–ø]Šyþǧjò Aº‘ÀöŒà¨cŠ=÷ƒ  \ýÅèö ¼o×ð8¿räl3Õ|âDKK»(ÝÈxk™ ýƒcÈãÙµ9…¹tAn~V¶"--!5U@r$'¬ëÑ®hꄾ:Áà7±¾šøÐ‰•0SG‹sŠôÃ4$©&ówHAÔ©ŠR2)ÉYz!jÍTjv~EºˆÜ`Ãßm®»Q‚ÿ<Àî†ÏÚÙõÕ -SyE½PjB1j²#£8mIàTÔ=-ü Â?)9!™ËÌ.,‹)Ca^A>¦³Æð UÀ±“ÁÃÃ=7Íä­Äi -‹» xŸÃk\_¥OÌ -O‹äßÞu²í/ì1ðIµ`<¦²O`ÎO(‡'[[Ož ÇïÖߨ­6ôѰ€ú›ÛÅWwyDìöÀ÷y]ÖtJ~€Ëa|Ó®ÊÞ%a(| -ÐC"tFI"º¢"od×'ŽÒãi”Wdì>ê¬Ù&SEq෺„÷ª7—+^P/ ñ<‚ŽÄlå<µ•§E@é®V×u2¹WjêÏézº«®'Gu5ñº=ôx#а6ú1¤t//5I¬³ ”=ê–àÃ)Ãk»Ôb|{fGÎæ‘YŸ$ ‡öÐÝ!ž¦ýrXù<šfü”><ÓþÍM•ÁÕ,dòk{Ïr­uI¡%bA|~”á D×^ãÕ¸€îÌ¡#m¡§ ó EB¯Uù¼á.œ—”îJµmˆÀÈÉY8>gØÁB«‹¦.›‡¸M槼ŀ®ó1§¹ó—›ºÅî~ÓÛwð™eÿHA¯ÄáHŒ,Ë"óƒ¶+Ðê„}Ž% ÿ -·Õ\Á¹ºÛZ:¤€_§RhåV"Ž*;^WXÂÕk#DE6$§nãÔ0‡·%á ¸Ào`îóÀzµ+€(cà‹ŸÝ€S¡üåwÑ«±9y¹‡E¿)ÆšÒ¼rîÑо¹‹}ÜÖû¥Tõ‹`´¼©Ôl¢eÖ‹ià”âbÈÙ=íB“·géznzšIÙ¯O†.Zâí¶A©­ê­mSêI™õog®\~ôÈÊœ´p&/è‰VÀÈSY¼q±ª!‚€N|K±|Ú²Gá§ŠRjRŠLqBXsFAx>™®ÕÆ+‚{ãÞèÉa Ýá/îÕ¤—Vä -„rêxtc2Ful¿ÿµ«Ã]˜$ÃBƒý[Ã{ñy -GŠd€o ÓÛÞÞÓzÂ_Ag!%6ÁbêzöñÄ.ázØÎ“ë87ÿ¤QVÚ >dÆœL. ¯6öÁs]ë›úÁÕzƒì‡D ™¾€Ï1š„25§V©Ëµmñ"œµ„°~þ6HãSüž »{GHôáx:%<8+‚Û²½k4\ ?ŸüÞO -è ~nè¸Ó\“¡ÍÑeåf -Y©ºŒL…¦4©"G^î~ŠàU^Úi“#UuH€ø%<W'1¶Y†4US–œª8]½ŽZuFùsK}~y».*ní8ýr´&7=\ÈÑfè29mVQ…˜ˆq®…´dæØÁ>ØÇ¢DÃRß1¦hÇ/K`v`Α¥}’_ óÏ`A€ß²QЦŽmb½}<;‚;;ƒ:½Ei¿’!` ©ÛKQÆ}I‰ k˜ØmvÔ%zW -å{Xƒ‡!*XžÇ^1ÖÆŽ€É?1àÌØÔLÀÞ&ù8+°ˆÀÎ@Ù#(Ûx¨tý·ð6…U'”M(¬–Vï­â;(„Fæ¼®ú/4Ùi)Z9#€®”¾³¢¾¹6ë2 s¶ûäð°²ÃÌ붸´¦¢Rh45U4r£æ]è08ÝÊᡞž! ¸pê€Gx¸'½Èr—;±—‚ÕP…ð?¼æŽ/ Q‡éèl5 0‘Q…)&CG³“ÛjÁ{ß›4(Žnñûùz€MQ¨)‡ -Ó4vE`éKå.ö™ú,bms±¹p„–FjMF¼æU]o[óÚ#hÖ—dF)ÐEÊ Ô&Ú€:x(•ž‡OúŸÆãVöàùœo9Û*ÔŸ(鹪îÑ^‘b\`Ʊe -€Ò|—òp ¼?2‰À5,æ)¾´šÍ­Oñ…Œ‰‰ŠhŠiiijji‰iŠAÑBºz ëéïïåqÒètw÷i`â¥?3¥%˜õ ê뛹J@þˆù'à¶Š - iŽ:ÑÞÜÜ~BÕ,‚ŒÍè©)8ùÚà'@y‘ïí>ÑÞÓÚî'¶ÁÂ"›*p1Q®“V±ç‰ä¯ãkÙS-µB¤Ò–Ôè+4 °ÕS¹"H0“½ ÕÜ© (/-S3NÀiâáéúDò€ÚtQ]4œI4‹2&Ç®¾Vƒýƒ*/ÛöϱI@¬èúEé÷pë)8딜汣à Êa@ Ä6-ãEl½“%;º€!ARyyZ —_dÌ/‰ -Ç݈‰}˜Ð:±ÖkÀx ÌäÝ¢–γ\_S¨hîǃìŸ$èRÜ>uøp°Ïö¨BÛ|ñŒµÙ -(i|½Õ§zU“ ÀˆìŸIò1’m+(,,,((Î/.üÚÞ~ ®ØRb©(+-(¬°wxÚÙ7É<é;Él‰I2,¹+“gOž79rÅäÎÉßÚM±[n·Ù.ÒNm÷Ñ”W¦lšR5¥nÊ}b1L\ ®ïŸ?“!äWÔkTUHÕQŸÐ›èP:—®¢ÿ˜:yê SÓ§~Â,f¶01Œ†IgŒÌ s H€ LžÀ:À5pÓ^jÏÚO·çíØ/³ßn¿ßþ°½—}®}½}“ýÇö_8Lr f9¸9¨RÚ>vøÌá±Ãw¿;ŒK'IIéé*éZé^©Fš*Í•öJ‡¥W¥w¥Hÿp´wtr|ÅñUÇ×}3›O;žs¼êxÇñsÇ_œœ^rÚáäéádp:îÔã4êtÓé§Oþæô›Ó¿œ§9Ïs^îü†ófg/ç`çXçTçRç&çÎ=ÎÎÃÎç?tþÑ…p™á2Óe¹Ëz—Ý.‡]¼]]’]Š]Z]κÜuùÂåw-“ËfÈž“=/[#Û-ó”EÉ2dY™¬ZÖ ë•–Ý½#»'ûJö›œ”?#Vþ¼ü%ù+òuò=òCò ¹Zž&Ï’åeòZy£¼MÞ%‘ß’$$ÿ^>ÆNb¥ì3ìLv»}™]ƾÉîb²Al›Áæ²¥l-{œígϰçÙ«ìmö=öûû»ëdW©«Âu®ë‹®Ë]_wÝàºÕu·«›ëWo×`×XWk¶k‘k•«Åõ¤ë°ëˆëE×kc{þ,I?Ènˆ{s¢j9ß[<çuÎà”ñAUå'FŒ¼UÔݬr­ñM}A-h8ü¶—xæfÂûÜï6vˆ§{š?üVq“Ò\èÌ>–àÍù…×uʼn '2:rîÒ°Ìú,i8¤‡î 9jÚÇ!§óÑ\4÷Ó…Ð铳íOn‰˜ÜײÐ>¿®ï,7\„ìŒb¬ÈkêyH‰uí<4úâÛ<|óƒÞh>Þ€ž¢>˜…“o"䇎íÄ -œFÔ,¨¼`è Hé"°†ÝdÓÒˆÜÜ\}6—œYR+Â*êóµ£HŠˆµßð1G÷÷µš;«²+µ5BzYnQ‘¢¡¥æ¤þd‹ÐVy!·:-T¡ÍMÊIÐô=š¿"R;s+ÊæêÒÁ|Áh«vˆtKEõëLéåÑ] §!½ì­“ˆìZ]q©¢­¢t´@° au*«,½ö0 ÿqZ+ wZ£àÚñ(¢…„G­?Vö56tæOI¼‹c¹[Bò²»³Ù·óÙOË0Á]&‘#Ü@¼Dâl .‘pÊR+iaר«ôçik°Æ²rCWRœ›] –¦åé ÏÓÈ -Ë.ŠöUŸ ºÍAúÁïpÙ½äwbÛDSR|Y4çZÓ‘"¶¥çëJ2èÊäâ(¥bÏ[»½:ð u:¢783G¯ÏÁõÀ*{BF®àÁ#^_ÿ§ÈOnKx*òÇ\ š 'ƒ×¨CChÿcûÖp;m:kèDš~n›êÏ*êvn½ÆÊèÝ“S7dÐ*ë} -lÌ3Y]\>âe­'ÍÛyÙéÅŒìÁ«<ø¬ÂM8ÚŸJPbNÿÏ> Ñ”ùhÊÄ>Úbÿ )½öESƒ™%éï4Æ;#Ý+kÅ,3€­{·š¨üâò²2dðpÖ—è±³f©ÙÙKÞx]\¿Ñwá<¢{^y´VØpÿ«ÀŸ¸Ÿ¿ê~ÿ}ñþýžo~UüêÿxÝû‡o,î~–Cß»°p?\„oûá>¸-BûÑ^ôzíZ°æÄíMÂæ;¾Vxm!šÕ‡MbilA‚J‘Ñ?Œ¡Ò{ìÒёРIòëÜуe/ÐpëxºŽ¼ «NèáäÒ jk+yz*2Ùm؇¦h%附IWÈý2©N‰æÂâZú®œ:õq~qk"ŸaryEÙËðŒ™Ý™ŸZ©¿AÃ=Ôhë@GIi¦¦\ˆ¬¯No溛[{†‚:ŽRFm:*Êî~ŠöDS§²ËS&:6i©Þ™8põ 0`L -³Ø(Ò­Úô:Òê8F5ã T:F´’g xºÎÓU: ݆q—?ˆië -H¬/Á ö,"‰’¸‚”tETBª· R˦t2Ú˜Y¬éBeÖŸ¦EÈgütÜÖ°`?Ýt5Tšb©ÀÈzsi~eI£ØóñLg?CžZL‚»le<”˜p±Þ7©ÈCMa„.« Þ¾ZÑ0(~Ú{ññ -Hnú~f§Z–h\_˜`˜VNéÏÖÔàÌ4¡ûj39¢«MЦS)ŸÜÍ9‘™¾ñÓWø¸-™¯˜ñåOý„ØÓWR¯sðE«†ý9ëâkh¦‰û_›“% Ù®sŠö?„¢μôð§"Aú -?ÑæXË'¦Çƒe¼©ë/ð&/ËÜÀÈ6üUÐÑøÚ“-ÙÜÇ1¼6NglUh88 ÿ]ˆ.‚ÙÇ(…œ(6«à]ê*J£€—)«+"¡Š¬5” -å%õE­Ü{ç‚ßê[ܪÖ/W,R/ûïµi˜Œ´t¬Ìª ©…ÚjtÅ*wC·“–Jð–=¥¢fåF+4Ù¡¡ -Cá ¦ŒïWWß+xÊ#­Ó,ÊJ; t¹‰5^E¾+¶, -¬Á®¶QU¯ާ_‹ôõ;¨8V—p3F,ú^E~–SfÜGÏ¢†ÇnÞîS?,x]Šç†Û†,BlU¶N¯ÏÈ™žÆ\ØÛ9uÉFwz<œÒ¹'§àU¶¾KõBo"’ Ý¢ŽŒ <Ý4opÛ*°CH£~ïS"²\l†ŸG‘ÉhŠ_À, ã©îŠÒ _žO‰hÚ#«Â*7ÓfxhÛ^ÐCÐÀÿÕçm -êZ;ÌüUÅ‘µ5{ç`A½Óí~Â.ö‹ÐL¤6ê ‹55¥-B RW9qm;áËèâ4ôŠ­ ®†_›Â¶ðù—h¸™‚ -ØÒs¿¼|¨hº íS›¨ •&+QƒÍº+üÂB®)Í(Õ}@Ãä ƒµô'íƒ×>R@zÑm´z¢]ÿNvU’q½­]¯ß‘ë™Cç¦ét©"Ô[?b›j+:ux¡IÛS”Q1‚::8sÆ–²K¶Dïß*®Ûî¿MQ`ƒ¤@3 <ˆüþpÇMáÖ¶5í³¹‚@ö÷kh&“ƒÛÐÔ™G=[‡"ÅÐ Éïþª€N¦†/ -µ}œJ©‹U'ëÊ3S²s³²Ò„ô¤œ´TÌ7kTäÎe¾ž[÷Ó‘£èµuÀ÷î¾ãŸö ªrB©Ö$Çq©µ}"¼`ë_)d\xûöAe¸VUs£{ÍÖ®š†Âbºç|us­¢8§ ###G›$èõ:]®y¡•Ór(ŒJ«°»§ïŒÜþ›âî±á]‡‚CÀÓ½­«VÛööÈðêpÝ–@>þjëYÄh²ótQ"$ -Ê Å±»ò - Œ­šãqä¤ÿ𙞓ÃglÕÀþú¦­¦{SëKÈÂ$»S­'ÄNúÀ#h!| ݧÀ #âÐkh-V`xÝÑóXÍÀNr t„ÄE鯶îÐQî››¿`©ê|ø›ÍÑ1!Áõq]Yb_¦A_©¥«‹#<s=·­÷¬í‰Z®V (ÎÄ´(£CyVh:bÛ7ÖÃG¤tÜàI?\cÿ¬¨š¨¨¹åÒ@§@ç`¤ƒið{ÛÒÐjæÌeqØBæàRï=¿ãH@øÆáG -mÙ_¨&òK - ùœyÂn߇ÎÔÄ’1àò"@ýÕ[Á€§E’bKÑ9Zúú¸{¡Yä±?jþ¦Á†âòÖ&Ü:h²¾ZÀ£¿f·%ñà SÀ5Áøê§Ý¡Ò‰îPéŸÝ¡¬¤t]&•cë}eMg‡ë•/¾˜päHBד'uÝgDàþ˜ÖçØ>&F¨çW0î¿´òQ2õÊzâõÕ=©ÿKWßL=íûK­snß¾mج´Ä­£ŽKåÜTW¡²-W¯t´G+«D`s21;ù¿Úä{¡œx@¡^(‹qƒþ«ˆ?ìHðC_j;‘“’¥Ïà|BºßOÑä:6# r{B˜›’Èy©GqèÅ.ö6û ºC”Æk‹Ë -ó‹‹Ê„²êlŽG™ƒýUª€ÝW޶ZmL¤:`"Ò2 ?¯žì…5½.ð,ô_Ä2®’ý:dïzj²Ì¸<Þ,†‰V²›¿z»;mÇ\†Ò‹² Þôÿ›à+5@;L p6È3IÀ+L»©ÎâãáK| ÐùÃçy`3XôæC[—*Þê?zm¤ãĹ¡°ÞÙYyú,AŸ«ËÓq©¥}ý÷GEðSl(ãþž°ÛÃc¼x[3a_b)ÄÉq“ô!vSÆS’ÞPœP©?GÃD˜…)ÝÁDíÛÙ¥o)pÞØÈC¹Jô­#€•€ Ùõ¥¯ç8x³!¿¦¤AŒ?ÞœÞÊõ¶4o©ŠˆÎJÂÛs[BxLt]U Íùp6¯«|ëɧmyœ&`aÿ”6 t®¡ö¶­pø­Šz'פÁ„‚|Qô¥ €1oÚâ›Ybºž´ ZWD¶jcµŒ¿QÆe©aœ)“ü³r|š?n MÀ”±Ä†žh9Z†Qâ \ŽVÀCø¶®À{?[…ˆàd yµ[µ­K0y²¦­EGV+ß„CÝî×|3Hð4ÚÎî2jêuØ#?K=cÑžæ ñðp>œùÆdb  õHœº½nM«l%:Œ¡H°€Qûz¬æ!F\„D©Bþ,ŸhjÅ”5)Ï £h‡³Ë PvÜ¥>' [ -åìe@<˜åщP.‡î8”+ð͇r9|Lœh02à+Æêcb=È]s‰:€Ö·‘…ßœùãû”ƒ=¢þ{¢\ä{ÀÓÎx}sV='›Ñ„¤€n¶Îa;˜/?¯½Ždñ9q…Œøt]—QfÑ‹ C¨ -Š,x‹C;à N 2¾‡ák¸¾Æçܶj·PÒ£¼ídÖ±Cõ¡~Š M\THrÓÈ`AM}« e­s¶ŒXnS?¼=Ž¿t›G[G¼>¼wîÜ=0aKá6“änogm7³ÐÚ£¸x¤oë[îž»|ð°Þ=ŸÞþÁ㋆:îw ðþý€ûî≯¯Š²ŽñgCXUtrf4§Œ7ÛjèÚH2 )=-ŽKO.(‹Í{Ž”ÛúÂ(€B¯A"¾)³²Jaª©hÀŸýÀæþø ÜÅþ¿èy°†æõ4€/ÃÅp6¾¿Œ×îþë@S½ °;`õ¨«ØÒ#ÞÑ‘jn‹÷Íäˆ:0ª={ºaÓ™x“ P·]ú†¥Q ‡9­:Ó7gwÚü:@î‰×I„Áÿ<ˆ/›(ƒ‰Ë©-J»i€Vgó¶îkn5†'yCô9)"¸*×àû×ìÆüŽ¹â£‰ÖPœó;¾yë‘ð@ni—áºnøj¯D«Ô*›š-øäZ×pKBJ• Á¤ãÖD*Р 1¿Â¨¨b÷)ÓÛï`±¿fÁåX[!þnÿ3 ïwre`¦ -ØÎ}àÜ%àr€u=˜8«’€qtK~‡Î6‰jÆ[È…8Cžu;2vä#$ Ñ³”º ©4gÞ3Œí0#@¿ìÌÊ -ÒOWk'T²_Ž–Pài³ì¤®¦$1R¸(Ïlç?Àÿñå¼£ +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 150.705 706.129 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(Dense)-250(vectors/matrices)-250(do)-250(not)-250(have)-250(an)-250(associated)-250(state;)]TJ +0 g 0 G + 0 -19.926 Td [(2.)]TJ +0 g 0 G + 1.02 0 0 1 175.611 666.278 Tm [(Duplicate)-264(entries)-265(ar)18(e)-265(eithe)1(r)-265(overwritten)-264(or)-265(added,)-269(ther)18(e)-265(is)-264(no)-264(pr)17(ovision)]TJ 1 0 0 1 175.611 654.323 Tm [(for)-250(raising)-250(an)-250(err)18(or)-250(condition.)]TJ +0 g 0 G + 141.968 -563.885 Td [(94)]TJ +0 g 0 G +ET + endstream endobj -1323 0 obj +1649 0 obj << -/Length 8093 +/Length 6324 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(5.3)-1000(psb)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(6.14)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 153.407 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(gather)-250(\227)-250(Gather)-250(Global)-250(Dense)-250(Matrix)]TJ/F54 9.9626 Tf -51.12 -19.441 Td [(These)-280(subr)18(outines)-280(collect)-280(the)-280(portions)-280(of)-280(g)1(lobal)-280(dense)-280(matrix)-280(distributed)-280(over)]TJ 0 -11.955 Td [(all)-250(pr)18(ocess)-250(into)-250(one)-250(single)-250(array)-250(stor)18(ed)-250(on)-250(one)-250(pr)18(ocess.)]TJ/F52 9.9626 Tf 120.161 -25.465 Td [(g)-25(l)-55(o)-35(b)]TJ +/F75 11.9552 Tf 156.993 706.129 Td [(geasb)-250(\227)-250(Assembly)-250(a)-250(dense)-250(matrix)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geasb\050x,)-525(desc_a,)-525(info,)-525(mold\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(desc)]TJ ET q -1 0 0 1 289.521 649.467 cm +1 0 0 1 120.408 625.596 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F52 9.9626 Tf 292.803 649.268 Td [(x)]TJ/F83 10.3811 Tf 8.097 0 Td [(\040)]TJ/F52 9.9626 Tf 13.398 0 Td [(c)-25(o)-35(l)-55(l)-55(e)-25(c)-25(t)]TJ/F85 10.3811 Tf 27.705 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(l)-55(o)-35(c)]TJ +/F75 9.9626 Tf 123.397 625.397 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.654 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.249 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 136.327 0 Td [(psb)]TJ ET q -1 0 0 1 359.144 649.467 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 277.448 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 362.427 649.268 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F85 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F54 9.9626 Tf -219.744 -22.41 Td [(wher)18(e:)]TJ +/F145 9.9626 Tf 280.586 577.576 Td [(desc)]TJ +ET +q +1 0 0 1 302.135 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 305.273 577.576 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F52 9.9626 Tf 0.344 -20.664 Td [(g)-25(l)-55(o)-35(b)]TJ +/F75 9.9626 Tf -226.3 -31.88 Td [(mold)]TJ +0 g 0 G +/F84 9.9626 Tf 28.473 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -3.566 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.987 0 0 1 124.802 497.875 Tm [(Speci\002ed)-254(as:)-315(an)-254(object)-254(of)-254(a)-254(class)-254(derived)-254(fr)18(om)]TJ/F145 9.9626 Tf 1 0 0 1 323.94 497.875 Tm [(psb)]TJ ET q -1 0 0 1 169.703 606.393 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 340.259 498.074 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 343.397 497.875 Td [(T)]TJ +ET +q +1 0 0 1 349.255 498.074 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 352.393 497.875 Td [(base)]TJ +ET +q +1 0 0 1 373.942 498.074 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 377.08 497.875 Td [(vect)]TJ +ET +q +1 0 0 1 398.629 498.074 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 401.767 497.875 Td [(type)]TJ/F84 9.9626 Tf 0.987 0 0 1 422.689 497.875 Tm [(;)-254(this)]TJ 1 0 0 1 124.802 485.92 Tm [(is)-250(only)-250(allowed)-250(when)]TJ/F78 9.9626 Tf 97.12 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(is)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 42.898 0 Td [(psb)]TJ +ET +q +1 0 0 1 288.835 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 291.973 485.92 Td [(T)]TJ +ET +q +1 0 0 1 297.831 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 300.969 485.92 Td [(vect)]TJ +ET +q +1 0 0 1 322.518 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 172.986 606.194 Td [(x)]TJ +/F145 9.9626 Tf 325.657 485.92 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 10.187 0 Td [(is)-250(the)-250(global)-250(submatrix)]TJ/F52 9.9626 Tf 103.256 0 Td [(g)-25(l)-55(o)-35(b)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -246.683 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.654 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(assembled.)]TJ 15.253 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ 0.98 0 0 1 124.802 396.256 Tm [(Speci\002ed)-220(as:)-299(a)-220(rank)-220(one)-220(or)-220(two)-221(array)-220(with)-220(the)-220(ALLOCA)76(T)75(ABLE)-220(or)-220(an)-220(object)]TJ 1 0 0 1 124.802 384.301 Tm [(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 33.285 0 Td [(psb)]TJ ET q -1 0 0 1 305.084 606.393 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 174.405 384.5 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 308.366 606.194 Td [(x)]TJ/F54 7.5716 Tf 5.106 -1.858 Td [(1)-13(:)]TJ/F52 7.5716 Tf 5.963 0 Td [(m)]TJ/F54 7.5716 Tf 5.985 0 Td [(,1)-13(:)]TJ/F52 7.5716 Tf 7.856 0 Td [(n)]TJ +/F145 9.9626 Tf 177.544 384.301 Td [(T)]TJ +ET +q +1 0 0 1 183.402 384.5 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 186.54 384.301 Td [(vect)]TJ +ET +q +1 0 0 1 208.089 384.5 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 211.227 384.301 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ +0 g 0 G +/F75 9.9626 Tf -132.253 -31.881 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -21.918 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + 0.996 0 0 1 124.802 262.757 Tm [(On)-250(entry)-251(to)-250(this)-250(r)18(outine)-250(the)-250(descriptor)-251(must)-250(be)-250(in)-250(the)-251(assembled)-250(state,)-250(i.e.)]TJ/F145 9.9626 Tf 1 0 0 1 124.802 250.801 Tm [(psb_cdasb)]TJ/F84 9.9626 Tf 49.564 0 Td [(must)-250(alr)18(eady)-250(have)-250(been)-250(called.)]TJ +0 g 0 G + -62.017 -19.925 Td [(2.)]TJ +0 g 0 G + 1.02 0 0 1 124.802 230.876 Tm [(If)-380(the)]TJ/F145 9.9626 Tf 1 0 0 1 153.429 230.876 Tm [(bldmode=psb_matbld_remote_)]TJ/F84 9.9626 Tf 1.02 0 0 1 293.28 230.876 Tm [(value)-380(was)-380(speci\002ed)-380(at)-380(allocation)]TJ 1.02 0 0 1 124.802 218.921 Tm [(time,)-380(contributions)-354(de\002ned)-353(on)-354(the)-353(curr)18(ent)-354(pr)18(ocess)-353(but)-354(belonging)-353(to)-354(a)]TJ 0.98 0 0 1 124.802 206.966 Tm [(r)18(emote)-252(pr)18(ocess)-253(will)-252(be)-253(handled)-252(accor)18(dingly)113(.)-315(This)-253(is)-253(most)-252(likely)-253(to)-253(occur)-252(in)]TJ 1 0 0 1 124.802 195.011 Tm [(\002nite)-250(element)-250(applications,)-250(with)]TJ/F145 9.9626 Tf 144.277 0 Td [(dupl=psb_dupl_add_)]TJ/F84 9.9626 Tf 94.146 0 Td [(.)]TJ +0 g 0 G + -96.455 -104.573 Td [(95)]TJ +0 g 0 G +ET + +endstream +endobj +1657 0 obj +<< +/Length 3253 +>> +stream +0 g 0 G 0 g 0 G -/F52 9.9626 Tf -182.447 -19.051 Td [(l)-55(o)-35(c)]TJ +BT +/F75 11.9552 Tf 150.705 706.129 Td [(6.15)-1000(psb)]TJ ET q -1 0 0 1 163.696 585.484 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F52 9.9626 Tf 166.979 585.285 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ +/F75 11.9552 Tf 207.803 706.129 Td [(gefree)-250(\227)-250(Frees)-250(a)-250(dense)-250(matrix)]TJ 0 g 0 G -/F54 9.9626 Tf 7.732 1.96 Td [(is)-250(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)-250(on)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 234.034 0 Td [(i)]TJ/F54 9.9626 Tf 2.964 0 Td [(.)]TJ 0 g 0 G -/F52 9.9626 Tf -266.027 -20.91 Td [(c)-25(o)-35(l)-55(l)-55(e)-25(c)-25(t)]TJ +/F145 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_gefree\050x,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G -/F54 9.9626 Tf 32.563 0 Td [(is)-250(the)-250(collect)-250(function.)]TJ +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G +/F75 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G +0 g 0 G + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.654 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(fr)18(eed.)]TJ 15.252 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ 0.98 0 0 1 175.611 577.576 Tm [(Speci\002ed)-220(as:)-299(a)-220(rank)-220(one)-220(or)-220(two)-221(array)-220(with)-220(the)-220(ALLOCA)76(T)75(ABLE)-220(or)-220(an)-220(object)]TJ 1 0 0 1 175.611 565.621 Tm [(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 33.285 0 Td [(psb)]TJ ET q -1 0 0 1 230.392 543.107 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 225.215 565.82 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 236.663 534.539 Td [(x)]TJ/F52 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F54 9.9626 Tf 2.75 1.96 Td [(,)]TJ/F52 9.9626 Tf 4.276 0 Td [(y)]TJ/F51 9.9626 Tf 108.448 0 Td [(Subroutine)]TJ +/F145 9.9626 Tf 228.353 565.621 Td [(T)]TJ ET q -1 0 0 1 230.392 530.753 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 234.211 565.82 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 236.369 522.185 Td [(Integer)-8983(psb)]TJ +/F145 9.9626 Tf 237.349 565.621 Td [(vect)]TJ ET q -1 0 0 1 373.603 522.385 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 258.898 565.82 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 522.185 Td [(gather)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F145 9.9626 Tf 262.036 565.621 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ +0 g 0 G +/F75 9.9626 Tf -132.253 -31.881 Td [(desc)]TJ ET q -1 0 0 1 373.603 510.429 cm +1 0 0 1 171.218 533.94 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 510.23 Td [(gather)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F75 9.9626 Tf 174.207 533.74 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.653 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.249 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 136.328 0 Td [(psb)]TJ ET q -1 0 0 1 373.603 498.474 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 328.257 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 498.275 Td [(gather)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F145 9.9626 Tf 331.395 485.92 Td [(desc)]TJ ET q -1 0 0 1 373.603 486.519 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 352.944 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 486.32 Td [(gather)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F145 9.9626 Tf 356.083 485.92 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -226.299 -33.873 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 142.356 -293.863 Td [(96)]TJ +0 g 0 G +ET + +endstream +endobj +1661 0 obj +<< +/Length 3208 +>> +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 99.895 706.129 Td [(6.16)-1000(psb)]TJ ET q -1 0 0 1 373.603 474.564 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F54 9.9626 Tf 376.592 474.365 Td [(gather)]TJ -ET -q -1 0 0 1 230.392 470.579 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q +/F75 11.9552 Tf 156.993 706.129 Td [(gelp)-250(\227)-250(Applies)-250(a)-250(left)-250(permutation)-250(to)-250(a)-250(dense)-250(matrix)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 278.277 442.2 Td [(T)92(able)-250(19:)-310(Data)-250(types)]TJ 0 g 0 G +/F145 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_gelp\050trans,)-525(iperm,)-525(x,)-525(info\051)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -127.572 -27.052 Td [(call)]TJ +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G - [-525(psb_gather\050glob_x,)-525(loc_x,)-525(desc_a,)-525(info,)-525(root\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-190(call)]TJ +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - [-525(psb_gather\050glob_x,)-525(loc_x,)-525(desc_a,)-525(info,)-525(root\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.902 Td [(T)90(ype:)]TJ + 0 -19.925 Td [(trans)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F84 9.9626 Tf 27.278 0 Td [(A)-250(character)-250(that)-250(speci\002es)-250(whether)-250(to)-250(permute)]TJ/F78 9.9626 Tf 203.748 0 Td [(A)]TJ/F84 9.9626 Tf 9.808 0 Td [(or)]TJ/F78 9.9626 Tf 12.488 0 Td [(A)]TJ/F78 7.5716 Tf 7.511 3.616 Td [(T)]TJ/F84 9.9626 Tf 5.401 -3.616 Td [(.)]TJ -241.327 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(single)-250(character)-250(with)-250(value)-250('N')-250(for)]TJ/F78 9.9626 Tf 218.194 0 Td [(A)]TJ/F84 9.9626 Tf 9.808 0 Td [(or)-250('T')-250(for)]TJ/F78 9.9626 Tf 41.808 0 Td [(A)]TJ/F78 7.5716 Tf 7.511 3.616 Td [(T)]TJ/F84 9.9626 Tf 5.4 -3.616 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.91 Td [(On)-250(Entry)]TJ +/F75 9.9626 Tf -307.628 -31.88 Td [(iperm)]TJ 0 g 0 G +/F84 9.9626 Tf 31.711 0 Td [(An)-250(integer)-250(array)-250(containing)-250(permutation)-250(information.)]TJ -6.804 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(one-dimensional)-250(array)111(.)]TJ 0 g 0 G - 0 -20.909 Td [(loc)]TJ -ET -q -1 0 0 1 164.583 350.626 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 167.571 350.427 Td [(x)]TJ +/F75 9.9626 Tf -24.907 -31.881 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.654 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(permuted.)]TJ 15.253 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one)-250(or)-250(two)-250(dimensional)-250(array)111(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -33.873 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 142.357 -226.117 Td [(97)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.664 0 Td [(g)-25(l)-55(o)-35(b)]TJ ET -q -1 0 0 1 371.853 350.626 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +1667 0 obj +<< +/Length 6625 +>> +stream +0 g 0 G +0 g 0 G BT -/F52 9.9626 Tf 375.135 350.427 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ -204.73 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(6.17)-1000(psb)]TJ ET q -1 0 0 1 436.673 302.805 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 439.811 302.606 Td [(T)]TJ +/F75 11.9552 Tf 207.803 706.129 Td [(glob)]TJ ET q -1 0 0 1 445.669 302.805 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 233.1 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 448.807 302.606 Td [(vect)]TJ +/F75 11.9552 Tf 236.686 706.129 Td [(to)]TJ ET q -1 0 0 1 470.356 302.805 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 248.031 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 473.495 302.606 Td [(type)]TJ +/F75 11.9552 Tf 251.618 706.129 Td [(loc)-250(\227)-250(Global)-250(to)-250(local)-250(indices)-250(convertion)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(19)]TJ 0 g 0 G - [(.)]TJ +/F145 9.9626 Tf -100.913 -18.964 Td [(call)-525(psb_glob_to_loc\050x,)-525(y,)-525(desc_a,)-525(info,)-525(iact,owned\051)]TJ 0 -11.955 Td [(call)-525(psb_glob_to_loc\050x,)-525(desc_a,)-525(info,)-525(iact,owned\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.109 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -19.602 Td [(On)-250(Entry)]TJ +0 g 0 G 0 g 0 G -/F51 9.9626 Tf -24.906 -20.91 Td [(desc)]TJ + 0 -19.601 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.574 0 Td [(An)-250(integer)-250(vector)-250(of)-250(indices)-250(to)-250(be)-250(converted.)]TJ 15.332 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in,)-250(inout)]TJ/F84 9.9626 Tf 38.734 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -31.557 Td [(desc)]TJ ET q -1 0 0 1 171.218 269.941 cm +1 0 0 1 171.218 535.72 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 269.741 Td [(a)]TJ +/F75 9.9626 Tf 174.207 535.52 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F84 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 222.12 cm +1 0 0 1 360.068 487.899 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 221.921 Td [(desc)]TJ +/F145 9.9626 Tf 363.206 487.7 Td [(desc)]TJ ET q -1 0 0 1 384.755 222.12 cm +1 0 0 1 384.755 487.899 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 221.921 Td [(type)]TJ +/F145 9.9626 Tf 387.893 487.7 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -258.11 -19.602 Td [(iact)]TJ +0 g 0 G +/F84 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F75 9.9626 Tf 253.795 0 Td [(global)]TJ/F84 9.9626 Tf -250.218 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.983 0 0 1 175.611 432.232 Tm [(Speci\002ed)-254(as:)-316(a)-254(character)-254(variable)]TJ/F145 9.9626 Tf 1 0 0 1 319.381 432.232 Tm [(I)]TJ/F84 9.9626 Tf 0.983 0 0 1 324.612 432.232 Tm [(gnor)18(e,)]TJ/F145 9.9626 Tf 1 0 0 1 354.422 432.232 Tm [(W)]TJ/F84 9.9626 Tf 0.983 0 0 1 359.653 432.232 Tm [(arning)-254(or)]TJ/F145 9.9626 Tf 1 0 0 1 402.308 432.232 Tm [(A)]TJ/F84 9.9626 Tf 0.983 0 0 1 407.539 432.232 Tm [(bort,)-254(default)]TJ/F145 9.9626 Tf 1 0 0 1 463.572 432.232 Tm [(I)]TJ/F84 9.9626 Tf 0.983 0 0 1 468.802 432.232 Tm [(gnor)18(e.)]TJ +0 g 0 G +/F75 9.9626 Tf 1 0 0 1 150.705 412.631 Tm [(owned)]TJ +0 g 0 G +/F84 9.9626 Tf 35.975 0 Td [(Spec\002es)-250(valid)-250(range)-250(of)-250(input)-250(Scope:)]TJ/F75 9.9626 Tf 159.54 0 Td [(global)]TJ/F84 9.9626 Tf -170.917 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.99 0 0 1 175.611 376.765 Tm [(If)-252(tr)8(ue,)-252(then)-252(on)1(ly)-252(indices)-252(strictly)-252(owned)-252(by)-251(the)-252(curr)18(ent)-252(pr)18(ocess)-251(ar)18(e)-252(consid-)]TJ 1 0 0 1 175.611 364.81 Tm [(er)18(ed)-250(valid,)-250(if)-250(false)-250(then)-250(halo)-250(indices)-250(ar)18(e)-250(also)-250(accepted.)-310(Default:)-310(false.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F75 9.9626 Tf -24.906 -21.109 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -20.91 Td [(root)]TJ 0 g 0 G -/F54 9.9626 Tf 23.252 0 Td [(The)-253(pr)18(ocess)-254(that)-253(holds)-253(the)-253(global)-254(copy)111(.)-319(If)]TJ/F52 9.9626 Tf 182.635 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F85 10.3811 Tf 19.983 0 Td [(=)]TJ/F83 10.3811 Tf 11.147 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-253(all)-254(t)1(he)-254(pr)18(ocesses)-253(will)]TJ -220.305 -11.955 Td [(have)-250(a)-250(copy)-250(of)-250(the)-250(global)-250(vector)74(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable)]TJ/F83 10.3811 Tf 142.42 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)]TJ/F83 10.3811 Tf 7.873 0 Td [(\024)]TJ/F52 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F83 10.3811 Tf 19.923 0 Td [(\024)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)]TJ/F83 10.3811 Tf 43.89 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1.)]TJ + 0 -19.601 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 160.667 324.1 Tm [(If)]TJ/F78 9.9626 Tf 1 0 0 1 170.229 324.1 Tm [(y)]TJ/F84 9.9626 Tf 1.02 0 0 1 177.963 324.1 Tm [(is)-259(not)-258(pr)17(esent,)-262(then)]TJ/F78 9.9626 Tf 1 0 0 1 266.607 324.1 Tm [(x)]TJ/F84 9.9626 Tf 1.02 0 0 1 274.441 324.1 Tm [(is)-259(overwritten)-258(with)-259(the)-258(translated)-259(integer)-259(indices.)]TJ 1 0 0 1 175.611 312.145 Tm [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -19.602 Td [(y)]TJ +0 g 0 G +/F84 9.9626 Tf 0.983 0 0 1 161.225 256.677 Tm [(If)]TJ/F78 9.9626 Tf 1 0 0 1 170.403 256.677 Tm [(y)]TJ/F84 9.9626 Tf 0.983 0 0 1 178 256.677 Tm [(is)-254(pr)18(esent,)-254(then)]TJ/F78 9.9626 Tf 1 0 0 1 246.332 256.677 Tm [(y)]TJ/F84 9.9626 Tf 0.983 0 0 1 253.929 256.677 Tm [(is)-254(overwritten)-255(with)-254(the)-254(translated)-255(integer)-254(indices,)-255(and)]TJ/F78 9.9626 Tf 1 0 0 1 489.211 256.677 Tm [(x)]TJ/F84 9.9626 Tf -313.6 -11.955 Td [(is)-250(left)-250(unchanged.)-310(Scope:)]TJ/F75 9.9626 Tf 112.557 0 Td [(global)]TJ/F84 9.9626 Tf -112.865 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf -301.107 -20.909 Td [(On)-250(Return)]TJ +/F75 9.9626 Tf -24.906 -19.602 Td [(info)]TJ 0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -21.108 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 166.874 -29.888 Td [(66)]TJ +/F84 9.9626 Tf 166.874 -29.888 Td [(98)]TJ 0 g 0 G ET endstream endobj -1328 0 obj +1671 0 obj << -/Length 1417 +/Length 775 >> stream 0 g 0 G 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(glob)]TJ -ET -q -1 0 0 1 120.976 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.965 706.129 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(array)-250(wher)18(e)-250(the)-250(local)-250(parts)-250(must)-250(be)-250(gather)18(ed.)]TJ -9.126 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(or)-250(two)-250(array)-250(with)-250(the)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 202.459 0 Td [(ALLOCATABLE)]TJ +/F84 9.9626 Tf 112.349 706.129 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 60.024 0 Td [(attribute.)]TJ + 1.01 0 0 1 124.802 706.129 Tm [(If)-248(an)-249(input)-248(index)-249(is)-248(out)-248(of)-249(range,)-248(then)-249(the)-248(corr)18(esponding)-249(output)-248(index)-248(is)]TJ 1 0 0 1 124.802 694.174 Tm [(set)-250(to)-250(a)-250(negative)-250(number;)]TJ 0 g 0 G -/F51 9.9626 Tf -287.39 -19.925 Td [(info)]TJ + -12.453 -19.926 Td [(2.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + 0.98 0 0 1 124.493 674.248 Tm [(The)-234(default)]TJ/F145 9.9626 Tf 1 0 0 1 176.098 674.248 Tm [(I)]TJ/F84 9.9626 Tf 0.98 0 0 1 181.328 674.248 Tm [(gnor)18(e)-234(means)-234(that)-235(the)-234(negative)-234(output)-234(is)-235(the)-234(only)-234(action)-235(taken)]TJ 1 0 0 1 124.802 662.293 Tm [(on)-250(an)-250(out-of-range)-250(input.)]TJ 0 g 0 G - 141.968 -500.124 Td [(67)]TJ + 141.968 -571.855 Td [(99)]TJ 0 g 0 G ET endstream endobj -1335 0 obj +1678 0 obj << -/Length 7178 +/Length 5816 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(5.4)-1000(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(6.18)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(scatter)-250(\227)-250(Scatter)-250(Global)-250(Dense)-250(Matrix)]TJ/F54 9.9626 Tf -51.12 -20.363 Td [(These)-223(subr)18(outines)-223(scatters)-224(the)-223(portions)-223(of)-224(global)-223(dense)-223(matrix)-223(owned)-224(by)-223(a)-223(pr)18(o-)]TJ 0 -11.955 Td [(cess)-250(to)-250(all)-250(the)-250(pr)18(ocesses)-250(in)-250(the)-250(pr)18(ocesses)-250(grid.)]TJ/F52 9.9626 Tf 119.478 -26.893 Td [(l)-55(o)-35(c)]TJ +/F75 11.9552 Tf 207.803 706.129 Td [(loc)]TJ ET q -1 0 0 1 283.05 647.117 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 224.456 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F52 9.9626 Tf 286.333 646.918 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F83 10.3811 Tf 5.642 1.96 Td [(\040)]TJ/F52 9.9626 Tf 13.398 0 Td [(s)-25(c)-40(a)-25(t)-25(t)-25(e)-15(r)]TJ/F85 10.3811 Tf 28.632 0 Td [(\050)]TJ/F52 9.9626 Tf 4.493 0 Td [(g)-25(l)-55(o)-35(b)]TJ +/F75 11.9552 Tf 228.043 706.129 Td [(to)]TJ ET q -1 0 0 1 362.3 647.117 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 239.388 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F52 9.9626 Tf 365.583 646.918 Td [(x)]TJ/F85 10.3811 Tf 5.329 0 Td [(\051)]TJ/F54 9.9626 Tf -220.207 -23.362 Td [(wher)18(e:)]TJ +/F75 11.9552 Tf 242.974 706.129 Td [(glob)-250(\227)-250(Local)-250(to)-250(global)-250(indices)-250(conversion)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -92.269 -18.964 Td [(call)-525(psb_loc_to_glob\050x,)-525(y,)-525(desc_a,)-525(info,)-525(iact\051)]TJ 0 -11.955 Td [(call)-525(psb_loc_to_glob\050x,)-525(desc_a,)-525(info,)-525(iact\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F52 9.9626 Tf 0.344 -22.091 Td [(g)-25(l)-55(o)-35(b)]TJ +0 g 0 G + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.574 0 Td [(An)-250(integer)-250(vector)-250(of)-250(indices)-250(to)-250(be)-250(converted.)]TJ 15.332 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in,)-250(inout)]TJ/F84 9.9626 Tf 38.734 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -31.881 Td [(desc)]TJ ET q -1 0 0 1 169.703 601.664 cm +1 0 0 1 171.218 533.94 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F52 9.9626 Tf 172.986 601.465 Td [(x)]TJ +/F75 9.9626 Tf 174.207 533.74 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 10.187 0 Td [(is)-250(the)-250(global)-250(matrix)]TJ/F52 9.9626 Tf 87.515 0 Td [(g)-25(l)-55(o)-35(b)]TJ +/F84 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 289.343 601.664 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 360.068 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 292.626 601.465 Td [(x)]TJ/F54 7.5716 Tf 5.105 -1.858 Td [(1)-13(:)]TJ/F52 7.5716 Tf 5.963 0 Td [(m)]TJ/F54 7.5716 Tf 5.985 0 Td [(,1)-13(:)]TJ/F52 7.5716 Tf 7.856 0 Td [(n)]TJ -0 g 0 G -/F52 9.9626 Tf -166.706 -20.955 Td [(l)-55(o)-35(c)]TJ +/F145 9.9626 Tf 363.206 485.92 Td [(desc)]TJ ET q -1 0 0 1 163.696 578.851 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 384.755 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 166.979 578.652 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ +/F145 9.9626 Tf 387.893 485.92 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -258.11 -19.926 Td [(iact)]TJ +0 g 0 G +/F84 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F75 9.9626 Tf 253.795 0 Td [(global)]TJ/F84 9.9626 Tf -250.218 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.983 0 0 1 175.611 430.129 Tm [(Speci\002ed)-254(as:)-316(a)-254(character)-254(variable)]TJ/F145 9.9626 Tf 1 0 0 1 319.381 430.129 Tm [(I)]TJ/F84 9.9626 Tf 0.983 0 0 1 324.612 430.129 Tm [(gnor)18(e,)]TJ/F145 9.9626 Tf 1 0 0 1 354.422 430.129 Tm [(W)]TJ/F84 9.9626 Tf 0.983 0 0 1 359.653 430.129 Tm [(arning)-254(or)]TJ/F145 9.9626 Tf 1 0 0 1 402.308 430.129 Tm [(A)]TJ/F84 9.9626 Tf 0.983 0 0 1 407.539 430.129 Tm [(bort,)-254(default)]TJ/F145 9.9626 Tf 1 0 0 1 463.572 430.129 Tm [(I)]TJ/F84 9.9626 Tf 0.983 0 0 1 468.802 430.129 Tm [(gnor)18(e.)]TJ +0 g 0 G +/F75 9.9626 Tf 1 0 0 1 150.705 408.211 Tm [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 160.667 388.286 Tm [(If)]TJ/F78 9.9626 Tf 1 0 0 1 170.229 388.286 Tm [(y)]TJ/F84 9.9626 Tf 1.02 0 0 1 177.963 388.286 Tm [(is)-259(not)-258(pr)17(esent,)-262(then)]TJ/F78 9.9626 Tf 1 0 0 1 266.607 388.286 Tm [(x)]TJ/F84 9.9626 Tf 1.02 0 0 1 274.441 388.286 Tm [(is)-259(overwritten)-258(with)-259(the)-258(translated)-259(integer)-259(indices.)]TJ 1 0 0 1 175.611 376.331 Tm [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ 0 g 0 G -/F54 9.9626 Tf 7.732 1.96 Td [(is)-250(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)-250(on)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 234.034 0 Td [(i)]TJ/F54 9.9626 Tf 2.964 0 Td [(.)]TJ +/F75 9.9626 Tf -24.906 -19.925 Td [(y)]TJ 0 g 0 G -/F52 9.9626 Tf -266.027 -22.813 Td [(s)-25(c)-40(a)-25(t)-25(t)-25(e)-15(r)]TJ +/F84 9.9626 Tf 1.02 0 0 1 161.225 320.54 Tm [(If)]TJ/F78 9.9626 Tf 1 0 0 1 170.727 320.54 Tm [(y)]TJ/F84 9.9626 Tf 1.02 0 0 1 178.402 320.54 Tm [(is)-253(not)-253(pr)18(esent,)-255(then)]TJ/F78 9.9626 Tf 1 0 0 1 266.624 320.54 Tm [(y)]TJ/F84 9.9626 Tf 1.02 0 0 1 274.299 320.54 Tm [(is)-253(overwritten)-253(with)-252(the)-253(translated)-253(integer)-253(indices,)]TJ 1 0 0 1 175.611 308.585 Tm [(and)]TJ/F78 9.9626 Tf 19.652 0 Td [(x)]TJ/F84 9.9626 Tf 7.696 0 Td [(is)-250(left)-250(unchanged.)-310(Scope:)]TJ/F75 9.9626 Tf 112.556 0 Td [(global)]TJ/F84 9.9626 Tf -140.212 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ 0 g 0 G -/F54 9.9626 Tf 33.489 0 Td [(is)-250(the)-250(scatter)-250(function.)]TJ +/F75 9.9626 Tf -24.907 -19.925 Td [(info)]TJ 0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.107 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G + 139.865 -114.535 Td [(100)]TJ 0 g 0 G ET -q -1 0 0 1 230.392 532.667 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q + +endstream +endobj +1683 0 obj +<< +/Length 3295 +>> +stream +0 g 0 G +0 g 0 G BT -/F52 9.9626 Tf 236.663 524.099 Td [(x)]TJ/F52 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F54 9.9626 Tf 2.75 1.96 Td [(,)]TJ/F52 9.9626 Tf 4.276 0 Td [(y)]TJ/F51 9.9626 Tf 108.448 0 Td [(Subroutine)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(6.19)-1000(psb)]TJ ET q -1 0 0 1 230.392 520.313 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F54 9.9626 Tf 236.369 511.745 Td [(Integer)-8983(psb)]TJ +/F75 11.9552 Tf 156.993 706.129 Td [(is)]TJ ET q -1 0 0 1 373.603 511.945 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 166.999 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F54 9.9626 Tf 376.592 511.745 Td [(scatter)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F75 11.9552 Tf 170.586 706.129 Td [(owned)-250(\227)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -70.691 -18.964 Td [(call)-525(psb_is_owned\050x,)-525(desc_a\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(Integer)-250(index.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(integer)74(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ ET q -1 0 0 1 373.603 499.989 cm +1 0 0 1 120.408 545.895 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 499.79 Td [(scatter)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F75 9.9626 Tf 123.397 545.696 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 373.603 488.034 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 309.258 498.074 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 487.835 Td [(scatter)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F145 9.9626 Tf 312.397 497.875 Td [(desc)]TJ ET q -1 0 0 1 373.603 476.079 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 333.945 498.074 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 475.88 Td [(scatter)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F145 9.9626 Tf 337.084 497.875 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 1.015 0 0 1 172.283 456.032 Tm [(A)-246(logical)-246(mask)-246(which)-246(is)-246(tr)8(ue)-246(if)]TJ/F78 9.9626 Tf 1 0 0 1 310.596 456.032 Tm [(x)]TJ/F84 9.9626 Tf 1.015 0 0 1 318.289 456.032 Tm [(is)-246(owned)-246(by)-246(the)-246(curr)18(ent)-246(pr)18(o-)]TJ 1 0 0 1 124.802 444.077 Tm [(cess)-250(Scope:)]TJ/F75 9.9626 Tf 51.566 0 Td [(local)]TJ/F84 9.9626 Tf -51.875 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ/F75 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + 0.997 0 0 1 124.493 366.368 Tm [(This)-252(r)18(outine)-251(r)18(eturns)-252(a)]TJ/F145 9.9626 Tf 1 0 0 1 221.898 366.368 Tm [(.true.)]TJ/F84 9.9626 Tf 0.997 0 0 1 255.779 366.368 Tm [(value)-252(for)-251(an)-252(index)-252(that)-251(is)-252(strictly)-251(owned)-252(by)]TJ 1 0 0 1 124.802 354.413 Tm [(the)-250(curr)18(ent)-250(pr)18(ocess,)-250(excluding)-250(the)-250(halo)-250(indices)]TJ +0 g 0 G + 139.477 -263.975 Td [(101)]TJ +0 g 0 G +ET + +endstream +endobj +1689 0 obj +<< +/Length 5095 +>> +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 150.705 706.129 Td [(6.20)-1000(psb)]TJ ET q -1 0 0 1 373.603 464.124 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F54 9.9626 Tf 376.592 463.925 Td [(scatter)]TJ +/F75 11.9552 Tf 207.803 706.129 Td [(owned)]TJ ET q -1 0 0 1 230.392 460.139 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 245.712 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 278.277 431.76 Td [(T)92(able)-250(20:)-310(Data)-250(types)]TJ +/F75 11.9552 Tf 249.299 706.129 Td [(index)-250(\227)]TJ 0 g 0 G 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -112.628 -28.004 Td [(call)]TJ -0 g 0 G - [-525(psb_scatter\050glob_x,)-525(loc_x,)-525(desc_a,)-525(info,)-525(root,)-525(mold\051)]TJ +/F145 9.9626 Tf -98.594 -18.964 Td [(call)-525(psb_owned_index\050y,)-525(x,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf -14.944 -24.806 Td [(T)90(ype:)]TJ +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.813 Td [(On)-250(Entry)]TJ +/F75 9.9626 Tf -29.439 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -22.813 Td [(glob)]TJ -ET -q -1 0 0 1 171.786 333.523 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.774 333.324 Td [(x)]TJ + 0 -19.925 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(array)-250(that)-250(must)-250(be)-250(scatter)18(ed)-250(into)-250(local)-250(pieces.)]TJ -9.126 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(or)-250(two)-250(array)111(.)]TJ +/F84 9.9626 Tf 9.963 0 Td [(Integer)-250(indices.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F84 9.9626 Tf 38.734 0 Td [(.)]TJ -70.534 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -22.814 Td [(desc)]TJ +/F75 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ ET q -1 0 0 1 171.218 262.89 cm +1 0 0 1 171.218 545.895 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 262.69 Td [(a)]TJ +/F75 9.9626 Tf 174.207 545.696 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F84 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 215.069 cm +1 0 0 1 360.068 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 214.87 Td [(desc)]TJ +/F145 9.9626 Tf 363.206 497.875 Td [(desc)]TJ ET q -1 0 0 1 384.755 215.069 cm +1 0 0 1 384.755 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 214.87 Td [(type)]TJ +/F145 9.9626 Tf 387.893 497.875 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -258.11 -19.925 Td [(iact)]TJ +0 g 0 G +/F84 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F75 9.9626 Tf 253.795 0 Td [(global)]TJ/F84 9.9626 Tf -250.218 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.983 0 0 1 175.611 442.084 Tm [(Speci\002ed)-254(as:)-316(a)-254(character)-254(variable)]TJ/F145 9.9626 Tf 1 0 0 1 319.381 442.084 Tm [(I)]TJ/F84 9.9626 Tf 0.983 0 0 1 324.612 442.084 Tm [(gnor)18(e,)]TJ/F145 9.9626 Tf 1 0 0 1 354.422 442.084 Tm [(W)]TJ/F84 9.9626 Tf 0.983 0 0 1 359.653 442.084 Tm [(arning)-254(or)]TJ/F145 9.9626 Tf 1 0 0 1 402.308 442.084 Tm [(A)]TJ/F84 9.9626 Tf 0.983 0 0 1 407.539 442.084 Tm [(bort,)-254(default)]TJ/F145 9.9626 Tf 1 0 0 1 463.572 442.084 Tm [(I)]TJ/F84 9.9626 Tf 0.983 0 0 1 468.802 442.084 Tm [(gnor)18(e.)]TJ +0 g 0 G +/F75 9.9626 Tf 1 0 0 1 150.705 420.166 Tm [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -22.813 Td [(root)]TJ + 0 -19.925 Td [(y)]TJ +0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 160.837 400.241 Tm [(A)-251(logical)-251(mask)-251(which)-251(is)-251(tr)8(ue)-251(for)-251(all)-251(corr)18(e)1(sponding)-251(entries)-251(of)]TJ/F78 9.9626 Tf 1 0 0 1 421.895 400.241 Tm [(x)]TJ/F84 9.9626 Tf 0.98 0 0 1 429.552 400.241 Tm [(that)-251(ar)18(e)-251(owned)]TJ 1 0 0 1 175.611 388.286 Tm [(by)-250(the)-250(curr)18(ent)-250(pr)18(ocess)-250(Scope:)]TJ/F75 9.9626 Tf 131.028 0 Td [(local)]TJ/F84 9.9626 Tf -131.336 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(rank)-250(one)-250(logical)-250(array)111(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.107 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -21.917 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 23.252 0 Td [(The)-218(pr)18(ocess)-218(that)-218(holds)-219(t)1(he)-219(global)-218(copy)111(.)-299(If)]TJ/F52 9.9626 Tf 179.982 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F85 10.3811 Tf 19.922 0 Td [(=)]TJ/F83 10.3811 Tf 11.086 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-218(all)-218(the)-218(pr)18(ocesses)-219(have)]TJ -217.53 -11.956 Td [(a)-250(copy)-250(of)-250(the)-250(global)-250(vector)74(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-258(as:)-327(an)-258(integer)-259(variable)]TJ/F83 10.3811 Tf 142.917 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)]TJ/F83 10.3811 Tf 8.027 0 Td [(\024)]TJ/F52 9.9626 Tf 11.139 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F83 10.3811 Tf 20.077 0 Td [(\024)]TJ/F52 9.9626 Tf 11.239 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.534 0 Td [(\000)]TJ/F54 9.9626 Tf 10.162 0 Td [(1,)-260(default)]TJ/F59 9.9626 Tf 43.952 0 Td [(psb_root_)]TJ/F54 9.9626 Tf 47.073 0 Td [(,)]TJ -316.314 -11.955 Td [(i.e.)-310(pr)18(ocess)-250(0.)]TJ +/F84 9.9626 Tf 12.453 -19.926 Td [(1.)]TJ 0 g 0 G - 141.968 -29.888 Td [(68)]TJ + 1.02 0 0 1 175.303 242.831 Tm [(This)-383(r)17(outine)-383(r)17(eturns)-383(a)]TJ/F145 9.9626 Tf 1 0 0 1 280.321 242.831 Tm [(.true.)]TJ/F84 9.9626 Tf 1.02 0 0 1 315.6 242.831 Tm [(value)-383(for)-384(those)-383(indices)-384(that)-383(ar)17(e)-383(strictly)]TJ 1 0 0 1 175.611 230.876 Tm [(owned)-250(by)-250(the)-250(curr)18(ent)-250(pr)18(ocess,)-250(excluding)-250(the)-250(halo)-250(indices)]TJ +0 g 0 G + 139.477 -140.438 Td [(102)]TJ 0 g 0 G ET endstream endobj -1342 0 obj +1695 0 obj << -/Length 3984 +/Length 3275 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(mold)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(6.21)-1000(psb)]TJ +ET +q +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 156.993 706.129 Td [(is)]TJ +ET +q +1 0 0 1 166.999 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 170.586 706.129 Td [(local)-250(\227)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -70.691 -18.964 Td [(call)-525(psb_is_local\050x,)-525(desc_a\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-223(as:)-296(an)-223(object)-223(of)-222(a)-223(class)-223(derived)-223(fr)18(om)]TJ/F59 9.9626 Tf 199.086 0 Td [(psb)]TJ + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(Integer)-250(index.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(integer)74(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ ET q -1 0 0 1 340.207 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.408 545.895 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 343.345 658.308 Td [(T)]TJ +/F75 9.9626 Tf 123.397 545.696 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 349.203 658.507 cm +1 0 0 1 309.258 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 352.341 658.308 Td [(base)]TJ +/F145 9.9626 Tf 312.397 497.875 Td [(desc)]TJ ET q -1 0 0 1 373.89 658.507 cm +1 0 0 1 333.945 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 377.028 658.308 Td [(vect)]TJ +/F145 9.9626 Tf 337.084 497.875 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 0.998 0 0 1 172.283 456.032 Tm [(A)-251(logical)-251(mask)-251(which)-251(is)-251(tr)8(ue)-250(if)]TJ/F78 9.9626 Tf 1 0 0 1 308.626 456.032 Tm [(x)]TJ/F84 9.9626 Tf 0.998 0 0 1 316.326 456.032 Tm [(is)-251(local)-251(to)-251(the)-251(curr)18(ent)-250(pr)18(ocess)]TJ 1 0 0 1 124.802 444.077 Tm [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ/F75 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + 0.98 0 0 1 124.493 366.368 Tm [(This)-244(r)19(outine)-244(r)18(eturns)-244(a)]TJ/F145 9.9626 Tf 1 0 0 1 219.923 366.368 Tm [(.true.)]TJ/F84 9.9626 Tf 0.98 0 0 1 253.685 366.368 Tm [(value)-244(for)-244(an)-243(index)-244(that)-244(is)-244(local)-243(to)-244(the)-244(curr)18(e)1(nt)]TJ 1 0 0 1 124.503 354.413 Tm [(pr)18(ocess,)-250(including)-250(the)-250(halo)-250(indices)]TJ +0 g 0 G + 139.776 -263.975 Td [(103)]TJ +0 g 0 G +ET + +endstream +endobj +1702 0 obj +<< +/Length 5087 +>> +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 150.705 706.129 Td [(6.22)-1000(psb)]TJ ET q -1 0 0 1 398.577 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 401.716 658.308 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(;)-232(this)]TJ -297.835 -11.955 Td [(is)-250(only)-250(allowed)-250(when)-250(loc)]TJ +/F75 11.9552 Tf 207.803 706.129 Td [(local)]TJ ET q -1 0 0 1 234.988 646.552 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 234.415 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F54 9.9626 Tf 237.976 646.353 Td [(x)-250(is)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 50.53 0 Td [(psb)]TJ +/F75 11.9552 Tf 238.001 706.129 Td [(index)-250(\227)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -87.296 -18.964 Td [(call)-525(psb_local_index\050y,)-525(x,)-525(desc_a,)-525(info\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(Integer)-250(indices.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in,)-250(inout)]TJ/F84 9.9626 Tf 38.734 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -31.88 Td [(desc)]TJ ET q -1 0 0 1 304.825 646.552 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.218 545.895 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 307.963 646.353 Td [(T)]TJ +/F75 9.9626 Tf 174.207 545.696 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 313.821 646.552 cm +1 0 0 1 360.068 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 316.959 646.353 Td [(vect)]TJ +/F145 9.9626 Tf 363.206 497.875 Td [(desc)]TJ ET q -1 0 0 1 338.508 646.552 cm +1 0 0 1 384.755 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 341.646 646.353 Td [(type)]TJ +/F145 9.9626 Tf 387.893 497.875 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -262.673 -19.925 Td [(On)-250(Return)]TJ +/F75 9.9626 Tf -258.11 -19.925 Td [(iact)]TJ 0 g 0 G +/F84 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F75 9.9626 Tf 253.795 0 Td [(global)]TJ/F84 9.9626 Tf -250.218 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.983 0 0 1 175.611 442.084 Tm [(Speci\002ed)-254(as:)-316(a)-254(character)-254(variable)]TJ/F145 9.9626 Tf 1 0 0 1 319.381 442.084 Tm [(I)]TJ/F84 9.9626 Tf 0.983 0 0 1 324.612 442.084 Tm [(gnor)18(e,)]TJ/F145 9.9626 Tf 1 0 0 1 354.422 442.084 Tm [(W)]TJ/F84 9.9626 Tf 0.983 0 0 1 359.653 442.084 Tm [(arning)-254(or)]TJ/F145 9.9626 Tf 1 0 0 1 402.308 442.084 Tm [(A)]TJ/F84 9.9626 Tf 0.983 0 0 1 407.539 442.084 Tm [(bort,)-254(default)]TJ/F145 9.9626 Tf 1 0 0 1 463.572 442.084 Tm [(I)]TJ/F84 9.9626 Tf 0.983 0 0 1 468.802 442.084 Tm [(gnor)18(e.)]TJ +0 g 0 G +/F75 9.9626 Tf 1 0 0 1 150.705 420.166 Tm [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(y)]TJ +0 g 0 G +/F84 9.9626 Tf 1.011 0 0 1 160.837 400.241 Tm [(A)-247(logical)-248(mask)-247(which)-247(is)-248(tr)8(ue)-247(for)-247(all)-248(corr)18(esponding)-247(entries)-247(of)]TJ/F78 9.9626 Tf 1 0 0 1 429.743 400.241 Tm [(x)]TJ/F84 9.9626 Tf 1.011 0 0 1 437.439 400.241 Tm [(that)-247(ar)17(e)-247(local)]TJ 1 0 0 1 175.611 388.286 Tm [(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess)-250(Scope:)]TJ/F75 9.9626 Tf 128.666 0 Td [(local)]TJ/F84 9.9626 Tf -128.974 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(rank)-250(one)-250(logical)-250(array)111(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -19.925 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -21.917 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.453 -19.926 Td [(1.)]TJ +0 g 0 G + 1.009 0 0 1 175.303 242.831 Tm [(This)-247(r)18(outine)-247(r)18(eturns)-247(a)]TJ/F145 9.9626 Tf 1 0 0 1 273.698 242.831 Tm [(.true.)]TJ/F84 9.9626 Tf 1.009 0 0 1 307.563 242.831 Tm [(value)-247(for)-247(those)-247(indices)-247(that)-247(ar)18(e)-247(local)-247(to)-247(the)]TJ 1 0 0 1 175.611 230.876 Tm [(curr)18(ent)-250(pr)18(ocess,)-250(including)-250(the)-250(halo)-250(indices.)]TJ +0 g 0 G + 139.477 -140.438 Td [(104)]TJ 0 g 0 G - 0 -19.926 Td [(loc)]TJ ET -q -1 0 0 1 113.773 606.702 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 116.762 606.502 Td [(x)]TJ + +endstream +endobj +1708 0 obj +<< +/Length 3771 +>> +stream +0 g 0 G 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.664 0 Td [(g)-25(l)-55(o)-35(b)]TJ +BT +/F75 11.9552 Tf 99.895 706.129 Td [(6.23)-1000(psb)]TJ ET q -1 0 0 1 321.043 606.702 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F52 9.9626 Tf 324.326 606.502 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -204.729 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-214(as:)-292(a)-215(rank)-214(one)-214(or)-214(two)-215(ALLOCA)74(T)74(ABLE)-214(array)-214(or)-214(an)-215(object)-214(of)-214(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 0 -11.955 Td [(psb)]TJ +/F75 11.9552 Tf 156.993 706.129 Td [(get)]TJ ET q -1 0 0 1 141.121 546.926 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 174.316 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 144.259 546.727 Td [(T)]TJ +/F75 11.9552 Tf 177.903 706.129 Td [(boundary)-250(\227)-250(Extract)-250(list)-250(of)-250(boundary)-250(elements)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -78.008 -18.964 Td [(call)-525(psb_get_boundary\050bndel,)-525(desc,)-525(info\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(desc)]TJ +0 g 0 G +/F84 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 150.117 546.926 cm +1 0 0 1 309.258 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 153.255 546.727 Td [(vect)]TJ +/F145 9.9626 Tf 312.397 577.576 Td [(desc)]TJ ET q -1 0 0 1 174.804 546.926 cm +1 0 0 1 333.945 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 177.942 546.727 Td [(type)]TJ +/F145 9.9626 Tf 337.084 577.576 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 23.412 0 Td [(containing)-250(numbers)-250(of)-250(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(20)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(.)]TJ +/F75 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(bndel)]TJ +0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 131.128 535.733 Tm [(The)-245(list)-245(of)-245(boundary)-245(elements)-245(on)-245(the)-245(calling)-245(pr)18(ocess,)-247(in)-245(local)-245(numbering.)]TJ 1 0 0 1 124.802 523.778 Tm [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ 0.994 0 0 1 124.802 487.912 Tm [(Speci\002ed)-253(as:)-314(a)-252(rank)-253(one)-252(array)-253(with)-253(the)-252(ALLOCA)74(T)75(ABLE)-253(attribute,)-253(of)-252(type)]TJ 1 0 0 1 124.802 475.957 Tm [(integer)74(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -31.88 Td [(info)]TJ 0 g 0 G -/F51 9.9626 Tf -101.459 -19.926 Td [(info)]TJ +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F84 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G - 141.968 -388.543 Td [(69)]TJ + 0.988 0 0 1 124.802 354.413 Tm [(If)-253(ther)18(e)-252(ar)18(e)-253(no)-253(boundary)-253(elements)-253(\050i)1(.e.,)-253(if)-253(the)-253(local)-253(part)-253(of)-253(t)1(he)-253(connectivity)]TJ 0.98 0 0 1 124.802 342.458 Tm [(graph)-236(is)-236(self-contained\051)-236(the)-236(output)-236(vector)-236(is)-236(set)-236(to)-236(the)-236(\223not)-236(allocated\224)-236(state.)]TJ +0 g 0 G + 1 0 0 1 112.349 322.532 Tm [(2.)]TJ +0 g 0 G + 0.985 0 0 1 124.802 322.532 Tm [(Otherwise)-253(the)-253(size)-253(of)]TJ/F145 9.9626 Tf 1 0 0 1 218.697 322.532 Tm [(bndel)]TJ/F84 9.9626 Tf 0.985 0 0 1 247.333 322.532 Tm [(will)-253(be)-253(exactly)-253(equal)-253(to)-253(the)-253(number)-253(of)-254(bound-)]TJ 1 0 0 1 124.802 310.577 Tm [(ary)-250(elements.)]TJ +0 g 0 G + 139.477 -220.139 Td [(105)]TJ 0 g 0 G ET endstream endobj -1346 0 obj +1715 0 obj << -/Length 6319 +/Length 3593 >> stream 0 g 0 G 0 g 0 G BT -/F51 14.3462 Tf 150.705 706.042 Td [(6)-1000(Data)-250(management)-250(routines)]TJ/F51 11.9552 Tf 0 -24.694 Td [(6.1)-1000(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(6.24)-1000(psb)]TJ +ET +q +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 207.803 706.129 Td [(get)]TJ ET q -1 0 0 1 198.238 681.547 cm +1 0 0 1 225.126 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 681.348 Td [(cdall)-250(\227)-250(Allocates)-250(a)-250(communication)-250(descriptor)]TJ +/F75 11.9552 Tf 228.712 706.129 Td [(overlap)-250(\227)-250(Extract)-250(list)-250(of)-250(overlap)-250(elements)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,mg=mg,parts=parts\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,vg=vg,[mg=mg,flag=flag]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,vl=vl,[nl=nl,globalcheck=.false.,lidx=lidx]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,nl=nl\051)]TJ 0 -11.956 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,mg=mg,repl=.true.\051)]TJ/F54 9.9626 Tf 14.944 -19.771 Td [(This)-377(subr)18(outine)-378(initializes)-377(the)-378(communication)-377(descriptor)-378(associated)-377(with)]TJ -14.944 -11.956 Td [(an)-271(index)-271(space.)-373(One)-272(o)1(f)-272(the)-271(optional)-271(ar)18(guments)]TJ/F59 9.9626 Tf 209.77 0 Td [(parts)]TJ/F54 9.9626 Tf 26.152 0 Td [(,)]TJ/F59 9.9626 Tf 5.244 0 Td [(vg)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)]TJ/F59 9.9626 Tf 5.244 0 Td [(vl)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)]TJ/F59 9.9626 Tf 5.244 0 Td [(nl)]TJ/F54 9.9626 Tf 13.161 0 Td [(or)]TJ/F59 9.9626 Tf 12.076 0 Td [(repl)]TJ/F54 9.9626 Tf 23.622 0 Td [(must)]TJ -321.435 -11.955 Td [(be)-250(speci\002ed,)-250(ther)18(eby)-250(choosing)-250(the)-250(speci\002c)-250(initialization)-250(strategy)111(.)]TJ +/F145 9.9626 Tf -78.007 -18.964 Td [(call)-525(psb_get_overlap\050ovrel,)-525(desc,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -18.208 Td [(On)-250(Entry)]TJ +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - 0 -19.067 Td [(T)90(ype:)]TJ +/F75 9.9626 Tf -29.439 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.067 Td [(icontxt)]TJ + 0 -19.925 Td [(desc)]TJ +0 g 0 G +/F84 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 363.206 577.576 Td [(desc)]TJ +ET +q +1 0 0 1 384.755 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 387.893 577.576 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.066 Td [(vg)]TJ +/F75 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 16.06 0 Td [(Data)-250(allocation:)-310(each)-250(index)]TJ/F52 9.9626 Tf 121.707 0 Td [(i)]TJ/F83 10.3811 Tf 5.856 0 Td [(2)-290(f)]TJ/F54 9.9626 Tf 15.245 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(m)-47(g)]TJ/F83 10.3811 Tf 13.449 0 Td [(g)]TJ/F54 9.9626 Tf 7.806 0 Td [(is)-250(allocated)-250(to)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 98.454 0 Td [(v)-47(g)]TJ/F85 10.3811 Tf 10.68 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(.)]TJ -295.759 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.067 Td [(\003ag)]TJ + 0 -19.925 Td [(ovrel)]TJ 0 g 0 G -/F54 9.9626 Tf 21.589 0 Td [(Speci\002es)-250(whether)-250(entries)-250(in)]TJ/F52 9.9626 Tf 123.401 0 Td [(v)-47(g)]TJ/F54 9.9626 Tf 13.046 0 Td [(ar)18(e)-250(zer)18(o-)-250(or)-250(one-based.)]TJ -133.129 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0,)-167(1,)-250(default)-250(0.)]TJ +/F84 9.9626 Tf 27.925 0 Td [(The)-250(list)-250(of)-250(overlap)-250(elements)-250(on)-250(the)-250(calling)-250(pr)18(ocess,)-250(in)-250(local)-250(numbering.)]TJ -3.018 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ 0.994 0 0 1 175.611 487.912 Tm [(Speci\002ed)-253(as:)-314(a)-252(rank)-253(one)-252(array)-253(with)-253(the)-252(ALLOCA)74(T)75(ABLE)-253(attribute,)-253(of)-252(type)]TJ 1 0 0 1 175.611 475.957 Tm [(integer)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.067 Td [(mg)]TJ +/F75 9.9626 Tf -24.906 -31.88 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 19.377 0 Td [(the)-250(\050global\051)-250(number)-250(of)-250(r)18(ows)-250(of)-250(the)-250(pr)18(oblem.)]TJ 5.53 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-262(as:)-335(an)-263(integer)-262(value.)-348(It)-262(is)-262(r)18(equir)18(ed)-263(if)]TJ/F59 9.9626 Tf 203.091 0 Td [(parts)]TJ/F54 9.9626 Tf 28.766 0 Td [(or)]TJ/F59 9.9626 Tf 11.99 0 Td [(repl)]TJ/F54 9.9626 Tf 23.536 0 Td [(is)-262(speci\002ed,)]TJ -267.383 -11.955 Td [(it)-250(is)-250(optional)-250(if)]TJ/F59 9.9626 Tf 66.141 0 Td [(vg)]TJ/F54 9.9626 Tf 12.951 0 Td [(is)-250(speci\002ed.)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -21.918 Td [(Notes)]TJ 0 g 0 G -/F51 9.9626 Tf -103.999 -19.067 Td [(parts)]TJ +/F84 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(the)-250(subr)18(outine)-250(that)-250(de\002nes)-250(the)-250(partitioning)-250(scheme.)]TJ -2.759 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(subr)18(outine.)]TJ + 1.02 0 0 1 175.611 354.413 Tm [(If)-254(ther)18(e)-254(ar)18(e)-254(no)-254(overlap)-254(elements)-253(the)-254(output)-254(vector)-254(is)-253(set)-254(to)-254(the)-254(\223not)-253(allo-)]TJ 1 0 0 1 175.611 342.458 Tm [(cated\224)-250(state.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.067 Td [(vl)]TJ + -12.453 -19.926 Td [(2.)]TJ 0 g 0 G -/F54 9.9626 Tf 13.838 0 Td [(Data)-293(allocation:)-395(the)-293(set)-292(of)-293(global)-293(i)1(ndices)]TJ/F52 9.9626 Tf 181.166 0 Td [(v)-25(l)]TJ/F85 10.3811 Tf 8.548 0 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(1)-369(:)]TJ/F52 9.9626 Tf 14.955 0 Td [(n)-25(l)]TJ/F85 10.3811 Tf 9.105 0 Td [(\051)]TJ/F54 9.9626 Tf 7.065 0 Td [(belonging)-293(to)-292(the)-293(calling)]TJ -213.919 -11.955 Td [(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ + 0.98 0 0 1 175.611 322.532 Tm [(Otherwise)-243(the)-242(size)-243(of)]TJ/F145 9.9626 Tf 1 0 0 1 268.625 322.532 Tm [(ovrel)]TJ/F84 9.9626 Tf 0.98 0 0 1 297.147 322.532 Tm [(will)-243(be)-242(exactly)-243(equal)-243(to)-243(the)-242(number)-243(of)-243(overlap)]TJ 1 0 0 1 175.611 310.577 Tm [(elements.)]TJ 0 g 0 G - 141.967 -29.887 Td [(70)]TJ + 139.477 -220.139 Td [(106)]TJ 0 g 0 G ET endstream endobj -1233 0 obj +1612 0 obj << /Type /ObjStm /N 100 -/First 996 -/Length 12673 ->> -stream -280 0 1231 57 1227 115 1235 248 1225 396 1226 540 1237 687 1234 746 1244 840 1238 1015 -1239 1157 1240 1302 1241 1449 1242 1593 1246 1740 284 1798 1247 1855 1243 1913 1250 2046 1248 2185 -1252 2333 288 2392 1249 2450 1258 2531 1253 2688 1254 2832 1255 2979 1260 3126 292 3184 1261 3241 -1257 3299 1265 3433 1269 3581 1270 3708 1271 3751 1272 3958 1273 4196 1274 4472 1256 4708 1263 4855 -1267 5002 1268 5061 1264 5120 1278 5256 1280 5374 1277 5432 1286 5500 1282 5657 1283 5801 1284 5946 -1288 6093 296 6152 1289 6210 1285 6269 1295 6402 1290 6559 1292 6705 1293 6851 1297 6996 1298 7054 -1299 7112 1300 7170 1294 7228 1303 7335 1305 7453 1302 7512 1307 7580 1310 7698 1311 7825 1312 7868 -1313 8075 1314 8313 1315 8589 1309 8825 1301 8883 1306 8941 1322 9038 1318 9195 1319 9336 1320 9483 -1324 9630 300 9689 1325 9747 1321 9806 1327 9939 1329 10057 1326 10115 1334 10209 1331 10348 1336 10495 -304 10554 1337 10612 1333 10671 1341 10804 1332 10961 1338 11104 1339 11247 1343 11394 1340 11452 1345 11559 -% 280 0 obj -<< -/D [1228 0 R /XYZ 99.895 716.092 null] ->> -% 1231 0 obj -<< -/D [1228 0 R /XYZ 99.895 560.161 null] ->> -% 1227 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1235 0 obj -<< -/Type /Page -/Contents 1236 0 R -/Resources 1234 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1225 0 R 1226 0 R ] ->> -% 1225 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [314.141 702.323 390.195 714.383] -/A << /S /GoTo /D (vdata) >> +/First 971 +/Length 10528 >> -% 1226 0 obj +stream +1611 0 352 58 1608 115 1616 210 1613 358 1614 503 1618 650 356 709 1619 767 1615 826 +1623 921 1620 1069 1621 1216 1625 1359 360 1417 1622 1474 1628 1582 1630 1700 1631 1759 1627 1817 +1635 1912 1632 2060 1633 2206 1637 2350 364 2408 1634 2465 1639 2587 1641 2705 1642 2764 1643 2823 +1638 2882 1648 2963 1644 3120 1645 3265 1646 3408 1650 3551 368 3609 1651 3666 1652 3724 1647 3782 +1656 3890 1653 4038 1654 4180 1658 4323 372 4382 1655 4440 1660 4535 1662 4653 376 4711 1659 4768 +1666 4876 1664 5015 1668 5162 380 5221 1665 5279 1670 5387 1672 5505 1673 5563 1674 5621 1669 5679 +1677 5761 1675 5900 1679 6047 384 6106 1676 6164 1682 6272 1680 6411 1684 6558 388 6616 1685 6673 +1681 6731 1688 6839 1686 6978 1690 7125 392 7184 1691 7242 1687 7301 1694 7409 1692 7548 1696 7695 +396 7753 1697 7810 1693 7868 1701 7976 1699 8115 1703 8262 400 8321 1704 8379 1700 8438 1707 8546 +1705 8685 1709 8830 405 8888 1710 8945 1711 9003 1706 9061 1714 9156 1712 9295 1716 9440 409 9499 +% 1611 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [293.677 690.368 305.632 702.428] -/A << /S /GoTo /D (table.14) >> +/D [1609 0 R /XYZ 98.895 753.953 null] >> -% 1237 0 obj +% 352 0 obj << -/D [1235 0 R /XYZ 149.705 753.953 null] +/D [1609 0 R /XYZ 99.895 716.092 null] >> -% 1234 0 obj +% 1608 0 obj << -/Font << /F54 586 0 R /F59 812 0 R /F51 584 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1244 0 obj +% 1616 0 obj << /Type /Page -/Contents 1245 0 R -/Resources 1243 0 R +/Contents 1617 0 R +/Resources 1615 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1238 0 R 1239 0 R 1240 0 R 1241 0 R 1242 0 R ] +/Parent 1577 0 R +/Annots [ 1613 0 R 1614 0 R ] >> -% 1238 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 430.55 339.385 442.61] -/A << /S /GoTo /D (vdata) >> ->> -% 1239 0 obj +% 1613 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [224.557 418.595 231.53 430.655] -/A << /S /GoTo /D (table.2) >> +/Rect [342.753 559.823 420.271 571.882] +/A << /S /GoTo /D (spdata) >> >> -% 1240 0 obj +% 1614 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 352.283 323.106 364.342] +/Rect [342.753 492.077 409.811 504.136] /A << /S /GoTo /D (descdata) >> >> -% 1241 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 201.166 339.385 213.226] -/A << /S /GoTo /D (vdata) >> ->> -% 1242 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [242.868 189.211 254.823 201.271] -/A << /S /GoTo /D (table.16) >> ->> -% 1246 0 obj +% 1618 0 obj << -/D [1244 0 R /XYZ 98.895 753.953 null] +/D [1616 0 R /XYZ 149.705 753.953 null] >> -% 284 0 obj +% 356 0 obj << -/D [1244 0 R /XYZ 99.895 716.092 null] +/D [1616 0 R /XYZ 150.705 716.092 null] >> -% 1247 0 obj +% 1619 0 obj << -/D [1244 0 R /XYZ 99.895 566.828 null] +/D [1616 0 R /XYZ 150.705 312.355 null] >> -% 1243 0 obj +% 1615 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1250 0 obj +% 1623 0 obj << /Type /Page -/Contents 1251 0 R -/Resources 1249 0 R +/Contents 1624 0 R +/Resources 1622 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1248 0 R ] +/Parent 1626 0 R +/Annots [ 1620 0 R 1621 0 R ] >> -% 1248 0 obj +% 1620 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [378.029 655.624 385.003 667.684] -/A << /S /GoTo /D (section.6) >> ->> -% 1252 0 obj -<< -/D [1250 0 R /XYZ 149.705 753.953 null] ->> -% 288 0 obj -<< -/D [1250 0 R /XYZ 150.705 716.092 null] ->> -% 1249 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1258 0 obj -<< -/Type /Page -/Contents 1259 0 R -/Resources 1257 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1253 0 R 1254 0 R 1255 0 R ] +/Rect [260.133 577.893 327.191 589.953] +/A << /S /GoTo /D (descdata) >> >> -% 1253 0 obj +% 1621 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 344.818 444.603 356.877] +/Rect [210.44 128.475 286.494 140.535] /A << /S /GoTo /D (vdata) >> >> -% 1254 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [326.652 332.863 338.608 344.922] -/A << /S /GoTo /D (table.17) >> ->> -% 1255 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 264.733 359.001 276.793] -/A << /S /GoTo /D (descdata) >> ->> -% 1260 0 obj -<< -/D [1258 0 R /XYZ 98.895 753.953 null] ->> -% 292 0 obj +% 1625 0 obj << -/D [1258 0 R /XYZ 99.895 716.092 null] +/D [1623 0 R /XYZ 98.895 753.953 null] >> -% 1261 0 obj +% 360 0 obj << -/D [1258 0 R /XYZ 99.895 513.636 null] +/D [1623 0 R /XYZ 99.895 716.092 null] >> -% 1257 0 obj +% 1622 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F60 1027 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1265 0 obj +% 1628 0 obj << /Type /Page -/Contents 1266 0 R -/Resources 1264 0 R +/Contents 1629 0 R +/Resources 1627 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1256 0 R 1263 0 R ] ->> -% 1269 0 obj -<< -/Producer (GPL Ghostscript 9.22) -/CreationDate (D:20180323100645Z00'00') -/ModDate (D:20180323100645Z00'00') ->> -% 1270 0 obj -<< -/Type /ExtGState -/OPM 1 ->> -% 1271 0 obj -<< -/BaseFont /XYUGDR+Times-Roman -/FontDescriptor 1273 0 R -/Type /Font -/FirstChar 48 -/LastChar 57 -/Widths [ 500 500 500 500 500 500 500 500 500 500] -/Encoding /WinAnsiEncoding -/Subtype /Type1 ->> -% 1272 0 obj -<< -/BaseFont /XISTAL+Times-Bold -/FontDescriptor 1274 0 R -/Type /Font -/FirstChar 48 -/LastChar 80 -/Widths [ 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611] -/Encoding /WinAnsiEncoding -/Subtype /Type1 ->> -% 1273 0 obj -<< -/Type /FontDescriptor -/FontName /XYUGDR+Times-Roman -/FontBBox [ 0 -14 476 688] -/Flags 65568 -/Ascent 688 -/CapHeight 688 -/Descent -14 -/ItalicAngle 0 -/StemV 71 -/MissingWidth 250 -/CharSet (/eight/five/four/nine/one/seven/six/three/two/zero) -/FontFile3 1275 0 R +/Parent 1626 0 R >> -% 1274 0 obj +% 1630 0 obj << -/Type /FontDescriptor -/FontName /XISTAL+Times-Bold -/FontBBox [ 0 -13 600 688] -/Flags 65568 -/Ascent 688 -/CapHeight 676 -/Descent -13 -/ItalicAngle 0 -/StemV 90 -/MissingWidth 250 -/CharSet (/P/one/zero) -/FontFile3 1276 0 R +/D [1628 0 R /XYZ 149.705 753.953 null] >> -% 1256 0 obj +% 1631 0 obj +<< +/D [1628 0 R /XYZ 150.705 632.19 null] +>> +% 1627 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1635 0 obj +<< +/Type /Page +/Contents 1636 0 R +/Resources 1634 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1626 0 R +/Annots [ 1632 0 R 1633 0 R ] +>> +% 1632 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [231.023 625.272 242.978 634.682] -/A << /S /GoTo /D (table.17) >> +/Rect [291.943 362.621 359.001 374.68] +/A << /S /GoTo /D (descdata) >> >> -% 1263 0 obj +% 1633 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [458.157 276.439 465.131 290.202] -/A << /S /GoTo /D (figure.3) >> +/Rect [367.353 207.637 443.407 219.697] +/A << /S /GoTo /D (vdata) >> >> -% 1267 0 obj +% 1637 0 obj << -/D [1265 0 R /XYZ 149.705 753.953 null] +/D [1635 0 R /XYZ 98.895 753.953 null] >> -% 1268 0 obj +% 364 0 obj << -/D [1265 0 R /XYZ 150.705 326.444 null] +/D [1635 0 R /XYZ 99.895 716.092 null] >> -% 1264 0 obj +% 1634 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F59 812 0 R >> -/XObject << /Im4 1262 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R /F192 942 0 R >> /ProcSet [ /PDF /Text ] >> -% 1278 0 obj +% 1639 0 obj << /Type /Page -/Contents 1279 0 R -/Resources 1277 0 R +/Contents 1640 0 R +/Resources 1638 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R +/Parent 1626 0 R >> -% 1280 0 obj +% 1641 0 obj << -/D [1278 0 R /XYZ 98.895 753.953 null] +/D [1639 0 R /XYZ 149.705 753.953 null] >> -% 1277 0 obj +% 1642 0 obj +<< +/D [1639 0 R /XYZ 150.705 701.929 null] +>> +% 1643 0 obj << -/Font << /F54 586 0 R >> +/D [1639 0 R /XYZ 150.705 680.684 null] +>> +% 1638 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1286 0 obj +% 1648 0 obj << /Type /Page -/Contents 1287 0 R -/Resources 1285 0 R +/Contents 1649 0 R +/Resources 1647 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R -/Annots [ 1282 0 R 1283 0 R 1284 0 R ] +/Parent 1626 0 R +/Annots [ 1644 0 R 1645 0 R 1646 0 R ] >> -% 1282 0 obj +% 1644 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 345.485 495.412 357.545] -/A << /S /GoTo /D (vdata) >> +/Rect [260.133 573.77 327.191 585.83] +/A << /S /GoTo /D (descdata) >> >> -% 1283 0 obj +% 1645 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [377.462 333.53 389.417 345.59] -/A << /S /GoTo /D (table.18) >> +/Rect [271.52 482.114 347.574 494.174] +/A << /S /GoTo /D (vdata) >> >> -% 1284 0 obj +% 1646 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 266.146 409.811 278.205] -/A << /S /GoTo /D (descdata) >> +/Rect [157.09 380.495 233.145 392.555] +/A << /S /GoTo /D (vdata) >> >> -% 1288 0 obj +% 1650 0 obj << -/D [1286 0 R /XYZ 149.705 753.953 null] +/D [1648 0 R /XYZ 98.895 753.953 null] >> -% 296 0 obj +% 368 0 obj << -/D [1286 0 R /XYZ 150.705 716.092 null] +/D [1648 0 R /XYZ 99.895 716.092 null] >> -% 1289 0 obj +% 1651 0 obj << -/D [1286 0 R /XYZ 150.705 510.975 null] +/D [1648 0 R /XYZ 99.895 278.482 null] >> -% 1285 0 obj +% 1652 0 obj +<< +/D [1648 0 R /XYZ 99.895 244.007 null] +>> +% 1647 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F59 812 0 R /F85 814 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1295 0 obj +% 1656 0 obj << /Type /Page -/Contents 1296 0 R -/Resources 1294 0 R +/Contents 1657 0 R +/Resources 1655 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R -/Annots [ 1290 0 R 1292 0 R 1293 0 R ] +/Parent 1626 0 R +/Annots [ 1653 0 R 1654 0 R ] >> -% 1290 0 obj +% 1653 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [202.52 554.876 214.475 566.936] -/A << /S /GoTo /D (table.18) >> +/Rect [207.9 561.815 283.954 573.875] +/A << /S /GoTo /D (vdata) >> >> -% 1292 0 obj +% 1654 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [407.408 325.46 414.381 339.127] -/A << /S /GoTo /D (figure.4) >> +/Rect [310.942 482.114 378 494.174] +/A << /S /GoTo /D (descdata) >> >> -% 1293 0 obj +% 1658 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [309.226 301.825 316.2 313.885] -/A << /S /GoTo /D (figure.3) >> +/D [1656 0 R /XYZ 149.705 753.953 null] >> -% 1297 0 obj +% 372 0 obj << -/D [1295 0 R /XYZ 98.895 753.953 null] +/D [1656 0 R /XYZ 150.705 716.092 null] >> -% 1298 0 obj +% 1655 0 obj << -/D [1295 0 R /XYZ 99.895 464.818 null] +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1299 0 obj +% 1660 0 obj << -/D [1295 0 R /XYZ 99.895 430.343 null] +/Type /Page +/Contents 1661 0 R +/Resources 1659 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1663 0 R >> -% 1300 0 obj +% 1662 0 obj +<< +/D [1660 0 R /XYZ 98.895 753.953 null] +>> +% 376 0 obj << -/D [1295 0 R /XYZ 99.895 386.508 null] +/D [1660 0 R /XYZ 99.895 716.092 null] >> -% 1294 0 obj +% 1659 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1303 0 obj +% 1666 0 obj << /Type /Page -/Contents 1304 0 R -/Resources 1302 0 R +/Contents 1667 0 R +/Resources 1665 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R +/Parent 1663 0 R +/Annots [ 1664 0 R ] >> -% 1305 0 obj +% 1664 0 obj << -/D [1303 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 483.894 409.811 495.954] +/A << /S /GoTo /D (descdata) >> >> -% 1302 0 obj +% 1668 0 obj +<< +/D [1666 0 R /XYZ 149.705 753.953 null] +>> +% 380 0 obj << -/Font << /F54 586 0 R >> +/D [1666 0 R /XYZ 150.705 716.092 null] +>> +% 1665 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1307 0 obj +% 1670 0 obj << /Type /Page -/Contents 1308 0 R -/Resources 1306 0 R +/Contents 1671 0 R +/Resources 1669 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R +/Parent 1663 0 R >> -% 1310 0 obj +% 1672 0 obj << -/Producer (GPL Ghostscript 9.22) -/CreationDate (D:20180323100658Z00'00') -/ModDate (D:20180323100658Z00'00') +/D [1670 0 R /XYZ 98.895 753.953 null] >> -% 1311 0 obj +% 1673 0 obj << -/Type /ExtGState -/OPM 1 +/D [1670 0 R /XYZ 99.895 716.092 null] >> -% 1312 0 obj +% 1674 0 obj << -/BaseFont /XYUGDR+Times-Roman -/FontDescriptor 1314 0 R -/Type /Font -/FirstChar 48 -/LastChar 57 -/Widths [ 500 500 500 500 500 500 500 500 500 500] -/Encoding /WinAnsiEncoding -/Subtype /Type1 +/D [1670 0 R /XYZ 99.895 687.379 null] >> -% 1313 0 obj +% 1669 0 obj << -/BaseFont /XISTAL+Times-Bold -/FontDescriptor 1315 0 R -/Type /Font -/FirstChar 48 -/LastChar 80 -/Widths [ 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611] -/Encoding /WinAnsiEncoding -/Subtype /Type1 +/Font << /F84 687 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1314 0 obj +% 1677 0 obj << -/Type /FontDescriptor -/FontName /XYUGDR+Times-Roman -/FontBBox [ 0 -14 476 688] -/Flags 65568 -/Ascent 688 -/CapHeight 688 -/Descent -14 -/ItalicAngle 0 -/StemV 71 -/MissingWidth 250 -/CharSet (/eight/five/four/nine/one/seven/six/three/two/zero) -/FontFile3 1316 0 R +/Type /Page +/Contents 1678 0 R +/Resources 1676 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1663 0 R +/Annots [ 1675 0 R ] >> -% 1315 0 obj +% 1675 0 obj << -/Type /FontDescriptor -/FontName /XISTAL+Times-Bold -/FontBBox [ 0 -13 600 688] -/Flags 65568 -/Ascent 688 -/CapHeight 676 -/Descent -13 -/ItalicAngle 0 -/StemV 90 -/MissingWidth 250 -/CharSet (/P/one/zero) -/FontFile3 1317 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 482.114 409.811 494.174] +/A << /S /GoTo /D (descdata) >> >> -% 1309 0 obj +% 1679 0 obj << -/D [1307 0 R /XYZ 98.895 753.953 null] +/D [1677 0 R /XYZ 149.705 753.953 null] >> -% 1301 0 obj +% 384 0 obj << -/D [1307 0 R /XYZ 99.895 282.918 null] +/D [1677 0 R /XYZ 150.705 716.092 null] >> -% 1306 0 obj +% 1676 0 obj << -/Font << /F54 586 0 R >> -/XObject << /Im5 1291 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1322 0 obj +% 1682 0 obj << /Type /Page -/Contents 1323 0 R -/Resources 1321 0 R +/Contents 1683 0 R +/Resources 1681 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R -/Annots [ 1318 0 R 1319 0 R 1320 0 R ] +/Parent 1663 0 R +/Annots [ 1680 0 R ] >> -% 1318 0 obj +% 1680 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 298.8 495.412 310.86] -/A << /S /GoTo /D (vdata) >> +/Rect [291.943 494.069 359.001 506.129] +/A << /S /GoTo /D (descdata) >> >> -% 1319 0 obj +% 1684 0 obj +<< +/D [1682 0 R /XYZ 98.895 753.953 null] +>> +% 388 0 obj +<< +/D [1682 0 R /XYZ 99.895 716.092 null] +>> +% 1685 0 obj +<< +/D [1682 0 R /XYZ 99.895 382.093 null] +>> +% 1681 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1688 0 obj +<< +/Type /Page +/Contents 1689 0 R +/Resources 1687 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1663 0 R +/Annots [ 1686 0 R ] +>> +% 1686 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [255.331 289.495 267.287 298.905] -/A << /S /GoTo /D (table.19) >> +/Rect [342.753 494.069 409.811 506.129] +/A << /S /GoTo /D (descdata) >> >> -% 1320 0 obj +% 1690 0 obj +<< +/D [1688 0 R /XYZ 149.705 753.953 null] +>> +% 392 0 obj +<< +/D [1688 0 R /XYZ 150.705 716.092 null] +>> +% 1691 0 obj +<< +/D [1688 0 R /XYZ 150.705 258.556 null] +>> +% 1687 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1694 0 obj +<< +/Type /Page +/Contents 1695 0 R +/Resources 1693 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1698 0 R +/Annots [ 1692 0 R ] +>> +% 1692 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 218.115 409.811 230.175] +/Rect [291.943 494.069 359.001 506.129] /A << /S /GoTo /D (descdata) >> >> -% 1324 0 obj +% 1696 0 obj << -/D [1322 0 R /XYZ 149.705 753.953 null] +/D [1694 0 R /XYZ 98.895 753.953 null] >> -% 300 0 obj +% 396 0 obj << -/D [1322 0 R /XYZ 150.705 716.092 null] +/D [1694 0 R /XYZ 99.895 716.092 null] >> -% 1325 0 obj +% 1697 0 obj << -/D [1322 0 R /XYZ 150.705 460.417 null] +/D [1694 0 R /XYZ 99.895 382.093 null] >> -% 1321 0 obj +% 1693 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1327 0 obj +% 1701 0 obj << /Type /Page -/Contents 1328 0 R -/Resources 1326 0 R +/Contents 1702 0 R +/Resources 1700 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R +/Parent 1698 0 R +/Annots [ 1699 0 R ] >> -% 1329 0 obj +% 1699 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 494.069 409.811 506.129] +/A << /S /GoTo /D (descdata) >> +>> +% 1703 0 obj << -/D [1327 0 R /XYZ 98.895 753.953 null] +/D [1701 0 R /XYZ 149.705 753.953 null] >> -% 1326 0 obj +% 400 0 obj +<< +/D [1701 0 R /XYZ 150.705 716.092 null] +>> +% 1704 0 obj +<< +/D [1701 0 R /XYZ 150.705 258.556 null] +>> +% 1700 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1334 0 obj +% 1707 0 obj << /Type /Page -/Contents 1335 0 R -/Resources 1333 0 R +/Contents 1708 0 R +/Resources 1706 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R -/Annots [ 1331 0 R ] +/Parent 1698 0 R +/Annots [ 1705 0 R ] >> -% 1331 0 obj +% 1705 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 211.064 409.811 223.124] +/Rect [291.943 573.77 359.001 585.83] /A << /S /GoTo /D (descdata) >> >> -% 1336 0 obj +% 1709 0 obj << -/D [1334 0 R /XYZ 149.705 753.953 null] +/D [1707 0 R /XYZ 98.895 753.953 null] >> -% 304 0 obj +% 405 0 obj << -/D [1334 0 R /XYZ 150.705 716.092 null] +/D [1707 0 R /XYZ 99.895 716.092 null] >> -% 1337 0 obj +% 1710 0 obj +<< +/D [1707 0 R /XYZ 99.895 370.138 null] +>> +% 1711 0 obj << -/D [1334 0 R /XYZ 150.705 449.977 null] +/D [1707 0 R /XYZ 99.895 335.663 null] >> -% 1333 0 obj +% 1706 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1341 0 obj +% 1714 0 obj << /Type /Page -/Contents 1342 0 R -/Resources 1340 0 R +/Contents 1715 0 R +/Resources 1713 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R -/Annots [ 1332 0 R 1338 0 R 1339 0 R ] +/Parent 1698 0 R +/Annots [ 1712 0 R ] >> -% 1332 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [287.51 642.547 363.564 654.607] -/A << /S /GoTo /D (vdata) >> ->> -% 1338 0 obj +% 1712 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [123.806 542.921 199.86 554.981] -/A << /S /GoTo /D (vdata) >> +/Rect [342.753 573.77 409.811 585.83] +/A << /S /GoTo /D (descdata) >> >> -% 1339 0 obj +% 1716 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [421.516 542.921 433.471 554.981] -/A << /S /GoTo /D (table.20) >> +/D [1714 0 R /XYZ 149.705 753.953 null] >> -% 1343 0 obj +% 409 0 obj << -/D [1341 0 R /XYZ 98.895 753.953 null] +/D [1714 0 R /XYZ 150.705 716.092 null] >> -% 1340 0 obj + +endstream +endobj +1723 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R >> -/ProcSet [ /PDF /Text ] +/Length 5529 >> -% 1345 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 99.895 706.129 Td [(6.25)-1000(psb)]TJ +ET +q +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 156.993 706.129 Td [(sp)]TJ +ET +q +1 0 0 1 170.323 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 173.91 706.129 Td [(getrow)-250(\227)-250(Extract)-250(row\050s\051)-250(from)-250(a)-250(sparse)-250(matrix)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -74.015 -19.204 Td [(call)-525(psb_sp_getrow\050row,)-525(a,)-525(nz,)-525(ia,)-525(ja,)-525(val,)-525(info,)-525(&)]TJ 73.225 -11.955 Td [(&)-525(append,)-525(nzin,)-525(lrw\051)]TJ +0 g 0 G +/F75 9.9626 Tf -73.225 -22.29 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -20.42 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.421 Td [(row)]TJ +0 g 0 G +/F84 9.9626 Tf 22.386 0 Td [(The)-250(\050\002rst\051)-250(r)18(ow)-250(to)-250(be)-250(extracted.)]TJ 2.521 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf -28.652 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf -23.691 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)]TJ/F148 10.3811 Tf 104.323 0 Td [(>)]TJ/F84 9.9626 Tf 10.961 0 Td [(0.)]TJ +0 g 0 G +/F75 9.9626 Tf -140.191 -20.42 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(fr)18(om)-250(which)-250(to)-250(get)-250(r)18(ows.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf -28.652 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf -23.691 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 495.976 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 312.397 495.777 Td [(Tspmat)]TJ +ET +q +1 0 0 1 344.406 495.976 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 347.544 495.777 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -268.571 -20.421 Td [(append)]TJ +0 g 0 G +/F84 9.9626 Tf 38.795 0 Td [(Whether)-250(to)-250(append)-250(or)-250(overwrite)-250(existing)-250(output.)]TJ -13.888 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf -28.652 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(optional)]TJ/F84 9.9626 Tf -23.691 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value)-250(default:)-310(false)-250(\050overwrite\051.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -20.421 Td [(nzin)]TJ +0 g 0 G +/F84 9.9626 Tf 25.455 0 Td [(Input)-250(size)-250(to)-250(be)-250(appended)-250(to.)]TJ -0.548 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf -28.652 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(optional)]TJ/F84 9.9626 Tf -23.691 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.02 0 0 1 124.802 359.294 Tm [(Speci\002ed)-267(as:)-347(an)-267(integer)]TJ/F148 10.3811 Tf 1 0 0 1 232.103 359.294 Tm [(>)]TJ/F84 9.9626 Tf 1.02 0 0 1 243.473 359.294 Tm [(0.)-369(When)-267(append)-267(is)-267(tr)8(ue,)-272(speci\002es)-267(how)-267(many)]TJ 1 0 0 1 124.802 347.339 Tm [(entries)-250(in)-250(the)-250(output)-250(vectors)-250(ar)18(e)-250(alr)18(eady)-250(\002lled.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -20.421 Td [(lrw)]TJ +0 g 0 G +/F84 9.9626 Tf 20.165 0 Td [(The)-250(last)-250(r)18(ow)-250(to)-250(be)-250(extracted.)]TJ 4.742 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf -28.652 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(optional)]TJ/F84 9.9626 Tf -23.691 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)]TJ/F148 10.3811 Tf 104.323 0 Td [(>)]TJ/F84 9.9626 Tf 10.961 0 Td [(0,)-250(default:)]TJ/F78 9.9626 Tf 46.879 0 Td [(r)-17(o)-35(w)]TJ/F84 9.9626 Tf 16.134 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -203.204 -22.29 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.42 Td [(nz)]TJ +0 g 0 G +/F84 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(elements)-250(r)18(eturned)-250(by)-250(this)-250(call.)]TJ 8.857 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -20.421 Td [(ia)]TJ +0 g 0 G +/F84 9.9626 Tf 13.28 0 Td [(the)-250(r)18(ow)-250(indices.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.343 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(with)-250(the)]TJ/F145 9.9626 Tf 169.114 0 Td [(ALLOCATABLE)]TJ/F84 9.9626 Tf 60.024 0 Td [(attribute.)]TJ +0 g 0 G + -89.661 -29.888 Td [(107)]TJ +0 g 0 G +ET + +endstream +endobj +1727 0 obj << -/Type /Page -/Contents 1346 0 R -/Resources 1344 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R +/Length 3789 >> +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F75 9.9626 Tf 150.705 706.129 Td [(ja)]TJ +0 g 0 G +/F84 9.9626 Tf 13.28 0 Td [(the)-250(column)-250(indices)-250(of)-250(the)-250(elements)-250(to)-250(be)-250(inserted.)]TJ 11.626 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(with)-250(the)]TJ/F145 9.9626 Tf 169.114 0 Td [(ALLOCATABLE)]TJ/F84 9.9626 Tf 60.025 0 Td [(attribute.)]TJ +0 g 0 G +/F75 9.9626 Tf -254.045 -19.925 Td [(val)]TJ +0 g 0 G +/F84 9.9626 Tf 18.819 0 Td [(the)-250(elements)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -50.231 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.983 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(array)-250(with)-250(the)]TJ/F145 9.9626 Tf 148.76 0 Td [(ALLOCATABLE)]TJ/F84 9.9626 Tf 60.025 0 Td [(attribute.)]TJ +0 g 0 G +/F75 9.9626 Tf -233.692 -19.925 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.107 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -21.918 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +0 g 0 G + 1.02 0 0 1 175.303 480.973 Tm [(The)-263(outp)1(ut)]TJ/F78 9.9626 Tf 1 0 0 1 228.31 480.973 Tm [(n)-25(z)]TJ/F84 9.9626 Tf 1.02 0 0 1 241.315 480.973 Tm [(is)-263(always)-262(the)-263(size)-262(of)-263(the)-262(output)-263(generated)-263(b)1(y)-263(the)-263(curr)18(ent)]TJ 1.016 0 0 1 175.611 469.018 Tm [(call;)-246(thus,)-246(if)]TJ/F145 9.9626 Tf 1 0 0 1 229.489 469.018 Tm [(append=.true.)]TJ/F84 9.9626 Tf 1.016 0 0 1 297.484 469.018 Tm [(,)-246(the)-246(total)-246(output)-246(size)-246(will)-246(be)]TJ/F78 9.9626 Tf 1 0 0 1 427.023 469.018 Tm [(n)-25(z)-18(i)-32(n)]TJ/F192 10.3811 Tf 21.204 0 Td [(+)]TJ/F78 9.9626 Tf 10.255 0 Td [(n)-25(z)]TJ/F84 9.9626 Tf 1.016 0 0 1 468.818 469.018 Tm [(,)-246(with)]TJ 1.017 0 0 1 175.611 457.063 Tm [(the)-247(newly)-246(extracted)-247(coef)18(\002cients)-247(stor)18(ed)-247(in)-246(entries)]TJ/F145 9.9626 Tf 1 0 0 1 393.234 457.063 Tm [(nzin+1:nzin+nz)]TJ/F84 9.9626 Tf 1.017 0 0 1 468.958 457.063 Tm [(of)-247(the)]TJ 1 0 0 1 175.611 445.108 Tm [(array)-250(ar)18(guments;)]TJ +0 g 0 G + -12.453 -19.926 Td [(2.)]TJ +0 g 0 G + [-450(When)]TJ/F145 9.9626 Tf 40.777 0 Td [(append=.true.)]TJ/F84 9.9626 Tf 70.485 0 Td [(the)-250(output)-250(arrays)-250(ar)18(e)-250(r)18(eallocated)-250(as)-250(necessary;)]TJ +0 g 0 G + -111.262 -19.925 Td [(3.)]TJ +0 g 0 G + 0.98 0 0 1 175.303 405.257 Tm [(The)-204(r)19(ow)-204(and)-204(column)-204(indices)-203(ar)18(e)-204(r)19(eturned)-204(in)-204(the)-203(local)-204(numbering)-204(scheme;)-221(if)]TJ 0.98 0 0 1 175.611 393.302 Tm [(the)-194(global)-194(numbering)-194(is)-193(desir)18(ed,)-206(the)-194(user)-194(may)-194(employ)-194(the)]TJ/F145 9.9626 Tf 1 0 0 1 421.701 393.302 Tm [(psb_loc_to_glob)]TJ/F84 9.9626 Tf -246.09 -11.955 Td [(r)18(outine)-250(on)-250(the)-250(output.)]TJ +0 g 0 G + 139.477 -290.909 Td [(108)]TJ +0 g 0 G +ET endstream endobj -1352 0 obj +1737 0 obj << -/Length 6337 +/Length 4035 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(nl)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(6.26)-1000(psb)]TJ +ET +q +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 156.993 706.129 Td [(sizeof)-250(\227)-250(Memory)-250(occupation)]TJ/F84 9.9626 Tf -57.406 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(memory)-250(occupation)-250(of)-250(a)-250(PSBLAS)-250(object.)]TJ 0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(Data)-305(allocation:)-421(in)-305(a)-305(generalized)-305(block-r)18(ow)-305(distribution)-306(the)-305(number)-305(of)-305(in-)]TJ 10.521 -11.955 Td [(dices)-250(belonging)-250(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(May)-250(be)-250(speci\002ed)-250(together)-250(with)]TJ/F59 9.9626 Tf 272.943 0 Td [(vl)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -308.311 -20.135 Td [(repl)]TJ +/F145 9.9626 Tf 0.308 -21.918 Td [(isz)-525(=)-525(psb_sizeof\050a\051)]TJ 0 -11.955 Td [(isz)-525(=)-525(psb_sizeof\050desc_a\051)]TJ 0 -11.955 Td [(isz)-525(=)-525(psb_sizeof\050prec\051)]TJ 0 g 0 G -/F54 9.9626 Tf 23.243 0 Td [(Data)-288(allocation:)-385(build)-288(a)-288(r)18(eplicated)-287(index)-288(space)-288(\050i.e.)-423(all)-288(pr)18(ocesses)-287(own)-288(all)]TJ 1.664 -11.955 Td [(indices\051.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F59 9.9626 Tf 132.133 0 Td [(.true.)]TJ +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -157.04 -20.135 Td [(globalcheck)]TJ +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 59.766 0 Td [(Data)-250(allocation:)-310(do)-250(global)-250(checks)-250(on)-250(the)-250(local)-250(index)-250(lists)]TJ/F59 9.9626 Tf 247.788 0 Td [(vl)]TJ/F54 9.9626 Tf -282.647 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value,)-250(default:)]TJ/F59 9.9626 Tf 162.678 0 Td [(.false.)]TJ +/F75 9.9626 Tf -29.44 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -187.585 -20.135 Td [(lidx)]TJ 0 g 0 G -/F54 9.9626 Tf 22.685 0 Td [(Data)-308(allocation:)-425(the)-307(set)-308(of)-307(local)-308(indices)]TJ/F52 9.9626 Tf 175.731 0 Td [(l)-48(i)-32(d)-42(x)]TJ/F85 10.3811 Tf 17.065 0 Td [(\050)]TJ/F54 9.9626 Tf 4.15 0 Td [(1)-397(:)]TJ/F52 9.9626 Tf 15.505 0 Td [(n)-25(l)]TJ/F85 10.3811 Tf 9.105 0 Td [(\051)]TJ/F54 9.9626 Tf 7.214 0 Td [(to)-308(be)-307(assigned)-308(to)-307(the)]TJ -226.548 -11.955 Td [(global)-250(indices)]TJ/F52 9.9626 Tf 63.476 0 Td [(v)-25(l)]TJ/F54 9.9626 Tf 8.423 0 Td [(.)]TJ -71.899 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ + 0 -19.925 Td [(a)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.127 Td [(On)-250(Return)]TJ +/F84 9.9626 Tf 9.574 0 Td [(A)-250(sparse)-250(matrix)]TJ/F78 9.9626 Tf 72.971 0 Td [(A)]TJ/F84 9.9626 Tf 7.318 0 Td [(.)]TJ -64.956 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 531.947 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 312.397 531.748 Td [(Tspmat)]TJ +ET +q +1 0 0 1 344.406 531.947 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 347.544 531.748 Td [(type)]TJ 0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - 0 -20.135 Td [(desc)]TJ +/F75 9.9626 Tf -268.571 -19.925 Td [(desc)]TJ ET q -1 0 0 1 120.408 376.512 cm +1 0 0 1 120.408 512.022 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 376.313 Td [(a)]TJ +/F75 9.9626 Tf 123.397 511.823 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F84 9.9626 Tf 9.963 0 Td [(Communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 309.258 328.692 cm +1 0 0 1 309.258 464.201 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 312.397 328.492 Td [(desc)]TJ +/F145 9.9626 Tf 312.397 464.002 Td [(desc)]TJ ET q -1 0 0 1 333.945 328.692 cm +1 0 0 1 333.945 464.201 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 337.084 328.492 Td [(type)]TJ +/F145 9.9626 Tf 337.084 464.002 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -20.135 Td [(info)]TJ +/F75 9.9626 Tf -258.11 -19.925 Td [(prec)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -22.128 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -20.082 Td [(1.)]TJ +/F84 9.9626 Tf 24.349 0 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.183 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 196.511 0 Td [(psb)]TJ +ET +q +1 0 0 1 337.631 408.41 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 340.77 408.211 Td [(Tprec)]TJ +ET +q +1 0 0 1 367.549 408.41 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 370.687 408.211 Td [(type)]TJ 0 g 0 G - [-500(One)-309(of)-310(the)-309(optional)-310(ar)18(guments)]TJ/F59 9.9626 Tf 152.661 0 Td [(parts)]TJ/F54 9.9626 Tf 26.152 0 Td [(,)]TJ/F59 9.9626 Tf 5.723 0 Td [(vg)]TJ/F54 9.9626 Tf 10.46 0 Td [(,)]TJ/F59 9.9626 Tf 5.723 0 Td [(vl)]TJ/F54 9.9626 Tf 10.46 0 Td [(,)]TJ/F59 9.9626 Tf 5.723 0 Td [(nl)]TJ/F54 9.9626 Tf 13.544 0 Td [(or)]TJ/F59 9.9626 Tf 12.458 0 Td [(repl)]TJ/F54 9.9626 Tf 24.005 0 Td [(must)-310(be)-309(speci-)]TJ -254.456 -11.956 Td [(\002ed,)-250(ther)18(eby)-250(choosing)-250(the)-250(initialization)-250(strategy)-250(as)-250(follows:)]TJ +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -20.135 Td [(parts)]TJ +/F75 9.9626 Tf -291.714 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(In)-242(this)-242(case)-242(we)-243(have)-242(a)-242(subr)18(outine)-242(specifying)-242(the)-242(mapping)-242(between)]TJ -5.748 -11.955 Td [(global)-312(indices)-312(and)-311(pr)18(ocess/local)-312(index)-312(pairs.)-496(If)-311(this)-312(optional)-312(ar)18(gu-)]TJ 0 -11.955 Td [(ment)-230(is)-230(speci\002ed,)-234(then)-230(it)-230(is)-230(mandatory)-230(to)-230(specify)-230(the)-230(ar)18(gument)]TJ/F59 9.9626 Tf 274.929 0 Td [(mg)]TJ/F54 9.9626 Tf 12.752 0 Td [(as)]TJ -287.681 -11.955 Td [(well.)-310(The)-250(subr)18(outine)-250(must)-250(conform)-250(to)-250(the)-250(following)-250(interface:)]TJ 0 g 0 G + 0 -19.925 Td [(Function)-250(value)]TJ 0 g 0 G -/F59 9.9626 Tf 10.46 -18.09 Td [(interface)]TJ 15.691 -11.955 Td [(subroutine)-525(psb_parts\050glob_index,mg,np,pv,nv\051)]TJ +/F84 9.9626 Tf 1.02 0 0 1 172.363 368.361 Tm [(The)-266(memory)-267(occupati)1(on)-267(of)-266(the)-266(object)-267(speci\002ed)-266(in)-266(the)-266(calling)]TJ 1 0 0 1 124.802 356.405 Tm [(sequence,)-250(in)-250(bytes.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.432 -11.955 Td [(Returned)-250(as:)-310(an)]TJ/F145 9.9626 Tf 71.82 0 Td [(integer\050psb_long_int_k_\051)]TJ/F84 9.9626 Tf 128.019 0 Td [(number)74(.)]TJ 0 g 0 G -/F54 9.9626 Tf 93.899 -29.888 Td [(71)]TJ + -60.362 -242.057 Td [(109)]TJ 0 g 0 G ET endstream endobj -1358 0 obj +1742 0 obj << -/Length 9985 +/Length 6065 >> stream 0 g 0 G 0 g 0 G BT -/F59 9.9626 Tf 234.142 706.129 Td [(integer,)-525(intent)-525(\050in\051)-1050(::)-525(glob_index,np,mg)]TJ 0 -11.955 Td [(integer,)-525(intent)-525(\050out\051)-525(::)-525(nv,)-525(pv\050*\051)]TJ -10.461 -11.955 Td [(end)-525(subroutine)-525(psb_parts)]TJ -15.691 -11.956 Td [(end)-525(interface)]TJ/F54 9.9626 Tf -10.461 -17.586 Td [(The)-250(input)-250(ar)18(guments)-250(ar)18(e:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -15.594 Td [(glob)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(6.27)-1000(Sorting)-250(utilities)-250(\227)]TJ -0.371 -20.164 Td [(psb)]TJ ET q -1 0 0 1 218.61 637.283 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 170.969 686.164 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 174.555 685.965 Td [(msort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Merge-sort)-250(algorithm)]TJ -24.221 -12.574 Td [(psb)]TJ +ET +q +1 0 0 1 170.969 673.59 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 174.555 673.391 Td [(qsort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Quicksort)-250(algorithm)]TJ -24.221 -12.575 Td [(psb)]TJ +ET +q +1 0 0 1 170.969 661.016 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 221.599 637.083 Td [(index)]TJ +/F75 11.9552 Tf 174.555 660.816 Td [(hsort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Heapsort)-250(algorithm)]TJ +0 g 0 G 0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(The)-250(global)-250(index)-250(to)-250(be)-250(mapped;)]TJ +/F145 9.9626 Tf -23.85 -22.402 Td [(call)-525(psb_msort\050x,ix,dir,flag\051)]TJ 0 -11.955 Td [(call)-525(psb_qsort\050x,ix,dir,flag\051)]TJ 0 -11.955 Td [(call)-525(psb_hsort\050x,ix,dir,flag\051)]TJ/F84 9.9626 Tf 1.001 0 0 1 165.649 592.721 Tm [(These)-249(serial)-248(r)18(outines)-249(sort)-248(a)-249(sequence)]TJ/F78 9.9626 Tf 1 0 0 1 328.652 592.721 Tm [(X)]TJ/F84 9.9626 Tf 1.001 0 0 1 338.668 592.721 Tm [(into)-249(ascending)-248(or)-249(descending)-248(or)18(der)74(.)]TJ 0.998 0 0 1 150.396 580.766 Tm [(The)-251(ar)18(gument)-250(meaning)-251(is)-251(identical)-251(for)-250(the)-251(thr)18(ee)-251(calls;)-250(the)-251(only)-251(dif)18(fer)18(ence)-251(is)-250(the)]TJ 1 0 0 1 150.705 568.811 Tm [(algorithm)-250(used)-250(to)-250(accomplish)-250(the)-250(task)-250(\050see)-250(Usage)-250(Notes)-250(below\051.)]TJ 0 g 0 G -/F51 9.9626 Tf -54.506 -13.774 Td [(np)]TJ +/F75 9.9626 Tf 0 -21.783 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 17.156 0 Td [(The)-250(number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(mapping;)]TJ +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -17.156 -13.774 Td [(mg)]TJ +/F75 9.9626 Tf -29.439 -22.402 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 19.377 0 Td [(The)-250(total)-250(number)-250(of)-250(global)-250(r)18(ows)-250(in)-250(the)-250(mapping;)]TJ -19.377 -15.594 Td [(The)-250(output)-250(ar)18(guments)-250(ar)18(e:)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -15.594 Td [(nv)]TJ + 0 -22.402 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 16.608 0 Td [(The)-250(number)-250(of)-250(entries)-250(in)]TJ/F59 9.9626 Tf 111.052 0 Td [(pv)]TJ/F54 9.9626 Tf 10.461 0 Td [(;)]TJ +/F84 9.9626 Tf 9.654 0 Td [(The)-250(sequence)-250(to)-250(be)-250(sorted.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(,)-250(r)18(eal)-250(or)-250(complex)-250(array)-250(of)-250(rank)-250(1.)]TJ 0 g 0 G -/F51 9.9626 Tf -138.121 -13.774 Td [(pv)]TJ +/F75 9.9626 Tf -24.906 -22.402 Td [(ix)]TJ 0 g 0 G -/F54 9.9626 Tf 16.608 0 Td [(A)-393(vector)-394(containing)-393(the)-394(indices)-393(of)-394(the)-394(pr)18(ocesses)-393(to)-394(which)-393(the)]TJ 2.022 -11.955 Td [(global)-357(index)-357(should)-356(be)-357(assigend;)-410(each)-357(entry)-357(must)-357(satisfy)-357(0)]TJ/F83 10.3811 Tf 270.063 0 Td [(\024)]TJ/F52 9.9626 Tf -269.39 -11.956 Td [(p)-25(v)]TJ/F85 10.3811 Tf 10.461 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F61 10.3811 Tf 8.665 0 Td [(<)]TJ/F52 9.9626 Tf 12.71 0 Td [(n)-80(p)]TJ/F54 9.9626 Tf 11.442 0 Td [(;)-382(if)]TJ/F52 9.9626 Tf 16.006 0 Td [(n)-25(v)]TJ/F61 10.3811 Tf 15.409 0 Td [(>)]TJ/F54 9.9626 Tf 12.586 0 Td [(1)-338(we)-338(have)-338(an)-338(index)-338(assigned)-338(to)-338(multiple)]TJ -95.244 -11.955 Td [(pr)18(ocesses,)-250(i.e.)-310(we)-250(have)-250(an)-250(overlap)-250(among)-250(the)-250(subdomains.)]TJ +/F84 9.9626 Tf 12.891 0 Td [(A)-250(vector)-250(of)-250(indices.)]TJ 11.707 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -61.878 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(\050at)-250(least\051)-250(the)-250(same)-250(size)-250(as)]TJ/F78 9.9626 Tf 254.19 0 Td [(X)]TJ/F84 9.9626 Tf 7.536 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -40.548 -15.594 Td [(vg)]TJ +/F75 9.9626 Tf -286.632 -22.402 Td [(dir)]TJ 0 g 0 G -/F54 9.9626 Tf 16.06 0 Td [(In)-330(this)-330(case)-330(the)-330(association)-330(between)-331(an)-330(index)-330(and)-330(a)-330(pr)18(ocess)-330(is)-330(spec-)]TJ 5.858 -11.955 Td [(i\002ed)-371(via)-372(an)-371(integer)-372(vector)]TJ/F59 9.9626 Tf 120.986 0 Td [(vg\0501:mg\051)]TJ/F54 9.9626 Tf 41.842 0 Td [(;)-432(each)-372(index)]TJ/F52 9.9626 Tf 58.923 0 Td [(i)]TJ/F83 10.3811 Tf 8.096 0 Td [(2)-506(f)]TJ/F54 9.9626 Tf 17.485 0 Td [(1)-179(.)-192(.)-192(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(m)-47(g)]TJ/F83 10.3811 Tf 13.449 0 Td [(g)]TJ/F54 9.9626 Tf 9.016 0 Td [(is)]TJ -289.764 -11.955 Td [(assigned)-381(to)-381(pr)18(ocess)]TJ/F52 9.9626 Tf 91.547 0 Td [(v)-47(g)]TJ/F85 10.3811 Tf 10.68 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(.)-703(The)-380(vector)]TJ/F59 9.9626 Tf 61.203 0 Td [(vg)]TJ/F54 9.9626 Tf 14.255 0 Td [(must)-381(be)-381(identical)-381(on)-380(all)]TJ -189.127 -11.955 Td [(calling)-354(pr)18(ocesses;)-406(its)-355(entri)1(es)-355(may)-354(have)-354(the)-354(ranges)]TJ/F85 10.3811 Tf 226.209 0 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(0)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.888 0 Td [(\000)]TJ/F54 9.9626 Tf 10.515 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)]TJ/F54 9.9626 Tf 7.678 0 Td [(or)]TJ/F85 10.3811 Tf -287.387 -11.955 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(n)-80(p)]TJ/F85 10.3811 Tf 11.566 0 Td [(\051)]TJ/F54 9.9626 Tf 6.984 0 Td [(accor)18(ding)-284(to)-285(the)-284(value)-285(of)]TJ/F59 9.9626 Tf 113.068 0 Td [(flag)]TJ/F54 9.9626 Tf 20.922 0 Td [(.)-413(The)-285(size)]TJ/F52 9.9626 Tf 45.955 0 Td [(m)-47(g)]TJ/F54 9.9626 Tf 16.159 0 Td [(may)-285(be)-284(spec-)]TJ -238.895 -11.955 Td [(i\002ed)-349(via)-349(the)-349(optional)-349(ar)18(gument)]TJ/F59 9.9626 Tf 144.092 0 Td [(mg)]TJ/F54 9.9626 Tf 10.46 0 Td [(;)-399(the)-349(default)-349(is)-349(to)-349(use)-349(the)-349(entir)18(e)]TJ -154.552 -11.956 Td [(vector)]TJ/F59 9.9626 Tf 29.937 0 Td [(vg)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)-250(thus)-250(having)]TJ/F59 9.9626 Tf 59.885 0 Td [(mg=size\050vg\051)]TJ/F54 9.9626 Tf 57.534 0 Td [(.)]TJ +/F84 9.9626 Tf 17.952 0 Td [(The)-250(desir)18(ed)-250(or)18(dering.)]TJ 6.646 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -61.878 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value:)]TJ 0 g 0 G -/F51 9.9626 Tf -179.735 -15.593 Td [(vl)]TJ +/F75 9.9626 Tf 0 -22.402 Td [(Integer)-250(and)-250(real)-250(data:)]TJ 0 g 0 G -/F54 9.9626 Tf 13.838 0 Td [(In)-383(this)-382(case)-383(we)-383(ar)18(e)-382(specifying)-383(the)-383(list)-382(of)-383(indices)]TJ/F59 9.9626 Tf 220.787 0 Td [(vl\0501:nl\051)]TJ/F54 9.9626 Tf 45.655 0 Td [(assigned)]TJ -258.362 -11.955 Td [(to)-401(the)-400(curr)18(ent)-401(pr)18(ocess;)-476(thus,)-438(the)-400(global)-401(pr)18(oblem)-400(size)]TJ/F52 9.9626 Tf 243.82 0 Td [(m)-47(g)]TJ/F54 9.9626 Tf 17.316 0 Td [(is)-401(given)]TJ -261.136 -11.956 Td [(by)-435(the)-435(range)-435(of)-435(the)-435(aggr)18(egate)-435(of)-435(the)-435(individual)-435(vectors)]TJ/F59 9.9626 Tf 259.368 0 Td [(vl)]TJ/F54 9.9626 Tf 14.794 0 Td [(spec-)]TJ -274.162 -11.955 Td [(i\002ed)-429(in)-429(the)-429(calling)-428(pr)18(ocesses.)-847(The)-429(size)-429(may)-429(be)-428(speci\002ed)-429(via)-429(the)]TJ 0 -11.955 Td [(optional)-438(ar)18(gument)]TJ/F59 9.9626 Tf 88.319 0 Td [(nl)]TJ/F54 9.9626 Tf 10.46 0 Td [(;)-532(the)-438(default)-438(is)-438(to)-438(use)-438(the)-438(entir)18(e)-438(vector)]TJ/F59 9.9626 Tf 185.156 0 Td [(vl)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)]TJ -294.396 -11.955 Td [(thus)-364(having)]TJ/F59 9.9626 Tf 57.178 0 Td [(nl=size\050vl\051)]TJ/F54 9.9626 Tf 57.534 0 Td [(.)-652(If)]TJ/F59 9.9626 Tf 19.294 0 Td [(globalcheck=.true.)]TJ/F54 9.9626 Tf 97.774 0 Td [(the)-364(subr)18(outine)]TJ -231.78 -11.955 Td [(will)-403(check)-403(how)-404(many)-403(times)-403(each)-403(entry)-403(in)-404(the)-403(global)-403(index)-403(space)]TJ/F85 10.3811 Tf 0.125 -11.955 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(m)-47(g)]TJ/F85 10.3811 Tf 13.449 0 Td [(\051)]TJ/F54 9.9626 Tf 6.245 0 Td [(is)-210(speci\002ed)-211(in)-210(the)-210(input)-210(lists)]TJ/F59 9.9626 Tf 122.836 0 Td [(vl)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)-218(thus)-211(allowin)1(g)-211(for)-210(the)-210(pr)18(es-)]TJ -177.232 -11.956 Td [(ence)-302(of)-302(overlap)-302(in)-302(the)-302(input,)-315(and)-302(checki)1(ng)-302(for)-302(\223orphan\224)-302(indices.)-466(If)]TJ/F59 9.9626 Tf 0 -11.955 Td [(globalcheck=.false.)]TJ/F54 9.9626 Tf 99.377 0 Td [(,)-437(the)-400(subr)18(outine)-400(will)-400(not)-400(check)-400(for)-400(overlap,)]TJ -99.377 -11.955 Td [(and)-255(may)-255(be)-255(signi\002cantly)-255(faster)74(,)-257(but)-255(the)-255(user)-255(is)-255(implicitly)-255(guarantee-)]TJ 0 -11.955 Td [(ing)-250(that)-250(ther)18(e)-250(ar)18(e)-250(neither)-250(orphan)-250(nor)-250(overlap)-250(indices.)]TJ +/F145 9.9626 Tf 101.28 0 Td [(psb_sort_up_)]TJ/F84 9.9626 Tf 1.02 0 0 1 339.655 363.286 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 345.778 363.286 Tm [(psb_sort_down_)]TJ/F84 9.9626 Tf 1.02 0 0 1 419.003 363.286 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 425.126 363.286 Tm [(psb_asort_up_)]TJ/F84 9.9626 Tf 1.02 0 0 1 493.121 363.286 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 197.529 351.331 Tm [(psb_asort_down_)]TJ/F84 9.9626 Tf 78.455 0 Td [(;)-250(default)]TJ/F145 9.9626 Tf 38.784 0 Td [(psb_sort_up_)]TJ/F84 9.9626 Tf 62.765 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -21.918 -15.594 Td [(lidx)]TJ +/F75 9.9626 Tf -201.922 -17.178 Td [(Complex)-250(data:)]TJ 0 g 0 G -/F54 9.9626 Tf 22.685 0 Td [(The)-377(optional)-376(ar)18(gument)]TJ/F59 9.9626 Tf 107.528 0 Td [(lidx)]TJ/F54 9.9626 Tf 24.674 0 Td [(is)-377(available)-376(for)-377(those)-377(cases)-377(in)-376(which)]TJ -132.969 -11.955 Td [(the)-361(user)-361(has)-361(alr)18(eady)-361(established)-361(a)-361(global-to-local)-361(mapping;)-416(if)-361(it)-361(is)]TJ 0 -11.955 Td [(speci\002ed,)-253(each)-253(index)-253(in)]TJ/F59 9.9626 Tf 105.128 0 Td [(vl\050i\051)]TJ/F54 9.9626 Tf 28.669 0 Td [(will)-253(be)-252(mapped)-253(to)-253(the)-252(corr)18(esponding)]TJ -133.797 -11.955 Td [(local)-317(index)]TJ/F59 9.9626 Tf 51.649 0 Td [(lidx\050i\051)]TJ/F54 9.9626 Tf 36.612 0 Td [(.)-510(When)-317(specifying)-316(the)-317(ar)18(gument)]TJ/F59 9.9626 Tf 148.638 0 Td [(lidx)]TJ/F54 9.9626 Tf 24.076 0 Td [(the)-317(user)]TJ -260.975 -11.956 Td [(would)-329(also)-330(likely)-329(employ)]TJ/F59 9.9626 Tf 117.394 0 Td [(lidx)]TJ/F54 9.9626 Tf 24.203 0 Td [(in)-329(calls)-330(to)]TJ/F59 9.9626 Tf 46.656 0 Td [(psb_cdins)]TJ/F54 9.9626 Tf 50.355 0 Td [(and)]TJ/F59 9.9626 Tf 20.148 0 Td [(local)]TJ/F54 9.9626 Tf 29.433 0 Td [(in)]TJ -288.189 -11.955 Td [(calls)-250(to)]TJ/F59 9.9626 Tf 33.095 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 49.564 0 Td [(and)]TJ/F59 9.9626 Tf 19.358 0 Td [(psb_geins)]TJ/F54 9.9626 Tf 47.073 0 Td [(;)-250(see)-250(also)-250(sec.)]TJ -0 0 1 rg 0 0 1 RG - [-250(2.3.1)]TJ +/F145 9.9626 Tf 70.287 0 Td [(psb_lsort_up_)]TJ/F84 9.9626 Tf 0.98 0 0 1 313.892 334.153 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 318.345 334.153 Tm [(psb_lsort_down_)]TJ/F84 9.9626 Tf 0.98 0 0 1 396.8 334.153 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 401.254 334.153 Tm [(psb_asort_up_)]TJ/F84 9.9626 Tf 0.98 0 0 1 469.248 334.153 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 473.701 334.153 Tm [(psb_asort_down_)]TJ/F84 9.9626 Tf 0.98 0 0 1 552.157 334.153 Tm [(;)]TJ 1 0 0 1 197.529 322.197 Tm [(default)]TJ/F145 9.9626 Tf 33.803 0 Td [(psb_lsort_up_)]TJ/F84 9.9626 Tf 67.994 0 Td [(.)]TJ 0 g 0 G - [(.)]TJ +/F75 9.9626 Tf -148.621 -22.402 Td [(\003ag)]TJ 0 g 0 G -/F51 9.9626 Tf -171.008 -15.593 Td [(nl)]TJ +/F84 9.9626 Tf 21.091 0 Td [(Whether)-250(to)-250(keep)-250(the)-250(original)-250(values)-250(in)]TJ/F78 9.9626 Tf 171.519 0 Td [(I)-81(X)]TJ/F84 9.9626 Tf 11.661 0 Td [(.)]TJ -179.673 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ 0.98 0 0 1 175.611 275.885 Tm [(Speci\002ed)-194(as:)-286(an)-193(integer)-194(value)]TJ/F145 9.9626 Tf 1 0 0 1 301.54 275.885 Tm [(psb_sort_ovw_idx_)]TJ/F84 9.9626 Tf 0.98 0 0 1 392.349 275.885 Tm [(or)]TJ/F145 9.9626 Tf 1 0 0 1 403.429 275.885 Tm [(psb_sort_keep_idx_)]TJ/F84 9.9626 Tf 0.98 0 0 1 497.575 275.885 Tm [(;)]TJ 1 0 0 1 175.611 263.93 Tm [(default)]TJ/F145 9.9626 Tf 33.803 0 Td [(psb_sort_ovw_idx_)]TJ/F84 9.9626 Tf 88.916 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(If)-411(this)-411(ar)18(gument)-411(is)-411(speci\002ed)-411(alone)-411(\050i.e.)-793(without)]TJ/F59 9.9626 Tf 223.432 0 Td [(vl)]TJ/F54 9.9626 Tf 10.461 0 Td [(\051)-411(the)-411(r)18(esult)-411(is)-411(a)]TJ -226.361 -11.956 Td [(generalized)-280(r)18(ow-block)-280(distribution)-280(in)-280(which)-281(each)-280(pr)18(ocess)]TJ/F52 9.9626 Tf 257.148 0 Td [(I)]TJ/F54 9.9626 Tf 6.642 0 Td [(gets)-280(as-)]TJ -263.79 -11.955 Td [(signed)-250(a)-250(consecutive)-250(chunk)-250(of)]TJ/F52 9.9626 Tf 135.186 0 Td [(N)]TJ/F52 7.5716 Tf 7.851 -1.808 Td [(I)]TJ/F85 10.3811 Tf 6.316 1.808 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-25(l)]TJ/F54 9.9626 Tf 11.472 0 Td [(global)-250(indices.)]TJ +/F75 9.9626 Tf -147.625 -24.395 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -193.829 -15.593 Td [(repl)]TJ 0 g 0 G -/F54 9.9626 Tf 23.243 0 Td [(This)-418(ar)18(guments)-417(speci\002es)-418(to)-417(r)18(eplicate)-418(all)-417(indices)-418(on)-418(all)-417(pr)18(ocesses.)]TJ -1.325 -11.956 Td [(This)-366(is)-367(a)-366(special)-366(purpose)-366(data)-367(allocation)-366(that)-366(is)-366(useful)-367(in)-366(the)-366(con-)]TJ 0 -11.955 Td [(str)8(uction)-250(of)-250(some)-250(multilevel)-250(pr)18(econditioners.)]TJ + 0 -22.402 Td [(x)]TJ 0 g 0 G - -34.371 -19.579 Td [(2.)]TJ +/F84 9.9626 Tf 9.654 0 Td [(The)-250(sequence)-250(of)-250(values,)-250(in)-250(the)-250(chosen)-250(or)18(dering.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(,)-250(r)18(eal)-250(or)-250(complex)-250(array)-250(of)-250(rank)-250(1.)]TJ 0 g 0 G - [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(build)-250(state.)]TJ +/F75 9.9626 Tf -24.906 -22.402 Td [(ix)]TJ 0 g 0 G - 154.421 -29.888 Td [(72)]TJ +/F84 9.9626 Tf 12.891 0 Td [(A)-250(vector)-250(of)-250(indices.)]TJ 11.707 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F84 9.9626 Tf 0.996 0 0 1 175.223 146.91 Tm [(An)-251(integer)-250(array)-251(of)-251(rank)-250(1,)-251(whose)-251(entries)-250(ar)18(e)-251(moved)-251(to)-250(the)-251(same)-251(position)]TJ 1 0 0 1 175.611 134.955 Tm [(as)-250(the)-250(corr)18(esponding)-250(entries)-250(in)]TJ/F78 9.9626 Tf 138.215 0 Td [(x)]TJ/F84 9.9626 Tf 5.206 0 Td [(.)]TJ +0 g 0 G + -3.944 -44.517 Td [(110)]TJ 0 g 0 G ET endstream endobj -1364 0 obj +1746 0 obj << -/Length 2555 +/Length 8202 >> stream 0 g 0 G 0 g 0 G +BT +/F75 11.9552 Tf 99.895 706.129 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + 1.02 0 0 1 124.802 686.204 Tm [(For)-318(integer)-317(or)-318(r)18(eal)-318(data)-318(the)-318(sorting)-317(can)-318(be)-318(performed)-317(in)-318(the)-318(up/down)]TJ 1 0 0 1 124.802 674.248 Tm [(dir)18(ection,)-250(on)-250(the)-250(natural)-250(or)-250(absolute)-250(values;)]TJ +0 g 0 G + -12.453 -19.925 Td [(2.)]TJ +0 g 0 G + 1.02 0 0 1 124.802 654.323 Tm [(For)-287(complex)-288(data)-287(the)-287(sorting)-288(can)-287(be)-287(done)-288(in)-287(a)-287(lexicographic)-288(or)18(der)-287(\050i.e.:)]TJ 1.007 0 0 1 124.802 642.368 Tm [(sort)-247(on)-248(the)-247(r)18(eal)-248(part)-247(with)-248(tie)1(s)-248(br)18(oken)-247(accor)17(ding)-247(to)-247(the)-248(imaginary)-247(part\051)-248(or)]TJ 1 0 0 1 124.802 630.413 Tm [(on)-250(the)-250(absolute)-250(values;)]TJ +0 g 0 G + -12.453 -19.925 Td [(3.)]TJ 0 g 0 G + 0.98 0 0 1 124.493 610.488 Tm [(The)-240(r)18(outines)-240(r)18(et)1(urn)-241(the)-240(items)-240(in)-240(the)-240(chosen)-240(or)18(dering;)-245(the)-240(output)-240(dif)18(fer)18(ence)]TJ 0.98 0 0 1 124.802 598.532 Tm [(is)-256(the)-256(handling)-256(of)-256(ties)-256(\050i.e.)-318(items)-256(with)-256(an)-256(equal)-256(value\051)-256(in)-256(the)-256(original)-256(input.)]TJ 0.98 0 0 1 124.304 586.577 Tm [(W)56(ith)-225(the)-226(mer)18(ge-so)1(rt)-226(algorithm)-226(t)1(ies)-226(ar)18(e)-225(pr)18(eserved)-225(in)-226(the)-225(same)-226(r)19(elative)-226(or)18(der)]TJ 1.02 0 0 1 124.802 574.622 Tm [(as)-352(they)-351(had)-352(in)-352(the)-351(original)-352(sequence,)-378(while)-352(this)-351(is)-352(not)-352(guaranteed)-351(for)]TJ 1 0 0 1 124.523 562.667 Tm [(quicksort)-250(or)-250(heapsort;)]TJ +0 g 0 G + -12.174 -19.925 Td [(4.)]TJ +0 g 0 G + 0.98 0 0 1 124.802 542.742 Tm [(If)]TJ/F78 9.9626 Tf 1 0 0 1 135.146 542.742 Tm [(f)-160(l)-70(a)-47(g)]TJ/F192 10.3811 Tf 20.72 0 Td [(=)]TJ/F78 9.9626 Tf 11.634 0 Td [(p)-25(s)-25(b)]TJ +ET +q +1 0 0 1 182.189 542.941 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F78 9.9626 Tf 185.303 542.742 Td [(s)-25(o)-25(r)-35(t)]TJ +ET +q +1 0 0 1 202.364 542.941 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F78 9.9626 Tf 205.477 542.742 Td [(o)-35(v)-25(w)]TJ +ET +q +1 0 0 1 223.394 542.941 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F54 9.9626 Tf 112.349 706.129 Td [(3.)]TJ +/F78 9.9626 Tf 226.438 542.742 Td [(i)-32(d)-42(x)]TJ +ET +q +1 0 0 1 240.729 542.941 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 0.98 0 0 1 246.101 542.742 Tm [(then)-244(the)-244(entries)-244(in)]TJ/F78 9.9626 Tf 1 0 0 1 326.032 542.742 Tm [(i)-49(x)]TJ/F192 10.3811 Tf 8.587 0 Td [(\050)]TJ/F84 9.9626 Tf 0.98 0 0 1 338.769 542.742 Tm [(1)]TJ 1 0 0 1 346.542 542.742 Tm [(:)]TJ/F78 9.9626 Tf 5.507 0 Td [(n)]TJ/F192 10.3811 Tf 5.788 0 Td [(\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 364.37 542.742 Tm [(wher)18(e)]TJ/F78 9.9626 Tf 1 0 0 1 393.733 542.742 Tm [(n)]TJ/F84 9.9626 Tf 0.98 0 0 1 401.779 542.742 Tm [(is)-244(the)-244(size)]TJ 0.984 0 0 1 124.802 530.786 Tm [(of)]TJ/F78 9.9626 Tf 1 0 0 1 136.206 530.786 Tm [(x)]TJ/F84 9.9626 Tf 0.984 0 0 1 143.904 530.786 Tm [(ar)18(e)-254(initialized)-254(to)]TJ/F78 9.9626 Tf 1 0 0 1 216.929 530.786 Tm [(i)-49(x)]TJ/F192 10.3811 Tf 8.588 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F190 10.3811 Tf 7.046 0 Td [(\040)]TJ/F78 9.9626 Tf 13.332 0 Td [(i)]TJ/F84 9.9626 Tf 0.984 0 0 1 256.15 530.786 Tm [(;)-254(thus,)-255(upon)-254(r)18(eturn)-254(fr)18(om)-254(the)-254(subr)18(outine,)-254(for)]TJ 1.02 0 0 1 124.802 518.831 Tm [(each)-260(index)]TJ/F78 9.9626 Tf 1 0 0 1 175.703 518.831 Tm [(i)]TJ/F84 9.9626 Tf 1.02 0 0 1 181.308 518.831 Tm [(we)-260(have)-260(in)]TJ/F78 9.9626 Tf 1 0 0 1 233.102 518.831 Tm [(i)-49(x)]TJ/F192 10.3811 Tf 8.588 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 255.772 518.831 Tm [(the)-260(position)-260(that)-260(the)-259(item)]TJ/F78 9.9626 Tf 1 0 0 1 371.842 518.831 Tm [(x)]TJ/F192 10.3811 Tf 5.33 0 Td [(\050)]TJ/F78 9.9626 Tf 4.204 0 Td [(i)]TJ/F192 10.3811 Tf 3.088 0 Td [(\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 391.255 518.831 Tm [(occupied)-260(in)]TJ 1 0 0 1 124.802 506.876 Tm [(the)-250(original)-250(data)-250(sequence;)]TJ 0 g 0 G - [-500(Calling)-260(the)-260(r)18(outine)-260(with)]TJ/F59 9.9626 Tf 120.886 0 Td [(vg)]TJ/F54 9.9626 Tf 13.052 0 Td [(or)]TJ/F59 9.9626 Tf 11.965 0 Td [(parts)]TJ/F54 9.9626 Tf 28.742 0 Td [(implies)-260(that)-260(every)-260(pr)18(ocess)-260(will)-260(scan)]TJ -162.192 -11.955 Td [(the)-250(entir)18(e)-250(index)-250(space)-250(to)-250(\002gur)18(e)-250(out)-250(the)-250(local)-250(indices.)]TJ + -12.453 -19.925 Td [(5.)]TJ 0 g 0 G - -12.453 -19.926 Td [(4.)]TJ + 1.02 0 0 1 124.802 486.951 Tm [(If)]TJ/F78 9.9626 Tf 1 0 0 1 136.291 486.951 Tm [(f)-160(l)-70(a)-47(g)]TJ/F192 10.3811 Tf 22.147 0 Td [(=)]TJ/F78 9.9626 Tf 13.061 0 Td [(p)-25(s)-25(b)]TJ +ET +q +1 0 0 1 186.188 487.15 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F78 9.9626 Tf 189.302 486.951 Td [(s)-25(o)-25(r)-35(t)]TJ +ET +q +1 0 0 1 206.362 487.15 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F78 9.9626 Tf 209.476 486.951 Td [(k)-30(e)-25(e)-80(p)]TJ +ET +q +1 0 0 1 228.698 487.15 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F78 9.9626 Tf 231.742 486.951 Td [(i)-32(d)-42(x)]TJ +ET +q +1 0 0 1 246.033 487.15 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 1.02 0 0 1 252.283 486.951 Tm [(the)-321(r)18(outine)-321(will)-321(assume)-321(that)-321(the)-321(entries)-321(in)]TJ/F78 9.9626 Tf 1 0 0 1 124.857 474.996 Tm [(i)-49(x)]TJ/F192 10.3811 Tf 8.587 0 Td [(\050)]TJ/F84 9.9626 Tf 4.274 0 Td [(:)]TJ/F192 10.3811 Tf 2.74 0 Td [(\051)]TJ/F84 9.9626 Tf 6.64 0 Td [(have)-250(alr)18(eady)-250(been)-250(initialized)-250(by)-250(the)-250(user;)]TJ 0 g 0 G - [-500(Overlapped)-250(indices)-250(ar)18(e)-250(possible)-250(with)-250(both)]TJ/F59 9.9626 Tf 201.093 0 Td [(parts)]TJ/F54 9.9626 Tf 28.643 0 Td [(and)]TJ/F59 9.9626 Tf 19.357 0 Td [(vl)]TJ/F54 9.9626 Tf 12.951 0 Td [(invocations.)]TJ + -34.749 -19.926 Td [(6.)]TJ 0 g 0 G - -262.044 -19.925 Td [(5.)]TJ + 1.008 0 0 1 124.493 455.07 Tm [(The)-249(thr)18(ee)-249(sorting)-248(algorithms)-249(have)-249(a)-248(similar)]TJ/F78 9.9626 Tf 1 0 0 1 317.915 455.07 Tm [(O)]TJ/F192 10.3811 Tf 8 0 Td [(\050)]TJ/F78 9.9626 Tf 4.273 0 Td [(n)]TJ/F84 9.9626 Tf 7.325 0 Td [(log)]TJ/F78 9.9626 Tf 15.662 0 Td [(n)]TJ/F192 10.3811 Tf 5.789 0 Td [(\051)]TJ/F84 9.9626 Tf 1.008 0 0 1 365.611 455.07 Tm [(expected)-249(r)8(unning)]TJ 1.008 0 0 1 124.802 443.115 Tm [(time;)-248(in)-248(the)-248(average)-248(case)-248(quicksort)-248(will)-247(be)-248(the)-248(fastest)-248(and)-248(mer)18(ge-sort)-248(the)]TJ 1 0 0 1 124.802 431.16 Tm [(slowest.)-310(However)-250(note)-250(that:)]TJ 0 g 0 G - [-500(When)-190(the)-190(subr)18(outine)-190(is)-190(invoked)-190(with)]TJ/F59 9.9626 Tf 175.161 0 Td [(vl)]TJ/F54 9.9626 Tf 12.354 0 Td [(in)-190(conjunction)-190(with)]TJ/F59 9.9626 Tf 86.235 0 Td [(globalcheck=.true.)]TJ/F54 9.9626 Tf 94.146 0 Td [(,)]TJ -355.443 -11.955 Td [(it)-280(will)-281(perform)-280(a)-280(scan)-281(of)-280(the)-281(index)-280(space)-280(to)-281(sear)18(ch)-280(for)-280(overlap)-281(or)-280(orphan)]TJ 0 -11.955 Td [(indices.)]TJ + 5.32 -19.925 Td [(\050a\051)]TJ 0 g 0 G - -12.453 -19.925 Td [(6.)]TJ + 0.98 0 0 1 146.411 411.235 Tm [(The)-241(worst)-240(case)-241(r)8(unning)-241(time)-240(for)-241(quicksort)-241(is)]TJ/F78 9.9626 Tf 1 0 0 1 338.306 411.235 Tm [(O)]TJ/F192 10.3811 Tf 8 0 Td [(\050)]TJ/F78 9.9626 Tf 4.274 0 Td [(n)]TJ/F84 7.5716 Tf 5.664 3.616 Td [(2)]TJ/F192 10.3811 Tf 4.408 -3.616 Td [(\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 364.802 411.235 Tm [(;)-245(the)-241(algorithm)-241(im-)]TJ 1.007 0 0 1 146.421 399.28 Tm [(plemented)-246(her)18(e)-246(follows)-246(the)-246(well-known)-246(median-of-thr)18(ee)-246(heuristics,)]TJ 1 0 0 1 146.72 387.324 Tm [(but)-250(the)-250(worst)-250(case)-250(may)-250(still)-250(apply;)]TJ 0 g 0 G - [-500(When)-190(the)-190(subr)18(outine)-190(is)-190(invoked)-190(with)]TJ/F59 9.9626 Tf 175.161 0 Td [(vl)]TJ/F54 9.9626 Tf 12.354 0 Td [(in)-190(conjunction)-190(with)]TJ/F59 9.9626 Tf 86.235 0 Td [(globalcheck=.false.)]TJ/F54 9.9626 Tf 99.377 0 Td [(,)]TJ -360.674 -11.956 Td [(no)-338(index)-337(space)-338(scan)-337(will)-338(take)-337(place.)-573(Thus)-337(it)-338(is)-338(the)-337(r)18(esponsibility)-338(of)-337(the)]TJ 0 -11.955 Td [(user)-328(to)-328(make)-328(sur)18(e)-328(that)-328(the)-328(indices)-328(speci\002ed)-328(in)]TJ/F59 9.9626 Tf 209.973 0 Td [(vl)]TJ/F54 9.9626 Tf 13.729 0 Td [(have)-328(neither)-328(orphans)]TJ -223.702 -11.955 Td [(nor)-250(overlaps;)-250(if)-250(this)-250(assumption)-250(fails,)-250(r)18(esults)-250(will)-250(be)-250(unpr)18(edictable.)]TJ + -17.126 -15.94 Td [(\050b\051)]TJ 0 g 0 G - -12.453 -19.925 Td [(7.)]TJ + 0.98 0 0 1 146.411 371.384 Tm [(The)-194(worst)-194(case)-194(r)9(unning)-194(time)-194(for)-194(mer)18(ge-sort)-193(and)-194(heap-sort)-194(is)]TJ/F78 9.9626 Tf 1 0 0 1 402.204 371.384 Tm [(O)]TJ/F192 10.3811 Tf 8 0 Td [(\050)]TJ/F78 9.9626 Tf 4.274 0 Td [(n)]TJ/F84 9.9626 Tf 7.324 0 Td [(log)]TJ/F78 9.9626 Tf 15.663 0 Td [(n)]TJ/F192 10.3811 Tf 5.788 0 Td [(\051)]TJ/F84 9.9626 Tf -296.533 -11.955 Td [(as)-250(the)-250(average)-250(case;)]TJ +0 g 0 G + -16.04 -15.94 Td [(\050c\051)]TJ 0 g 0 G - [-500(Orphan)-417(and)-416(overlap)-417(indices)-416(ar)18(e)-417(impossible)-416(by)-417(constr)8(uction)-417(when)-416(the)]TJ 12.453 -11.955 Td [(subr)18(outine)-250(is)-250(invoked)-250(with)]TJ/F59 9.9626 Tf 121.164 0 Td [(nl)]TJ/F54 9.9626 Tf 12.951 0 Td [(\050alone\051,)-250(or)]TJ/F59 9.9626 Tf 47.372 0 Td [(vg)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)]TJ + 1.005 0 0 1 146.411 343.489 Tm [(The)-249(mer)18(ge-sort)-249(algorithm)-249(is)-249(implemented)-249(to)-250(take)-249(advantage)-249(of)-249(sub-)]TJ 1.02 0 0 1 146.72 331.534 Tm [(sequences)-260(that)-261(may)-261(be)-260(alr)18(eady)-261(in)-260(the)-261(desir)18(ed)-261(or)18(dering)-261(prior)-260(to)-261(the)]TJ 1.02 0 0 1 146.72 319.578 Tm [(subr)18(outine)-321(call;)-358(this)-321(situation)-320(is)-321(r)18(elatively)-321(common)-320(when)-321(dealing)]TJ 0.98 0 0 1 146.301 307.623 Tm [(with)-239(gr)18(oups)-240(of)-239(indices)-239(of)-240(sparse)-239(matrix)-240(entries,)-242(thus)-240(mer)19(ge-sort)-240(is)-239(the)]TJ 1.02 0 0 1 146.421 295.668 Tm [(pr)18(eferr)17(ed)-268(choice)-268(when)-268(a)-268(sorting)-268(is)-268(needed)-268(by)-269(other)-268(r)18(outines)-268(in)-268(the)]TJ 1 0 0 1 146.72 283.713 Tm [(library)111(.)]TJ 0 g 0 G - -49.98 -452.304 Td [(73)]TJ + 117.559 -193.275 Td [(111)]TJ 0 g 0 G ET endstream endobj -1375 0 obj +1759 0 obj +<< +/Length 172 +>> +stream +0 g 0 G +0 g 0 G +BT +/F75 14.3462 Tf 150.705 705.784 Td [(7)-1000(Parallel)-250(environment)-250(routines)]TJ +0 g 0 G +/F84 9.9626 Tf 164.383 -615.346 Td [(112)]TJ +0 g 0 G +ET + +endstream +endobj +1763 0 obj << -/Length 7006 +/Length 6837 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.2)-1000(psb)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(7.1)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(cdins)-250(\227)-250(Communication)-250(descriptor)-250(insert)-250(routine)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdins\050nz,)-525(ia,)-525(ja,)-525(desc_a,)-525(info)-525([,ila,jla]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdins\050nz,ja,desc,info[,jla,mask,lidx]\051)]TJ/F54 9.9626 Tf 14.944 -20.366 Td [(This)-336(subr)18(outine)-335(examines)-336(the)-336(edges)-335(of)-336(the)-336(graph)-335(associated)-336(with)-335(the)-336(dis-)]TJ -14.944 -11.955 Td [(cr)18(etization)-260(mesh)-261(\050and)-260(isomorphic)-260(to)-261(the)-260(sparsity)-260(pattern)-261(of)-260(a)-260(linear)-261(system)-260(co-)]TJ 0 -11.955 Td [(ef)18(\002cient)-238(matrix\051,)-241(storing)-238(them)-239(as)-238(necessary)-238(into)-239(the)-238(communication)-238(descriptor)74(.)]TJ 0 -11.955 Td [(In)-259(the)-260(\002rst)-259(form)-260(the)-259(edges)-259(ar)18(e)-260(speci\002ed)-259(as)-260(pairs)-259(of)-260(indices)]TJ/F52 9.9626 Tf 255.974 0 Td [(i)-47(a)]TJ/F85 10.3811 Tf 7.91 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(,)]TJ/F52 9.9626 Tf 4.624 0 Td [(j)-40(a)]TJ/F85 10.3811 Tf 7.841 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(;)-264(the)-260(start)1(-)]TJ -299.233 -11.956 Td [(ing)-299(index)]TJ/F52 9.9626 Tf 44.948 0 Td [(i)-47(a)]TJ/F85 10.3811 Tf 7.91 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 7.13 0 Td [(should)-299(belong)-299(to)-299(the)-299(curr)18(ent)-299(pr)18(ocess.)-458(In)-299(the)-299(second)-299(form)-299(only)]TJ -67.28 -11.955 Td [(the)-250(r)18(emote)-250(indices)]TJ/F52 9.9626 Tf 83.65 0 Td [(j)-40(a)]TJ/F85 10.3811 Tf 7.841 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 6.64 0 Td [(ar)18(e)-250(speci\002ed.)]TJ -0 g 0 G -/F51 9.9626 Tf -105.423 -20.366 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.304 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.305 Td [(nz)]TJ -0 g 0 G -/F54 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(points)-250(being)-250(inserted.)]TJ 8.857 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.305 Td [(ia)]TJ +/F75 11.9552 Tf 151.016 706.129 Td [(init)-250(\227)-250(Initializes)-250(PSBLAS)-250(parallel)-250(environment)]TJ 0 g 0 G -/F54 9.9626 Tf 13.28 0 Td [(the)-250(indices)-250(of)-250(the)-250(starting)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -207.221 -19.304 Td [(ja)]TJ +/F145 9.9626 Tf -51.121 -18.964 Td [(call)-525(psb_init\050ctxt,)-525(np,)-525(basectxt,)-525(ids,)-525(extcomm\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 114.839 669.261 Tm [(This)-354(subr)18(outine)-353(initializes)-354(the)-354(PSBLAS)-353(parallel)-354(envir)18(onment,)-381(de\002ning)-353(a)]TJ 1 0 0 1 99.616 657.306 Tm [(virtual)-250(parallel)-250(machine.)]TJ 0 g 0 G -/F54 9.9626 Tf 13.28 0 Td [(the)-250(indices)-250(of)-250(the)-250(end)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ +/F75 9.9626 Tf 0.279 -16.715 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -207.221 -19.304 Td [(mask)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.33 0 Td [(Mask)-247(entries)-248(in)]TJ/F59 9.9626 Tf 69.91 0 Td [(ja)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)-248(they)-247(ar)18(e)-248(inserted)-247(only)-248(when)-247(the)-247(corr)18(esponding)]TJ/F59 9.9626 Tf 213.089 0 Td [(mask)]TJ/F54 9.9626 Tf -297.883 -11.956 Td [(entries)-250(ar)18(e)]TJ/F59 9.9626 Tf 48.139 0 Td [(.true.)]TJ/F54 9.9626 Tf -48.139 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 164.297 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(,)-250(default)]TJ/F59 9.9626 Tf 38.784 0 Td [(.true.)]TJ/F54 9.9626 Tf 31.382 0 Td [(.)]TJ +/F75 9.9626 Tf -29.828 -18.32 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -269.706 -19.305 Td [(lidx)]TJ 0 g 0 G -/F54 9.9626 Tf 22.685 0 Td [(User)-250(de\002ned)-250(local)-250(indices)-250(for)]TJ/F59 9.9626 Tf 131.117 0 Td [(ja)]TJ/F54 9.9626 Tf 10.46 0 Td [(.)]TJ -139.355 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ + 0 -18.319 Td [(np)]TJ 0 g 0 G -/F51 9.9626 Tf -207.221 -20.366 Td [(On)-250(Return)]TJ +/F84 9.9626 Tf 17.156 0 Td [(Number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 7.751 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-560(Default:)-310(use)-250(all)-250(available)-250(pr)18(ocesses.)]TJ 0 g 0 G +/F75 9.9626 Tf -24.907 -18.32 Td [(basectxt)]TJ 0 g 0 G - 0 -19.305 Td [(desc)]TJ +/F84 9.9626 Tf 1.018 0 0 1 141.39 537.811 Tm [(the)-246(i)1(nitial)-246(PSBLAS)-246(communication)-245(context.)-305(The)-245(new)-246(context)-245(will)-246(be)]TJ 1 0 0 1 124.802 525.856 Tm [(de\002ned)-250(fr)18(om)-250(the)-250(pr)18(ocesses)-250(participating)-250(in)-250(the)-250(initial)-250(one.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-560(Default:)-310(use)-250(MPI)]TJ +ET +q +1 0 0 1 336.765 478.235 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 339.753 478.035 Td [(COMM)]TJ ET q -1 0 0 1 171.218 168.346 cm +1 0 0 1 374.095 478.235 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 168.146 Td [(a)]TJ +/F84 9.9626 Tf 377.083 478.035 Td [(WORLD.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(updated)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F75 9.9626 Tf -277.188 -18.32 Td [(ids)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 118.705 459.715 Tm [(Identities)-374(of)-374(the)-374(pr)18(ocesses)-374(to)-374(use)-374(for)-374(t)1(he)-374(new)-374(context;)-439(the)-373(ar)17(gument)-374(is)]TJ 1.02 0 0 1 124.802 447.76 Tm [(ignor)18(ed)-287(when)]TJ/F145 9.9626 Tf 1 0 0 1 190.771 447.76 Tm [(np)]TJ/F84 9.9626 Tf 1.02 0 0 1 204.14 447.76 Tm [(is)-286(not)-286(speci\002ed.)-428(This)-286(allows)-286(the)-286(pr)17(ocesses)-286(in)-286(the)-286(new)]TJ 1 0 0 1 124.802 435.805 Tm [(envir)18(onment)-250(to)-250(be)-250(in)-250(an)-250(or)18(der)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(original)-250(one.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)-560(Default:)-310(use)-250(the)-250(indices)]TJ/F192 10.3811 Tf 240.57 0 Td [(\050)]TJ/F84 9.9626 Tf 4.149 0 Td [(0)-179(.)-192(.)-192(.)]TJ/F78 9.9626 Tf 19.967 0 Td [(n)-80(p)]TJ/F190 10.3811 Tf 13.504 0 Td [(\000)]TJ/F84 9.9626 Tf 10.131 0 Td [(1)]TJ/F192 10.3811 Tf 5.106 0 Td [(\051)]TJ/F84 9.9626 Tf 4.149 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -322.483 -18.319 Td [(extcomm)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 145.833 369.665 Tm [(an)-301(alternative)-301(initial)-301(MPI)-301(communicator)72(.)-472(The)-301(new)-301(context)-301(will)-301(be)]TJ 1 0 0 1 124.802 357.709 Tm [(de\002ned)-250(fr)18(om)-250(the)-250(pr)18(ocesses)-250(participating)-250(in)-250(the)-250(initial)-250(one.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-560(Default:)-310(use)-250(MPI)]TJ ET q -1 0 0 1 360.068 120.525 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 336.765 310.088 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 363.206 120.326 Td [(desc)]TJ +/F84 9.9626 Tf 339.753 309.889 Td [(COMM)]TJ ET q -1 0 0 1 384.755 120.525 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 374.095 310.088 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 387.893 120.326 Td [(type)]TJ +/F84 9.9626 Tf 377.083 309.889 Td [(WORLD.)]TJ +0 g 0 G +/F75 9.9626 Tf -277.188 -17.904 Td [(On)-250(Return)]TJ +0 g 0 G 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ + 0 -18.32 Td [(ctxt)]TJ 0 g 0 G - -91.236 -29.888 Td [(74)]TJ +/F84 9.9626 Tf 1.01 0 0 1 120.916 273.665 Tm [(the)-246(communication)-245(context)-246(identifying)-246(the)-245(virtual)-246(parallel)-245(machine,)-247(t)1(ype)]TJ/F145 9.9626 Tf 1 0 0 1 124.802 261.71 Tm [(psb_ctxt_type)]TJ/F84 9.9626 Tf 1.004 0 0 1 192.796 261.71 Tm [(.)-310(Note)-249(that)-250(this)-249(is)-249(always)-250(a)-249(duplicate)-249(of)]TJ/F145 9.9626 Tf 1 0 0 1 367.227 261.71 Tm [(basectxt)]TJ/F84 9.9626 Tf 1.004 0 0 1 409.07 261.71 Tm [(,)-249(so)-250(that)]TJ 0.985 0 0 1 124.802 249.754 Tm [(library)-255(communications)-256(ar)19(e)-256(completely)-255(separated)-256(fr)19(om)-256(other)-255(communica-)]TJ 1 0 0 1 124.802 237.799 Tm [(tion)-250(operations.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ/F75 11.9552 Tf -24.907 -17.904 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.454 -16.714 Td [(1.)]TJ +0 g 0 G + [-461(A)-250(call)-250(to)-250(this)-250(r)18(outine)-250(must)-250(pr)18(ecede)-250(any)-250(other)-250(PSBLAS)-250(call.)]TJ +0 g 0 G + 0 -18.32 Td [(2.)]TJ +0 g 0 G + 0.98 0 0 1 124.802 137.04 Tm [(It)-236(is)-237(an)-236(err)18(or)-236(to)-236(specify)-236(a)-237(value)-236(for)]TJ/F78 9.9626 Tf 1 0 0 1 271.283 137.04 Tm [(n)-80(p)]TJ/F84 9.9626 Tf 0.98 0 0 1 285.033 137.04 Tm [(gr)18(eater)-236(than)-236(the)-236(number)-237(of)-236(pr)18(ocesses)]TJ 1 0 0 1 124.802 125.085 Tm [(available)-250(in)-250(the)-250(underlying)-250(base)-250(parallel)-250(envir)18(onment.)]TJ +0 g 0 G + 139.477 -34.647 Td [(113)]TJ 0 g 0 G ET endstream endobj -1380 0 obj +1769 0 obj << -/Length 3007 +/Length 4738 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(info)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(7.2)]TJ 0.996 0 0 1 177.604 706.129 Tm [(psb)]TJ +ET +q +1 0 0 1 198.159 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 0.996 0 0 1 201.745 706.129 Tm [(info)-250(\227)-249(Return)-250(information)-250(about)-249(PSBLAS)-250(parallel)-250(en-)]TJ 1 0 0 1 177.269 692.181 Tm [(vironment)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(ila)]TJ +/F145 9.9626 Tf -26.564 -18.964 Td [(call)-525(psb_info\050ctxt,)-525(iam,)-525(np\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 165.649 651.3 Tm [(This)-224(subr)19(outine)-224(r)19(eturns)-224(information)-223(about)-224(the)-223(PSBLAS)-224(parallel)-224(envi)1(r)18(onment,)]TJ 1 0 0 1 150.705 639.344 Tm [(de\002ning)-250(a)-250(virtual)-250(parallel)-250(machine.)]TJ 0 g 0 G -/F54 9.9626 Tf 16.598 0 Td [(the)-250(local)-250(indices)-250(of)-250(the)-250(starting)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 8.309 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ +/F75 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -207.221 -19.925 Td [(jla)]TJ +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 16.598 0 Td [(the)-250(local)-250(indices)-250(of)-250(the)-250(end)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 8.309 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ/F51 11.9552 Tf -207.221 -21.918 Td [(Notes)]TJ +/F75 9.9626 Tf -29.439 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G - [-500(This)-250(r)18(outine)-250(may)-250(only)-250(be)-250(called)-250(if)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(build)-250(state;)]TJ + 0 -19.925 Td [(ctxt)]TJ 0 g 0 G - 0 -19.925 Td [(2.)]TJ +/F84 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.885 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.522 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G - [-500(This)-370(r)18(outine)-370(automatically)-370(ignor)18(es)-370(edges)-370(that)-370(do)-370(not)-370(insist)-370(on)-370(the)-370(cur)18(-)]TJ 12.453 -11.955 Td [(r)18(ent)-288(pr)18(ocess,)-298(i.e.)-424(edges)-288(for)-288(which)-288(neither)-288(the)-288(starting)-288(nor)-288(the)-288(end)-288(vertex)]TJ 0 -11.955 Td [(belong)-250(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess.)]TJ +/F75 9.9626 Tf -24.906 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G - -12.453 -19.926 Td [(3.)]TJ 0 g 0 G - [-500(The)-322(second)-323(form)-322(of)-323(this)-322(r)18(outine)-323(will)-322(be)-322(useful)-323(when)-322(dealing)-323(with)-322(user)18(-)]TJ 12.453 -11.955 Td [(speci\002ed)-250(index)-250(mappings;)-250(see)-250(also)]TJ -0 0 1 rg 0 0 1 RG - [-250(2.3.1)]TJ + 0 -19.925 Td [(iam)]TJ 0 g 0 G - [(.)]TJ +/F84 9.9626 Tf 22.137 0 Td [(Identi\002er)-250(of)-250(curr)18(ent)-250(pr)18(ocess)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 2.769 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ/F190 10.3811 Tf 134.191 0 Td [(\000)]TJ/F84 9.9626 Tf 8.194 0 Td [(1)]TJ/F190 10.3811 Tf 7.873 0 Td [(\024)]TJ/F78 9.9626 Tf 11.016 0 Td [(i)-47(a)-25(m)]TJ/F190 10.3811 Tf 18.678 0 Td [(\024)]TJ/F78 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F190 10.3811 Tf 13.504 0 Td [(\000)]TJ/F84 9.9626 Tf 10.131 0 Td [(1)]TJ +0 g 0 G +/F75 9.9626 Tf -239.579 -19.925 Td [(np)]TJ +0 g 0 G +/F84 9.9626 Tf 17.156 0 Td [(Number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 7.751 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.522 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -66.071 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ/F75 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +0 g 0 G + 1.02 0 0 1 175.611 332.495 Tm [(For)-337(pr)18(ocesses)-337(in)-337(the)-337(virtual)-337(parallel)-337(machine)-336(the)-337(identi\002er)-337(will)-337(satisfy)]TJ 1 0 0 1 175.611 320.54 Tm [(0)]TJ/F190 10.3811 Tf 7.874 0 Td [(\024)]TJ/F78 9.9626 Tf 11.016 0 Td [(i)-47(a)-25(m)]TJ/F190 10.3811 Tf 18.678 0 Td [(\024)]TJ/F78 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F190 10.3811 Tf 13.503 0 Td [(\000)]TJ/F84 9.9626 Tf 10.131 0 Td [(1;)]TJ +0 g 0 G + -84.741 -19.925 Td [(2.)]TJ +0 g 0 G + 1.02 0 0 1 175.611 300.615 Tm [(If)-304(the)-303(user)-304(has)-304(r)18(equested)-304(on)]TJ/F145 9.9626 Tf 1 0 0 1 305.201 300.615 Tm [(psb_init)]TJ/F84 9.9626 Tf 1.02 0 0 1 350.13 300.615 Tm [(a)-304(number)-303(of)-304(pr)18(ocesses)-304(less)-304(than)]TJ 1.02 0 0 1 175.611 288.659 Tm [(the)-253(total)-254(available)-253(in)-253(the)-254(parallel)-253(execution)-253(envir)17(onment,)-255(the)-253(r)17(emaining)]TJ 0.992 0 0 1 175.313 276.704 Tm [(pr)18(ocesses)-251(will)-251(have)-251(on)-252(r)19(eturn)]TJ/F78 9.9626 Tf 1 0 0 1 305.592 276.704 Tm [(i)-47(a)-25(m)]TJ/F192 10.3811 Tf 18.677 0 Td [(=)]TJ/F190 10.3811 Tf 11.086 0 Td [(\000)]TJ/F84 9.9626 Tf 0.992 0 0 1 343.549 276.704 Tm [(1;)-252(the)-251(only)-251(call)-251(involving)]TJ/F145 9.9626 Tf 1 0 0 1 453.875 276.704 Tm [(ctxt)]TJ/F84 9.9626 Tf 0.992 0 0 1 477.279 276.704 Tm [(that)]TJ 1 0 0 1 175.611 264.749 Tm [(any)-250(such)-250(pr)18(ocess)-250(may)-250(execute)-250(is)-250(to)]TJ/F145 9.9626 Tf 155.296 0 Td [(psb_exit)]TJ/F84 9.9626 Tf 41.843 0 Td [(.)]TJ 0 g 0 G - 141.968 -314.819 Td [(75)]TJ + -57.662 -174.311 Td [(114)]TJ 0 g 0 G ET endstream endobj -1389 0 obj +1775 0 obj << -/Length 5969 +/Length 4507 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.3)-1000(psb)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(7.3)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(cdasb)-250(\227)-250(Communication)-250(descriptor)-250(assembly)-250(routine)]TJ +/F75 11.9552 Tf 151.016 706.129 Td [(exit)-250(\227)-250(Exit)-250(from)-250(PSBLAS)-250(parallel)-250(environment)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdasb\050desc_a,)-525(info)-525([,)-525(mold]\051)]TJ +/F145 9.9626 Tf -51.121 -18.964 Td [(call)-525(psb_exit\050ctxt\051)]TJ 0 -11.955 Td [(call)-525(psb_exit\050ctxt,close\051)]TJ/F84 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(exits)-250(fr)18(om)-250(the)-250(PSBLAS)-250(parallel)-250(virtual)-250(machine.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F75 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F75 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 625.596 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 625.397 Td [(a)]TJ + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 577.576 Td [(desc)]TJ -ET -q -1 0 0 1 384.755 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 387.893 577.576 Td [(type)]TJ +/F84 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.926 Td [(close)]TJ +0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 127.063 525.77 Tm [(Whether)-235(to)-235(close)-236(all)-235(data)-235(str)8(uctur)19(es)-236(r)19(elated)-235(to)-236(the)-235(virtual)-235(parallel)-235(machine,)]TJ 1 0 0 1 124.802 513.815 Tm [(besides)-250(those)-250(associated)-250(with)-250(ctxt.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(variable,)-250(default)-250(value:)-310(tr)8(ue.)]TJ/F75 11.9552 Tf -24.907 -19.925 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + 0.98 0 0 1 124.493 426.144 Tm [(This)-218(r)19(outine)-218(may)-217(be)-218(called)-217(even)-218(if)-218(a)-217(pr)18(evious)-217(call)-218(to)]TJ/F145 9.9626 Tf 1 0 0 1 345.207 426.144 Tm [(psb_info)]TJ/F84 9.9626 Tf 0.98 0 0 1 389.174 426.144 Tm [(has)-218(r)19(eturned)]TJ 1.02 0 0 1 124.384 414.189 Tm [(with)]TJ/F78 9.9626 Tf 1 0 0 1 147.897 414.189 Tm [(i)-47(a)-25(m)]TJ/F192 10.3811 Tf 19.251 0 Td [(=)]TJ/F190 10.3811 Tf 11.66 0 Td [(\000)]TJ/F84 9.9626 Tf 1.02 0 0 1 187.002 414.189 Tm [(1;)-291(indeed,)-283(it)-275(it)-276(is)-276(the)-275(only)-276(r)18(outine)-276(that)-275(may)-276(be)-275(called)-276(with)]TJ 1 0 0 1 124.802 402.234 Tm [(ar)18(gument)]TJ/F145 9.9626 Tf 45.389 0 Td [(ctxt)]TJ/F84 9.9626 Tf 23.412 0 Td [(in)-250(this)-250(situation.)]TJ +0 g 0 G + -81.254 -19.926 Td [(2.)]TJ +0 g 0 G + 1.02 0 0 1 124.413 382.308 Tm [(A)-249(call)-249(to)-248(this)-249(r)18(outine)-249(with)]TJ/F145 9.9626 Tf 1 0 0 1 241.568 382.308 Tm [(close=.true.)]TJ/F84 9.9626 Tf 1.02 0 0 1 306.86 382.308 Tm [(implies)-249(a)-248(call)-249(to)]TJ/F145 9.9626 Tf 1 0 0 1 379.547 382.308 Tm [(MPI_Finalize)]TJ/F84 9.9626 Tf 1.02 0 0 1 442.311 382.308 Tm [(,)]TJ 1 0 0 1 124.802 370.353 Tm [(after)-250(which)-250(no)-250(parallel)-250(r)18(outine)-250(may)-250(be)-250(called.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ + -12.453 -19.925 Td [(3.)]TJ +0 g 0 G + 1.015 0 0 1 124.802 350.428 Tm [(If)-245(the)-246(user)-245(whishes)-246(to)-245(use)-246(multiple)-245(communication)-246(contexts)-245(in)-246(the)-245(same)]TJ 1.02 0 0 1 124.503 338.473 Tm [(pr)18(ogram,)-354(or)-332(to)-332(enter)-332(and)-332(exit)-332(multiple)-332(times)-332(into)-332(the)-332(parallel)-333(en)1(vir)17(on-)]TJ 1.02 0 0 1 124.802 326.518 Tm [(ment,)-356(this)-334(r)18(outine)-334(may)-334(be)-334(called)-334(to)-333(selectively)-334(close)-334(the)-334(contexts)-334(with)]TJ/F145 9.9626 Tf 1 0 0 1 124.802 314.562 Tm [(close=.false.)]TJ/F84 9.9626 Tf 0.98 0 0 1 192.796 314.562 Tm [(,)-206(while)-194(on)-194(the)-194(last)-194(call)-193(it)-194(should)-194(be)-194(called)-194(with)]TJ/F145 9.9626 Tf 1 0 0 1 386.196 314.562 Tm [(close=.true.)]TJ/F84 9.9626 Tf -261.394 -11.955 Td [(to)-250(shutdown)-250(in)-250(a)-250(clean)-250(way)-250(the)-250(entir)18(e)-250(parallel)-250(envir)18(onment.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(mold)]TJ + 139.477 -212.169 Td [(115)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(index)-250(storage.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(object)-190(of)-190(type)-190(derived)-190(fr)18(om)-190(\050integer\051)]TJ/F59 9.9626 Tf 221.926 0 Td [(psb)]TJ ET -q -1 0 0 1 413.855 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +1783 0 obj +<< +/Length 2607 +>> +stream +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 416.994 509.83 Td [(T)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(7.4)-1000(psb)]TJ ET q -1 0 0 1 422.851 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 425.99 509.83 Td [(base)]TJ +/F75 11.9552 Tf 201.825 706.129 Td [(get)]TJ ET q -1 0 0 1 447.539 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 219.148 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 450.677 509.83 Td [(vect)]TJ +/F75 11.9552 Tf 222.735 706.129 Td [(mpi)]TJ ET q -1 0 0 1 472.226 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 245.365 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 475.364 509.83 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F75 11.9552 Tf 248.952 706.129 Td [(comm)-250(\227)-250(Get)-250(the)-250(MPI)-250(communicator)]TJ 0 g 0 G -/F51 9.9626 Tf -345.58 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G +/F145 9.9626 Tf -98.247 -18.964 Td [(icomm)-525(=)-525(psb_get_mpi_comm\050ctxt\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 165.649 665.247 Tm [(This)-345(funct)1(ion)-345(r)18(eturns)-345(the)-344(MPI)-345(communicator)-344(associated)-345(with)-344(a)-345(PSBLAS)]TJ 1 0 0 1 150.705 653.292 Tm [(context)]TJ 0 g 0 G - 0 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 468.186 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 467.987 Td [(a)]TJ +/F75 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 420.366 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 420.166 Td [(desc)]TJ -ET -q -1 0 0 1 384.755 420.366 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 387.893 420.166 Td [(type)]TJ +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(info)]TJ + 0 -19.926 Td [(ctxt)]TJ +0 g 0 G +/F84 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.885 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.522 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -21.917 Td [(Notes)]TJ +/F75 9.9626 Tf -24.906 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.926 Td [(1.)]TJ 0 g 0 G - [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(assembled)-250(state.)]TJ -12.453 -19.925 Td [(This)-274(call)-275(will)-274(set)-275(up)-274(all)-275(the)-274(necessary)-275(information)-274(for)-275(the)-274(halo)-275(data)-274(exchanges.)]TJ 0 -11.955 Td [(In)-337(doing)-337(so,)-358(the)-337(library)-337(will)-337(need)-336(to)-337(identify)-337(the)-337(set)-337(of)-337(pr)18(ocesse)1(s)-337(owning)-337(the)]TJ 0 -11.955 Td [(halo)-381(indices)-381(thr)18(ough)-380(the)-381(use)-381(of)-381(the)]TJ/F59 9.9626 Tf 163.76 0 Td [(desc%fnd_owner\050\051)]TJ/F54 9.9626 Tf 87.479 0 Td [(method;)-446(the)-381(owning)]TJ -251.239 -11.956 Td [(pr)18(ocesses)-273(ar)18(e)-273(the)-273(topological)-272(neighbours)-273(of)-273(the)-273(calling)-273(pr)18(ocess.)-379(If)-272(the)-273(user)-273(has)]TJ 0 -11.955 Td [(some)-248(backgr)18(ound)-248(information)-248(on)-248(the)-248(pr)18(ocesses)-248(that)-248(ar)18(e)-248(neighbours)-248(of)-248(the)-248(cur)18(-)]TJ 0 -11.955 Td [(r)18(ent)-274(one,)-281(it)-274(is)-275(possible)-274(to)-274(specify)-275(explicitly)-274(the)-274(list)-275(of)-274(adjacent)-274(pr)18(ocesses)-275(with)-274(a)]TJ 0 -11.955 Td [(call)-327(to)]TJ/F59 9.9626 Tf 30.401 0 Td [(desc%set_p_adjcncy\050list\051)]TJ/F54 9.9626 Tf 125.529 0 Td [(;)-365(this)-327(will)-327(speed)-327(u)1(p)-327(the)-327(subsequent)-327(call)-327(to)]TJ/F59 9.9626 Tf -155.93 -11.955 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 47.073 0 Td [(.)]TJ + 0 -19.926 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 1.02 0 0 1 223.173 503.852 Tm [(The)-300(MPI)-299(communicator)-300(associated)-300(with)-300(the)-300(PSBLAS)-299(virtual)]TJ 1 0 0 1 175.313 491.897 Tm [(parallel)-250(machine.)]TJ 0.298 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.522 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ/F75 11.9552 Tf 0.999 0 0 1 150.705 422.159 Tm [(Notes)]TJ/F84 9.9626 Tf 0.999 0 0 1 185.028 422.159 Tm [(The)-249(subr)18(outine)-250(version)]TJ/F145 9.9626 Tf 1 0 0 1 289.412 422.159 Tm [(psb_get_mpicomm)]TJ/F84 9.9626 Tf 0.999 0 0 1 370.349 422.159 Tm [(is)-249(still)-250(available)-249(but)-250(is)-249(depr)18(e-)]TJ 1 0 0 1 150.705 410.204 Tm [(cated.)]TJ 0 g 0 G - 119.801 -116.528 Td [(76)]TJ + 164.383 -319.766 Td [(116)]TJ 0 g 0 G ET endstream endobj -1396 0 obj +1787 0 obj << -/Length 3168 +/Length 3383 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.4)-1000(psb)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(7.5)-1000(psb)]TJ ET q 1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(cdcpy)-250(\227)-250(Copies)-250(a)-250(communication)-250(descriptor)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -51.121 -18.964 Td [(call)-525(psb_cdcpy\050desc_in,)-525(desc_out,)-525(info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 625.596 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 625.397 Td [(in)]TJ -0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -12.981 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F75 11.9552 Tf 151.016 706.129 Td [(get)]TJ ET q -1 0 0 1 309.258 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 168.338 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 312.397 577.576 Td [(desc)]TJ +/F75 11.9552 Tf 171.925 706.129 Td [(mpi)]TJ ET q -1 0 0 1 333.945 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 194.556 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 337.084 577.576 Td [(type)]TJ +/F75 11.9552 Tf 198.143 706.129 Td [(rank)-250(\227)-250(Get)-250(the)-250(MPI)-250(rank)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +/F145 9.9626 Tf -98.248 -18.964 Td [(rank)-525(=)-525(psb_get_mpi_rank\050ctxt,)-525(id\051)]TJ/F84 9.9626 Tf 14.944 -21.918 Td [(This)-250(function)-250(r)18(eturns)-250(the)-250(MPI)-250(rank)-250(of)-250(the)-250(PSBLAS)-250(pr)18(ocess)]TJ/F78 9.9626 Tf 257.337 0 Td [(i)-32(d)]TJ 0 g 0 G +/F75 9.9626 Tf -272.281 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 535.932 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 535.733 Td [(out)]TJ +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 19.925 0 Td [(the)-250(communication)-250(descriptor)-250(copy)111(.)]TJ -18.52 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 488.112 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 487.912 Td [(desc)]TJ -ET -q -1 0 0 1 333.945 488.112 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 337.084 487.912 Td [(type)]TJ +/F75 9.9626 Tf -29.44 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F84 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(info)]TJ +/F75 9.9626 Tf -24.907 -19.926 Td [(id)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F84 9.9626 Tf 14.386 0 Td [(Identi\002er)-250(of)-250(a)-250(pr)18(ocess)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 10.521 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(0)]TJ/F190 10.3811 Tf 141.939 0 Td [(\024)]TJ/F78 9.9626 Tf 11.016 0 Td [(i)-32(d)]TJ/F190 10.3811 Tf 11.086 0 Td [(\024)]TJ/F78 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F190 10.3811 Tf 13.504 0 Td [(\000)]TJ/F84 9.9626 Tf 10.131 0 Td [(1)]TJ 0 g 0 G - 141.968 -329.728 Td [(77)]TJ +/F75 9.9626 Tf -223.669 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(Funciton)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 72.468 0 Td [(The)-250(MPI)-250(rank)-250(associated)-250(with)-250(the)-250(PSBLAS)-250(pr)18(ocess)]TJ/F78 9.9626 Tf 224.292 0 Td [(i)-32(d)]TJ/F84 9.9626 Tf 8.194 0 Td [(.)]TJ -280.047 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ/F75 11.9552 Tf 0.98 0 0 1 99.895 378.323 Tm [(Notes)]TJ/F84 9.9626 Tf 0.98 0 0 1 133.429 378.323 Tm [(The)-235(subr)18(outine)-235(version)]TJ/F145 9.9626 Tf 1 0 0 1 235.414 378.323 Tm [(psb_get_rank)]TJ/F84 9.9626 Tf 0.98 0 0 1 300.476 378.323 Tm [(is)-235(still)-236(available)-235(but)-236(is)-235(depr)18(ecated.)]TJ +0 g 0 G + 1 0 0 1 264.279 90.438 Tm [(117)]TJ 0 g 0 G ET endstream endobj -1401 0 obj +1791 0 obj << -/Length 2167 +/Length 1171 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.5)-1000(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(7.6)-1000(psb)]TJ ET q 1 0 0 1 198.238 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(cdfree)-250(\227)-250(Frees)-250(a)-250(communication)-250(descriptor)]TJ +/F75 11.9552 Tf 201.825 706.129 Td [(wtime)-250(\227)-250(W)74(all)-250(clock)-250(timing)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdfree\050desc_a,)-525(info\051)]TJ +/F145 9.9626 Tf -51.12 -18.964 Td [(time)-525(=)-525(psb_wtime\050\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 165.649 665.247 Tm [(This)-374(function)-374(r)17(eturns)-374(a)-374(wall)-374(clock)-374(timer)72(.)-691(The)-374(r)18(esolution)-375(of)-374(the)-374(timer)-374(is)]TJ 1 0 0 1 150.705 653.292 Tm [(dependent)-250(on)-250(the)-250(underlying)-250(parallel)-250(envir)18(onment)-250(implementation.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F75 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F75 9.9626 Tf -29.439 -19.925 Td [(On)-250(Exit)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 625.596 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 625.397 Td [(a)]TJ + 0 -19.926 Td [(Function)-250(value)]TJ +0 g 0 G +/F84 9.9626 Tf 72.776 0 Td [(the)-250(elapsed)-250(time)-250(in)-250(seconds.)]TJ -47.87 -11.955 Td [(Returned)-250(as:)-310(a)]TJ/F145 9.9626 Tf 66.022 0 Td [(real\050psb_dpk_\051)]TJ/F84 9.9626 Tf 75.716 0 Td [(variable.)]TJ +0 g 0 G + -2.261 -491.123 Td [(118)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)-250(to)-250(be)-250(fr)18(eed.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ ET -q -1 0 0 1 360.068 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +1795 0 obj +<< +/Length 1466 +>> +stream +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 363.206 577.576 Td [(desc)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(7.7)]TJ 0.998 0 0 1 126.795 706.129 Tm [(psb)]TJ ET q -1 0 0 1 384.755 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 147.389 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 387.893 577.576 Td [(type)]TJ +/F75 11.9552 Tf 0.998 0 0 1 150.976 706.129 Tm [(barrier)-250(\227)-251(Sinchronizati)1(on)-251(point)-250(parallel)-250(environment)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +/F145 9.9626 Tf 1 0 0 1 99.895 687.165 Tm [(call)-525(psb_barrier\050ctxt\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 114.839 665.247 Tm [(This)-293(subr)18(outine)-293(acts)-293(as)-293(an)-293(explicit)-293(synchr)18(onization)-293(point)-293(for)-293(the)-293(PSBLAS)]TJ 1 0 0 1 99.596 653.292 Tm [(parallel)-250(virtual)-250(machine.)]TJ 0 g 0 G +/F75 9.9626 Tf 0.299 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -19.925 Td [(info)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F75 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - 141.968 -397.474 Td [(78)]TJ +0 g 0 G + 0 -19.926 Td [(ctxt)]TJ +0 g 0 G +/F84 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G + 139.477 -455.258 Td [(119)]TJ 0 g 0 G ET endstream endobj -1407 0 obj +1799 0 obj << -/Length 5710 +/Length 1287 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.6)-1000(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(7.8)-1000(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 198.238 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(cdbldext)-190(\227)-190(Build)-190(an)-190(extended)-190(communication)-190(descrip-)]TJ -24.221 -13.948 Td [(tor)]TJ +/F75 11.9552 Tf 201.825 706.129 Td [(abort)-250(\227)-250(Abort)-250(a)-250(computation)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -26.9 -19.693 Td [(call)-525(psb_cdbldext\050a,desc_a,nl,desc_out,)-525(info,)-525(extype\051)]TJ/F54 9.9626 Tf 14.944 -23.422 Td [(This)-379(subr)18(outine)-379(builds)-379(an)-379(extended)-379(communication)-379(descriptor)74(,)-411(based)-379(on)]TJ -14.944 -11.955 Td [(the)-428(input)-428(descriptor)]TJ/F59 9.9626 Tf 95.499 0 Td [(desc_a)]TJ/F54 9.9626 Tf 35.646 0 Td [(and)-428(on)-428(the)-428(stencil)-428(speci\002ed)-428(thr)18(ough)-428(the)-427(input)]TJ -131.145 -11.955 Td [(sparse)-250(matrix)]TJ/F59 9.9626 Tf 62.107 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +/F145 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_abort\050ctxt\051)]TJ/F84 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(aborts)-250(computation)-250(on)-250(the)-250(parallel)-250(virtual)-250(machine.)]TJ 0 g 0 G -/F51 9.9626 Tf -67.337 -21.054 Td [(T)90(ype:)]TJ +/F75 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -21.429 Td [(On)-250(Entry)]TJ +/F75 9.9626 Tf -29.439 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -21.43 Td [(a)]TJ + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(A)-250(sparse)-250(matrix)-250(Scope:)]TJ/F51 9.9626 Tf 100.691 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -107.326 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(type.)]TJ +/F84 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.885 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.522 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.429 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 504.147 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 503.948 Td [(a)]TJ + 139.477 -467.213 Td [(120)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ ET -q -1 0 0 1 309.258 456.326 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +1803 0 obj +<< +/Length 6065 +>> +stream +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 312.397 456.127 Td [(Tspmat)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(7.9)-1000(psb)]TJ ET q -1 0 0 1 344.406 456.326 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 347.544 456.127 Td [(type)]TJ +/F75 11.9552 Tf 151.016 706.129 Td [(bcast)-250(\227)-250(Broadcast)-250(data)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -268.571 -21.43 Td [(nl)]TJ +/F145 9.9626 Tf -51.121 -19.198 Td [(call)-525(psb_bcast\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F84 9.9626 Tf 0.992 0 0 1 114.839 664.53 Tm [(This)-251(subr)18(outine)-251(implements)-251(a)-251(br)18(oadcast)-250(operation)-251(based)-251(on)-251(the)-251(underlying)]TJ 1 0 0 1 99.895 652.575 Tm [(communication)-250(library)111(.)]TJ 0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(the)-250(number)-250(of)-250(additional)-250(layers)-250(desir)18(ed.)]TJ 10.521 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F52 9.9626 Tf 131.102 0 Td [(n)-25(l)]TJ/F83 10.3811 Tf 11.873 0 Td [(\025)]TJ/F54 9.9626 Tf 10.961 0 Td [(0.)]TJ +/F75 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -178.843 -21.43 Td [(extype)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 34.869 0 Td [(the)-250(kind)-250(of)-250(estension)-250(r)18(equir)18(ed.)]TJ -9.962 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 40.677 0 Td [(.)]TJ -64.677 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-345(as:)-501(an)-345(integer)-346(value)]TJ/F59 9.9626 Tf 136.676 0 Td [(psb_ovt_xhal_)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)]TJ/F59 9.9626 Tf 6.169 0 Td [(psb_ovt_asov_)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)-369(default:)]TJ/F59 9.9626 Tf -278.833 -11.955 Td [(psb_ovt_xhal_)]TJ +/F75 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -23.422 Td [(On)-250(Return)]TJ 0 g 0 G + 0 -20.408 Td [(ctxt)]TJ 0 g 0 G - 0 -21.43 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 261.018 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 260.819 Td [(out)]TJ +/F84 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F54 9.9626 Tf 19.925 0 Td [(the)-250(extended)-250(communication)-250(descriptor)74(.)]TJ -18.52 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 213.198 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 212.998 Td [(desc)]TJ -ET -q -1 0 0 1 333.945 213.198 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 337.084 212.998 Td [(type)]TJ +/F75 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F84 9.9626 Tf 19.368 0 Td [(On)-250(the)-250(r)18(oot)-250(pr)18(ocess,)-250(the)-250(data)-250(to)-250(be)-250(br)18(oadcast.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ 0.98 0 0 1 124.802 475.42 Tm [(Speci\002ed)-228(as:)-303(an)-228(integer)76(,)-234(r)19(eal)-228(or)-228(complex)-228(variable,)-234(which)-228(may)-228(be)-228(a)-228(scalar)76(,)-234(or)]TJ 0.98 0 0 1 124.802 463.465 Tm [(a)-235(rank)-235(1)-235(or)-235(2)-235(array)113(,)-239(or)-235(a)-235(character)-235(or)-235(logical)-235(variable,)-239(which)-235(may)-235(be)-236(a)-235(scalar)]TJ 1 0 0 1 124.802 451.509 Tm [(or)-250(rank)-250(1)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.429 Td [(info)]TJ +/F75 9.9626 Tf -24.907 -20.408 Td [(root)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -23.422 Td [(Notes)]TJ +/F84 9.9626 Tf 23.253 0 Td [(Root)-250(pr)18(ocess)-250(holding)-250(data)-250(to)-250(be)-250(br)18(oadcast.)]TJ 1.654 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F148 10.3811 Tf 138.85 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F148 10.3811 Tf 19.923 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F190 10.3811 Tf 13.504 0 Td [(\000)]TJ/F84 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(0)]TJ 0 g 0 G -/F54 9.9626 Tf 166.875 -29.888 Td [(79)]TJ +/F75 9.9626 Tf -246.025 -20.409 Td [(mode)]TJ 0 g 0 G -ET - -endstream -endobj -1412 0 obj -<< -/Length 1484 ->> -stream +/F84 9.9626 Tf 0.983 0 0 1 129.843 362.871 Tm [(Whether)-256(the)-255(call)-256(is)-256(started)-256(in)-255(non-blocking)-256(mode)-256(and)-256(completed)-255(later)75(,)-256(or)]TJ 1 0 0 1 124.802 350.916 Tm [(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.02 0 0 1 124.802 303.096 Tm [(Speci\002ed)-285(as:)-383(an)-285(integer)-285(value.)-423(The)-284(action)-285(to)-285(be)-285(taken)-284(is)-285(determined)-285(by)]TJ 1.02 0 0 1 124.802 291.14 Tm [(its)-329(bit)-328(\002elds,)-350(which)-329(can)-328(be)-329(set)-329(with)-328(bitwise)]TJ/F145 9.9626 Tf 1 0 0 1 323.986 291.14 Tm [(OR)]TJ/F84 9.9626 Tf 1.02 0 0 1 334.446 291.14 Tm [(.)-329(Basic)-328(action)-329(values)-329(ar)18(e)]TJ/F145 9.9626 Tf 1 0 0 1 124.802 279.185 Tm [(psb_collective_start_)]TJ/F84 9.9626 Tf 0.98 0 0 1 234.639 279.185 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 239.567 279.185 Tm [(psb_collective_end_)]TJ/F84 9.9626 Tf 0.98 0 0 1 338.943 279.185 Tm [(.)-316(Default:)-316(both)-255(\002elds)-254(ar)18(e)]TJ 1 0 0 1 124.802 267.23 Tm [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ 0 g 0 G +/F75 9.9626 Tf -24.907 -32.364 Td [(request)]TJ 0 g 0 G +/F84 9.9626 Tf 38.346 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.439 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F145 9.9626 Tf 9.166 0 Td [(mode)]TJ/F84 9.9626 Tf 23.412 0 Td [(speci\002es)-250(non-blocking)-250(action,)-250(then)-250(this)-250(variable)-250(must)-250(be)-250(pr)18(esent.)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 163.158 706.129 Td [(1.)]TJ +/F75 9.9626 Tf -57.485 -22.401 Td [(On)-250(Return)]TJ 0 g 0 G - [-500(Specifying)]TJ/F59 9.9626 Tf 61.745 0 Td [(psb_ovt_xhal_)]TJ/F54 9.9626 Tf 70.881 0 Td [(for)-290(the)]TJ/F59 9.9626 Tf 32.282 0 Td [(extype)]TJ/F54 9.9626 Tf 34.269 0 Td [(ar)18(gument)-290(the)-289(user)-290(will)-290(obtain)]TJ -186.724 -11.955 Td [(a)-400(descriptor)-400(for)-400(a)-400(domain)-400(partition)-400(in)-400(which)-400(the)-400(additional)-400(layers)-400(ar)18(e)]TJ 0 -11.955 Td [(fetched)-222(as)-221(part)-222(of)-221(an)-222(\050extended\051)-221(halo;)-232(however)-221(the)-222(index-to-pr)18(ocess)-221(map-)]TJ 0 -11.956 Td [(ping)-250(is)-250(identical)-250(to)-250(that)-250(of)-250(the)-250(base)-250(descriptor;)]TJ 0 g 0 G - -12.453 -19.925 Td [(2.)]TJ + 0 -20.409 Td [(dat)]TJ 0 g 0 G - [-500(Specifying)]TJ/F59 9.9626 Tf 61.745 0 Td [(psb_ovt_asov_)]TJ/F54 9.9626 Tf 70.881 0 Td [(for)-290(the)]TJ/F59 9.9626 Tf 32.282 0 Td [(extype)]TJ/F54 9.9626 Tf 34.269 0 Td [(ar)18(gument)-290(the)-289(user)-290(will)-290(obtain)]TJ -186.724 -11.955 Td [(a)-330(descriptor)-331(with)-330(an)-330(overlapped)-331(decomposition:)-470(the)-331(additional)-330(layer)-330(is)]TJ 0 -11.955 Td [(aggr)18(egated)-326(to)-326(the)-326(local)-326(subdomain)-326(\050and)-326(thus)-326(is)-325(an)-326(overlap\051,)-345(and)-326(a)-326(new)]TJ 0 -11.955 Td [(halo)-250(extending)-250(beyond)-250(the)-250(last)-250(additional)-250(layer)-250(is)-250(formed.)]TJ +/F84 9.9626 Tf 19.368 0 Td [(On)-250(all)-250(pr)18(ocesses)-250(other)-250(than)-250(r)18(oot,)-250(the)-250(br)18(oadcasted)-250(data.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ 0 g 0 G - 141.968 -524.035 Td [(80)]TJ + 73.405 -29.888 Td [(121)]TJ 0 g 0 G ET endstream endobj -1420 0 obj +1808 0 obj << -/Length 5699 +/Length 5230 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.7)-1000(psb)]TJ -ET -q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 151.016 706.129 Td [(spall)-250(\227)-250(Allocates)-250(a)-250(sparse)-250(matrix)]TJ -0 g 0 G +/F84 9.9626 Tf 175.611 706.129 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ 1.013 0 0 1 175.611 694.174 Tm [(Speci\002ed)-247(as:)-308(an)-247(integer)73(,)-248(r)18(eal)-248(or)-247(complex)-247(variable,)-248(which)-248(may)-247(be)-247(a)-248(scalar)73(,)]TJ 0.98 0 0 1 175.611 682.219 Tm [(or)-245(a)-245(rank)-245(1)-245(or)-245(2)-245(array)114(,)-247(or)-245(a)-245(character)-245(or)-245(logical)-245(scalar)76(.)-558(T)92(ype,)-247(kind,)-247(rank)-245(and)]TJ 1 0 0 1 175.611 670.263 Tm [(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F59 9.9626 Tf -51.121 -19.277 Td [(call)-525(psb_spall\050a,)-525(desc_a,)-525(info)-525([,)-525(nnz,)-525(dupl,)-525(bldmode]\051)]TJ +/F75 9.9626 Tf -24.906 -19.925 Td [(request)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.403 Td [(T)90(ype:)]TJ +/F84 9.9626 Tf 38.346 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.44 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(If)]TJ/F145 9.9626 Tf 9.166 0 Td [(mode)]TJ/F84 9.9626 Tf 23.412 0 Td [(speci\002es)-250(non-blocking)-250(action,)-250(then)-250(this)-250(variable)-250(must)-250(be)-250(pr)18(esent.)]TJ/F75 11.9552 Tf -57.484 -21.917 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F84 9.9626 Tf 12.453 -19.926 Td [(1.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.571 Td [(On)-250(Entry)]TJ + 0.98 0 0 1 175.303 560.674 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 193.994 560.674 Tm [(dat)]TJ/F84 9.9626 Tf 0.98 0 0 1 212.032 560.674 Tm [(ar)18(gument)-240(is)-240(both)-241(input)-240(and)-241(output,)-243(and)-240(its)-241(value)-240(may)-240(be)-241(changed)]TJ 1 0 0 1 175.611 548.719 Tm [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ 0 g 0 G + -12.453 -19.925 Td [(2.)]TJ 0 g 0 G - 0 -20.572 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 623.505 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 623.306 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 575.684 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 575.485 Td [(desc)]TJ + 1.02 0 0 1 175.303 528.794 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 195.057 528.794 Tm [(mode)]TJ/F84 9.9626 Tf 1.02 0 0 1 218.722 528.794 Tm [(ar)18(gument)-270(can)-270(be)-270(built)-270(with)-270(the)-270(bitwise)]TJ/F145 9.9626 Tf 1 0 0 1 395.907 528.794 Tm [(IOR\050\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 424.803 528.794 Tm [(operator;)-282(in)-270(the)]TJ 0.988 0 0 1 175.611 516.839 Tm [(following)-254(example,)-255(the)-254(ar)18(gument)-254(is)-255(for)18(cing)-254(immediate)-254(completion,)-255(hence)]TJ 1 0 0 1 175.611 504.884 Tm [(the)]TJ/F145 9.9626 Tf 16.309 0 Td [(request)]TJ/F84 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 333.945 575.684 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 175.611 465.033 cm +0 0 318.804 27.895 re f Q -BT -/F59 9.9626 Tf 337.084 575.485 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F51 9.9626 Tf -258.11 -20.572 Td [(nnz)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 22.137 0 Td [(An)-230(estimate)-230(of)-230(the)-230(number)-230(of)-231(nonzer)18(oes)-230(in)-230(the)-230(local)-230(part)-230(of)-230(the)-230(assembled)]TJ 2.77 -11.955 Td [(matrix.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F233 8.9664 Tf 188.015 482.268 Td [(call)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.572 Td [(dupl)]TJ + [-525(psb_bcast\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 26.561 0 Td [(How)-250(to)-250(handle)-250(duplicate)-250(coef)18(\002cients.)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-243(as:)-306(integer)74(,)-244(possible)-243(values:)]TJ/F59 9.9626 Tf 164.941 0 Td [(psb_dupl_ovwrt_)]TJ/F54 9.9626 Tf 78.455 0 Td [(,)]TJ/F59 9.9626 Tf 4.923 0 Td [(psb_dupl_add_)]TJ/F54 9.9626 Tf 67.995 0 Td [(,)]TJ/F59 9.9626 Tf -316.314 -11.955 Td [(psb_dupl_err_)]TJ/F54 9.9626 Tf 67.994 0 Td [(.)]TJ + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -92.901 -20.572 Td [(bldmode)]TJ + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 45.938 0 Td [(Whether)-372(to)-372(kee)1(p)-372(track)-372(of)-372(matrix)-372(entries)-371(that)-372(do)-372(not)-372(belong)-371(to)-372(the)]TJ -21.031 -11.955 Td [(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)-190(value)]TJ/F59 9.9626 Tf 128.287 0 Td [(psb_matbld_noremote_)]TJ/F54 9.9626 Tf 104.607 0 Td [(,)]TJ/F59 9.9626 Tf 4.503 0 Td [(psb_matbld_remote_)]TJ/F54 9.9626 Tf 94.146 0 Td [(.)]TJ -331.543 -11.955 Td [(Default:)]TJ/F59 9.9626 Tf 38.515 0 Td [(psb_matbld_noremote_)]TJ/F54 9.9626 Tf 104.607 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ 0 g 0 G -/F51 9.9626 Tf -168.029 -22.402 Td [(On)-250(Return)]TJ + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G 0 g 0 G - 0 -20.572 Td [(a)]TJ +/F84 9.9626 Tf -48.393 -36.164 Td [(3.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf -28.343 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf -24 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 231.892 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 231.692 Td [(Tspmat)]TJ + 1.02 0 0 1 175.113 435.145 Tm [(When)-250(splitting)-250(the)-250(operation)-250(in)-250(two)-250(c)1(alls,)-252(the)]TJ/F145 9.9626 Tf 1 0 0 1 379.88 435.145 Tm [(dat)]TJ/F84 9.9626 Tf 1.02 0 0 1 398.111 435.145 Tm [(ar)18(gument)]TJ/F78 9.9626 Tf 1.02 0 0 1 444.411 435.145 Tm [(must)-250(not)]TJ/F84 9.9626 Tf 1.02 0 0 1 483.929 435.145 Tm [(be)]TJ 1 0 0 1 175.611 423.19 Tm [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 344.406 231.892 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 175.611 350.463 cm +0 0 318.804 60.772 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 347.544 231.692 Td [(type)]TJ +/F233 8.9664 Tf 188.015 400.575 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ + [-525(psb_bcast\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -268.571 -20.571 Td [(info)]TJ + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -22.564 Td [(Notes)]TJ + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 12.454 -20.41 Td [(1.)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(sparse)-250(matrix)-250(is)-250(in)-250(the)-250(build)-250(state.)]TJ + [(bcast_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 154.421 -29.888 Td [(81)]TJ + -23.536 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1425 0 obj -<< -/Length 1141 ->> -stream +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F279 8.9664 Tf 37.658 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F233 8.9664 Tf -37.658 -10.959 Td [(call)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 163.158 706.129 Td [(2.)]TJ + [-525(psb_bcast\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-500(The)-250(descriptor)-250(may)-250(be)-250(in)-250(either)-250(the)-250(build)-250(or)-250(assembled)-250(state.)]TJ + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(3.)]TJ + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(Pr)18(oviding)-219(a)-219(good)-219(estimate)-218(for)-219(the)-219(number)-219(of)-219(nonzer)18(oes)]TJ/F52 9.9626 Tf 255.761 0 Td [(n)-25(n)-25(z)]TJ/F54 9.9626 Tf 18.305 0 Td [(in)-219(the)-219(assem-)]TJ -261.613 -11.956 Td [(bled)-295(matri)1(x)-295(may)-294(substantially)-295(impr)18(ove)-294(performance)-295(in)-294(the)-295(matrix)-294(build)]TJ 0 -11.955 Td [(phase,)-370(as)-346(it)-346(will)-345(r)18(educe)-346(or)-346(eliminate)-346(the)-346(need)-346(for)-345(\050potentially)-346(multiple\051)]TJ 0 -11.955 Td [(data)-250(r)18(eallocations;)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - -12.453 -19.925 Td [(4.)]TJ + [(bcast_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - [-500(Using)]TJ/F59 9.9626 Tf 41.798 0 Td [(psb_matbld_remote_)]TJ/F54 9.9626 Tf 97.28 0 Td [(is)-315(likel)1(y)-315(to)-315(cause)-314(a)-315(r)8(untime)-314(over)18(head)-315(at)-314(as-)]TJ -126.625 -11.955 Td [(sembly)-250(time;)]TJ 0 g 0 G - 141.968 -528.02 Td [(82)]TJ +/F84 9.9626 Tf 103.537 -266.301 Td [(122)]TJ 0 g 0 G ET endstream endobj -1433 0 obj +1816 0 obj << -/Length 5375 +/Length 6196 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.8)-1000(psb)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(7.10)-1000(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 153.407 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(spins)-233(\227)-233(Insert)-233(a)-233(set)-233(of)-234(coef)18(\002cients)-233(into)-233(a)-233(sparse)-233(matrix)]TJ +/F75 11.9552 Tf 156.993 706.129 Td [(sum)-250(\227)-250(Global)-250(sum)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -51.121 -20.373 Td [(call)-525(psb_spins\050nz,)-525(ia,)-525(ja,)-525(val,)-525(a,)-525(desc_a,)-525(info)-525([,local]\051)]TJ 0 -11.956 Td [(call)-525(psb_spins\050nr,)-525(irw,)-525(irp,)-525(ja,)-525(val,)-525(a,)-525(desc_a,)-525(info)-525([,local]\051)]TJ +/F145 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_sum\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F84 9.9626 Tf 0.991 0 0 1 114.839 664.53 Tm [(This)-253(subr)18(outine)-253(implements)-254(a)-253(sum)-253(r)18(eduction)-253(operation)-254(based)-253(on)-253(the)-254(under)19(-)]TJ 1 0 0 1 99.895 652.575 Tm [(lying)-250(communication)-250(library)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -24.099 Td [(T)90(ype:)]TJ +/F75 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.835 Td [(On)-250(Entry)]TJ +/F75 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -22.834 Td [(nz)]TJ + 0 -20.408 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 8.857 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +/F84 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.834 Td [(nr)]TJ +/F75 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ 0 g 0 G -/F54 9.9626 Tf 14.944 0 Td [(the)-250(number)-250(of)-250(r)18(ows)-250(to)-250(be)-250(inserted.)]TJ 9.963 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +/F84 9.9626 Tf 19.059 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(sum.)]TJ 5.848 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ 0.98 0 0 1 124.802 475.42 Tm [(Speci\002ed)-228(as:)-303(an)-228(integer)76(,)-234(r)19(eal)-228(or)-228(complex)-228(variable,)-234(which)-228(may)-228(be)-228(a)-228(scalar)76(,)-234(or)]TJ 0.991 0 0 1 124.802 463.465 Tm [(a)-252(rank)-253(1)-252(or)-252(2)-252(array)112(.)-565(T)90(ype,)-252(kind,)-252(rank)-252(and)-253(size)-252(must)-252(agr)18(ee)-252(on)-252(all)-253(pr)18(ocesses.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.834 Td [(irw)]TJ +/F75 9.9626 Tf 1 0 0 1 99.895 443.056 Tm [(root)]TJ 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(the)-250(\002rst)-250(r)18(ow)-250(to)-250(be)-250(inserted.)]TJ 4.434 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +/F84 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-250(to)-250(hold)-250(the)-250(\002nal)-250(sum,)-250(or)]TJ/F190 10.3811 Tf 143.744 0 Td [(\000)]TJ/F84 9.9626 Tf 8.195 0 Td [(1)-250(to)-250(make)-250(it)-250(available)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -150.285 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F190 10.3811 Tf 131.102 0 Td [(\000)]TJ/F84 9.9626 Tf 8.194 0 Td [(1)]TJ/F148 10.3811 Tf 7.873 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F148 10.3811 Tf 19.923 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F190 10.3811 Tf 13.503 0 Td [(\000)]TJ/F84 9.9626 Tf 10.132 0 Td [(1,)-250(default)-250(-1.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.834 Td [(ia)]TJ +/F75 9.9626 Tf -254.344 -20.408 Td [(mode)]TJ 0 g 0 G -/F54 9.9626 Tf 13.281 0 Td [(the)-250(r)18(ow)-250(indices)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 11.626 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F52 9.9626 Tf 160.8 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ +/F84 9.9626 Tf 0.983 0 0 1 129.843 374.827 Tm [(Whether)-256(the)-255(call)-256(is)-256(started)-256(in)-255(non-blocking)-256(mode)-256(and)-256(completed)-255(later)75(,)-256(or)]TJ 1 0 0 1 124.802 362.871 Tm [(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.02 0 0 1 124.802 315.051 Tm [(Speci\002ed)-285(as:)-383(an)-285(integer)-285(value.)-423(The)-284(action)-285(to)-285(be)-285(taken)-284(is)-285(determined)-285(by)]TJ 1.02 0 0 1 124.802 303.096 Tm [(its)-329(bit)-328(\002elds,)-350(which)-329(can)-328(be)-329(set)-329(with)-328(bitwise)]TJ/F145 9.9626 Tf 1 0 0 1 323.986 303.096 Tm [(OR)]TJ/F84 9.9626 Tf 1.02 0 0 1 334.446 303.096 Tm [(.)-329(Basic)-328(action)-329(values)-329(ar)18(e)]TJ/F145 9.9626 Tf 1 0 0 1 124.802 291.14 Tm [(psb_collective_start_)]TJ/F84 9.9626 Tf 0.98 0 0 1 234.639 291.14 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 239.567 291.14 Tm [(psb_collective_end_)]TJ/F84 9.9626 Tf 0.98 0 0 1 338.943 291.14 Tm [(.)-316(Default:)-316(both)-255(\002elds)-254(ar)18(e)]TJ 1 0 0 1 124.802 279.185 Tm [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ 0 g 0 G -/F51 9.9626 Tf -196.043 -22.834 Td [(irp)]TJ +/F75 9.9626 Tf -24.907 -32.363 Td [(request)]TJ 0 g 0 G -/F54 9.9626 Tf 18.262 0 Td [(the)-250(r)18(ow)-250(pointers)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 6.645 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F52 9.9626 Tf 160.8 0 Td [(n)-15(r)]TJ/F85 10.3811 Tf 11.85 0 Td [(+)]TJ/F54 9.9626 Tf 10.131 0 Td [(1.)]TJ +/F84 9.9626 Tf 38.346 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.439 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F145 9.9626 Tf 9.166 0 Td [(mode)]TJ/F84 9.9626 Tf 23.412 0 Td [(speci\002es)-250(non-blocking)-250(action,)-250(then)-250(this)-250(variable)-250(must)-250(be)-250(pr)18(esent.)]TJ 0 g 0 G -/F51 9.9626 Tf -207.688 -22.835 Td [(ja)]TJ +/F75 9.9626 Tf -57.485 -22.401 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 13.28 0 Td [(the)-250(column)-250(indices)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F52 9.9626 Tf 160.8 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -196.043 -22.835 Td [(val)]TJ + 0 -20.409 Td [(dat)]TJ 0 g 0 G -/F54 9.9626 Tf 18.82 0 Td [(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-315(as:)-439(an)-314(array)-315(of)-315(size)]TJ/F52 9.9626 Tf 131.853 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.337 0 Td [(.)-504(Must)-314(be)-315(of)-315(the)-314(same)-315(type)-315(and)-314(kind)-315(of)]TJ -142.19 -11.956 Td [(the)-250(coef)18(\002cients)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 157.901 0 Td [(a)]TJ/F54 9.9626 Tf 4.548 0 Td [(.)]TJ +/F84 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(sum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ 0 g 0 G - -20.481 -29.887 Td [(83)]TJ + 83.328 -29.888 Td [(123)]TJ 0 g 0 G ET endstream endobj -1439 0 obj +1820 0 obj << -/Length 6861 +/Length 5025 >> stream 0 g 0 G 0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 150.705 706.129 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 706.129 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.381 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 136.328 0 Td [(psb)]TJ -ET -q -1 0 0 1 328.257 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q BT -/F59 9.9626 Tf 331.395 658.308 Td [(desc)]TJ -ET -q -1 0 0 1 352.944 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 356.083 658.308 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F84 9.9626 Tf 1.013 0 0 1 175.611 706.129 Tm [(Speci\002ed)-247(as:)-308(an)-247(integer)73(,)-248(r)18(eal)-248(or)-247(complex)-247(variable,)-248(which)-248(may)-247(be)-247(a)-248(scalar)73(,)]TJ 1 0 0 1 175.611 694.174 Tm [(or)-250(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)]TJ -0.308 -11.955 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F51 9.9626 Tf -226.299 -33.398 Td [(local)]TJ +/F75 9.9626 Tf -24.598 -19.926 Td [(request)]TJ 0 g 0 G -/F54 9.9626 Tf 26.56 0 Td [(Whether)-207(the)-207(entries)-207(in)-207(the)-208(indices)-207(vectors)]TJ/F59 9.9626 Tf 181.487 0 Td [(ia)]TJ/F54 9.9626 Tf 10.46 0 Td [(,)]TJ/F59 9.9626 Tf 4.64 0 Td [(ja)]TJ/F54 9.9626 Tf 12.524 0 Td [(ar)18(e)-207(alr)18(eady)-207(in)-207(local)-208(num-)]TJ -210.765 -11.956 Td [(bering.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value;)-250(default:)]TJ/F59 9.9626 Tf 162.678 0 Td [(.false.)]TJ/F54 9.9626 Tf 36.613 0 Td [(.)]TJ +/F84 9.9626 Tf 38.346 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.44 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F145 9.9626 Tf 9.166 0 Td [(mode)]TJ/F84 9.9626 Tf 23.412 0 Td [(speci\002es)-250(non-blocking)-250(action,)-250(then)-250(this)-250(variable)-250(must)-250(be)-250(pr)18(esent.)]TJ/F75 11.9552 Tf -57.484 -21.918 Td [(Notes)]TJ 0 g 0 G -/F51 9.9626 Tf -224.197 -23.056 Td [(On)-250(Return)]TJ +/F84 9.9626 Tf 12.453 -19.926 Td [(1.)]TJ 0 g 0 G + 0.98 0 0 1 175.303 572.629 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 193.994 572.629 Tm [(dat)]TJ/F84 9.9626 Tf 0.98 0 0 1 212.032 572.629 Tm [(ar)18(gument)-240(is)-240(both)-241(input)-240(and)-241(output,)-243(and)-240(its)-241(value)-240(may)-240(be)-241(changed)]TJ 1 0 0 1 175.611 560.674 Tm [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ 0 g 0 G - 0 -21.444 Td [(a)]TJ + -12.453 -19.925 Td [(2.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(matrix)-250(into)-250(which)-250(coef)18(\002cients)-250(will)-250(be)-250(inserted.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 484.968 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 484.768 Td [(Tspmat)]TJ + 1.02 0 0 1 175.303 540.749 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 195.057 540.749 Tm [(mode)]TJ/F84 9.9626 Tf 1.02 0 0 1 218.722 540.749 Tm [(ar)18(gument)-270(can)-270(be)-270(built)-270(with)-270(the)-270(bitwise)]TJ/F145 9.9626 Tf 1 0 0 1 395.907 540.749 Tm [(IOR\050\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 424.803 540.749 Tm [(operator;)-282(in)-270(the)]TJ 0.988 0 0 1 175.611 528.794 Tm [(following)-254(example,)-255(the)-254(ar)18(gument)-254(is)-255(for)18(cing)-254(immediate)-254(completion,)-255(hence)]TJ 1 0 0 1 175.611 516.839 Tm [(the)]TJ/F145 9.9626 Tf 16.309 0 Td [(request)]TJ/F84 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 395.216 484.968 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 175.611 476.988 cm +0 0 318.804 27.895 re f Q -BT -/F59 9.9626 Tf 398.354 484.768 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F51 9.9626 Tf -268.57 -21.443 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 463.524 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 463.325 Td [(a)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.381 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 136.328 0 Td [(psb)]TJ -ET -q -1 0 0 1 328.257 415.704 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 331.395 415.504 Td [(desc)]TJ -ET -q -1 0 0 1 352.944 415.704 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 356.083 415.504 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -226.299 -33.398 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -23.436 Td [(Notes)]TJ +/F233 8.9664 Tf 188.015 494.224 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -21.064 Td [(1.)]TJ + [-525(psb_sum\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(On)-312(entry)-312(to)-312(this)-312(r)18(out)1(ine)-312(the)-312(descriptor)-312(may)-312(be)-312(in)-312(either)-312(the)-311(build)-312(or)-312(as-)]TJ 12.453 -11.955 Td [(sembled)-250(state.)]TJ + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -12.453 -21.443 Td [(2.)]TJ -0 g 0 G - [-500(On)-314(entry)-315(to)-314(this)-315(r)18(ou)1(tine)-315(the)-314(sparse)-315(matrix)-314(may)-314(be)-315(in)-314(either)-314(the)-315(build)-314(or)]TJ 12.453 -11.955 Td [(update)-250(state.)]TJ + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - -12.453 -21.444 Td [(3.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ 0 g 0 G - [-500(If)-263(the)-263(descriptor)-263(is)-262(in)-263(the)-263(build)-263(state,)-266(then)-263(the)-263(sparse)-263(matrix)-262(must)-263(also)-263(be)]TJ 12.453 -11.955 Td [(in)-212(the)-212(build)-213(state;)-224(the)-213(action)-212(of)-212(the)-212(r)18(outine)-212(is)-213(to)-212(\050implicitly\051)-212(call)]TJ/F59 9.9626 Tf 271.732 0 Td [(psb_cdins)]TJ/F54 9.9626 Tf -271.732 -11.955 Td [(to)-259(add)-259(entries)-259(to)-259(the)-259(sparsity)-259(pattern;)-263(each)-259(sparse)-259(matrix)-259(entry)-259(implicitly)]TJ 0 -11.955 Td [(de\002nes)-288(a)-288(graph)-288(edge,)-297(that)-288(is)-288(passed)-288(to)-288(the)-288(descriptor)-288(r)18(outine)-288(for)-288(the)-288(ap-)]TJ 0 -11.955 Td [(pr)18(opriate)-250(pr)18(ocessing;)]TJ + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - -12.453 -21.444 Td [(4.)]TJ 0 g 0 G - [-500(The)-250(input)-250(data)-250(can)-250(be)-250(passed)-250(in)-250(either)-250(COO)-250(or)-250(CSR)-250(formats;)]TJ +/F84 9.9626 Tf -48.393 -36.165 Td [(3.)]TJ 0 g 0 G - 0 -21.443 Td [(5.)]TJ + 1.02 0 0 1 175.113 447.1 Tm [(When)-250(splitting)-250(the)-250(operation)-250(in)-250(two)-250(c)1(alls,)-252(the)]TJ/F145 9.9626 Tf 1 0 0 1 379.88 447.1 Tm [(dat)]TJ/F84 9.9626 Tf 1.02 0 0 1 398.111 447.1 Tm [(ar)18(gument)]TJ/F78 9.9626 Tf 1.02 0 0 1 444.411 447.1 Tm [(must)-250(not)]TJ/F84 9.9626 Tf 1.02 0 0 1 483.929 447.1 Tm [(be)]TJ 1 0 0 1 175.611 435.145 Tm [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 175.611 362.418 cm +0 0 318.804 60.772 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - [-500(In)-307(COO)-307(format)-307(the)-306(coef)18(\002cients)-307(to)-307(be)-307(inserted)-307(ar)18(e)-307(r)18(epr)18(esented)-306(by)-307(the)-307(or)18(-)]TJ 12.453 -11.955 Td [(der)18(ed)-194(triples)]TJ/F52 9.9626 Tf 57.352 0 Td [(i)-47(a)]TJ/F85 10.3811 Tf 7.911 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(,)]TJ/F52 9.9626 Tf 4.624 0 Td [(j)-40(a)]TJ/F85 10.3811 Tf 7.84 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(,)]TJ/F52 9.9626 Tf 4.276 0 Td [(v)-40(a)-25(l)]TJ/F85 10.3811 Tf 13.37 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(,)-205(for)]TJ/F52 9.9626 Tf 19.208 0 Td [(i)]TJ/F85 10.3811 Tf 5.856 0 Td [(=)]TJ/F54 9.9626 Tf 10.961 0 Td [(1,)-179(.)-192(.)-191(.)-180(,)]TJ/F52 9.9626 Tf 26.608 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.337 0 Td [(;)-212(these)-194(triples)-194(ar)18(e)-193(arbitrary;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -60.701 -29.888 Td [(84)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F233 8.9664 Tf 188.015 412.53 Td [(call)]TJ 0 g 0 G -ET - -endstream -endobj -1448 0 obj -<< -/Length 4535 ->> -stream + [-525(psb_sum\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F54 9.9626 Tf 112.349 706.129 Td [(6.)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-500(In)-272(CSR)-271(format)-272(the)-271(coef)18(\002cients)-272(to)-271(be)-272(inserted)-272(for)-271(each)-272(input)-271(r)18(ow)]TJ/F52 9.9626 Tf 294.598 0 Td [(i)]TJ/F85 10.3811 Tf 6.254 0 Td [(=)]TJ/F54 9.9626 Tf 11.36 0 Td [(1,)]TJ/F52 9.9626 Tf 9.257 0 Td [(n)-15(r)]TJ/F54 9.9626 Tf -309.016 -11.955 Td [(ar)18(e)-311(r)18(epr)18(esented)-312(by)-311(the)-311(or)18(der)18(ed)-312(triples)]TJ/F85 10.3811 Tf 171.689 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 5.251 0 Td [(+)]TJ/F52 9.9626 Tf 10.413 0 Td [(i)-22(r)-35(w)]TJ/F83 10.3811 Tf 16.818 0 Td [(\000)]TJ/F54 9.9626 Tf 10.358 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(,)]TJ/F52 9.9626 Tf 4.624 0 Td [(j)-40(a)]TJ/F85 10.3811 Tf 7.841 0 Td [(\050)]TJ/F52 9.9626 Tf 4.622 0 Td [(j)]TJ/F85 10.3811 Tf 3.019 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(,)]TJ/F52 9.9626 Tf 4.276 0 Td [(v)-40(a)-25(l)]TJ/F85 10.3811 Tf 13.37 0 Td [(\050)]TJ/F52 9.9626 Tf 4.622 0 Td [(j)]TJ/F85 10.3811 Tf 3.019 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(,)-327(for)]TJ/F52 9.9626 Tf 22.013 0 Td [(j)]TJ/F85 10.3811 Tf 6.917 0 Td [(=)]TJ/F52 9.9626 Tf -310.555 -11.955 Td [(i)-22(r)-90(p)]TJ/F85 10.3811 Tf 12.991 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(,)-179(.)-192(.)-191(.)-180(,)]TJ/F52 9.9626 Tf 21.557 0 Td [(i)-22(r)-90(p)]TJ/F85 10.3811 Tf 12.991 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 5.301 0 Td [(+)]TJ/F54 9.9626 Tf 10.407 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)]TJ/F83 10.3811 Tf 6.486 0 Td [(\000)]TJ/F54 9.9626 Tf 10.407 0 Td [(1;)-362(these)-325(triples)-324(should)-325(belong)-325(to)-324(the)-325(curr)18(ent)-325(pr)18(o-)]TJ -100.947 -11.956 Td [(cess,)-276(i.e.)]TJ/F52 9.9626 Tf 39.307 0 Td [(i)]TJ/F85 10.3811 Tf 5.103 0 Td [(+)]TJ/F52 9.9626 Tf 10.263 0 Td [(i)-22(r)-35(w)]TJ/F83 10.3811 Tf 16.669 0 Td [(\000)]TJ/F54 9.9626 Tf 10.209 0 Td [(1)-271(should)-271(be)-271(one)-271(of)-271(the)-271(local)-270(indices,)-277(but)-271(ar)18(e)-270(otherwise)]TJ -81.551 -11.955 Td [(arbitrary;)]TJ + [(sum_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -12.453 -19.925 Td [(7.)]TJ + -23.536 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(Ther)18(e)-315(is)-314(no)-315(r)18(equir)18(ement)-314(that)-315(a)-315(given)-314(r)18(ow)-315(must)-315(be)-314(passed)-315(in)-315(its)-314(entir)18(ety)]TJ 12.453 -11.955 Td [(to)-298(a)-299(single)-298(call)-298(to)-299(thi)1(s)-299(r)18(outine:)-406(the)-299(buildup)-298(of)-298(a)-299(r)18(ow)-298(may)-298(be)-299(split)-298(into)-298(as)]TJ 0 -11.955 Td [(many)-250(calls)-250(as)-250(desir)18(ed)-250(\050even)-250(in)-250(the)-250(CSR)-250(format\051;)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F279 8.9664 Tf 37.658 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ 0 g 0 G - -12.453 -19.926 Td [(8.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(Coef)18(\002cients)-288(fr)18(om)-289(dif)18(fer)18(ent)-288(r)18(ows)-288(may)-289(also)-288(be)-288(mixed)-289(up)-288(fr)18(eely)-288(in)-289(a)-288(single)]TJ 12.453 -11.955 Td [(call,)-250(accor)18(ding)-250(to)-250(the)-250(application)-250(needs;)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F233 8.9664 Tf -37.658 -10.959 Td [(call)]TJ 0 g 0 G - -12.453 -19.925 Td [(9.)]TJ + [-525(psb_sum\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-500(Coef)18(\002cients)-190(fr)18(om)-190(matrix)-190(r)18(ows)-190(not)-190(owned)-190(by)-190(the)-190(calling)-190(pr)18(ocess)-190(ar)18(e)-190(tr)18(eated)]TJ 12.453 -11.955 Td [(accor)18(ding)-254(to)-254(the)-253(value)-254(of)]TJ/F59 9.9626 Tf 111.539 0 Td [(bldmode)]TJ/F54 9.9626 Tf 39.141 0 Td [(speci\002ed)-254(at)-253(allocation)-254(time;)-256(if)]TJ/F59 9.9626 Tf 131.512 0 Td [(bldmode)]TJ/F54 9.9626 Tf -282.192 -11.956 Td [(was)-300(chosen)-300(as)]TJ/F59 9.9626 Tf 66.146 0 Td [(psb_matbld_remote_)]TJ/F54 9.9626 Tf 97.136 0 Td [(the)-300(library)-300(will)-300(keep)-300(track)-301(of)-300(them,)]TJ -163.282 -11.955 Td [(otherwise)-250(they)-250(ar)18(e)-250(silently)-250(ignor)18(ed;)]TJ + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -17.435 -19.925 Td [(10.)]TJ + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(If)-295(the)-294(descriptor)-295(is)-295(i)1(n)-295(the)-295(assembled)-294(state,)-306(then)-295(any)-294(entries)-295(in)-295(the)-294(sparse)]TJ 17.435 -11.955 Td [(matrix)-284(that)-284(would)-284(generate)-284(additional)-284(communication)-284(r)18(equir)18(ements)-284(ar)18(e)]TJ 0 -11.955 Td [(ignor)18(ed;)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - -17.435 -19.926 Td [(11.)]TJ + [(sum_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - [-500(If)-268(the)-268(matrix)-268(is)-268(in)-268(the)-268(update)-268(state,)-273(any)-268(entries)-268(in)-268(positions)-268(that)-268(wer)18(e)-268(not)]TJ 17.435 -11.955 Td [(pr)18(esent)-250(in)-250(the)-250(original)-250(matrix)-250(ar)18(e)-250(ignor)18(ed.)]TJ 0 g 0 G - 141.968 -360.647 Td [(85)]TJ +/F84 9.9626 Tf 103.537 -278.256 Td [(124)]TJ 0 g 0 G ET endstream endobj -1348 0 obj +1719 0 obj << /Type /ObjStm /N 100 -/First 977 -/Length 10176 ->> -stream -1347 0 308 59 312 117 1344 174 1351 307 1349 446 1353 593 1354 651 1350 709 1357 829 -1355 968 1359 1126 1361 1185 1356 1244 1363 1391 1365 1509 1366 1567 1367 1625 1368 1683 1369 1741 -1370 1799 1362 1857 1374 1938 1372 2077 1376 2222 316 2281 1373 2339 1379 2459 1377 2598 1381 2756 -1382 2814 1383 2872 1384 2930 1378 2988 1388 3082 1385 3230 1386 3375 1390 3521 320 3580 1391 3638 -1387 3697 1395 3791 1392 3939 1393 4084 1397 4231 324 4289 1394 4346 1400 4440 1398 4579 1402 4724 -328 4783 1399 4841 1406 4935 1403 5083 1404 5228 1408 5375 332 5433 1405 5490 1411 5610 1413 5728 -1414 5787 1415 5846 1410 5905 1419 5986 1416 6134 1417 6281 1421 6426 336 6484 1422 6541 1418 6599 -1424 6693 1426 6811 1427 6870 1428 6929 1429 6988 1423 7047 1432 7141 1434 7259 340 7317 1431 7374 -1438 7494 1430 7651 1435 7794 1436 7939 1440 8082 1441 8141 1442 8199 1443 8258 1444 8317 1445 8376 -1437 8435 1447 8555 1449 8673 1450 8731 1451 8789 1452 8847 1453 8905 1454 8963 1455 9021 1446 9079 -% 1347 0 obj -<< -/D [1345 0 R /XYZ 149.705 753.953 null] +/First 971 +/Length 9417 >> -% 308 0 obj +stream +1717 0 1718 59 1713 118 1722 213 1720 352 1724 497 413 555 1721 612 1726 735 1728 853 +1729 912 1730 971 1731 1030 1725 1089 1736 1211 1732 1368 1733 1513 1734 1660 1738 1807 417 1865 +1735 1922 1741 2030 1743 2148 421 2207 1740 2265 1745 2373 1747 2491 1748 2549 1749 2607 1750 2665 +1751 2723 1752 2781 1753 2839 1754 2897 1755 2955 1756 3013 1744 3071 1758 3193 1760 3311 425 3370 +1757 3428 1762 3509 1764 3627 429 3685 1765 3742 1766 3800 1761 3858 1768 3994 1770 4112 433 4171 +1771 4229 1772 4287 1767 4344 1774 4480 1776 4598 437 4656 1777 4713 1778 4771 1779 4829 1773 4887 +1782 5023 1784 5141 441 5200 1781 5258 1786 5353 1788 5471 445 5529 1785 5586 1790 5708 1792 5826 +449 5885 1789 5943 1794 6038 1796 6156 453 6214 1793 6271 1798 6366 1800 6484 457 6543 1797 6601 +1802 6696 1804 6814 461 6872 1801 6929 1807 7080 1809 7198 1810 7257 1811 7316 1812 7375 1806 7434 +1815 7572 1817 7690 465 7748 1814 7805 1819 7956 1821 8074 1822 8133 1823 8192 1824 8250 1818 8308 +% 1717 0 obj << -/D [1345 0 R /XYZ 150.705 716.092 null] +/D [1714 0 R /XYZ 150.705 370.138 null] >> -% 312 0 obj +% 1718 0 obj << -/D [1345 0 R /XYZ 150.705 691.48 null] +/D [1714 0 R /XYZ 150.705 338.313 null] >> -% 1344 0 obj +% 1713 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1351 0 obj +% 1722 0 obj << /Type /Page -/Contents 1352 0 R -/Resources 1350 0 R +/Contents 1723 0 R +/Resources 1721 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R -/Annots [ 1349 0 R ] +/Parent 1698 0 R +/Annots [ 1720 0 R ] >> -% 1349 0 obj +% 1720 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 324.687 359.001 336.746] -/A << /S /GoTo /D (descdata) >> +/Rect [291.943 491.971 369.462 504.031] +/A << /S /GoTo /D (spdata) >> >> -% 1353 0 obj +% 1724 0 obj << -/D [1351 0 R /XYZ 98.895 753.953 null] +/D [1722 0 R /XYZ 98.895 753.953 null] >> -% 1354 0 obj +% 413 0 obj << -/D [1351 0 R /XYZ 99.895 234.157 null] +/D [1722 0 R /XYZ 99.895 716.092 null] >> -% 1350 0 obj +% 1721 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F148 1490 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1357 0 obj +% 1726 0 obj << /Type /Page -/Contents 1358 0 R -/Resources 1356 0 R +/Contents 1727 0 R +/Resources 1725 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R -/Annots [ 1355 0 R ] +/Parent 1698 0 R >> -% 1355 0 obj +% 1728 0 obj +<< +/D [1726 0 R /XYZ 149.705 753.953 null] +>> +% 1729 0 obj +<< +/D [1726 0 R /XYZ 150.705 496.698 null] +>> +% 1730 0 obj +<< +/D [1726 0 R /XYZ 150.705 438.313 null] +>> +% 1731 0 obj +<< +/D [1726 0 R /XYZ 150.705 418.388 null] +>> +% 1725 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R /F78 686 0 R /F192 942 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1736 0 obj +<< +/Type /Page +/Contents 1737 0 R +/Resources 1735 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1739 0 R +/Annots [ 1732 0 R 1733 0 R 1734 0 R ] +>> +% 1732 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [405.298 215.702 427.216 227.166] -/A << /S /GoTo /D (subsubsection.2.3.1) >> +/Rect [291.943 527.942 369.462 540.002] +/A << /S /GoTo /D (spdata) >> >> -% 1359 0 obj +% 1733 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 460.196 359.001 472.256] +/A << /S /GoTo /D (descdata) >> +>> +% 1734 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [320.317 404.405 392.605 416.465] +/A << /S /GoTo /D (precdata) >> +>> +% 1738 0 obj << -/D [1357 0 R /XYZ 149.705 753.953 null] +/D [1736 0 R /XYZ 98.895 753.953 null] >> -% 1361 0 obj +% 417 0 obj << -/D [1357 0 R /XYZ 150.705 133.283 null] +/D [1736 0 R /XYZ 99.895 716.092 null] >> -% 1356 0 obj +% 1735 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R /F78 686 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1741 0 obj << -/Font << /F59 812 0 R /F54 586 0 R /F51 584 0 R /F83 813 0 R /F52 585 0 R /F85 814 0 R /F61 1360 0 R >> +/Type /Page +/Contents 1742 0 R +/Resources 1740 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1739 0 R +>> +% 1743 0 obj +<< +/D [1741 0 R /XYZ 149.705 753.953 null] +>> +% 421 0 obj +<< +/D [1741 0 R /XYZ 150.705 716.092 null] +>> +% 1740 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1363 0 obj +% 1745 0 obj +<< +/Type /Page +/Contents 1746 0 R +/Resources 1744 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1739 0 R +>> +% 1747 0 obj +<< +/D [1745 0 R /XYZ 98.895 753.953 null] +>> +% 1748 0 obj +<< +/D [1745 0 R /XYZ 99.895 701.929 null] +>> +% 1749 0 obj +<< +/D [1745 0 R /XYZ 99.895 668.729 null] +>> +% 1750 0 obj << -/Type /Page -/Contents 1364 0 R -/Resources 1362 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R +/D [1745 0 R /XYZ 99.895 624.894 null] >> -% 1365 0 obj +% 1751 0 obj << -/D [1363 0 R /XYZ 98.895 753.953 null] +/D [1745 0 R /XYZ 99.895 555.872 null] >> -% 1366 0 obj +% 1752 0 obj << -/D [1363 0 R /XYZ 99.895 716.092 null] +/D [1745 0 R /XYZ 99.895 500.082 null] >> -% 1367 0 obj +% 1753 0 obj << -/D [1363 0 R /XYZ 99.895 687.379 null] +/D [1745 0 R /XYZ 99.895 468.201 null] >> -% 1368 0 obj +% 1754 0 obj << -/D [1363 0 R /XYZ 99.895 667.454 null] +/D [1745 0 R /XYZ 99.895 425.023 null] >> -% 1369 0 obj +% 1755 0 obj << -/D [1363 0 R /XYZ 99.895 626.268 null] +/D [1745 0 R /XYZ 99.895 382.522 null] >> -% 1370 0 obj +% 1756 0 obj << -/D [1363 0 R /XYZ 99.895 567.828 null] +/D [1745 0 R /XYZ 99.895 354.627 null] >> -% 1362 0 obj +% 1744 0 obj << -/Font << /F54 586 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F192 942 0 R /F190 941 0 R >> /ProcSet [ /PDF /Text ] >> -% 1374 0 obj +% 1758 0 obj << /Type /Page -/Contents 1375 0 R -/Resources 1373 0 R +/Contents 1759 0 R +/Resources 1757 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R -/Annots [ 1372 0 R ] ->> -% 1372 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 116.52 409.811 128.58] -/A << /S /GoTo /D (descdata) >> +/Parent 1739 0 R >> -% 1376 0 obj +% 1760 0 obj << -/D [1374 0 R /XYZ 149.705 753.953 null] +/D [1758 0 R /XYZ 149.705 753.953 null] >> -% 316 0 obj +% 425 0 obj << -/D [1374 0 R /XYZ 150.705 716.092 null] +/D [1758 0 R /XYZ 150.705 716.092 null] >> -% 1373 0 obj +% 1757 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F75 685 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1379 0 obj +% 1762 0 obj << /Type /Page -/Contents 1380 0 R -/Resources 1378 0 R +/Contents 1763 0 R +/Resources 1761 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R -/Annots [ 1377 0 R ] ->> -% 1377 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [278.165 401.451 300.083 413.511] -/A << /S /GoTo /D (subsubsection.2.3.1) >> +/Parent 1739 0 R >> -% 1381 0 obj +% 1764 0 obj << -/D [1379 0 R /XYZ 98.895 753.953 null] +/D [1762 0 R /XYZ 98.895 753.953 null] >> -% 1382 0 obj +% 429 0 obj << -/D [1379 0 R /XYZ 99.895 496.698 null] +/D [1762 0 R /XYZ 99.895 716.092 null] >> -% 1383 0 obj +% 1765 0 obj << -/D [1379 0 R /XYZ 99.895 474.179 null] +/D [1762 0 R /XYZ 99.895 170.282 null] >> -% 1384 0 obj +% 1766 0 obj << -/D [1379 0 R /XYZ 99.895 430.343 null] +/D [1762 0 R /XYZ 99.895 149.368 null] >> -% 1378 0 obj +% 1761 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F192 942 0 R /F78 686 0 R /F190 941 0 R >> /ProcSet [ /PDF /Text ] >> -% 1388 0 obj +% 1768 0 obj << /Type /Page -/Contents 1389 0 R -/Resources 1387 0 R +/Contents 1769 0 R +/Resources 1767 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R -/Annots [ 1385 0 R 1386 0 R ] ->> -% 1385 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 573.77 409.811 585.83] -/A << /S /GoTo /D (descdata) >> +/Parent 1739 0 R >> -% 1386 0 obj +% 1770 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 416.361 409.811 428.42] -/A << /S /GoTo /D (descdata) >> +/D [1768 0 R /XYZ 149.705 753.953 null] >> -% 1390 0 obj +% 433 0 obj << -/D [1388 0 R /XYZ 149.705 753.953 null] +/D [1768 0 R /XYZ 150.705 716.092 null] >> -% 320 0 obj +% 1771 0 obj << -/D [1388 0 R /XYZ 150.705 716.092 null] +/D [1768 0 R /XYZ 150.705 348.22 null] >> -% 1391 0 obj +% 1772 0 obj << -/D [1388 0 R /XYZ 150.705 326.302 null] +/D [1768 0 R /XYZ 150.705 313.8 null] >> -% 1387 0 obj +% 1767 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F190 941 0 R /F78 686 0 R /F192 942 0 R >> /ProcSet [ /PDF /Text ] >> -% 1395 0 obj +% 1774 0 obj << /Type /Page -/Contents 1396 0 R -/Resources 1394 0 R +/Contents 1775 0 R +/Resources 1773 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R -/Annots [ 1392 0 R 1393 0 R ] +/Parent 1780 0 R >> -% 1392 0 obj +% 1776 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 573.77 359.001 585.83] -/A << /S /GoTo /D (descdata) >> +/D [1774 0 R /XYZ 98.895 753.953 null] >> -% 1393 0 obj +% 437 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 484.107 359.001 496.166] -/A << /S /GoTo /D (descdata) >> +/D [1774 0 R /XYZ 99.895 716.092 null] +>> +% 1777 0 obj +<< +/D [1774 0 R /XYZ 99.895 441.869 null] >> -% 1397 0 obj +% 1778 0 obj << -/D [1395 0 R /XYZ 98.895 753.953 null] +/D [1774 0 R /XYZ 99.895 395.439 null] >> -% 324 0 obj +% 1779 0 obj << -/D [1395 0 R /XYZ 99.895 716.092 null] +/D [1774 0 R /XYZ 99.895 363.559 null] >> -% 1394 0 obj +% 1773 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R /F192 942 0 R /F190 941 0 R >> /ProcSet [ /PDF /Text ] >> -% 1400 0 obj +% 1782 0 obj << /Type /Page -/Contents 1401 0 R -/Resources 1399 0 R +/Contents 1783 0 R +/Resources 1781 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R -/Annots [ 1398 0 R ] ->> -% 1398 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 573.77 409.811 585.83] -/A << /S /GoTo /D (descdata) >> +/Parent 1780 0 R >> -% 1402 0 obj +% 1784 0 obj << -/D [1400 0 R /XYZ 149.705 753.953 null] +/D [1782 0 R /XYZ 149.705 753.953 null] >> -% 328 0 obj +% 441 0 obj << -/D [1400 0 R /XYZ 150.705 716.092 null] +/D [1782 0 R /XYZ 150.705 716.092 null] >> -% 1399 0 obj +% 1781 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1406 0 obj +% 1786 0 obj << /Type /Page -/Contents 1407 0 R -/Resources 1405 0 R +/Contents 1787 0 R +/Resources 1785 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R -/Annots [ 1403 0 R 1404 0 R ] ->> -% 1403 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 452.321 369.462 464.381] -/A << /S /GoTo /D (spdata) >> ->> -% 1404 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 209.193 359.001 221.252] -/A << /S /GoTo /D (descdata) >> +/Parent 1780 0 R >> -% 1408 0 obj +% 1788 0 obj << -/D [1406 0 R /XYZ 98.895 753.953 null] +/D [1786 0 R /XYZ 98.895 753.953 null] >> -% 332 0 obj +% 445 0 obj << -/D [1406 0 R /XYZ 99.895 716.092 null] +/D [1786 0 R /XYZ 99.895 716.092 null] >> -% 1405 0 obj +% 1785 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R >> /ProcSet [ /PDF /Text ] >> -% 1411 0 obj +% 1790 0 obj << /Type /Page -/Contents 1412 0 R -/Resources 1410 0 R +/Contents 1791 0 R +/Resources 1789 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R ->> -% 1413 0 obj -<< -/D [1411 0 R /XYZ 149.705 753.953 null] +/Parent 1780 0 R >> -% 1414 0 obj +% 1792 0 obj << -/D [1411 0 R /XYZ 150.705 716.092 null] +/D [1790 0 R /XYZ 149.705 753.953 null] >> -% 1415 0 obj +% 449 0 obj << -/D [1411 0 R /XYZ 150.705 663.469 null] +/D [1790 0 R /XYZ 150.705 716.092 null] >> -% 1410 0 obj +% 1789 0 obj << -/Font << /F54 586 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1419 0 obj +% 1794 0 obj << /Type /Page -/Contents 1420 0 R -/Resources 1418 0 R +/Contents 1795 0 R +/Resources 1793 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R -/Annots [ 1416 0 R 1417 0 R ] ->> -% 1416 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 571.679 359.001 583.739] -/A << /S /GoTo /D (descdata) >> ->> -% 1417 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 227.887 369.462 239.946] -/A << /S /GoTo /D (spdata) >> ->> -% 1421 0 obj -<< -/D [1419 0 R /XYZ 98.895 753.953 null] +/Parent 1780 0 R >> -% 336 0 obj +% 1796 0 obj << -/D [1419 0 R /XYZ 99.895 716.092 null] +/D [1794 0 R /XYZ 98.895 753.953 null] >> -% 1422 0 obj +% 453 0 obj << -/D [1419 0 R /XYZ 99.895 136.374 null] +/D [1794 0 R /XYZ 99.895 716.092 null] >> -% 1418 0 obj +% 1793 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1424 0 obj +% 1798 0 obj << /Type /Page -/Contents 1425 0 R -/Resources 1423 0 R +/Contents 1799 0 R +/Resources 1797 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R ->> -% 1426 0 obj -<< -/D [1424 0 R /XYZ 149.705 753.953 null] +/Parent 1780 0 R >> -% 1427 0 obj -<< -/D [1424 0 R /XYZ 150.705 716.092 null] ->> -% 1428 0 obj +% 1800 0 obj << -/D [1424 0 R /XYZ 150.705 699.334 null] +/D [1798 0 R /XYZ 149.705 753.953 null] >> -% 1429 0 obj +% 457 0 obj << -/D [1424 0 R /XYZ 150.705 644.819 null] +/D [1798 0 R /XYZ 150.705 716.092 null] >> -% 1423 0 obj +% 1797 0 obj << -/Font << /F54 586 0 R /F52 585 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1432 0 obj +% 1802 0 obj << /Type /Page -/Contents 1433 0 R -/Resources 1431 0 R +/Contents 1803 0 R +/Resources 1801 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R +/Parent 1805 0 R >> -% 1434 0 obj +% 1804 0 obj << -/D [1432 0 R /XYZ 98.895 753.953 null] +/D [1802 0 R /XYZ 98.895 753.953 null] >> -% 340 0 obj +% 461 0 obj << -/D [1432 0 R /XYZ 99.895 716.092 null] +/D [1802 0 R /XYZ 99.895 716.092 null] >> -% 1431 0 obj +% 1801 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F148 1490 0 R /F192 942 0 R /F78 686 0 R /F190 941 0 R >> /ProcSet [ /PDF /Text ] >> -% 1438 0 obj +% 1807 0 obj << /Type /Page -/Contents 1439 0 R -/Resources 1437 0 R +/Contents 1808 0 R +/Resources 1806 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R -/Annots [ 1430 0 R 1435 0 R 1436 0 R ] ->> -% 1430 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [310.942 654.503 378 666.562] -/A << /S /GoTo /D (descdata) >> +/Parent 1805 0 R >> -% 1435 0 obj +% 1809 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 480.963 420.271 493.022] -/A << /S /GoTo /D (spdata) >> +/D [1807 0 R /XYZ 149.705 753.953 null] >> -% 1436 0 obj +% 1810 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [310.942 411.699 378 423.758] -/A << /S /GoTo /D (descdata) >> +/D [1807 0 R /XYZ 150.705 576.399 null] >> -% 1440 0 obj +% 1811 0 obj << -/D [1438 0 R /XYZ 149.705 753.953 null] +/D [1807 0 R /XYZ 150.705 541.925 null] >> -% 1441 0 obj +% 1812 0 obj << -/D [1438 0 R /XYZ 150.705 306.27 null] +/D [1807 0 R /XYZ 150.705 451.085 null] >> -% 1442 0 obj +% 1806 0 obj << -/D [1438 0 R /XYZ 150.705 272.927 null] +/Font << /F84 687 0 R /F75 685 0 R /F145 940 0 R /F233 1044 0 R /F78 686 0 R /F279 1813 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1443 0 obj +% 1815 0 obj << -/D [1438 0 R /XYZ 150.705 236.878 null] +/Type /Page +/Contents 1816 0 R +/Resources 1814 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1805 0 R >> -% 1444 0 obj +% 1817 0 obj << -/D [1438 0 R /XYZ 150.705 167.614 null] +/D [1815 0 R /XYZ 98.895 753.953 null] >> -% 1445 0 obj +% 465 0 obj << -/D [1438 0 R /XYZ 150.705 146.171 null] +/D [1815 0 R /XYZ 99.895 716.092 null] >> -% 1437 0 obj +% 1814 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F190 941 0 R /F148 1490 0 R /F192 942 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1447 0 obj +% 1819 0 obj << /Type /Page -/Contents 1448 0 R -/Resources 1446 0 R +/Contents 1820 0 R +/Resources 1818 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R +/Parent 1805 0 R >> -% 1449 0 obj +% 1821 0 obj << -/D [1447 0 R /XYZ 98.895 753.953 null] +/D [1819 0 R /XYZ 149.705 753.953 null] >> -% 1450 0 obj +% 1822 0 obj << -/D [1447 0 R /XYZ 99.895 716.092 null] +/D [1819 0 R /XYZ 150.705 588.355 null] >> -% 1451 0 obj +% 1823 0 obj << -/D [1447 0 R /XYZ 99.895 651.514 null] +/D [1819 0 R /XYZ 150.705 553.88 null] >> -% 1452 0 obj +% 1824 0 obj << -/D [1447 0 R /XYZ 99.895 607.678 null] +/D [1819 0 R /XYZ 150.705 463.04 null] >> -% 1453 0 obj +% 1818 0 obj +<< +/Font << /F84 687 0 R /F75 685 0 R /F145 940 0 R /F233 1044 0 R /F78 686 0 R /F279 1813 0 R >> +/ProcSet [ /PDF /Text ] +>> + +endstream +endobj +1827 0 obj +<< +/Length 6060 +>> +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 99.895 706.129 Td [(7.11)-1000(psb)]TJ +ET +q +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 156.993 706.129 Td [(max)-250(\227)-250(Global)-250(maximum)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_max\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 114.839 664.53 Tm [(This)-253(subr)18(outine)-253(implements)-253(a)-253(maximum)-253(valuer)19(eduction)-254(oper)1(ation)-254(based)-253(on)]TJ 1 0 0 1 99.895 652.575 Tm [(the)-250(underlying)-250(communication)-250(library)111(.)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.408 Td [(ctxt)]TJ +0 g 0 G +/F84 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ +0 g 0 G +/F84 9.9626 Tf 19.059 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(maximum.)]TJ 5.848 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ 0.983 0 0 1 124.802 475.42 Tm [(Speci\002ed)-255(as:)-317(an)-255(integer)-255(or)-255(r)19(eal)-255(variable,)-256(w)1(hich)-255(may)-255(be)-255(a)-256(s)1(calar)75(,)-255(or)-255(a)-255(rank)-255(1)]TJ 1 0 0 1 124.802 463.465 Tm [(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -20.409 Td [(root)]TJ +0 g 0 G +/F84 9.9626 Tf 1.009 0 0 1 123.148 443.056 Tm [(Pr)18(ocess)-247(to)-247(hold)-247(the)-247(\002nal)-247(maximum,)-247(or)]TJ/F190 10.3811 Tf 1 0 0 1 294.6 443.056 Tm [(\000)]TJ/F84 9.9626 Tf 1.009 0 0 1 302.794 443.056 Tm [(1)-247(to)-247(make)-247(it)-247(available)-247(on)-246(a)-1(l)1(l)-247(pr)17(o-)]TJ 1 0 0 1 124.802 431.101 Tm [(cesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F190 10.3811 Tf 131.102 0 Td [(\000)]TJ/F84 9.9626 Tf 8.194 0 Td [(1)]TJ/F148 10.3811 Tf 7.873 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F148 10.3811 Tf 19.923 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F190 10.3811 Tf 13.503 0 Td [(\000)]TJ/F84 9.9626 Tf 10.132 0 Td [(1,)-250(default)-250(-1.)]TJ +0 g 0 G +/F75 9.9626 Tf -254.344 -32.364 Td [(mode)]TJ +0 g 0 G +/F84 9.9626 Tf 0.983 0 0 1 129.843 350.916 Tm [(Whether)-256(the)-255(call)-256(is)-256(started)-256(in)-255(non-blocking)-256(mode)-256(and)-256(completed)-255(later)75(,)-256(or)]TJ 1 0 0 1 124.802 338.961 Tm [(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.02 0 0 1 124.802 291.14 Tm [(Speci\002ed)-285(as:)-383(an)-285(integer)-285(value.)-423(The)-284(action)-285(to)-285(be)-285(taken)-284(is)-285(determined)-285(by)]TJ 1.02 0 0 1 124.802 279.185 Tm [(its)-329(bit)-328(\002elds,)-350(which)-329(can)-328(be)-329(set)-329(with)-328(bitwise)]TJ/F145 9.9626 Tf 1 0 0 1 323.986 279.185 Tm [(OR)]TJ/F84 9.9626 Tf 1.02 0 0 1 334.446 279.185 Tm [(.)-329(Basic)-328(action)-329(values)-329(ar)18(e)]TJ/F145 9.9626 Tf 1 0 0 1 124.802 267.23 Tm [(psb_collective_start_)]TJ/F84 9.9626 Tf 0.98 0 0 1 234.639 267.23 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 239.567 267.23 Tm [(psb_collective_end_)]TJ/F84 9.9626 Tf 0.98 0 0 1 338.943 267.23 Tm [(.)-316(Default:)-316(both)-255(\002elds)-254(ar)18(e)]TJ 1 0 0 1 124.802 255.275 Tm [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -32.364 Td [(request)]TJ +0 g 0 G +/F84 9.9626 Tf 38.346 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.439 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F145 9.9626 Tf 9.166 0 Td [(mode)]TJ/F84 9.9626 Tf 23.412 0 Td [(speci\002es)-250(non-blocking)-250(action,)-250(then)-250(this)-250(variable)-250(must)-250(be)-250(pr)18(esent.)]TJ +0 g 0 G +/F75 9.9626 Tf -57.485 -22.402 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.408 Td [(dat)]TJ +0 g 0 G +/F84 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(maximum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ +0 g 0 G + 79.263 -29.888 Td [(125)]TJ +0 g 0 G +ET + +endstream +endobj +1832 0 obj << -/D [1447 0 R /XYZ 99.895 575.798 null] +/Length 5209 >> -% 1454 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F84 9.9626 Tf 175.303 706.129 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ 0.983 0 0 1 175.611 682.219 Tm [(Speci\002ed)-255(as:)-317(an)-255(integer)-255(or)-255(r)19(eal)-255(variable,)-256(which)-255(may)-255(be)-255(a)-255(scalar)76(,)-255(or)-255(a)-255(rank)-255(1)]TJ 1 0 0 1 175.611 670.263 Tm [(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -19.925 Td [(request)]TJ +0 g 0 G +/F84 9.9626 Tf 38.346 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.44 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(If)]TJ/F145 9.9626 Tf 9.166 0 Td [(mode)]TJ/F84 9.9626 Tf 23.412 0 Td [(speci\002es)-250(non-blocking)-250(action,)-250(then)-250(this)-250(variable)-250(must)-250(be)-250(pr)18(esent.)]TJ/F75 11.9552 Tf -57.484 -21.917 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.453 -19.926 Td [(1.)]TJ +0 g 0 G + 0.98 0 0 1 175.303 560.674 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 193.994 560.674 Tm [(dat)]TJ/F84 9.9626 Tf 0.98 0 0 1 212.032 560.674 Tm [(ar)18(gument)-240(is)-240(both)-241(input)-240(and)-241(output,)-243(and)-240(its)-241(value)-240(may)-240(be)-241(changed)]TJ 1 0 0 1 175.611 548.719 Tm [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +0 g 0 G + -12.453 -19.925 Td [(2.)]TJ +0 g 0 G + 1.02 0 0 1 175.303 528.794 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 195.057 528.794 Tm [(mode)]TJ/F84 9.9626 Tf 1.02 0 0 1 218.722 528.794 Tm [(ar)18(gument)-270(can)-270(be)-270(built)-270(with)-270(the)-270(bitwise)]TJ/F145 9.9626 Tf 1 0 0 1 395.907 528.794 Tm [(IOR\050\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 424.803 528.794 Tm [(operator;)-282(in)-270(the)]TJ 0.988 0 0 1 175.611 516.839 Tm [(following)-254(example,)-255(the)-254(ar)18(gument)-254(is)-255(for)18(cing)-254(immediate)-254(completion,)-255(hence)]TJ 1 0 0 1 175.611 504.884 Tm [(the)]TJ/F145 9.9626 Tf 16.309 0 Td [(request)]TJ/F84 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 175.611 465.033 cm +0 0 318.804 27.895 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F233 8.9664 Tf 188.015 482.268 Td [(call)]TJ +0 g 0 G + [-525(psb_max\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ +0 g 0 G + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F84 9.9626 Tf -48.393 -36.164 Td [(3.)]TJ +0 g 0 G + 1.02 0 0 1 175.113 435.145 Tm [(When)-250(splitting)-250(the)-250(operation)-250(in)-250(two)-250(c)1(alls,)-252(the)]TJ/F145 9.9626 Tf 1 0 0 1 379.88 435.145 Tm [(dat)]TJ/F84 9.9626 Tf 1.02 0 0 1 398.111 435.145 Tm [(ar)18(gument)]TJ/F78 9.9626 Tf 1.02 0 0 1 444.411 435.145 Tm [(must)-250(not)]TJ/F84 9.9626 Tf 1.02 0 0 1 483.929 435.145 Tm [(be)]TJ 1 0 0 1 175.611 423.19 Tm [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 175.611 350.463 cm +0 0 318.804 60.772 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F233 8.9664 Tf 188.015 400.575 Td [(call)]TJ +0 g 0 G + [-525(psb_max\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(max_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + -23.536 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F279 8.9664 Tf 37.658 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F233 8.9664 Tf -37.658 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_max\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(max_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F84 9.9626 Tf 103.537 -266.301 Td [(126)]TJ +0 g 0 G +ET + +endstream +endobj +1839 0 obj << -/D [1447 0 R /XYZ 99.895 520.007 null] +/Length 6144 >> -% 1455 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 99.895 706.129 Td [(7.12)-1000(psb)]TJ +ET +q +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 156.993 706.129 Td [(min)-250(\227)-250(Global)-250(minimum)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_min\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 114.839 664.53 Tm [(This)-247(subr)18(outine)-247(implements)-246(a)-247(minimum)-247(value)-246(r)17(eduction)-246(operation)-247(based)]TJ 1 0 0 1 99.895 652.575 Tm [(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.408 Td [(ctxt)]TJ +0 g 0 G +/F84 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ +0 g 0 G +/F84 9.9626 Tf 19.059 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.848 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ 0.983 0 0 1 124.802 475.42 Tm [(Speci\002ed)-255(as:)-317(an)-255(integer)-255(or)-255(r)19(eal)-255(variable,)-256(w)1(hich)-255(may)-255(be)-255(a)-256(s)1(calar)75(,)-255(or)-255(a)-255(rank)-255(1)]TJ 1 0 0 1 124.802 463.465 Tm [(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -20.409 Td [(root)]TJ +0 g 0 G +/F84 9.9626 Tf 0.986 0 0 1 123.148 443.056 Tm [(Pr)18(ocess)-253(to)-254(hold)-253(the)-253(\002nal)-254(value,)-253(or)]TJ/F190 10.3811 Tf 1 0 0 1 270.3 443.056 Tm [(\000)]TJ/F84 9.9626 Tf 0.986 0 0 1 278.495 443.056 Tm [(1)-253(to)-254(make)-253(it)-254(available)-253(on)-253(all)-254(pr)19(ocesses.)]TJ 1 0 0 1 124.802 431.101 Tm [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F190 10.3811 Tf 131.102 0 Td [(\000)]TJ/F84 9.9626 Tf 8.194 0 Td [(1)]TJ/F148 10.3811 Tf 7.873 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F148 10.3811 Tf 19.923 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F190 10.3811 Tf 13.503 0 Td [(\000)]TJ/F84 9.9626 Tf 10.132 0 Td [(1,)-250(default)-250(-1.)]TJ +0 g 0 G +/F75 9.9626 Tf -254.344 -32.364 Td [(mode)]TJ +0 g 0 G +/F84 9.9626 Tf 0.983 0 0 1 129.843 362.871 Tm [(Whether)-256(the)-255(call)-256(is)-256(started)-256(in)-255(non-blocking)-256(mode)-256(and)-256(completed)-255(later)75(,)-256(or)]TJ 1 0 0 1 124.802 350.916 Tm [(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.02 0 0 1 124.802 303.096 Tm [(Speci\002ed)-285(as:)-383(an)-285(integer)-285(value.)-423(The)-284(action)-285(to)-285(be)-285(taken)-284(is)-285(determined)-285(by)]TJ 1.02 0 0 1 124.802 291.14 Tm [(its)-329(bit)-328(\002elds,)-350(which)-329(can)-328(be)-329(set)-329(with)-328(bitwise)]TJ/F145 9.9626 Tf 1 0 0 1 323.986 291.14 Tm [(OR)]TJ/F84 9.9626 Tf 1.02 0 0 1 334.446 291.14 Tm [(.)-329(Basic)-328(action)-329(values)-329(ar)18(e)]TJ/F145 9.9626 Tf 1 0 0 1 124.802 279.185 Tm [(psb_collective_start_)]TJ/F84 9.9626 Tf 0.98 0 0 1 234.639 279.185 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 239.567 279.185 Tm [(psb_collective_end_)]TJ/F84 9.9626 Tf 0.98 0 0 1 338.943 279.185 Tm [(.)-316(Default:)-316(both)-255(\002elds)-254(ar)18(e)]TJ 1 0 0 1 124.802 267.23 Tm [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -32.364 Td [(request)]TJ +0 g 0 G +/F84 9.9626 Tf 38.346 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.439 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F145 9.9626 Tf 9.166 0 Td [(mode)]TJ/F84 9.9626 Tf 23.412 0 Td [(speci\002es)-250(non-blocking)-250(action,)-250(then)-250(this)-250(variable)-250(must)-250(be)-250(pr)18(esent.)]TJ +0 g 0 G +/F75 9.9626 Tf -57.485 -22.401 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.409 Td [(dat)]TJ +0 g 0 G +/F84 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(minimum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ +0 g 0 G + 73.405 -29.888 Td [(127)]TJ +0 g 0 G +ET + +endstream +endobj +1844 0 obj << -/D [1447 0 R /XYZ 99.895 476.171 null] +/Length 5122 >> -% 1446 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F84 9.9626 Tf 175.611 706.129 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ 0.983 0 0 1 175.611 694.174 Tm [(Speci\002ed)-255(as:)-317(an)-255(integer)-255(or)-255(r)19(eal)-255(variable,)-256(which)-255(may)-255(be)-255(a)-255(scalar)76(,)-255(or)-255(a)-255(rank)-255(1)]TJ 1 0 0 1 175.611 682.219 Tm [(or)-250(2)-250(array)111(.)]TJ -0.308 -11.956 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.598 -19.925 Td [(request)]TJ +0 g 0 G +/F84 9.9626 Tf 38.346 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.44 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(If)]TJ/F145 9.9626 Tf 9.166 0 Td [(mode)]TJ/F84 9.9626 Tf 23.412 0 Td [(speci\002es)-250(non-blocking)-250(action,)-250(then)-250(this)-250(variable)-250(must)-250(be)-250(pr)18(esent.)]TJ/F75 11.9552 Tf -57.484 -21.917 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.453 -19.926 Td [(1.)]TJ +0 g 0 G + 0.98 0 0 1 175.303 560.674 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 193.994 560.674 Tm [(dat)]TJ/F84 9.9626 Tf 0.98 0 0 1 212.032 560.674 Tm [(ar)18(gument)-240(is)-240(both)-241(input)-240(and)-241(output,)-243(and)-240(its)-241(value)-240(may)-240(be)-241(changed)]TJ 1 0 0 1 175.611 548.719 Tm [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +0 g 0 G + -12.453 -19.925 Td [(2.)]TJ +0 g 0 G + 1.02 0 0 1 175.303 528.794 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 195.057 528.794 Tm [(mode)]TJ/F84 9.9626 Tf 1.02 0 0 1 218.722 528.794 Tm [(ar)18(gument)-270(can)-270(be)-270(built)-270(with)-270(the)-270(bitwise)]TJ/F145 9.9626 Tf 1 0 0 1 395.907 528.794 Tm [(IOR\050\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 424.803 528.794 Tm [(operator;)-282(in)-270(the)]TJ 0.988 0 0 1 175.611 516.839 Tm [(following)-254(example,)-255(the)-254(ar)18(gument)-254(is)-255(for)18(cing)-254(immediate)-254(completion,)-255(hence)]TJ 1 0 0 1 175.611 504.884 Tm [(the)]TJ/F145 9.9626 Tf 16.309 0 Td [(request)]TJ/F84 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 175.611 465.033 cm +0 0 318.804 27.895 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F233 8.9664 Tf 188.015 482.268 Td [(call)]TJ +0 g 0 G + [-525(psb_min\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ +0 g 0 G + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F84 9.9626 Tf -48.393 -36.164 Td [(3.)]TJ +0 g 0 G + 1.02 0 0 1 175.113 435.145 Tm [(When)-250(splitting)-250(the)-250(operation)-250(in)-250(two)-250(c)1(alls,)-252(the)]TJ/F145 9.9626 Tf 1 0 0 1 379.88 435.145 Tm [(dat)]TJ/F84 9.9626 Tf 1.02 0 0 1 398.111 435.145 Tm [(ar)18(gument)]TJ/F78 9.9626 Tf 1.02 0 0 1 444.411 435.145 Tm [(must)-250(not)]TJ/F84 9.9626 Tf 1.02 0 0 1 483.929 435.145 Tm [(be)]TJ 1 0 0 1 175.611 423.19 Tm [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 175.611 350.463 cm +0 0 318.804 60.772 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F233 8.9664 Tf 188.015 400.575 Td [(call)]TJ +0 g 0 G + [-525(psb_min\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(min_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + -23.536 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F279 8.9664 Tf 37.658 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F233 8.9664 Tf -37.658 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_min\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(min_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F84 9.9626 Tf 103.537 -266.301 Td [(128)]TJ +0 g 0 G +ET + +endstream +endobj +1851 0 obj << -/Font << /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Length 6205 >> +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 99.895 706.129 Td [(7.13)-1000(psb)]TJ +ET +q +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 156.993 706.129 Td [(amx)-250(\227)-250(Global)-250(maximum)-250(absolute)-250(value)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_amx\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 114.839 664.53 Tm [(This)-250(subr)19(outine)-250(implements)-250(a)-249(maximum)-250(absolute)-250(value)-249(r)18(eduction)-250(operation)]TJ 1 0 0 1 99.895 652.575 Tm [(based)-250(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.408 Td [(ctxt)]TJ +0 g 0 G +/F84 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ +0 g 0 G +/F84 9.9626 Tf 19.059 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(maximum.)]TJ 5.848 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ 0.98 0 0 1 124.802 475.42 Tm [(Speci\002ed)-228(as:)-303(an)-228(integer)76(,)-234(r)19(eal)-228(or)-228(complex)-228(variable,)-234(which)-228(may)-228(be)-228(a)-228(scalar)76(,)-234(or)]TJ 0.991 0 0 1 124.802 463.465 Tm [(a)-252(rank)-253(1)-252(or)-252(2)-252(array)112(.)-565(T)90(ype,)-252(kind,)-252(rank)-252(and)-253(size)-252(must)-252(agr)18(ee)-252(on)-252(all)-253(pr)18(ocesses.)]TJ +0 g 0 G +/F75 9.9626 Tf 1 0 0 1 99.895 443.056 Tm [(root)]TJ +0 g 0 G +/F84 9.9626 Tf 0.986 0 0 1 123.148 443.056 Tm [(Pr)18(ocess)-253(to)-254(hold)-253(the)-253(\002nal)-254(value,)-253(or)]TJ/F190 10.3811 Tf 1 0 0 1 270.3 443.056 Tm [(\000)]TJ/F84 9.9626 Tf 0.986 0 0 1 278.495 443.056 Tm [(1)-253(to)-254(make)-253(it)-254(available)-253(on)-253(all)-254(pr)19(ocesses.)]TJ 1 0 0 1 124.802 431.101 Tm [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F190 10.3811 Tf 131.102 0 Td [(\000)]TJ/F84 9.9626 Tf 8.194 0 Td [(1)]TJ/F148 10.3811 Tf 7.873 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F148 10.3811 Tf 19.923 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F190 10.3811 Tf 13.503 0 Td [(\000)]TJ/F84 9.9626 Tf 10.132 0 Td [(1,)-250(default)-250(-1.)]TJ +0 g 0 G +/F75 9.9626 Tf -254.344 -32.364 Td [(mode)]TJ +0 g 0 G +/F84 9.9626 Tf 0.983 0 0 1 129.843 362.871 Tm [(Whether)-256(the)-255(call)-256(is)-256(started)-256(in)-255(non-blocking)-256(mode)-256(and)-256(completed)-255(later)75(,)-256(or)]TJ 1 0 0 1 124.802 350.916 Tm [(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.02 0 0 1 124.802 303.096 Tm [(Speci\002ed)-285(as:)-383(an)-285(integer)-285(value.)-423(The)-284(action)-285(to)-285(be)-285(taken)-284(is)-285(determined)-285(by)]TJ 1.02 0 0 1 124.802 291.14 Tm [(its)-329(bit)-328(\002elds,)-350(which)-329(can)-328(be)-329(set)-329(with)-328(bitwise)]TJ/F145 9.9626 Tf 1 0 0 1 323.986 291.14 Tm [(OR)]TJ/F84 9.9626 Tf 1.02 0 0 1 334.446 291.14 Tm [(.)-329(Basic)-328(action)-329(values)-329(ar)18(e)]TJ/F145 9.9626 Tf 1 0 0 1 124.802 279.185 Tm [(psb_collective_start_)]TJ/F84 9.9626 Tf 0.98 0 0 1 234.639 279.185 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 239.567 279.185 Tm [(psb_collective_end_)]TJ/F84 9.9626 Tf 0.98 0 0 1 338.943 279.185 Tm [(.)-316(Default:)-316(both)-255(\002elds)-254(ar)18(e)]TJ 1 0 0 1 124.802 267.23 Tm [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -32.364 Td [(request)]TJ +0 g 0 G +/F84 9.9626 Tf 38.346 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.439 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F145 9.9626 Tf 9.166 0 Td [(mode)]TJ/F84 9.9626 Tf 23.412 0 Td [(speci\002es)-250(non-blocking)-250(action,)-250(then)-250(this)-250(variable)-250(must)-250(be)-250(pr)18(esent.)]TJ +0 g 0 G +/F75 9.9626 Tf -57.485 -22.401 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.409 Td [(dat)]TJ +0 g 0 G +/F84 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(maximum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ +0 g 0 G + 73.405 -29.888 Td [(129)]TJ +0 g 0 G +ET endstream endobj -1462 0 obj +1855 0 obj << -/Length 6789 +/Length 5076 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.9)-1000(psb)]TJ +/F84 9.9626 Tf 175.611 706.129 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ 0.98 0 0 1 175.611 694.174 Tm [(Speci\002ed)-228(as:)-303(an)-228(integer)76(,)-234(r)19(eal)-228(or)-228(complex)-228(variable,)-234(which)-228(may)-228(be)-228(a)-228(scalar)76(,)-234(or)]TJ 0.991 0 0 1 175.611 682.219 Tm [(a)-252(rank)-253(1)-252(or)-252(2)-252(array)112(.)-565(T)90(ype,)-252(kind,)-252(rank)-252(and)-253(size)-252(must)-252(agr)18(ee)-252(on)-253(all)-252(pr)18(ocesses.)]TJ +0 g 0 G +/F75 9.9626 Tf 1 0 0 1 150.705 662.293 Tm [(request)]TJ +0 g 0 G +/F84 9.9626 Tf 38.346 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.44 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F145 9.9626 Tf 9.166 0 Td [(mode)]TJ/F84 9.9626 Tf 23.412 0 Td [(speci\002es)-250(non-blocking)-250(action,)-250(then)-250(this)-250(variable)-250(must)-250(be)-250(pr)18(esent.)]TJ/F75 11.9552 Tf -57.484 -21.918 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.453 -19.926 Td [(1.)]TJ +0 g 0 G + 0.98 0 0 1 175.303 572.629 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 193.994 572.629 Tm [(dat)]TJ/F84 9.9626 Tf 0.98 0 0 1 212.032 572.629 Tm [(ar)18(gument)-240(is)-240(both)-241(input)-240(and)-241(output,)-243(and)-240(its)-241(value)-240(may)-240(be)-241(changed)]TJ 1 0 0 1 175.611 560.674 Tm [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +0 g 0 G + -12.453 -19.925 Td [(2.)]TJ +0 g 0 G + 1.02 0 0 1 175.303 540.749 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 195.057 540.749 Tm [(mode)]TJ/F84 9.9626 Tf 1.02 0 0 1 218.722 540.749 Tm [(ar)18(gument)-270(can)-270(be)-270(built)-270(with)-270(the)-270(bitwise)]TJ/F145 9.9626 Tf 1 0 0 1 395.907 540.749 Tm [(IOR\050\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 424.803 540.749 Tm [(operator;)-282(in)-270(the)]TJ 0.988 0 0 1 175.611 528.794 Tm [(following)-254(example,)-255(the)-254(ar)18(gument)-254(is)-255(for)18(cing)-254(immediate)-254(completion,)-255(hence)]TJ 1 0 0 1 175.611 516.839 Tm [(the)]TJ/F145 9.9626 Tf 16.309 0 Td [(request)]TJ/F84 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 175.611 476.988 cm +0 0 318.804 27.895 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F51 11.9552 Tf 201.825 706.129 Td [(spasb)-250(\227)-250(Sparse)-250(matrix)-250(assembly)-250(routine)]TJ +/F233 8.9664 Tf 188.015 494.224 Td [(call)]TJ 0 g 0 G + [-525(psb_amx\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -51.12 -19.204 Td [(call)-525(psb_spasb\050a,)-525(desc_a,)-525(info)-525([,)-525(afmt,)-525(upd,)-1050(mold]\051)]TJ + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -22.289 Td [(T)90(ype:)]TJ + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.421 Td [(On)-250(Entry)]TJ + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G 0 g 0 G - 0 -20.421 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 623.994 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 623.794 Td [(a)]TJ +/F84 9.9626 Tf -48.393 -36.165 Td [(3.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in/out)]TJ/F54 9.9626 Tf 27.297 0 Td [(.)]TJ -59.098 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 576.173 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 575.974 Td [(desc)]TJ + 1.02 0 0 1 175.113 447.1 Tm [(When)-250(splitting)-250(the)-250(operation)-250(in)-250(two)-250(c)1(alls,)-252(the)]TJ/F145 9.9626 Tf 1 0 0 1 379.88 447.1 Tm [(dat)]TJ/F84 9.9626 Tf 1.02 0 0 1 398.111 447.1 Tm [(ar)18(gument)]TJ/F78 9.9626 Tf 1.02 0 0 1 444.411 447.1 Tm [(must)-250(not)]TJ/F84 9.9626 Tf 1.02 0 0 1 483.929 447.1 Tm [(be)]TJ 1 0 0 1 175.611 435.145 Tm [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 384.755 576.173 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 175.611 362.418 cm +0 0 318.804 60.772 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 387.893 575.974 Td [(type)]TJ +/F233 8.9664 Tf 178.6 412.53 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ + [-525(psb_amx\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -20.421 Td [(afmt)]TJ + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 26.012 0 Td [(the)-250(storage)-250(format)-250(for)-250(the)-250(sparse)-250(matrix.)]TJ -1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(array)-250(of)-250(characters.)-310(Defalt:)-310('CSR'.)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.906 -20.42 Td [(upd)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 23.243 0 Td [(Pr)18(ovide)-250(for)-250(updates)-250(to)-250(the)-250(matrix)-250(coef)18(\002cients.)]TJ 1.663 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(integer)74(,)-250(possible)-250(values:)]TJ/F59 9.9626 Tf 165.219 0 Td [(psb_upd_srch_)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)]TJ/F59 9.9626 Tf 4.981 0 Td [(psb_upd_perm_)]TJ + [(amx_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -263.1 -20.421 Td [(mold)]TJ + -14.122 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F279 8.9664 Tf 37.658 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F233 8.9664 Tf -37.658 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_amx\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(amx_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F84 9.9626 Tf 103.537 -278.256 Td [(130)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(matrix)-250(storage.)]TJ -3.876 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F59 9.9626 Tf 201.393 0 Td [(psb)]TJ -ET -q -1 0 0 1 393.323 371.449 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 396.461 371.249 Td [(T)]TJ -ET -q -1 0 0 1 402.319 371.449 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 405.457 371.249 Td [(base)]TJ ET -q -1 0 0 1 427.006 371.449 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +1862 0 obj +<< +/Length 6206 +>> +stream +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 430.144 371.249 Td [(sparse)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(7.14)-1000(psb)]TJ ET q -1 0 0 1 462.154 371.449 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 465.292 371.249 Td [(mat)]TJ/F54 9.9626 Tf 15.691 0 Td [(.)]TJ +/F75 11.9552 Tf 156.993 706.129 Td [(amn)-250(\227)-250(Global)-250(minimum)-250(absolute)-250(value)]TJ 0 g 0 G -/F51 9.9626 Tf -330.278 -22.289 Td [(On)-250(Return)]TJ 0 g 0 G +/F145 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_amn\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F84 9.9626 Tf 0.983 0 0 1 114.839 664.53 Tm [(This)-255(subr)19(outine)-255(implements)-255(a)-254(minimum)-255(absolute)-255(value)-255(r)19(eduction)-255(operation)]TJ 1 0 0 1 99.895 652.575 Tm [(based)-250(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ 0 g 0 G - 0 -20.421 Td [(a)]TJ +/F75 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.408 Td [(ctxt)]TJ +0 g 0 G +/F84 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ +0 g 0 G +/F84 9.9626 Tf 19.059 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.848 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ 0.98 0 0 1 124.802 475.42 Tm [(Speci\002ed)-228(as:)-303(an)-228(integer)76(,)-234(r)19(eal)-228(or)-228(complex)-228(variable,)-234(which)-228(may)-228(be)-228(a)-228(scalar)76(,)-234(or)]TJ 0.991 0 0 1 124.802 463.465 Tm [(a)-252(rank)-253(1)-252(or)-252(2)-252(array)112(.)-565(T)90(ype,)-252(kind,)-252(rank)-252(and)-253(size)-252(must)-252(agr)18(ee)-252(on)-252(all)-253(pr)18(ocesses.)]TJ +0 g 0 G +/F75 9.9626 Tf 1 0 0 1 99.895 443.056 Tm [(root)]TJ +0 g 0 G +/F84 9.9626 Tf 0.986 0 0 1 123.148 443.056 Tm [(Pr)18(ocess)-253(to)-254(hold)-253(the)-253(\002nal)-254(value,)-253(or)]TJ/F190 10.3811 Tf 1 0 0 1 270.3 443.056 Tm [(\000)]TJ/F84 9.9626 Tf 0.986 0 0 1 278.495 443.056 Tm [(1)-253(to)-254(make)-253(it)-254(available)-253(on)-253(all)-254(pr)19(ocesses.)]TJ 1 0 0 1 124.802 431.101 Tm [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F190 10.3811 Tf 131.102 0 Td [(\000)]TJ/F84 9.9626 Tf 8.194 0 Td [(1)]TJ/F148 10.3811 Tf 7.873 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F148 10.3811 Tf 19.923 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F190 10.3811 Tf 13.503 0 Td [(\000)]TJ/F84 9.9626 Tf 10.132 0 Td [(1,)-250(default)-250(-1.)]TJ +0 g 0 G +/F75 9.9626 Tf -254.344 -32.364 Td [(mode)]TJ +0 g 0 G +/F84 9.9626 Tf 0.983 0 0 1 129.843 362.871 Tm [(Whether)-256(the)-255(call)-256(is)-256(started)-256(in)-255(non-blocking)-256(mode)-256(and)-256(completed)-255(later)75(,)-256(or)]TJ 1 0 0 1 124.802 350.916 Tm [(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.02 0 0 1 124.802 303.096 Tm [(Speci\002ed)-285(as:)-383(an)-285(integer)-285(value.)-423(The)-284(action)-285(to)-285(be)-285(taken)-284(is)-285(determined)-285(by)]TJ 1.02 0 0 1 124.802 291.14 Tm [(its)-329(bit)-328(\002elds,)-350(which)-329(can)-328(be)-329(set)-329(with)-328(bitwise)]TJ/F145 9.9626 Tf 1 0 0 1 323.986 291.14 Tm [(OR)]TJ/F84 9.9626 Tf 1.02 0 0 1 334.446 291.14 Tm [(.)-329(Basic)-328(action)-329(values)-329(ar)18(e)]TJ/F145 9.9626 Tf 1 0 0 1 124.802 279.185 Tm [(psb_collective_start_)]TJ/F84 9.9626 Tf 0.98 0 0 1 234.639 279.185 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 239.567 279.185 Tm [(psb_collective_end_)]TJ/F84 9.9626 Tf 0.98 0 0 1 338.943 279.185 Tm [(.)-316(Default:)-316(both)-255(\002elds)-254(ar)18(e)]TJ 1 0 0 1 124.802 267.23 Tm [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -32.364 Td [(request)]TJ +0 g 0 G +/F84 9.9626 Tf 38.346 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.439 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F145 9.9626 Tf 9.166 0 Td [(mode)]TJ/F84 9.9626 Tf 23.412 0 Td [(speci\002es)-250(non-blocking)-250(action,)-250(then)-250(this)-250(variable)-250(must)-250(be)-250(pr)18(esent.)]TJ +0 g 0 G +/F75 9.9626 Tf -57.485 -22.401 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.409 Td [(dat)]TJ +0 g 0 G +/F84 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(minimum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ +0 g 0 G + 73.405 -29.888 Td [(131)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(matrix)-250(to)-250(be)-250(assembled.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ ET -q -1 0 0 1 360.068 280.918 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +1866 0 obj +<< +/Length 5140 +>> +stream +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 363.206 280.719 Td [(Tspmat)]TJ +/F84 9.9626 Tf 175.611 706.129 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ 1.013 0 0 1 175.611 694.174 Tm [(Speci\002ed)-247(as:)-308(an)-247(integer)73(,)-248(r)18(eal)-248(or)-247(complex)-247(variable,)-248(which)-248(may)-247(be)-247(a)-248(scalar)73(,)]TJ 1 0 0 1 175.611 682.219 Tm [(or)-250(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)]TJ -0.308 -11.956 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.598 -19.925 Td [(request)]TJ +0 g 0 G +/F84 9.9626 Tf 38.346 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.44 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(If)]TJ/F145 9.9626 Tf 9.166 0 Td [(mode)]TJ/F84 9.9626 Tf 23.412 0 Td [(speci\002es)-250(non-blocking)-250(action,)-250(then)-250(this)-250(variable)-250(must)-250(be)-250(pr)18(esent.)]TJ/F75 11.9552 Tf -57.484 -21.917 Td [(Notes)]TJ +0 g 0 G +/F84 9.9626 Tf 12.453 -19.926 Td [(1.)]TJ +0 g 0 G + 0.98 0 0 1 175.303 560.674 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 193.994 560.674 Tm [(dat)]TJ/F84 9.9626 Tf 0.98 0 0 1 212.032 560.674 Tm [(ar)18(gument)-240(is)-240(both)-241(input)-240(and)-241(output,)-243(and)-240(its)-241(value)-240(may)-240(be)-241(changed)]TJ 1 0 0 1 175.611 548.719 Tm [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +0 g 0 G + -12.453 -19.925 Td [(2.)]TJ +0 g 0 G + 1.02 0 0 1 175.303 528.794 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 195.057 528.794 Tm [(mode)]TJ/F84 9.9626 Tf 1.02 0 0 1 218.722 528.794 Tm [(ar)18(gument)-270(can)-270(be)-270(built)-270(with)-270(the)-270(bitwise)]TJ/F145 9.9626 Tf 1 0 0 1 395.907 528.794 Tm [(IOR\050\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 424.803 528.794 Tm [(operator;)-282(in)-270(the)]TJ 0.988 0 0 1 175.611 516.839 Tm [(following)-254(example,)-255(the)-254(ar)18(gument)-254(is)-255(for)18(cing)-254(immediate)-254(completion,)-255(hence)]TJ 1 0 0 1 175.611 504.884 Tm [(the)]TJ/F145 9.9626 Tf 16.309 0 Td [(request)]TJ/F84 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 395.216 280.918 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 175.611 465.033 cm +0 0 318.804 27.895 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 398.354 280.719 Td [(type)]TJ +/F233 8.9664 Tf 188.015 482.268 Td [(call)]TJ +0 g 0 G + [-525(psb_amn\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ +0 g 0 G + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F84 9.9626 Tf -48.393 -36.164 Td [(3.)]TJ 0 g 0 G -/F51 9.9626 Tf -268.57 -20.421 Td [(desc)]TJ + 1.02 0 0 1 175.113 435.145 Tm [(When)-250(splitting)-250(the)-250(operation)-250(in)-250(two)-250(c)1(alls,)-252(the)]TJ/F145 9.9626 Tf 1 0 0 1 379.88 435.145 Tm [(dat)]TJ/F84 9.9626 Tf 1.02 0 0 1 398.111 435.145 Tm [(ar)18(gument)]TJ/F78 9.9626 Tf 1.02 0 0 1 444.411 435.145 Tm [(must)-250(not)]TJ/F84 9.9626 Tf 1.02 0 0 1 483.929 435.145 Tm [(be)]TJ 1 0 0 1 175.611 423.19 Tm [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 171.218 260.497 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 175.611 350.463 cm +0 0 318.804 60.772 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F51 9.9626 Tf 174.207 260.298 Td [(a)]TJ +/F233 8.9664 Tf 188.015 400.575 Td [(call)]TJ +0 g 0 G + [-525(psb_amn\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(amn_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + -23.536 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F279 8.9664 Tf 37.658 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F233 8.9664 Tf -37.658 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_amn\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(amn_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F84 9.9626 Tf 103.537 -266.301 Td [(132)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in/out)]TJ/F54 9.9626 Tf 27.297 0 Td [(.)]TJ -59.098 -11.956 Td [(Speci\002ed)-290(as:)-389(a)-290(str)8(uctur)18(ed)-290(data)-289(of)-290(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 171.305 0 Td [(psb)]TJ ET -q -1 0 0 1 363.235 212.677 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +1873 0 obj +<< +/Length 6113 +>> +stream +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 366.373 212.477 Td [(desc)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(7.15)-1000(psb)]TJ ET q -1 0 0 1 387.922 212.677 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 391.06 212.477 Td [(type)]TJ +/F75 11.9552 Tf 156.993 706.129 Td [(nrm2)-250(\227)-250(Global)-250(2-norm)-250(reduction)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_nrm2\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F84 9.9626 Tf 1.014 0 0 1 114.839 664.53 Tm [(This)-246(subr)18(outine)-246(implements)-246(a)-246(2-norm)-246(value)-246(r)17(educti)1(on)-247(ope)1(ration)-247(b)1(ased)-247(on)]TJ 1 0 0 1 99.895 652.575 Tm [(the)-250(underlying)-250(communication)-250(library)111(.)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)-429(If)-290(the)-290(matrix)-289(was)]TJ -236.371 -11.955 Td [(allocated)-209(with)]TJ/F59 9.9626 Tf 64.153 0 Td [(bldmode=psb_matbld_remote_)]TJ/F54 9.9626 Tf 135.988 0 Td [(,)-217(then)-210(the)-209(descriptor)-209(will)-209(be)]TJ -200.141 -11.955 Td [(r)18(eassembled.)]TJ +/F75 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -20.421 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + 0 -20.408 Td [(ctxt)]TJ +0 g 0 G +/F84 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ +0 g 0 G +/F84 9.9626 Tf 19.059 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.848 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ 0.98 0 0 1 124.802 475.42 Tm [(Speci\002ed)-224(as:)-300(a)-224(r)18(eal)-224(variable,)-230(which)-223(may)-224(be)-224(a)-224(scalar)76(,)-230(or)-224(a)-224(rank)-224(1)-223(array)113(.)-530(Kind,)]TJ 1 0 0 1 124.802 463.465 Tm [(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -20.409 Td [(root)]TJ +0 g 0 G +/F84 9.9626 Tf 0.986 0 0 1 123.148 443.056 Tm [(Pr)18(ocess)-253(to)-254(hold)-253(the)-253(\002nal)-254(value,)-253(or)]TJ/F190 10.3811 Tf 1 0 0 1 270.3 443.056 Tm [(\000)]TJ/F84 9.9626 Tf 0.986 0 0 1 278.495 443.056 Tm [(1)-253(to)-254(make)-253(it)-254(available)-253(on)-253(all)-254(pr)19(ocesses.)]TJ 1 0 0 1 124.802 431.101 Tm [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F190 10.3811 Tf 131.102 0 Td [(\000)]TJ/F84 9.9626 Tf 8.194 0 Td [(1)]TJ/F148 10.3811 Tf 7.873 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F148 10.3811 Tf 19.923 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F190 10.3811 Tf 13.503 0 Td [(\000)]TJ/F84 9.9626 Tf 10.132 0 Td [(1,)-250(default)-250(-1.)]TJ +0 g 0 G +/F75 9.9626 Tf -254.344 -32.364 Td [(mode)]TJ +0 g 0 G +/F84 9.9626 Tf 0.983 0 0 1 129.843 362.871 Tm [(Whether)-256(the)-255(call)-256(is)-256(started)-256(in)-255(non-blocking)-256(mode)-256(and)-256(completed)-255(later)75(,)-256(or)]TJ 1 0 0 1 124.802 350.916 Tm [(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.02 0 0 1 124.802 303.096 Tm [(Speci\002ed)-285(as:)-383(an)-285(integer)-285(value.)-423(The)-284(action)-285(to)-285(be)-285(taken)-284(is)-285(determined)-285(by)]TJ 1.02 0 0 1 124.802 291.14 Tm [(its)-329(bit)-328(\002elds,)-350(which)-329(can)-328(be)-329(set)-329(with)-328(bitwise)]TJ/F145 9.9626 Tf 1 0 0 1 323.986 291.14 Tm [(OR)]TJ/F84 9.9626 Tf 1.02 0 0 1 334.446 291.14 Tm [(.)-329(Basic)-328(action)-329(values)-329(ar)18(e)]TJ/F145 9.9626 Tf 1 0 0 1 124.802 279.185 Tm [(psb_collective_start_)]TJ/F84 9.9626 Tf 0.98 0 0 1 234.639 279.185 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 239.567 279.185 Tm [(psb_collective_end_)]TJ/F84 9.9626 Tf 0.98 0 0 1 338.943 279.185 Tm [(.)-316(Default:)-316(both)-255(\002elds)-254(ar)18(e)]TJ 1 0 0 1 124.802 267.23 Tm [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ 0 g 0 G - 141.968 -29.888 Td [(86)]TJ +/F75 9.9626 Tf -24.907 -32.364 Td [(request)]TJ +0 g 0 G +/F84 9.9626 Tf 38.346 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.439 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F145 9.9626 Tf 9.166 0 Td [(mode)]TJ/F84 9.9626 Tf 23.412 0 Td [(speci\002es)-250(non-blocking)-250(action,)-250(then)-250(this)-250(variable)-250(must)-250(be)-250(pr)18(esent.)]TJ +0 g 0 G +/F75 9.9626 Tf -57.485 -22.401 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.409 Td [(dat)]TJ +0 g 0 G +/F84 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(2-norm)-250(r)18(eduction.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ +0 g 0 G + 73.405 -29.888 Td [(133)]TJ 0 g 0 G ET endstream endobj -1467 0 obj +1878 0 obj << -/Length 3146 +/Length 6575 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(Notes)]TJ +/F84 9.9626 Tf 175.611 706.129 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(variable,)-250(which)-250(may)-250(be)-250(a)-250(scalar)74(,)-250(or)-250(a)-250(rank)-250(1)-250(array)111(.)]TJ 0 -11.955 Td [(Kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F75 9.9626 Tf -24.906 -19.926 Td [(request)]TJ 0 g 0 G - [-500(On)-226(entry)-227(to)-226(this)-227(r)18(outine)-226(the)-227(descriptor)-226(must)-227(be)-226(in)-227(the)-226(assembled)-227(state,)-231(i.e.)]TJ/F59 9.9626 Tf 12.453 -11.956 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 49.564 0 Td [(must)-250(alr)18(eady)-250(have)-250(been)-250(called.)]TJ +/F84 9.9626 Tf 38.346 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.44 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F145 9.9626 Tf 9.166 0 Td [(mode)]TJ/F84 9.9626 Tf 23.412 0 Td [(speci\002es)-250(non-blocking)-250(action,)-250(then)-250(this)-250(variable)-250(must)-250(be)-250(pr)18(esent.)]TJ/F75 11.9552 Tf -57.484 -21.918 Td [(Notes)]TJ 0 g 0 G - -62.017 -19.925 Td [(2.)]TJ +/F84 9.9626 Tf 12.453 -19.926 Td [(1.)]TJ 0 g 0 G - [-500(The)-250(sparse)-250(matrix)-250(may)-250(be)-250(in)-250(either)-250(the)-250(build)-250(or)-250(update)-250(state;)]TJ + 1.02 0 0 1 175.303 572.629 Tm [(This)-295(r)17(educti)1(on)-296(is)-295(appr)18(opriate)-295(to)-296(compute)-295(the)-295(r)18(esults)-296(of)-295(multiple)-295(\050local\051)]TJ 1 0 0 1 175.611 560.674 Tm [(NRM2)-250(operations)-250(at)-250(the)-250(same)-250(time.)]TJ 0 g 0 G - 0 -19.925 Td [(3.)]TJ + -12.453 -19.925 Td [(2.)]TJ +0 g 0 G + 0.999 0 0 1 175.611 540.749 Tm [(Denoting)-251(by)]TJ/F78 9.9626 Tf 1 0 0 1 232.93 540.749 Tm [(d)-40(a)-25(t)]TJ/F78 7.5716 Tf 13.536 -1.96 Td [(i)]TJ/F84 9.9626 Tf 0.999 0 0 1 251.713 540.749 Tm [(the)-251(value)-251(of)-250(the)-251(variable)]TJ/F78 9.9626 Tf 1 0 0 1 360.51 540.749 Tm [(d)-40(a)-25(t)]TJ/F84 9.9626 Tf 0.999 0 0 1 376.501 540.749 Tm [(on)-251(pr)18(ocess)]TJ/F78 9.9626 Tf 1 0 0 1 425.568 540.749 Tm [(i)]TJ/F84 9.9626 Tf 0.999 0 0 1 428.532 540.749 Tm [(,)-251(the)-251(output)]TJ/F78 9.9626 Tf 1 0 0 1 482.247 540.749 Tm [(r)-17(e)-25(s)]TJ/F84 9.9626 Tf -306.636 -11.955 Td [(is)-250(equivalent)-250(to)-250(the)-250(computation)-250(of)]TJ/F78 9.9626 Tf 124.797 -25.467 Td [(r)-17(e)-25(s)]TJ/F192 10.3811 Tf 15.061 0 Td [(=)]TJ/F17 9.9626 Tf 11.086 10.922 Td [(r)]TJ +ET +q +1 0 0 1 336.642 514.443 cm +[]0 d 0 J 0.389 w 0 0 m 30.512 0 l S +Q +BT +/F243 13.9477 Tf 336.766 501.255 Td [(\345)]TJ/F78 7.5716 Tf 4.245 -8.765 Td [(i)]TJ/F78 9.9626 Tf 8.365 10.837 Td [(d)-40(a)-25(t)]TJ/F84 7.5716 Tf 13.494 3.472 Td [(2)]TJ/F78 7.5716 Tf 0.042 -7.026 Td [(i)]TJ/F84 9.9626 Tf 4.242 3.554 Td [(,)]TJ -191.961 -30.807 Td [(with)-250(car)18(e)-250(taken)-250(to)-250(avoid)-250(unnecessary)-250(over\003ow)92(.)]TJ +0 g 0 G + -12.035 -19.925 Td [(3.)]TJ 0 g 0 G - [-500(Duplicate)-421(entries)-422(ar)18(e)-421(detected)-421(and)-422(handled)-421(in)-421(both)-421(build)-422(and)-421(update)]TJ 12.453 -11.955 Td [(state,)-244(wit)1(h)-242(the)-242(exception)-242(of)-242(the)-242(err)18(or)-242(action)-242(that)-242(is)-242(only)-241(taken)-242(in)-242(the)-242(build)]TJ 0 -11.955 Td [(state,)-250(i.e.)-310(on)-250(the)-250(\002rst)-250(assembly;)]TJ + 0.98 0 0 1 175.303 452.595 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 193.994 452.595 Tm [(dat)]TJ/F84 9.9626 Tf 0.98 0 0 1 212.032 452.595 Tm [(ar)18(gument)-240(is)-240(both)-241(input)-240(and)-241(output,)-243(and)-240(its)-241(value)-240(may)-240(be)-241(changed)]TJ 1 0 0 1 175.611 440.64 Tm [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ 0 g 0 G -12.453 -19.926 Td [(4.)]TJ 0 g 0 G - [-500(If)-190(the)-190(update)-190(choice)-190(is)]TJ/F59 9.9626 Tf 108.372 0 Td [(psb_upd_perm_)]TJ/F54 9.9626 Tf 67.995 0 Td [(,)-202(then)-190(subsequent)-190(calls)-190(to)]TJ/F59 9.9626 Tf 109.946 0 Td [(psb_spins)]TJ/F54 9.9626 Tf -273.86 -11.955 Td [(to)-309(update)-309(the)-308(matrix)-309(must)-309(be)-309(arranged)-309(in)-308(such)-309(a)-309(way)-309(as)-309(to)-308(pr)18(oduce)-309(ex-)]TJ 0 -11.955 Td [(actly)-319(the)-320(same)-319(sequence)-320(of)-319(coef)18(\002cient)-319(values)-320(as)-319(encounter)18(ed)-319(at)-320(the)-319(\002rst)]TJ 0 -11.955 Td [(assembly;)]TJ + 1.02 0 0 1 175.303 420.714 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 195.057 420.714 Tm [(mode)]TJ/F84 9.9626 Tf 1.02 0 0 1 218.722 420.714 Tm [(ar)18(gument)-270(can)-270(be)-270(built)-270(with)-270(the)-270(bitwise)]TJ/F145 9.9626 Tf 1 0 0 1 395.907 420.714 Tm [(IOR\050\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 424.803 420.714 Tm [(operator;)-282(in)-270(the)]TJ 0.988 0 0 1 175.611 408.759 Tm [(following)-254(example,)-255(the)-254(ar)18(gument)-254(is)-255(for)18(cing)-254(immediate)-254(completion,)-255(hence)]TJ 1 0 0 1 175.611 396.804 Tm [(the)]TJ/F145 9.9626 Tf 16.309 0 Td [(request)]TJ/F84 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 175.611 356.953 cm +0 0 318.804 27.895 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - -12.453 -19.926 Td [(5.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(The)-250(output)-250(storage)-250(format)-250(need)-250(not)-250(be)-250(the)-250(same)-250(on)-250(all)-250(pr)18(ocesses;)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F233 8.9664 Tf 188.015 374.189 Td [(call)]TJ 0 g 0 G - 0 -19.925 Td [(6.)]TJ + [-525(psb_nrm2\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(On)-249(exit)-249(fr)18(om)-250(this)-249(r)18(outine)-249(the)-249(matrix)-249(is)-250(in)-249(the)-249(assembled)-249(state,)-250(and)-249(thus)-249(is)]TJ 12.453 -11.955 Td [(suitable)-250(for)-250(the)-250(computational)-250(r)18(outines;)]TJ + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -12.453 -19.925 Td [(7.)]TJ + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-500(If)-431(the)]TJ/F59 9.9626 Tf 41.543 0 Td [(bldmode=psb_matbld_remote_)]TJ/F54 9.9626 Tf 140.288 0 Td [(value)-431(was)-432(speci\002ed)-431(at)-432(allocation)]TJ -169.378 -11.955 Td [(time,)-278(contributions)-272(de\002ned)-272(on)-272(the)-273(curr)18(ent)-272(pr)18(ocess)-272(but)-272(belonging)-273(to)-272(a)-272(r)18(e-)]TJ 0 -11.956 Td [(mote)-267(pr)18(ocess)-266(will)-267(be)-267(handled)-267(accor)18(dingly)111(.)-360(This)-267(is)-266(most)-267(likely)-267(to)-266(occur)-267(in)]TJ 0 -11.955 Td [(\002nite)-288(element)-288(applications,)-297(with)]TJ/F59 9.9626 Tf 145.88 0 Td [(dupl=psb_dupl_add_)]TJ/F54 9.9626 Tf 94.147 0 Td [(;)-307(it)-288(is)-287(necessary)-288(to)]TJ -240.027 -11.955 Td [(check)-236(for)-235(possible)-236(updates)-236(needed)-235(in)-236(the)-236(descriptor)74(,)-238(hence)-236(ther)18(e)-236(will)-235(be)-236(a)]TJ 0 -11.955 Td [(r)8(untime)-250(over)18(head.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ 0 g 0 G - 141.968 -332.752 Td [(87)]TJ + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -ET - -endstream -endobj -1480 0 obj -<< -/Length 2987 ->> -stream 0 g 0 G +/F84 9.9626 Tf -48.393 -36.164 Td [(5.)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.10)-1000(psb)]TJ + 1.02 0 0 1 175.113 327.066 Tm [(When)-250(splitting)-250(the)-250(operation)-250(in)-250(two)-250(c)1(alls,)-252(the)]TJ/F145 9.9626 Tf 1 0 0 1 379.88 327.066 Tm [(dat)]TJ/F84 9.9626 Tf 1.02 0 0 1 398.111 327.066 Tm [(ar)18(gument)]TJ/F78 9.9626 Tf 1.02 0 0 1 444.411 327.066 Tm [(must)-250(not)]TJ/F84 9.9626 Tf 1.02 0 0 1 483.929 327.066 Tm [(be)]TJ 1 0 0 1 175.611 315.11 Tm [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 175.611 242.383 cm +0 0 318.804 60.772 re f Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(spfree)-250(\227)-250(Frees)-250(a)-250(sparse)-250(matrix)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F233 8.9664 Tf 178.6 292.495 Td [(call)]TJ 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_spfree\050a,)-525(desc_a,)-525(info\051)]TJ + [-525(psb_nrm2\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G + [(nrm2_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(a)]TJ + -14.122 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(fr)18(eed.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf -28.343 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf -23.999 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.137 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 577.576 Td [(Tspmat)]TJ -ET -q -1 0 0 1 395.216 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 398.354 577.576 Td [(type)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F279 8.9664 Tf 37.658 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -268.57 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 557.85 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 557.651 Td [(a)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F233 8.9664 Tf -37.658 -10.959 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 509.83 Td [(desc)]TJ -ET -q -1 0 0 1 384.755 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 387.893 509.83 Td [(type)]TJ + [-525(psb_nrm2\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ + 23.536 -10.958 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - 0 -19.925 Td [(info)]TJ + [(nrm2_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 141.968 -329.728 Td [(88)]TJ +/F84 9.9626 Tf 103.537 -158.222 Td [(134)]TJ 0 g 0 G ET endstream endobj -1486 0 obj +1888 0 obj << -/Length 3858 +/Length 5618 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.11)-1000(psb)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(7.16)-1000(psb)]TJ ET q 1 0 0 1 153.407 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(sprn)-254(\227)-255(Reinit)-254(sparse)-255(matrix)-254(structure)-254(for)-255(psblas)-254(rou-)]TJ -24.221 -13.948 Td [(tines.)]TJ -0 g 0 G +/F75 11.9552 Tf 156.993 706.129 Td [(snd)-250(\227)-250(Send)-250(data)]TJ 0 g 0 G -/F59 9.9626 Tf -32.877 -18.964 Td [(call)-525(psb_sprn\050a,)-525(decsc_a,)-525(info,)-525(clear\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.917 Td [(T)90(ype:)]TJ +/F145 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_snd\050ctxt,)-525(dat,)-525(dst,)-525(m\051)]TJ/F84 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(sends)-250(a)-250(packet)-250(of)-250(data)-250(to)-250(a)-250(destination.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F75 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.926 Td [(On)-250(Entry)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous:)-310(see)-250(usage)-250(notes.)]TJ 0 g 0 G +/F75 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -19.925 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(r)18(einitialized.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf -28.343 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf -24 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 563.828 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 563.628 Td [(Tspmat)]TJ -ET -q -1 0 0 1 344.406 563.828 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 347.544 563.628 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G -/F51 9.9626 Tf -268.571 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 543.902 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 543.703 Td [(a)]TJ +/F84 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 496.082 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 495.882 Td [(desc)]TJ -ET -q -1 0 0 1 333.945 496.082 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 337.084 495.882 Td [(type)]TJ +/F75 9.9626 Tf -24.907 -19.926 Td [(dat)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F84 9.9626 Tf 19.059 0 Td [(The)-250(data)-250(to)-250(be)-250(sent.)]TJ 5.848 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.013 0 0 1 124.802 489.905 Tm [(Speci\002ed)-247(as:)-308(an)-247(integer)73(,)-248(r)18(eal)-248(or)-247(complex)-247(variable,)-248(which)-248(may)-247(be)-247(a)-248(scalar)73(,)]TJ 0.984 0 0 1 124.802 477.95 Tm [(or)-254(a)-254(rank)-255(1)-254(or)-254(2)-254(array)113(,)-255(or)-254(a)-254(character)-254(or)-254(logical)-254(scalar)75(.)-570(T)92(ype,)-255(kind)-254(and)-254(rank)]TJ 0.985 0 0 1 124.802 465.994 Tm [(must)-253(agr)18(ee)-253(on)-254(sender)-253(and)-254(r)19(eceiver)-254(pr)19(ocess;)-254(if)]TJ/F78 9.9626 Tf 1 0 0 1 322.39 465.994 Tm [(m)]TJ/F84 9.9626 Tf 0.985 0 0 1 332.752 465.994 Tm [(is)-253(not)-254(speci\002ed,)-253(size)-254(must)]TJ 1 0 0 1 124.802 454.039 Tm [(agr)18(ee)-250(as)-250(well.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(clear)]TJ +/F75 9.9626 Tf -24.907 -19.925 Td [(dst)]TJ 0 g 0 G -/F54 9.9626 Tf 26.561 0 Td [(Choose)-250(whether)-250(to)-250(zer)18(o)-250(out)-250(matrix)-250(coef)18(\002cients)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)-310(tr)8(ue.)]TJ +/F84 9.9626 Tf 18.81 0 Td [(Destination)-250(pr)18(ocess.)]TJ 6.097 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F148 10.3811 Tf 138.85 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(d)-25(s)-25(t)]TJ/F148 10.3811 Tf 15.689 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F190 10.3811 Tf 13.503 0 Td [(\000)]TJ/F84 9.9626 Tf 10.131 0 Td [(1.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.917 Td [(On)-250(Return)]TJ +/F75 9.9626 Tf -241.89 -31.88 Td [(m)]TJ 0 g 0 G +/F84 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows.)]TJ 11.069 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F84 9.9626 Tf 40.946 0 Td [(.)]TJ -67.725 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F148 10.3811 Tf 138.85 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(m)]TJ/F148 10.3811 Tf 10.767 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F192 10.3811 Tf 15.94 0 Td [(\050)]TJ/F78 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F84 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F192 10.3811 Tf 9.257 0 Td [(\051)]TJ/F84 9.9626 Tf 4.15 0 Td [(.)]TJ 1.02 0 0 1 124.304 294.637 Tm [(When)]TJ/F78 9.9626 Tf 1 0 0 1 153.769 294.637 Tm [(d)-40(a)-25(t)]TJ/F84 9.9626 Tf 1.02 0 0 1 169.747 294.637 Tm [(is)-244(a)-245(rank)-244(2)-245(array)109(,)-244(speci\002es)-245(the)-244(number)-244(of)-245(r)18(ows)-244(to)-245(be)-244(sent)-245(inde-)]TJ 0.98 0 0 1 124.503 282.682 Tm [(pendently)-236(of)-236(the)-236(leading)-236(dimension)]TJ/F78 9.9626 Tf 1 0 0 1 280.369 282.682 Tm [(s)-18(i)-32(z)-25(e)]TJ/F192 10.3811 Tf 15.94 0 Td [(\050)]TJ/F78 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F84 9.9626 Tf 0.98 0 0 1 314.077 282.682 Tm [(,)-170(1)]TJ/F192 10.3811 Tf 1 0 0 1 323.185 282.682 Tm [(\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 327.334 282.682 Tm [(;)-242(must)-236(have)-236(the)-236(same)-236(value)]TJ 1 0 0 1 124.802 270.727 Tm [(on)-250(sending)-250(and)-250(r)18(eceiving)-250(pr)18(ocesses.)]TJ 0 g 0 G - 0 -19.926 Td [(info)]TJ +/F75 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +/F75 11.9552 Tf 0 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F84 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G - [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(sparse)-250(matrix)-250(is)-250(in)-250(the)-250(update)-250(state.)]TJ + 1.014 0 0 1 124.493 206.966 Tm [(This)-247(subr)18(outine)-248(impl)1(ies)-248(a)-247(synchr)18(onization,)-247(but)-247(only)-248(between)-247(the)-247(calling)]TJ 1 0 0 1 124.503 195.011 Tm [(pr)18(ocess)-250(and)-250(the)-250(destination)-250(pr)18(ocess)]TJ/F78 9.9626 Tf 158.31 0 Td [(d)-25(s)-25(t)]TJ/F84 9.9626 Tf 12.797 0 Td [(.)]TJ 0 g 0 G - 154.421 -206.192 Td [(89)]TJ + -31.331 -104.573 Td [(135)]TJ 0 g 0 G ET endstream endobj -1493 0 obj +1893 0 obj << -/Length 6166 +/Length 5630 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.12)-1000(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(7.17)-1000(psb)]TJ ET q 1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(geall)-250(\227)-250(Allocates)-250(a)-250(dense)-250(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geall\050x,)-525(desc_a,)-525(info[,)-525(dupl,)-525(bldmode,)-525(n,)-525(lb]\051)]TJ +/F75 11.9552 Tf 207.803 706.129 Td [(rcv)-250(\227)-250(Receive)-250(data)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -19.627 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F145 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_rcv\050ctxt,)-525(dat,)-525(src,)-525(m\051)]TJ/F84 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(r)18(eceives)-250(a)-250(packet)-250(of)-250(data)-250(to)-250(a)-250(destination.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.01 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.009 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 629.719 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 629.519 Td [(a)]TJ +/F75 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 136.328 0 Td [(psb)]TJ -ET -q -1 0 0 1 328.257 581.898 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 331.395 581.699 Td [(desc)]TJ -ET -q -1 0 0 1 352.944 581.898 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 356.083 581.699 Td [(type)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous:)-310(see)-250(usage)-250(notes.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F75 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -226.299 -30.965 Td [(n)]TJ 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(The)-250(number)-250(of)-250(columns)-250(of)-250(the)-250(dense)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 13.838 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-297(as:)-404(Integer)-297(scalar)74(,)-309(default)-297(1.)-450(It)-297(is)-297(not)-297(a)-297(valid)-297(ar)18(gument)-297(if)]TJ/F52 9.9626 Tf 295.578 0 Td [(x)]TJ/F54 9.9626 Tf 8.164 0 Td [(is)-297(a)]TJ -303.742 -11.956 Td [(rank-1)-250(array)111(.)]TJ + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.009 Td [(lb)]TJ +/F84 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.522 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -66.071 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(The)-237(lower)-238(bound)-237(for)-238(the)-237(column)-238(index)-237(range)-237(of)-238(the)-237(dense)-238(matrix)-237(to)-238(be)-237(allo-)]TJ 10.52 -11.955 Td [(cated.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-297(as:)-404(Integer)-297(scalar)74(,)-309(default)-297(1.)-450(It)-297(is)-297(not)-297(a)-297(valid)-297(ar)18(gument)-297(if)]TJ/F52 9.9626 Tf 295.578 0 Td [(x)]TJ/F54 9.9626 Tf 8.164 0 Td [(is)-297(a)]TJ -303.742 -11.955 Td [(rank-1)-250(array)111(.)]TJ +/F75 9.9626 Tf -24.907 -19.926 Td [(src)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.009 Td [(dupl)]TJ +/F84 9.9626 Tf 17.703 0 Td [(Sour)18(ce)-250(pr)18(ocess.)]TJ 7.204 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.522 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -66.071 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F148 10.3811 Tf 138.85 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(s)-15(r)-17(c)]TJ/F148 10.3811 Tf 15.14 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F190 10.3811 Tf 13.504 0 Td [(\000)]TJ/F84 9.9626 Tf 10.131 0 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 26.56 0 Td [(How)-250(to)-250(handle)-250(duplicate)-250(coef)18(\002cients.)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-243(as:)-306(integer)74(,)-244(possible)-243(values:)]TJ/F59 9.9626 Tf 164.942 0 Td [(psb_dupl_ovwrt_)]TJ/F54 9.9626 Tf 78.455 0 Td [(,)]TJ/F59 9.9626 Tf 4.923 0 Td [(psb_dupl_add_)]TJ/F54 9.9626 Tf 67.994 0 Td [(;)]TJ/F59 9.9626 Tf -316.314 -11.955 Td [(psb_dupl_err_)]TJ/F54 9.9626 Tf 70.485 0 Td [(has)-250(no)-250(ef)18(fect.)]TJ +/F75 9.9626 Tf -241.342 -31.881 Td [(m)]TJ 0 g 0 G -/F51 9.9626 Tf -95.391 -19.009 Td [(bldmode)]TJ +/F84 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows.)]TJ 11.069 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.522 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F84 9.9626 Tf 40.946 0 Td [(.)]TJ -67.725 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F148 10.3811 Tf 138.85 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(m)]TJ/F148 10.3811 Tf 10.767 0 Td [(<)]TJ/F192 10.3811 Tf 8.319 0 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F192 10.3811 Tf 15.94 0 Td [(\050)]TJ/F78 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F84 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F192 10.3811 Tf 9.257 0 Td [(\051)]TJ/F84 9.9626 Tf 4.149 0 Td [(.)]TJ 1.02 0 0 1 175.113 398.249 Tm [(When)]TJ/F78 9.9626 Tf 1 0 0 1 204.579 398.249 Tm [(d)-40(a)-25(t)]TJ/F84 9.9626 Tf 1.02 0 0 1 220.557 398.249 Tm [(is)-244(a)-245(rank)-244(2)-244(array)108(,)-244(speci\002es)-245(the)-244(number)-244(of)-245(r)18(ows)-244(to)-245(be)-244(sent)-244(inde-)]TJ 0.98 0 0 1 175.313 386.293 Tm [(pendently)-236(of)-236(the)-236(leading)-236(dimension)]TJ/F78 9.9626 Tf 1 0 0 1 331.178 386.293 Tm [(s)-18(i)-32(z)-25(e)]TJ/F192 10.3811 Tf 15.94 0 Td [(\050)]TJ/F78 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F84 9.9626 Tf 0.98 0 0 1 364.887 386.293 Tm [(,)-170(1)]TJ/F192 10.3811 Tf 1 0 0 1 373.994 386.293 Tm [(\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 378.144 386.293 Tm [(;)-242(must)-236(have)-236(the)-236(same)-236(value)]TJ 1 0 0 1 175.611 374.338 Tm [(on)-250(sending)-250(and)-250(r)18(eceiving)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F54 9.9626 Tf 45.937 0 Td [(Whether)-372(to)-372(keep)-371(track)-372(of)-372(matrix)-372(entries)-371(that)-372(do)-372(not)-372(belong)-371(to)-372(the)]TJ -21.031 -11.955 Td [(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)-190(value)]TJ/F59 9.9626 Tf 128.288 0 Td [(psb_matbld_noremote_)]TJ/F54 9.9626 Tf 104.607 0 Td [(,)]TJ/F59 9.9626 Tf 4.503 0 Td [(psb_matbld_remote_)]TJ/F54 9.9626 Tf 94.146 0 Td [(.)]TJ -331.544 -11.955 Td [(Default:)]TJ/F59 9.9626 Tf 38.515 0 Td [(psb_matbld_noremote_)]TJ/F54 9.9626 Tf 104.607 0 Td [(.)]TJ +/F75 9.9626 Tf -24.906 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -168.028 -19.627 Td [(On)-250(Return)]TJ 0 g 0 G + 0 -19.925 Td [(dat)]TJ 0 g 0 G - 0 -19.009 Td [(x)]TJ +/F84 9.9626 Tf 19.058 0 Td [(The)-250(data)-250(to)-250(be)-250(r)18(eceived.)]TJ 5.848 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ 1.013 0 0 1 175.611 284.674 Tm [(Speci\002ed)-247(as:)-308(an)-247(integer)73(,)-248(r)18(eal)-248(or)-247(complex)-247(variable,)-248(which)-248(may)-247(be)-247(a)-248(scalar)73(,)]TJ 0.984 0 0 1 175.611 272.719 Tm [(or)-254(a)-254(rank)-255(1)-254(or)-254(2)-254(array)113(,)-255(or)-254(a)-254(character)-254(or)-254(logical)-254(scalar)75(.)-570(T)92(ype,)-255(kind)-254(and)-254(rank)]TJ 0.985 0 0 1 175.611 260.764 Tm [(must)-253(agr)18(ee)-254(on)-253(sender)-253(and)-254(r)19(eceiver)-254(pr)19(ocess;)-254(if)]TJ/F78 9.9626 Tf 1 0 0 1 373.199 260.764 Tm [(m)]TJ/F84 9.9626 Tf 0.985 0 0 1 383.561 260.764 Tm [(is)-253(not)-254(speci\002ed,)-253(size)-254(must)]TJ 1 0 0 1 175.611 248.809 Tm [(agr)18(ee)-250(as)-250(well.)]TJ/F75 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.943 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-273(as:)-357(a)-273(rank)-274(one)-273(or)-274(two)-273(array)-273(with)-274(the)-273(ALLOCA)74(T)74(ABLE)-273(attribute)]TJ 0 -11.955 Td [(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 86.634 0 Td [(psb)]TJ -ET -q -1 0 0 1 278.564 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 281.702 132.281 Td [(T)]TJ -ET -q -1 0 0 1 287.56 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 290.699 132.281 Td [(vect)]TJ -ET -q -1 0 0 1 312.247 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 315.386 132.281 Td [(type)]TJ +/F84 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ + 1.014 0 0 1 175.303 206.966 Tm [(This)-247(subr)18(outine)-247(implies)-248(a)-247(synchr)18(onization,)-247(but)-247(only)-248(between)-247(the)-247(calling)]TJ 1 0 0 1 175.313 195.011 Tm [(pr)18(ocess)-250(and)-250(the)-250(sour)18(ce)-250(pr)18(ocess)]TJ/F78 9.9626 Tf 137.537 0 Td [(s)-15(r)-17(c)]TJ/F84 9.9626 Tf 12.249 0 Td [(.)]TJ 0 g 0 G - -18.728 -41.843 Td [(90)]TJ + -10.011 -104.573 Td [(136)]TJ 0 g 0 G ET endstream endobj -1497 0 obj +1900 0 obj << -/Length 925 +/Length 6870 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +/F75 14.3462 Tf 99.895 705.784 Td [(8)-1000(Error)-250(handling)]TJ/F84 9.9626 Tf 1.02 0 0 1 99.587 683.082 Tm [(The)-317(PSBLAS)-317(library)-317(err)17(or)-317(handling)-317(policy)-317(has)-317(been)-317(completely)-318(r)18(ewritten)-317(in)]TJ 1.02 0 0 1 99.616 671.127 Tm [(version)-315(2.0.)-514(The)-314(idea)-315(behind)-315(the)-315(design)-315(of)-315(this)-315(new)-315(err)17(or)-314(handling)-315(strategy)]TJ 1.02 0 0 1 99.895 659.172 Tm [(is)-261(to)-260(keep)-261(err)17(or)-260(messages)-261(on)-261(a)-260(stack)-261(allowing)-261(the)-260(user)-261(to)-261(trace)-261(back)-260(up)-261(to)-261(the)]TJ 1.02 0 0 1 99.596 647.217 Tm [(point)-270(wher)17(e)-270(the)-271(\002rst)-270(err)17(or)-270(message)-270(has)-271(been)-270(generated.)-380(Every)-271(r)18(outine)-271(in)-270(the)]TJ 0.98 0 0 1 99.895 635.261 Tm [(PSBLAS-2.0)-245(library)-245(has,)-247(as)-245(last)-245(non-optional)-245(ar)18(gument,)-247(an)-245(integer)]TJ/F145 9.9626 Tf 1 0 0 1 384.203 635.261 Tm [(info)]TJ/F84 9.9626 Tf 0.98 0 0 1 407.517 635.261 Tm [(variable;)]TJ 1 0 0 1 99.477 623.306 Tm [(whenever)74(,)-251(inside)-250(the)-251(r)18(outine,)-250(an)-251(err)18(or)-250(is)-251(detected,)-250(this)-251(variable)-250(is)-251(set)-250(to)-251(a)-250(value)]TJ 1.02 0 0 1 99.895 611.351 Tm [(corr)18(esponding)-337(to)-336(a)-337(speci\002c)-336(err)17(or)-336(code.)-578(Then)-337(this)-336(err)17(or)-336(code)-337(is)-336(also)-337(pushed)]TJ 1.02 0 0 1 99.895 599.396 Tm [(on)-299(the)-299(err)17(or)-299(stack)-299(and)-299(then)-299(either)-299(contr)17(ol)-299(is)-299(r)18(eturned)-299(to)-300(the)-299(caller)-299(r)18(outine)-299(or)]TJ 1.005 0 0 1 99.895 587.441 Tm [(the)-249(execution)-249(is)-249(aborted,)-249(depending)-249(on)-250(the)-249(users)-249(choice.)-309(At)-249(the)-249(time)-249(when)-250(the)]TJ 0.98 0 0 1 99.895 575.486 Tm [(execution)-218(is)-218(aborted,)-226(an)-218(err)18(or)-218(message)-218(is)-218(printed)-218(on)-218(standar)18(d)-218(output)-218(with)-218(a)-218(level)]TJ 0.98 0 0 1 99.895 563.53 Tm [(of)-247(verbosity)-247(than)-246(can)-247(be)-247(chosen)-247(by)-247(the)-246(user)75(.)-313(If)-247(the)-247(execution)-247(is)-247(not)-246(aborted,)-249(then,)]TJ 0.98 0 0 1 99.895 551.575 Tm [(the)-256(caller)-256(r)18(outine)-256(checks)-256(the)-256(value)-256(r)18(eturned)-256(in)-256(the)]TJ/F145 9.9626 Tf 1 0 0 1 316.265 551.575 Tm [(info)]TJ/F84 9.9626 Tf 0.98 0 0 1 339.687 551.575 Tm [(variable)-256(and,)-256(if)-257(not)-256(zer)19(o,)]TJ 1.02 0 0 1 99.895 539.62 Tm [(an)-247(err)17(or)-247(condition)-247(is)-247(raised.)-311(This)-247(pr)18(ocess)-247(continues)-248(on)-247(all)-247(the)-247(levels)-248(o)1(f)-248(nested)]TJ 1 0 0 1 99.895 527.665 Tm [(calls)-250(until)-250(the)-250(level)-250(wher)18(e)-250(the)-250(user)-250(decides)-250(to)-250(abort)-250(the)-250(pr)18(ogram)-250(execution.)]TJ 1.018 0 0 1 114.839 515.71 Tm [(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-246(5)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ + [-246(shows)-245(the)-246(layout)-246(of)-245(a)-246(generic)]TJ/F145 9.9626 Tf 1 0 0 1 286.762 515.71 Tm [(psb_foo)]TJ/F84 9.9626 Tf 1.018 0 0 1 325.866 515.71 Tm [(r)18(outine)-246(with)-246(r)18(espect)-246(to)-245(the)]TJ 1.004 0 0 1 99.895 503.755 Tm [(PSBLAS-2.0)-250(err)18(or)-250(handling)-250(policy)111(.)-311(It)-250(is)-250(possible)-250(to)-250(see)-249(how)91(,)-250(whenever)-250(an)-250(err)18(or)]TJ 1.02 0 0 1 99.895 491.799 Tm [(condition)-246(is)-247(det)1(ected,)-247(the)]TJ/F145 9.9626 Tf 1 0 0 1 214.838 491.799 Tm [(info)]TJ/F84 9.9626 Tf 1.02 0 0 1 238.262 491.799 Tm [(variable)-246(is)-246(set)-247(to)-246(the)-246(corr)17(esponding)-246(err)18(or)-246(code)]TJ 1.02 0 0 1 99.477 479.844 Tm [(which)-277(is,)-285(then,)-285(pushed)-277(on)-277(top)-277(of)-277(the)-277(stack)-277(by)-277(means)-277(of)-277(the)]TJ/F145 9.9626 Tf 1 0 0 1 365.652 479.844 Tm [(psb_errpush)]TJ/F84 9.9626 Tf 1.02 0 0 1 423.186 479.844 Tm [(.)-400(An)]TJ 0.992 0 0 1 99.895 467.889 Tm [(err)18(or)-251(condition)-251(may)-251(be)-251(dir)18(ectly)-251(detected)-251(inside)-251(a)-251(r)18(outine)-251(or)-252(i)1(ndir)18(ectly)-251(checking)]TJ 1.02 0 0 1 99.895 455.934 Tm [(the)-333(err)18(or)-333(code)-333(r)17(eturned)-333(r)18(eturned)-333(by)-333(a)-333(called)-333(r)18(outine.)-568(Whene)1(ver)-333(an)-333(err)17(or)-333(is)]TJ 0.98 0 0 1 99.895 443.979 Tm [(encounter)18(ed,)-240(after)-238(it)-237(has)-237(been)-237(pushed)-238(on)-237(stack,)-241(the)-237(pr)19(ogram)-238(execution)-237(skips)-237(to)-237(a)]TJ 0.98 0 0 1 99.596 432.024 Tm [(point)-252(wher)19(e)-252(the)-251(err)18(or)-251(condition)-252(is)-251(handled;)-253(the)-252(err)19(or)-252(condition)-251(is)-252(handled)-251(either)]TJ 1.015 0 0 1 99.895 420.068 Tm [(by)-246(r)18(eturning)-245(contr)17(ol)-245(to)-246(t)1(he)-246(caller)-245(r)17(outine)-245(or)-246(by)-245(calling)-246(the)]TJ/F145 9.9626 Tf 1 0 0 1 356.419 420.068 Tm [(psb\134_error)]TJ/F84 9.9626 Tf 1.015 0 0 1 411.205 420.068 Tm [(r)18(outine)]TJ 1.02 0 0 1 99.477 408.113 Tm [(which)-255(prints)-254(the)-255(content)-255(of)-255(the)-254(err)17(or)-254(stack)-255(and)-255(aborts)-255(the)-254(pr)17(ogram)-254(execution,)]TJ 0.98 0 0 1 99.895 396.158 Tm [(accor)18(ding)-242(to)-242(the)-242(choice)-241(made)-242(by)-242(the)-242(user)-242(with)]TJ/F145 9.9626 Tf 1 0 0 1 299.812 396.158 Tm [(psb_set_erraction)]TJ/F84 9.9626 Tf 0.98 0 0 1 388.728 396.158 Tm [(.)-312(The)-242(default)]TJ 1.02 0 0 1 99.895 384.203 Tm [(is)-314(to)-314(print)-314(the)-313(err)17(or)-314(and)-313(terminate)-314(the)-314(pr)18(ogram,)-331(but)-314(the)-314(user)-314(may)-314(choose)-314(to)]TJ 1 0 0 1 99.895 372.248 Tm [(handle)-250(the)-250(err)18(or)-250(explicitly)111(.)]TJ 0.98 0 0 1 114.839 360.293 Tm [(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-224(6)]TJ 0 g 0 G - [-500(Using)]TJ/F59 9.9626 Tf 41.798 0 Td [(psb_matbld_remote_)]TJ/F54 9.9626 Tf 97.28 0 Td [(is)-314(likely)-315(to)-315(cau)1(se)-315(a)-315(r)8(untime)-314(over)18(head)-315(at)-314(as-)]TJ -126.625 -11.955 Td [(sembly)-250(time;)]TJ + [-225(r)18(e)1(ports)-225(a)-225(sample)-224(err)18(or)-225(message)-224(generated)-225(by)-224(the)-225(PSBLAS-2.0)-224(library)113(.)]TJ 1.02 0 0 1 99.587 348.337 Tm [(This)-253(err)18(or)-254(has)-253(been)-253(generated)-253(by)-253(the)-253(fact)-253(that)-253(the)-254(user)-253(has)-253(chosen)-253(the)-253(invalid)]TJ 0.98 0 0 1 98.401 336.382 Tm [(\223FOO\224)-255(storage)-254(format)-255(to)-254(r)18(epr)19(esent)-255(the)-254(sparse)-255(matrix.)-316(Fr)18(om)-254(this)-255(err)19(or)-255(message)-254(it)]TJ 0.98 0 0 1 99.895 324.427 Tm [(is)-217(possible)-218(to)-217(see)-217(that)-218(the)-217(err)18(or)-217(has)-217(been)-218(detected)-217(inside)-217(the)]TJ/F145 9.9626 Tf 1 0 0 1 352.938 324.427 Tm [(psb_cest)]TJ/F84 9.9626 Tf 0.98 0 0 1 396.903 324.427 Tm [(subr)18(outine)]TJ 1 0 0 1 99.895 312.472 Tm [(called)-250(by)]TJ/F145 9.9626 Tf 42.092 0 Td [(psb_spasb)]TJ/F84 9.9626 Tf 49.564 0 Td [(...)-310(by)-250(pr)18(ocess)-250(0)-250(\050i.e.)-310(the)-250(r)18(oot)-250(pr)18(ocess\051.)]TJ 0 g 0 G - 141.968 -514.072 Td [(91)]TJ + 72.728 -222.034 Td [(137)]TJ 0 g 0 G ET endstream endobj -1505 0 obj +1906 0 obj << -/Length 6336 +/Length 10395 >> stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.13)-1000(psb)]TJ -ET +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 150.705 417.212 cm +0 0 343.711 292.902 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F51 11.9552 Tf 207.803 706.129 Td [(geins)-250(\227)-250(Dense)-250(matrix)-250(insertion)-250(routine)]TJ +/F145 9.9626 Tf 153.694 698.757 Td [(subroutine)]TJ 0 g 0 G + [-525(psb_foo\050some)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geins\050m,)-525(irw,)-525(val,)-525(x,)-525(desc_a,)-525(info)-525([,local]\051)]TJ + [-525(args,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -20.57 Td [(T)90(ype:)]TJ + [-525(info\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F279 9.9626 Tf 15.691 -11.956 Td [(!...)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.386 Td [(On)-250(Entry)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf 0 -11.955 Td [(if)]TJ 0 g 0 G - 0 -19.386 Td [(m)]TJ + [(\050error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows)-250(in)]TJ/F52 9.9626 Tf 86.569 0 Td [(v)-40(a)-25(l)]TJ/F54 9.9626 Tf 15.736 0 Td [(to)-250(be)-250(inserted.)]TJ -91.237 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ + [-525(detected\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.906 -19.386 Td [(irw)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Indices)-381(of)-382(the)-382(r)18(ows)-381(to)-382(be)-381(inserted.)-705(Speci\002cally)111(,)-414(r)18(ow)]TJ/F52 9.9626 Tf 239.84 0 Td [(i)]TJ/F54 9.9626 Tf 6.765 0 Td [(of)]TJ/F52 9.9626 Tf 12.683 0 Td [(v)-40(a)-25(l)]TJ/F54 9.9626 Tf 17.046 0 Td [(will)-381(be)-382(in-)]TJ -271.901 -11.955 Td [(serted)-344(into)-344(the)-344(local)-344(r)18(ow)-344(corr)18(esponding)-344(to)-344(the)-344(global)-344(r)18(ow)-344(index)]TJ/F52 9.9626 Tf 290.218 0 Td [(i)-22(r)-35(w)]TJ/F85 10.3811 Tf 14.654 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(.)]TJ -316.314 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG 0 g 0 G -/F51 9.9626 Tf -24.906 -19.386 Td [(val)]TJ + 15.691 -11.955 Td [(info)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 18.819 0 Td [(the)-250(dense)-250(submatrix)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)-310(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ + [(errcode1)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.906 -19.386 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 414.446 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 414.247 Td [(a)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 366.626 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 366.426 Td [(desc)]TJ -ET -q -1 0 0 1 384.755 366.626 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 387.893 366.426 Td [(type)]TJ + [-525(psb_errpush\050)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + [(\015psb_foo\015)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -258.11 -19.386 Td [(local)]TJ + [-525(errcode1\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 26.56 0 Td [(Whether)-240(the)-240(entries)-241(in)-240(the)-240(index)-240(vector)]TJ/F59 9.9626 Tf 173.162 0 Td [(irw)]TJ/F54 9.9626 Tf 15.692 0 Td [(,)-242(ar)18(e)-240(alr)18(eady)-241(i)1(n)-241(local)-240(number)18(-)]TJ -190.508 -11.955 Td [(ing.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value;)-250(default:)]TJ/F59 9.9626 Tf 162.678 0 Td [(.false.)]TJ/F54 9.9626 Tf 36.613 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(goto)]TJ 0 g 0 G -/F51 9.9626 Tf -224.197 -20.57 Td [(On)-250(Return)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(9999)]TJ 0 g 0 G - 0 -19.387 Td [(x)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(output)-250(dense)-250(matrix.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(rank)-190(one)-190(or)-190(two)-190(array)-190(or)-190(an)-190(object)-190(of)-190(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 242.569 0 Td [(psb)]TJ -ET -q -1 0 0 1 434.498 211.642 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 437.636 211.443 Td [(T)]TJ -ET -q -1 0 0 1 443.494 211.642 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 446.633 211.443 Td [(vect)]TJ -ET -q -1 0 0 1 468.182 211.642 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 471.32 211.443 Td [(type)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -15.691 -11.955 Td [(end)-525(if)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(,)]TJ -316.63 -11.955 Td [(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.906 -31.342 Td [(info)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F279 9.9626 Tf 0 -11.956 Td [(!...)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 141.968 -29.888 Td [(92)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf 0 -11.955 Td [(call)]TJ 0 g 0 G -ET - -endstream -endobj -1509 0 obj -<< -/Length 539 ->> -stream + [-525(psb_bar\050some)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(args,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(Notes)]TJ + [-525(info\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(if)]TJ 0 g 0 G - [-500(Dense)-250(vectors/matrices)-250(do)-250(not)-250(have)-250(an)-250(associated)-250(state;)]TJ + [(\050info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.926 Td [(2.)]TJ + [-525(.ne.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(Duplicate)-326(entries)-326(ar)18(e)-325(either)-326(overwritten)-326(or)-326(added,)-345(ther)18(e)-325(is)-326(no)-326(pr)18(ovision)]TJ 12.453 -11.955 Td [(for)-250(raising)-250(an)-250(err)18(or)-250(condition.)]TJ + [-525(zero\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 141.968 -563.885 Td [(93)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G -ET - -endstream -endobj -1518 0 obj -<< -/Length 6120 ->> -stream +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG 0 g 0 G + 15.691 -11.955 Td [(info)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.14)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(geasb)-250(\227)-250(Assembly)-250(a)-250(dense)-250(matrix)]TJ + [(errcode2)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ +0 g 0 G + [-525(psb_errpush\050)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + [(\015psb_foo\015)]TJ 0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geasb\050x,)-525(desc_a,)-525(info,)-525(mold\051)]TJ + [-525(errcode2\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.956 Td [(goto)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(9999)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 625.596 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 625.397 Td [(a)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -15.691 -11.955 Td [(end)-525(if)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 136.328 0 Td [(psb)]TJ -ET -q -1 0 0 1 328.257 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 331.395 577.576 Td [(desc)]TJ -ET -q -1 0 0 1 352.944 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 356.083 577.576 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F279 9.9626 Tf 0 -11.955 Td [(!...)]TJ 0 g 0 G -/F51 9.9626 Tf -226.299 -31.88 Td [(mold)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG +/F145 9.9626 Tf -15.691 -11.955 Td [(9999)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -3.876 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-223(as:)-296(an)-223(object)-223(of)-222(a)-223(class)-223(derived)-223(fr)18(om)]TJ/F59 9.9626 Tf 199.087 0 Td [(psb)]TJ -ET -q -1 0 0 1 391.016 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 394.155 497.875 Td [(T)]TJ -ET -q -1 0 0 1 400.012 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 403.151 497.875 Td [(base)]TJ -ET -q -1 0 0 1 424.7 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 427.838 497.875 Td [(vect)]TJ -ET -q -1 0 0 1 449.387 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 452.525 497.875 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(;)-232(this)]TJ -297.835 -11.955 Td [(is)-250(only)-250(allowed)-250(when)]TJ/F52 9.9626 Tf 97.12 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 42.899 0 Td [(psb)]TJ -ET -q -1 0 0 1 339.644 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 342.783 485.92 Td [(T)]TJ -ET -q -1 0 0 1 348.641 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 351.779 485.92 Td [(vect)]TJ -ET -q -1 0 0 1 373.328 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 376.466 485.92 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(continue)]TJ 0 g 0 G -/F51 9.9626 Tf -246.682 -21.918 Td [(On)-250(Return)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 15.691 -11.955 Td [(if)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(x)]TJ + [-525(\050err_act)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(assembled.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-259(as:)-328(a)-259(rank)-258(one)-259(or)-259(two)-259(array)-259(with)-259(the)-259(ALLOCA)74(T)74(ABLE)-258(or)-259(an)-259(ob-)]TJ 0 -11.955 Td [(ject)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 50.55 0 Td [(psb)]TJ -ET -q -1 0 0 1 242.48 384.5 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 245.618 384.301 Td [(T)]TJ -ET -q -1 0 0 1 251.476 384.5 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 254.614 384.301 Td [(vect)]TJ -ET -q -1 0 0 1 276.163 384.5 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 279.301 384.301 Td [(type)]TJ + [-525(.eq.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ + [-525(act_abort\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -149.518 -31.881 Td [(info)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 15.691 -11.955 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ + [-525(psb_error\050icontxt\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(On)-227(entry)-226(to)-227(this)-226(r)18(outine)-227(th)1(e)-227(descriptor)-226(must)-227(be)-226(in)-227(the)-226(assembled)-227(state,)-231(i.e.)]TJ/F59 9.9626 Tf 12.453 -11.956 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 49.564 0 Td [(must)-250(alr)18(eady)-250(have)-250(been)-250(called.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.956 Td [(return)]TJ 0 g 0 G - -62.017 -19.925 Td [(2.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -15.691 -11.955 Td [(else)]TJ 0 g 0 G - [-500(If)-431(the)]TJ/F59 9.9626 Tf 41.544 0 Td [(bldmode=psb_matbld_remote_)]TJ/F54 9.9626 Tf 140.287 0 Td [(value)-431(was)-432(speci\002ed)-431(at)-432(allocation)]TJ -169.378 -11.955 Td [(time,)-278(contributions)-272(de\002ned)-272(on)-273(the)-272(curr)18(ent)-272(pr)18(ocess)-272(but)-272(belonging)-273(to)-272(a)-272(r)18(e-)]TJ 0 -11.955 Td [(mote)-267(pr)18(ocess)-266(will)-267(be)-267(handled)-267(accor)18(dingly)111(.)-360(This)-267(is)-266(most)-267(likely)-267(to)-266(occur)-267(in)]TJ 0 -11.955 Td [(\002nite)-250(element)-250(applications,)-250(with)]TJ/F59 9.9626 Tf 144.277 0 Td [(dupl=psb_dupl_add_)]TJ/F54 9.9626 Tf 94.146 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(return)]TJ 0 g 0 G - -96.455 -104.573 Td [(94)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -15.691 -11.955 Td [(end)-525(if)]TJ 0 g 0 G -ET - -endstream -endobj -1526 0 obj -<< -/Length 3224 ->> -stream +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -23.91 Td [(end)-525(subroutine)]TJ +0 g 0 G + [-525(psb_foo)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.15)-1000(psb)]TJ +/F84 9.9626 Tf 1.018 0 0 1 150.705 382.059 Tm [(Listing)-246(5:)-306(The)-247(layout)-246(of)-246(a)-247(generic)]TJ/F145 9.9626 Tf 1 0 0 1 299.955 382.059 Tm [(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 316.274 382.258 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(gefree)-250(\227)-250(Frees)-250(a)-250(dense)-250(matrix)]TJ +/F145 9.9626 Tf 319.412 382.059 Td [(foo)]TJ/F84 9.9626 Tf 1.018 0 0 1 337.602 382.059 Tm [(r)18(outine)-247(with)-246(r)18(espect)-247(to)-246(PSBLAS-2.0)]TJ 1 0 0 1 150.705 370.104 Tm [(err)18(or)-250(handling)-250(policy)111(.)]TJ 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG +/F145 9.9626 Tf 0 -19.609 Td [(==========================================================)]TJ 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_gefree\050x,)-525(desc_a,)-525(info\051)]TJ + 0 -11.955 Td [(Process:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [-1050(PSBLAS)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(x)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(\050)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(fr)18(eed.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-259(as:)-328(a)-258(rank)-259(one)-259(or)-259(two)-259(array)-259(with)-259(the)-259(ALLOCA)74(T)74(ABLE)-258(or)-259(an)-259(ob-)]TJ 0 -11.955 Td [(ject)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 50.55 0 Td [(psb)]TJ -ET -q -1 0 0 1 191.67 565.82 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 194.809 565.621 Td [(T)]TJ -ET -q -1 0 0 1 200.666 565.82 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 203.805 565.621 Td [(vect)]TJ -ET -q -1 0 0 1 225.354 565.82 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 228.492 565.621 Td [(type)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(4010)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(\051)]TJ 0 g 0 G -/F51 9.9626 Tf -149.518 -31.881 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 533.94 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 533.74 Td [(a)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 136.327 0 Td [(psb)]TJ -ET -q -1 0 0 1 277.448 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 280.586 485.92 Td [(desc)]TJ -ET -q -1 0 0 1 302.135 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 305.273 485.92 Td [(type)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(in)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -226.3 -33.873 Td [(On)-250(Return)]TJ + [-525(subroutine:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(df_sample)]TJ 0 -11.955 Td [(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(info)]TJ + [-525(from)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + [-525(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 141.968 -293.863 Td [(95)]TJ + [-525(to)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1530 0 obj -<< -/Length 3218 ->> -stream + [-525(subroutine)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(mat)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.16)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(gelp)-250(\227)-250(Applies)-250(a)-250(left)-250(permutation)-250(to)-250(a)-250(dense)-250(matrix)]TJ +0.73 0.38 0.84 rg 0.73 0.38 0.84 RG + [-525(dist)]TJ 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + 0 -11.955 Td [(==========================================================)]TJ +0 g 0 G + 0 -11.955 Td [(Process:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_gelp\050trans,)-525(iperm,)-525(x,)-525(info\051)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ + [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [-1050(PSBLAS)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [-525(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(\050)]TJ 0 g 0 G - 0 -19.925 Td [(trans)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(4010)]TJ 0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(A)-250(character)-250(that)-250(speci\002es)-250(whether)-250(to)-250(permute)]TJ/F52 9.9626 Tf 203.748 0 Td [(A)]TJ/F54 9.9626 Tf 9.808 0 Td [(or)]TJ/F52 9.9626 Tf 12.488 0 Td [(A)]TJ/F52 7.5716 Tf 7.511 3.616 Td [(T)]TJ/F54 9.9626 Tf 5.401 -3.616 Td [(.)]TJ -241.716 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(single)-250(character)-250(with)-250(value)-250('N')-250(for)]TJ/F52 9.9626 Tf 218.195 0 Td [(A)]TJ/F54 9.9626 Tf 9.808 0 Td [(or)-250('T')-250(for)]TJ/F52 9.9626 Tf 41.807 0 Td [(A)]TJ/F52 7.5716 Tf 7.511 3.616 Td [(T)]TJ/F54 9.9626 Tf 5.401 -3.616 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(\051)]TJ 0 g 0 G -/F51 9.9626 Tf -307.628 -31.88 Td [(iperm)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 32.099 0 Td [(An)-250(integer)-250(array)-250(containing)-250(permutation)-250(information.)]TJ -7.193 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(one-dimensional)-250(array)111(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(in)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.881 Td [(x)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(permuted.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one)-250(or)-250(two)-250(dimensional)-250(array)111(.)]TJ + [-525(subroutine:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.906 -33.873 Td [(On)-250(Return)]TJ + [-525(mat_distv)]TJ 0 -11.955 Td [(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(from)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.926 Td [(info)]TJ + [-525(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + [-525(to)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 141.968 -226.117 Td [(96)]TJ + [-525(subroutine)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1535 0 obj -<< -/Length 6238 ->> -stream +0.73 0.38 0.84 rg 0.73 0.38 0.84 RG + [-525(psb_spasb)]TJ 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + 0 -11.956 Td [(==========================================================)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.17)-1000(psb)]TJ -ET -q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 156.993 706.129 Td [(glob)]TJ -ET -q -1 0 0 1 182.29 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 185.877 706.129 Td [(to)]TJ -ET -q -1 0 0 1 197.222 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 200.809 706.129 Td [(loc)-250(\227)-250(Global)-250(to)-250(local)-250(indices)-250(convertion)]TJ + 0 -11.955 Td [(Process:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ 0 g 0 G -/F59 9.9626 Tf -100.914 -18.964 Td [(call)-525(psb_glob_to_loc\050x,)-525(y,)-525(desc_a,)-525(info,)-525(iact,owned\051)]TJ 0 -11.955 Td [(call)-525(psb_glob_to_loc\050x,)-525(desc_a,)-525(info,)-525(iact,owned\051)]TJ + [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -21.109 Td [(T)90(ype:)]TJ + [-1050(PSBLAS)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [-525(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.602 Td [(On)-250(Entry)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(\050)]TJ 0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(4010)]TJ 0 g 0 G - 0 -19.601 Td [(x)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(\051)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(An)-250(integer)-250(vector)-250(of)-250(indices)-250(to)-250(be)-250(converted.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F54 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -31.557 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 535.72 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 535.52 Td [(a)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(in)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 487.899 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 487.7 Td [(desc)]TJ -ET -q -1 0 0 1 333.945 487.899 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 337.084 487.7 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(subroutine:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_spasb)]TJ 0 -11.955 Td [(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [-525(from)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -258.11 -19.602 Td [(iact)]TJ + [-525(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F51 9.9626 Tf 253.796 0 Td [(global)]TJ/F54 9.9626 Tf -249.91 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F59 9.9626 Tf 143.341 0 Td [(I)]TJ/F54 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F59 9.9626 Tf 29.808 0 Td [(W)]TJ/F54 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F59 9.9626 Tf 42.111 0 Td [(A)]TJ/F54 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F59 9.9626 Tf 55.839 0 Td [(I)]TJ/F54 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ + [-525(to)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -316.929 -19.601 Td [(owned)]TJ + [-525(subroutine)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 35.975 0 Td [(Spec\002es)-250(valid)-250(range)-250(of)-250(input)-250(Scope:)]TJ/F51 9.9626 Tf 159.54 0 Td [(global)]TJ/F54 9.9626 Tf -170.608 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(If)-320(tr)8(ue,)-337(then)-320(only)-320(indices)-320(strictly)-320(owned)-320(by)-320(the)-319(curr)18(ent)-320(pr)18(ocess)-320(ar)18(e)-320(con-)]TJ 0 -11.955 Td [(sider)18(ed)-250(valid,)-250(if)-250(false)-250(then)-250(halo)-250(indices)-250(ar)18(e)-250(also)-250(accepted.)-310(Default:)-310(false.)]TJ +0.73 0.38 0.84 rg 0.73 0.38 0.84 RG + [-525(psb_cest)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.109 Td [(On)-250(Return)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + 0 -11.955 Td [(==========================================================)]TJ 0 g 0 G + 0 -11.955 Td [(Process:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.601 Td [(x)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(If)]TJ/F52 9.9626 Tf 9.727 0 Td [(y)]TJ/F54 9.9626 Tf 8.032 0 Td [(is)-294(not)-294(pr)18(esent,)-304(then)]TJ/F52 9.9626 Tf 88.385 0 Td [(x)]TJ/F54 9.9626 Tf 8.132 0 Td [(is)-294(overwritten)-294(with)-293(the)-294(translated)-294(integer)-294(indices.)]TJ -99.332 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ + [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.602 Td [(y)]TJ + [-1050(PSBLAS)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(If)]TJ/F52 9.9626 Tf 9.521 0 Td [(y)]TJ/F54 9.9626 Tf 7.827 0 Td [(is)-273(pr)18(esent,)-279(then)]TJ/F52 9.9626 Tf 70.133 0 Td [(y)]TJ/F54 9.9626 Tf 7.827 0 Td [(is)-273(overwritten)-273(with)-273(the)-273(translated)-274(integer)-273(indices,)-279(and)]TJ/F52 9.9626 Tf -80.628 -11.955 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(left)-250(unchanged.)-310(Scope:)]TJ/F51 9.9626 Tf 112.557 0 Td [(global)]TJ/F54 9.9626 Tf -120.547 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ + [-525(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.602 Td [(info)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(\050)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.108 Td [(Notes)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(136)]TJ 0 g 0 G -/F54 9.9626 Tf 166.875 -29.888 Td [(97)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(\051)]TJ 0 g 0 G -ET - -endstream -endobj -1540 0 obj -<< -/Length 672 ->> -stream +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(in)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(subroutine:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F54 9.9626 Tf 163.158 706.129 Td [(1.)]TJ + [-525(psb_cest)]TJ 0 -11.955 Td [(Format)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(If)-272(an)-273(input)-272(index)-272(is)-273(out)-272(of)-273(range)1(,)-278(then)-273(the)-272(corr)18(esponding)-272(output)-273(index)-272(is)]TJ 12.453 -11.955 Td [(set)-250(to)-250(a)-250(negative)-250(number;)]TJ + [-525(FOO)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -12.453 -19.926 Td [(2.)]TJ + [-525(is)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.73 0.38 0.84 rg 0.73 0.38 0.84 RG + [-525(unknown)]TJ +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + 0 -11.956 Td [(==========================================================)]TJ +0 g 0 G + 0 -11.955 Td [(Aborting...)]TJ 0 g 0 G - [-500(The)-416(default)]TJ/F59 9.9626 Tf 68.74 0 Td [(I)]TJ/F54 9.9626 Tf 5.23 0 Td [(gnor)18(e)-416(means)-417(that)-416(the)-417(negative)-416(output)-416(is)-417(the)-416(only)-416(action)]TJ -61.517 -11.955 Td [(taken)-250(on)-250(an)-250(out-of-range)-250(input.)]TJ +/F84 9.9626 Tf 1.02 0 0 1 150.705 165.681 Tm [(Listing)-316(6:)-445(A)-316(sample)-316(PSBLAS-3.0)-316(err)18(or)-316(message.)-516(Pr)18(ocess)-316(0)-316(detected)-316(an)-316(err)18(or)]TJ 1 0 0 1 150.705 153.726 Tm [(condition)-250(inside)-250(the)-250(psb)]TJ +ET +q +1 0 0 1 257.025 153.925 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 260.013 153.726 Td [(cest)-250(subr)18(outine)]TJ 0 g 0 G - 141.968 -571.855 Td [(98)]TJ + 55.075 -63.288 Td [(138)]TJ 0 g 0 G ET endstream endobj -1547 0 obj +1910 0 obj << -/Length 5458 +/Length 3566 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.18)-1000(psb)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(8.1)-1000(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm +1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(loc)]TJ +/F75 11.9552 Tf 151.016 706.129 Td [(errpush)-250(\227)-250(Pushes)-250(an)-250(error)-250(code)-250(onto)-250(the)-250(error)-250(stack)]TJ/F84 9.9626 Tf -49.379 -24.942 Td [(c)-175(a)-175(l)-174(l)-874(p)-98(s)-99(b)]TJ ET q -1 0 0 1 173.646 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 150.031 681.387 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 177.233 706.129 Td [(to)]TJ +/F84 9.9626 Tf 154.002 681.187 Td [(e)-99(r)-98(r)-99(p)-98(u)-99(s)-99(h)-232(\050)-266(e)-132(r)-132(r)]TJ ET q -1 0 0 1 188.578 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 220.033 681.387 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 192.165 706.129 Td [(glob)-250(\227)-250(Local)-250(to)-250(global)-250(indices)-250(conversion)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -92.27 -18.964 Td [(call)-525(psb_loc_to_glob\050x,)-525(y,)-525(desc_a,)-525(info,)-525(iact\051)]TJ 0 -11.955 Td [(call)-525(psb_loc_to_glob\050x,)-525(desc_a,)-525(info,)-525(iact\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(An)-250(integer)-250(vector)-250(of)-250(indices)-250(to)-250(be)-250(converted.)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F54 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -31.881 Td [(desc)]TJ +/F84 9.9626 Tf 224.341 681.187 Td [(c)-440(,)-825(r)]TJ ET q -1 0 0 1 120.408 533.94 cm +1 0 0 1 248.891 681.387 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 533.74 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F84 9.9626 Tf 252.379 681.187 Td [(n)-50(a)-50(m)-50(e)-276(,)-929(i)]TJ ET q -1 0 0 1 309.258 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 297.751 681.387 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 312.397 485.92 Td [(desc)]TJ +/F84 9.9626 Tf 302.277 681.187 Td [(e)-154(r)-155(r)-483(,)-920(a)]TJ ET q -1 0 0 1 333.945 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 341.495 681.387 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 337.084 485.92 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -258.11 -19.926 Td [(iact)]TJ +/F84 9.9626 Tf 345.93 681.187 Td [(e)-145(r)-145(r)-279(\051)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F51 9.9626 Tf 253.796 0 Td [(global)]TJ/F54 9.9626 Tf -249.91 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F59 9.9626 Tf 143.341 0 Td [(I)]TJ/F54 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F59 9.9626 Tf 29.808 0 Td [(W)]TJ/F54 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F59 9.9626 Tf 42.111 0 Td [(A)]TJ/F54 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F59 9.9626 Tf 55.839 0 Td [(I)]TJ/F54 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ 0 g 0 G -/F51 9.9626 Tf -316.929 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G +/F75 9.9626 Tf -246.035 -27.895 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -19.925 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(If)]TJ/F52 9.9626 Tf 9.727 0 Td [(y)]TJ/F54 9.9626 Tf 8.032 0 Td [(is)-294(not)-294(pr)18(esent,)-304(then)]TJ/F52 9.9626 Tf 88.385 0 Td [(x)]TJ/F54 9.9626 Tf 8.132 0 Td [(is)-294(overwritten)-294(with)-293(the)-294(translated)-294(integer)-294(indices.)]TJ -99.332 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(If)]TJ/F52 9.9626 Tf 9.705 0 Td [(y)]TJ/F54 9.9626 Tf 8.011 0 Td [(is)-292(not)-291(pr)18(esent,)-302(then)]TJ/F52 9.9626 Tf 88.122 0 Td [(y)]TJ/F54 9.9626 Tf 8.011 0 Td [(is)-292(overwritten)-291(with)-292(the)-291(translated)-292(integer)-292(indice)1(s,)]TJ -99.463 -11.955 Td [(and)]TJ/F52 9.9626 Tf 19.651 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(left)-250(unchanged.)-310(Scope:)]TJ/F51 9.9626 Tf 112.557 0 Td [(global)]TJ/F54 9.9626 Tf -139.904 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +/F75 9.9626 Tf -29.44 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -114.535 Td [(99)]TJ 0 g 0 G + 0 -19.925 Td [(err)]TJ ET - -endstream -endobj -1552 0 obj -<< -/Length 3169 ->> -stream +q +1 0 0 1 113.225 613.641 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 116.214 613.442 Td [(c)]TJ 0 g 0 G +/F84 9.9626 Tf 9.405 0 Td [(the)-250(err)18(or)-250(code)]TJ -0.817 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.19)-1000(psb)]TJ +/F75 9.9626 Tf -24.907 -19.925 Td [(r)]TJ ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 104.368 545.895 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(is)]TJ +/F75 9.9626 Tf 107.357 545.696 Td [(name)]TJ +0 g 0 G +/F84 9.9626 Tf 29.888 0 Td [(the)-250(soutine)-250(wher)18(e)-250(the)-250(err)18(or)-250(has)-250(been)-250(caught.)]TJ -12.443 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(string.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -31.881 Td [(i)]TJ ET q -1 0 0 1 217.809 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 103.811 466.194 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 221.396 706.129 Td [(owned)-250(\227)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -70.691 -18.964 Td [(call)-525(psb_is_owned\050x,)-525(desc_a\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(x)]TJ +/F75 9.9626 Tf 106.799 465.994 Td [(err)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Integer)-250(index.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(integer)74(.)]TJ +/F84 9.9626 Tf 17.714 0 Td [(addional)-250(info)-250(for)-250(err)18(or)-250(code)]TJ 0.289 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ +/F75 9.9626 Tf -24.907 -31.881 Td [(a)]TJ ET q -1 0 0 1 171.218 545.895 cm +1 0 0 1 105.474 398.448 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 545.696 Td [(a)]TJ +/F75 9.9626 Tf 108.463 398.249 Td [(err)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F84 9.9626 Tf 17.714 0 Td [(addional)-250(info)-250(for)-250(err)18(or)-250(code)]TJ -1.375 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(string.)]TJ +0 g 0 G + 139.477 -271.945 Td [(139)]TJ +0 g 0 G +ET + +endstream +endobj +1917 0 obj +<< +/Length 1379 +>> +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 150.705 706.129 Td [(8.2)]TJ 0.984 0 0 1 177.604 706.129 Tm [(psb)]TJ ET q -1 0 0 1 360.068 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 197.92 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 363.206 497.875 Td [(desc)]TJ +/F75 11.9552 Tf 0.984 0 0 1 201.506 706.129 Tm [(error)-255(\227)-255(Prints)-255(the)-254(error)-255(stack)-255(content)-255(and)-255(aborts)-255(execu-)]TJ 1 0 0 1 177.604 692.181 Tm [(tion)]TJ/F84 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-900(p)-126(s)-125(b)]TJ ET q -1 0 0 1 384.755 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 201.901 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 387.893 497.875 Td [(type)]TJ +/F84 9.9626 Tf 206.138 667.24 Td [(e)-125(r)-125(r)-126(o)-125(r)-259(\050)-279(i)-146(c)-146(o)-147(n)-146(t)-146(x)-146(t)-280(\051)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G +/F75 9.9626 Tf -55.433 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(A)-261(logical)-260(mask)-261(which)-261(is)-261(tr)8(ue)-261(if)]TJ/F52 9.9626 Tf 137.304 0 Td [(x)]TJ/F54 9.9626 Tf 7.803 0 Td [(is)-261(owned)-261(by)-260(the)-261(curr)18(ent)-261(pr)18(o-)]TJ -192.978 -11.955 Td [(cess)-250(Scope:)]TJ/F51 9.9626 Tf 51.567 0 Td [(local)]TJ/F54 9.9626 Tf -51.567 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ/F51 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ + 0 -19.925 Td [(icontxt)]TJ 0 g 0 G - [-500(This)-240(r)18(outine)-239(r)18(eturns)-240(a)]TJ/F59 9.9626 Tf 109.67 0 Td [(.true.)]TJ/F54 9.9626 Tf 33.769 0 Td [(value)-240(for)-239(an)-240(index)-239(that)-240(is)-239(strictly)-240(owned)-239(by)]TJ -130.986 -11.955 Td [(the)-250(curr)18(ent)-250(pr)18(ocess,)-250(excluding)-250(the)-250(halo)-250(indices)]TJ +/F84 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ 0 g 0 G - 139.477 -263.975 Td [(100)]TJ + 139.476 -461.235 Td [(140)]TJ 0 g 0 G ET endstream endobj -1558 0 obj +1923 0 obj << -/Length 4795 +/Length 1583 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.20)-1000(psb)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(8.3)]TJ 0.98 0 0 1 126.795 706.129 Tm [(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm +1 0 0 1 147.031 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(owned)]TJ +/F75 11.9552 Tf 0.98 0 0 1 150.617 706.129 Tm [(set)]TJ ET q -1 0 0 1 194.903 706.328 cm +1 0 0 1 166.296 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 198.489 706.129 Td [(index)-250(\227)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -98.594 -18.964 Td [(call)-525(psb_owned_index\050y,)-525(x,)-525(desc_a,)-525(info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Integer)-250(indices.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F54 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ +/F75 11.9552 Tf 0.98 0 0 1 169.882 706.129 Tm [(errverbosity)-250(\227)-251(Sets)-250(the)-250(verbosity)-251(of)-250(error)-250(messages)]TJ/F84 9.9626 Tf 1 0 0 1 101.637 681.187 Tm [(c)-175(a)-175(l)-174(l)-921(p)-147(s)-146(b)]TJ ET q -1 0 0 1 120.408 545.895 cm +1 0 0 1 151.927 681.387 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 545.696 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 497.875 Td [(desc)]TJ +/F84 9.9626 Tf 156.371 681.187 Td [(s)-146(e)-146(t)]TJ ET q -1 0 0 1 333.945 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 173.581 681.387 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 337.084 497.875 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F84 9.9626 Tf 178.026 681.187 Td [(e)-146(r)-146(r)-146(v)-147(e)-146(r)-146(b)-146(o)-146(s)-146(i)-147(t)-146(y)-279(\050)-151(v)-151(\051)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(iact)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F51 9.9626 Tf 253.796 0 Td [(global)]TJ/F54 9.9626 Tf -249.91 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F59 9.9626 Tf 143.341 0 Td [(I)]TJ/F54 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F59 9.9626 Tf 29.808 0 Td [(W)]TJ/F54 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F59 9.9626 Tf 42.111 0 Td [(A)]TJ/F54 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F59 9.9626 Tf 55.839 0 Td [(I)]TJ/F54 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ 0 g 0 G -/F51 9.9626 Tf -316.929 -21.918 Td [(On)-250(Return)]TJ +/F75 9.9626 Tf -78.131 -27.895 Td [(T)90(ype:)]TJ 0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - 0 -19.925 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(A)-200(logical)-200(mask)-200(which)-201(is)-200(tr)8(ue)-200(for)-200(all)-200(corr)18(esponding)-200(entries)-200(of)]TJ/F52 9.9626 Tf 260.812 0 Td [(x)]TJ/F54 9.9626 Tf 7.2 0 Td [(that)-200(ar)18(e)-200(owned)]TJ -253.626 -11.955 Td [(by)-250(the)-250(curr)18(ent)-250(pr)18(ocess)-250(Scope:)]TJ/F51 9.9626 Tf 131.027 0 Td [(local)]TJ/F54 9.9626 Tf -131.027 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(rank)-250(one)-250(logical)-250(array)111(.)]TJ +/F75 9.9626 Tf -29.44 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -19.926 Td [(1.)]TJ + 0 -19.925 Td [(v)]TJ 0 g 0 G - [-500(This)-429(r)18(outine)-428(r)18(eturns)-429(a)]TJ/F59 9.9626 Tf 117.209 0 Td [(.true.)]TJ/F54 9.9626 Tf 35.654 0 Td [(value)-429(for)-428(those)-429(indices)-429(that)-429(ar)18(e)-428(strictly)]TJ -140.41 -11.955 Td [(owned)-250(by)-250(the)-250(curr)18(ent)-250(pr)18(ocess,)-250(excluding)-250(the)-250(halo)-250(indices)]TJ +/F84 9.9626 Tf 10.521 0 Td [(the)-250(verbosity)-250(level)]TJ 14.386 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ 0 g 0 G - 139.477 -140.438 Td [(101)]TJ + 139.477 -475.183 Td [(141)]TJ 0 g 0 G ET endstream endobj -1564 0 obj +1929 0 obj << -/Length 3147 +/Length 2082 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.21)-1000(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(8.4)]TJ 0.988 0 0 1 177.604 706.129 Tm [(psb)]TJ ET q -1 0 0 1 204.216 706.328 cm +1 0 0 1 197.999 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(is)]TJ +/F75 11.9552 Tf 0.988 0 0 1 201.586 706.129 Tm [(set)]TJ ET q -1 0 0 1 217.809 706.328 cm +1 0 0 1 217.387 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 221.396 706.129 Td [(local)-250(\227)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -70.691 -18.964 Td [(call)-525(psb_is_local\050x,)-525(desc_a\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Integer)-250(index.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(integer)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ +/F75 11.9552 Tf 0.988 0 0 1 220.973 706.129 Tm [(erraction)-254(\227)-254(Set)-254(the)-254(type)-255(of)-254(action)-254(to)-254(be)-254(taken)-254(upon)]TJ 1 0 0 1 177.604 692.181 Tm [(error)-250(condition)]TJ/F84 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-926(p)-151(s)-151(b)]TJ ET q -1 0 0 1 171.218 545.895 cm +1 0 0 1 202.923 667.439 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 545.696 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F84 9.9626 Tf 207.415 667.24 Td [(s)-151(e)-151(t)]TJ ET q -1 0 0 1 360.068 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 224.765 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 363.206 497.875 Td [(desc)]TJ +/F84 9.9626 Tf 229.256 667.24 Td [(e)-151(r)-151(r)-151(a)-150(c)-151(t)-151(i)-151(o)-151(n)-284(\050)-296(e)-163(r)-162(r)]TJ ET q -1 0 0 1 384.755 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 307.912 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 387.893 497.875 Td [(type)]TJ +/F84 9.9626 Tf 312.521 667.24 Td [(a)-163(c)-162(t)-296(\051)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G +/F75 9.9626 Tf -161.816 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(err)]TJ +ET +q +1 0 0 1 164.035 599.693 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 167.023 599.494 Td [(act)]TJ +0 g 0 G +/F84 9.9626 Tf 17.704 0 Td [(the)-250(type)-250(of)-250(action.)]TJ -9.116 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)-310(Possible)-250(values:)]TJ/F145 9.9626 Tf 179.117 0 Td [(psb_act_ret)]TJ/F84 9.9626 Tf 57.533 0 Td [(,)]TJ/F145 9.9626 Tf 4.982 0 Td [(psb_act_abort)]TJ/F84 9.9626 Tf 67.994 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(A)-244(logical)-244(mask)-243(which)-244(is)-244(tr)8(ue)-244(if)]TJ/F52 9.9626 Tf 136.118 0 Td [(x)]TJ/F54 9.9626 Tf 7.635 0 Td [(is)-244(local)-244(to)-243(the)-244(curr)18(ent)-244(pr)18(ocess)]TJ -191.623 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ/F51 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ + -170.149 -461.235 Td [(142)]TJ +0 g 0 G +ET + +endstream +endobj +1935 0 obj +<< +/Length 535 +>> +stream 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G - [-500(This)-199(r)18(outine)-200(r)18(eturns)-199(a)]TJ/F59 9.9626 Tf 108.069 0 Td [(.true.)]TJ/F54 9.9626 Tf 33.369 0 Td [(value)-199(for)-200(an)-199(index)-199(that)-200(is)-199(local)-200(to)-199(the)-199(curr)18(ent)]TJ -128.984 -11.955 Td [(pr)18(ocess,)-250(including)-250(the)-250(halo)-250(indices)]TJ +BT +/F75 14.3462 Tf 99.895 705.784 Td [(9)-1000(Utilities)]TJ/F84 9.9626 Tf 1.02 0 0 1 99.397 683.082 Tm [(W)90(e)-272(have)-272(some)-272(utilities)-272(available)-272(for)-272(input)-272(and)-272(output)-272(of)-272(sparse)-272(matrices;)-286(the)]TJ 1 0 0 1 99.895 671.127 Tm [(interfaces)-250(to)-250(these)-250(r)18(outines)-250(ar)18(e)-250(available)-250(in)-250(the)-250(module)]TJ/F145 9.9626 Tf 242.01 0 Td [(psb_util_mod)]TJ/F84 9.9626 Tf 62.764 0 Td [(.)]TJ 0 g 0 G - 139.476 -263.975 Td [(102)]TJ + -140.39 -580.689 Td [(143)]TJ 0 g 0 G ET endstream endobj -1464 0 obj +1829 0 obj << /Type /ObjStm /N 100 -/First 976 -/Length 10636 ->> -stream -1461 0 1457 157 1458 304 1459 449 1463 595 344 654 1460 712 1466 806 1468 924 1469 982 -1470 1040 1471 1098 1472 1156 1473 1214 1474 1272 1475 1330 1465 1388 1479 1482 1476 1630 1477 1773 -1481 1920 348 1979 1478 2037 1485 2131 1482 2279 1483 2424 1487 2571 352 2629 1488 2686 1484 2744 -1492 2838 1489 2986 1490 3129 1494 3273 356 3332 1491 3390 1496 3497 1498 3615 1499 3673 1495 3730 -1504 3824 1501 3972 1502 4118 1506 4262 360 4321 1503 4379 1508 4499 1510 4617 1511 4675 1512 4733 -1507 4791 1517 4872 1513 5029 1514 5170 1515 5313 1519 5457 364 5516 1520 5574 1521 5633 1516 5692 -1525 5799 1522 5947 1523 6090 1527 6237 368 6295 1524 6352 1529 6446 1531 6564 372 6623 1528 6681 -1534 6788 1532 6927 1536 7074 376 7132 1533 7189 1539 7296 1541 7414 1542 7473 1543 7532 1538 7591 -1546 7672 1544 7811 1548 7958 380 8016 1545 8073 1551 8180 1549 8319 1553 8466 384 8525 1554 8583 -1550 8642 1557 8749 1555 8888 1559 9035 388 9093 1560 9150 1556 9208 1563 9315 1561 9454 1565 9601 -% 1461 0 obj +/First 973 +/Length 9417 +>> +stream +1826 0 1828 118 469 176 1825 233 1831 384 1833 502 1834 561 1835 620 1836 679 1830 738 +1838 876 1840 994 473 1052 1837 1109 1843 1260 1845 1378 1846 1437 1847 1496 1848 1555 1842 1614 +1850 1752 1852 1870 477 1928 1849 1985 1854 2136 1856 2254 1857 2313 1858 2372 1859 2430 1853 2488 +1861 2626 1863 2744 481 2802 1860 2859 1865 3010 1867 3128 1868 3187 1869 3246 1870 3305 1864 3364 +1872 3502 1874 3620 485 3678 1871 3735 1877 3886 1879 4004 1880 4063 1881 4122 1883 4180 1884 4239 +1885 4298 1876 4357 1887 4538 1889 4656 489 4714 1890 4771 1886 4829 1892 4980 1894 5098 493 5157 +1895 5215 1891 5274 1899 5425 1896 5573 1897 5721 1901 5869 497 5927 1898 5984 1905 6079 1907 6197 +1902 6256 1903 6315 1904 6374 1909 6471 1911 6589 501 6647 1912 6704 1913 6762 1908 6820 1916 6901 +1918 7019 505 7078 1919 7136 1920 7194 1915 7253 1922 7334 1924 7452 509 7510 1925 7567 1926 7625 +1921 7683 1928 7764 1930 7882 513 7941 1931 7999 1932 8057 1927 8116 1934 8211 1936 8329 517 8387 +% 1826 0 obj << /Type /Page -/Contents 1462 0 R -/Resources 1460 0 R +/Contents 1827 0 R +/Resources 1825 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R -/Annots [ 1457 0 R 1458 0 R 1459 0 R ] +/Parent 1805 0 R >> -% 1457 0 obj +% 1828 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 572.168 409.811 584.228] -/A << /S /GoTo /D (descdata) >> +/D [1826 0 R /XYZ 98.895 753.953 null] >> -% 1458 0 obj +% 469 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 276.913 420.271 288.973] -/A << /S /GoTo /D (spdata) >> +/D [1826 0 R /XYZ 99.895 716.092 null] >> -% 1459 0 obj +% 1825 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [345.92 208.672 412.978 220.731] -/A << /S /GoTo /D (descdata) >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F190 941 0 R /F148 1490 0 R /F192 942 0 R /F78 686 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1463 0 obj +% 1831 0 obj +<< +/Type /Page +/Contents 1832 0 R +/Resources 1830 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1805 0 R +>> +% 1833 0 obj << -/D [1461 0 R /XYZ 149.705 753.953 null] +/D [1831 0 R /XYZ 149.705 753.953 null] >> -% 344 0 obj +% 1834 0 obj << -/D [1461 0 R /XYZ 150.705 716.092 null] +/D [1831 0 R /XYZ 150.705 576.399 null] >> -% 1460 0 obj +% 1835 0 obj +<< +/D [1831 0 R /XYZ 150.705 541.925 null] +>> +% 1836 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/D [1831 0 R /XYZ 150.705 451.085 null] +>> +% 1830 0 obj +<< +/Font << /F84 687 0 R /F75 685 0 R /F145 940 0 R /F233 1044 0 R /F78 686 0 R /F279 1813 0 R >> /ProcSet [ /PDF /Text ] >> -% 1466 0 obj +% 1838 0 obj << /Type /Page -/Contents 1467 0 R -/Resources 1465 0 R +/Contents 1839 0 R +/Resources 1837 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R +/Parent 1841 0 R >> -% 1468 0 obj +% 1840 0 obj << -/D [1466 0 R /XYZ 98.895 753.953 null] +/D [1838 0 R /XYZ 98.895 753.953 null] >> -% 1469 0 obj +% 473 0 obj +<< +/D [1838 0 R /XYZ 99.895 716.092 null] +>> +% 1837 0 obj << -/D [1466 0 R /XYZ 99.895 701.929 null] +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F190 941 0 R /F148 1490 0 R /F192 942 0 R /F78 686 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1470 0 obj +% 1843 0 obj +<< +/Type /Page +/Contents 1844 0 R +/Resources 1842 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1841 0 R +>> +% 1845 0 obj +<< +/D [1843 0 R /XYZ 149.705 753.953 null] +>> +% 1846 0 obj +<< +/D [1843 0 R /XYZ 150.705 576.399 null] +>> +% 1847 0 obj << -/D [1466 0 R /XYZ 99.895 667.454 null] +/D [1843 0 R /XYZ 150.705 541.925 null] >> -% 1471 0 obj +% 1848 0 obj << -/D [1466 0 R /XYZ 99.895 647.529 null] +/D [1843 0 R /XYZ 150.705 451.085 null] >> -% 1472 0 obj +% 1842 0 obj << -/D [1466 0 R /XYZ 99.895 603.693 null] +/Font << /F84 687 0 R /F75 685 0 R /F145 940 0 R /F233 1044 0 R /F78 686 0 R /F279 1813 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1473 0 obj +% 1850 0 obj << -/D [1466 0 R /XYZ 99.895 547.902 null] +/Type /Page +/Contents 1851 0 R +/Resources 1849 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1841 0 R >> -% 1474 0 obj +% 1852 0 obj << -/D [1466 0 R /XYZ 99.895 527.977 null] +/D [1850 0 R /XYZ 98.895 753.953 null] >> -% 1475 0 obj +% 477 0 obj << -/D [1466 0 R /XYZ 99.895 496.097 null] +/D [1850 0 R /XYZ 99.895 716.092 null] >> -% 1465 0 obj +% 1849 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F190 941 0 R /F148 1490 0 R /F192 942 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1479 0 obj +% 1854 0 obj << /Type /Page -/Contents 1480 0 R -/Resources 1478 0 R +/Contents 1855 0 R +/Resources 1853 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R -/Annots [ 1476 0 R 1477 0 R ] +/Parent 1841 0 R >> -% 1476 0 obj +% 1856 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 573.77 420.271 585.83] -/A << /S /GoTo /D (spdata) >> +/D [1854 0 R /XYZ 149.705 753.953 null] >> -% 1477 0 obj +% 1857 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 506.024 409.811 518.084] -/A << /S /GoTo /D (descdata) >> +/D [1854 0 R /XYZ 150.705 588.355 null] >> -% 1481 0 obj +% 1858 0 obj << -/D [1479 0 R /XYZ 149.705 753.953 null] +/D [1854 0 R /XYZ 150.705 553.88 null] >> -% 348 0 obj +% 1859 0 obj << -/D [1479 0 R /XYZ 150.705 716.092 null] +/D [1854 0 R /XYZ 150.705 463.04 null] >> -% 1478 0 obj +% 1853 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F84 687 0 R /F75 685 0 R /F145 940 0 R /F233 1044 0 R /F78 686 0 R /F279 1813 0 R >> /ProcSet [ /PDF /Text ] >> -% 1485 0 obj +% 1861 0 obj << /Type /Page -/Contents 1486 0 R -/Resources 1484 0 R +/Contents 1862 0 R +/Resources 1860 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R -/Annots [ 1482 0 R 1483 0 R ] ->> -% 1482 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 559.823 369.462 571.882] -/A << /S /GoTo /D (spdata) >> ->> -% 1483 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 492.077 359.001 504.136] -/A << /S /GoTo /D (descdata) >> +/Parent 1841 0 R >> -% 1487 0 obj -<< -/D [1485 0 R /XYZ 98.895 753.953 null] ->> -% 352 0 obj +% 1863 0 obj << -/D [1485 0 R /XYZ 99.895 716.092 null] +/D [1861 0 R /XYZ 98.895 753.953 null] >> -% 1488 0 obj +% 481 0 obj << -/D [1485 0 R /XYZ 99.895 312.355 null] +/D [1861 0 R /XYZ 99.895 716.092 null] >> -% 1484 0 obj +% 1860 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F190 941 0 R /F148 1490 0 R /F192 942 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1492 0 obj +% 1865 0 obj << /Type /Page -/Contents 1493 0 R -/Resources 1491 0 R +/Contents 1866 0 R +/Resources 1864 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R -/Annots [ 1489 0 R 1490 0 R ] +/Parent 1841 0 R >> -% 1489 0 obj +% 1867 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [310.942 577.893 378 589.953] -/A << /S /GoTo /D (descdata) >> +/D [1865 0 R /XYZ 149.705 753.953 null] >> -% 1490 0 obj +% 1868 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [261.249 128.475 337.303 140.535] -/A << /S /GoTo /D (vdata) >> +/D [1865 0 R /XYZ 150.705 576.399 null] >> -% 1494 0 obj +% 1869 0 obj << -/D [1492 0 R /XYZ 149.705 753.953 null] +/D [1865 0 R /XYZ 150.705 541.925 null] >> -% 356 0 obj +% 1870 0 obj << -/D [1492 0 R /XYZ 150.705 716.092 null] +/D [1865 0 R /XYZ 150.705 451.085 null] >> -% 1491 0 obj +% 1864 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F84 687 0 R /F75 685 0 R /F145 940 0 R /F233 1044 0 R /F78 686 0 R /F279 1813 0 R >> /ProcSet [ /PDF /Text ] >> -% 1496 0 obj +% 1872 0 obj << /Type /Page -/Contents 1497 0 R -/Resources 1495 0 R +/Contents 1873 0 R +/Resources 1871 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R +/Parent 1875 0 R >> -% 1498 0 obj +% 1874 0 obj << -/D [1496 0 R /XYZ 98.895 753.953 null] +/D [1872 0 R /XYZ 98.895 753.953 null] >> -% 1499 0 obj +% 485 0 obj << -/D [1496 0 R /XYZ 99.895 632.19 null] +/D [1872 0 R /XYZ 99.895 716.092 null] >> -% 1495 0 obj +% 1871 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F190 941 0 R /F148 1490 0 R /F192 942 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1504 0 obj +% 1877 0 obj << /Type /Page -/Contents 1505 0 R -/Resources 1503 0 R +/Contents 1878 0 R +/Resources 1876 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R -/Annots [ 1501 0 R 1502 0 R ] +/Parent 1875 0 R >> -% 1501 0 obj +% 1879 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 362.621 409.811 374.68] -/A << /S /GoTo /D (descdata) >> +/D [1877 0 R /XYZ 149.705 753.953 null] >> -% 1502 0 obj +% 1880 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [417.183 207.637 493.237 219.697] -/A << /S /GoTo /D (vdata) >> +/D [1877 0 R /XYZ 150.705 588.355 null] >> -% 1506 0 obj +% 1881 0 obj << -/D [1504 0 R /XYZ 149.705 753.953 null] +/D [1877 0 R /XYZ 150.705 553.88 null] >> -% 360 0 obj +% 1883 0 obj << -/D [1504 0 R /XYZ 150.705 716.092 null] +/D [1877 0 R /XYZ 150.705 465.726 null] >> -% 1503 0 obj +% 1884 0 obj +<< +/D [1877 0 R /XYZ 150.705 433.845 null] +>> +% 1885 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R >> +/D [1877 0 R /XYZ 150.705 343.006 null] +>> +% 1876 0 obj +<< +/Font << /F84 687 0 R /F75 685 0 R /F145 940 0 R /F78 686 0 R /F192 942 0 R /F17 1882 0 R /F243 1285 0 R /F233 1044 0 R /F279 1813 0 R >> /ProcSet [ /PDF /Text ] >> -% 1508 0 obj +% 1887 0 obj << /Type /Page -/Contents 1509 0 R -/Resources 1507 0 R +/Contents 1888 0 R +/Resources 1886 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R +/Parent 1875 0 R >> -% 1510 0 obj +% 1889 0 obj << -/D [1508 0 R /XYZ 98.895 753.953 null] +/D [1887 0 R /XYZ 98.895 753.953 null] >> -% 1511 0 obj +% 489 0 obj << -/D [1508 0 R /XYZ 99.895 701.929 null] +/D [1887 0 R /XYZ 99.895 716.092 null] >> -% 1512 0 obj +% 1890 0 obj << -/D [1508 0 R /XYZ 99.895 680.684 null] +/D [1887 0 R /XYZ 99.895 222.691 null] >> -% 1507 0 obj +% 1886 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R /F148 1490 0 R /F192 942 0 R /F190 941 0 R >> /ProcSet [ /PDF /Text ] >> -% 1517 0 obj +% 1892 0 obj << /Type /Page -/Contents 1518 0 R -/Resources 1516 0 R +/Contents 1893 0 R +/Resources 1891 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R -/Annots [ 1513 0 R 1514 0 R 1515 0 R ] ->> -% 1513 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [310.942 573.77 378 585.83] -/A << /S /GoTo /D (descdata) >> ->> -% 1514 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [322.33 482.114 398.384 494.174] -/A << /S /GoTo /D (vdata) >> ->> -% 1515 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [225.165 380.495 301.219 392.555] -/A << /S /GoTo /D (vdata) >> ->> -% 1519 0 obj -<< -/D [1517 0 R /XYZ 149.705 753.953 null] +/Parent 1875 0 R >> -% 364 0 obj +% 1894 0 obj << -/D [1517 0 R /XYZ 150.705 716.092 null] +/D [1892 0 R /XYZ 149.705 753.953 null] >> -% 1520 0 obj +% 493 0 obj << -/D [1517 0 R /XYZ 150.705 278.482 null] +/D [1892 0 R /XYZ 150.705 716.092 null] >> -% 1521 0 obj +% 1895 0 obj << -/D [1517 0 R /XYZ 150.705 244.007 null] +/D [1892 0 R /XYZ 150.705 222.691 null] >> -% 1516 0 obj +% 1891 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F148 1490 0 R /F192 942 0 R /F78 686 0 R /F190 941 0 R >> /ProcSet [ /PDF /Text ] >> -% 1525 0 obj +% 1899 0 obj << /Type /Page -/Contents 1526 0 R -/Resources 1524 0 R +/Contents 1900 0 R +/Resources 1898 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R -/Annots [ 1522 0 R 1523 0 R ] +/Parent 1875 0 R +/Annots [ 1896 0 R 1897 0 R ] >> -% 1522 0 obj +% 1896 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [174.355 561.815 250.41 573.875] -/A << /S /GoTo /D (vdata) >> +/Rect [145.364 511.904 152.427 523.964] +/A << /S /GoTo /D (listing.5) >> >> -% 1523 0 obj +% 1897 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [260.133 482.114 327.191 494.174] -/A << /S /GoTo /D (descdata) >> +/Rect [143.975 356.487 150.849 368.547] +/A << /S /GoTo /D (listing.6) >> >> -% 1527 0 obj +% 1901 0 obj << -/D [1525 0 R /XYZ 98.895 753.953 null] +/D [1899 0 R /XYZ 98.895 753.953 null] >> -% 368 0 obj +% 497 0 obj << -/D [1525 0 R /XYZ 99.895 716.092 null] +/D [1899 0 R /XYZ 99.895 716.092 null] >> -% 1524 0 obj +% 1898 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1529 0 obj +% 1905 0 obj << /Type /Page -/Contents 1530 0 R -/Resources 1528 0 R +/Contents 1906 0 R +/Resources 1904 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R +/Parent 1875 0 R >> -% 1531 0 obj +% 1907 0 obj << -/D [1529 0 R /XYZ 149.705 753.953 null] +/D [1905 0 R /XYZ 149.705 753.953 null] >> -% 372 0 obj +% 1902 0 obj << -/D [1529 0 R /XYZ 150.705 716.092 null] +/D [1905 0 R /XYZ 150.705 411.235 null] >> -% 1528 0 obj +% 1903 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/D [1905 0 R /XYZ 150.705 182.902 null] +>> +% 1904 0 obj +<< +/Font << /F145 940 0 R /F279 1813 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1534 0 obj +% 1909 0 obj << /Type /Page -/Contents 1535 0 R -/Resources 1533 0 R +/Contents 1910 0 R +/Resources 1908 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R -/Annots [ 1532 0 R ] +/Parent 1914 0 R >> -% 1532 0 obj +% 1911 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 483.894 359.001 495.954] -/A << /S /GoTo /D (descdata) >> +/D [1909 0 R /XYZ 98.895 753.953 null] >> -% 1536 0 obj +% 501 0 obj << -/D [1534 0 R /XYZ 98.895 753.953 null] +/D [1909 0 R /XYZ 99.895 716.092 null] >> -% 376 0 obj +% 1912 0 obj << -/D [1534 0 R /XYZ 99.895 716.092 null] +/D [1909 0 R /XYZ 99.895 690.058 null] >> -% 1533 0 obj +% 1913 0 obj +<< +/D [1909 0 R /XYZ 99.895 693.143 null] +>> +% 1908 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F75 685 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1539 0 obj +% 1916 0 obj << /Type /Page -/Contents 1540 0 R -/Resources 1538 0 R +/Contents 1917 0 R +/Resources 1915 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R +/Parent 1914 0 R >> -% 1541 0 obj +% 1918 0 obj << -/D [1539 0 R /XYZ 149.705 753.953 null] +/D [1916 0 R /XYZ 149.705 753.953 null] >> -% 1542 0 obj +% 505 0 obj +<< +/D [1916 0 R /XYZ 150.705 716.092 null] +>> +% 1919 0 obj << -/D [1539 0 R /XYZ 150.705 716.092 null] +/D [1916 0 R /XYZ 150.705 678.98 null] >> -% 1543 0 obj +% 1920 0 obj << -/D [1539 0 R /XYZ 150.705 687.379 null] +/D [1916 0 R /XYZ 150.705 679.195 null] >> -% 1538 0 obj +% 1915 0 obj << -/Font << /F54 586 0 R /F59 812 0 R >> +/Font << /F75 685 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1546 0 obj +% 1922 0 obj << /Type /Page -/Contents 1547 0 R -/Resources 1545 0 R +/Contents 1923 0 R +/Resources 1921 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R -/Annots [ 1544 0 R ] +/Parent 1914 0 R >> -% 1544 0 obj +% 1924 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 482.114 359.001 494.174] -/A << /S /GoTo /D (descdata) >> +/D [1922 0 R /XYZ 98.895 753.953 null] >> -% 1548 0 obj +% 509 0 obj << -/D [1546 0 R /XYZ 98.895 753.953 null] +/D [1922 0 R /XYZ 99.895 716.092 null] >> -% 380 0 obj +% 1925 0 obj << -/D [1546 0 R /XYZ 99.895 716.092 null] +/D [1922 0 R /XYZ 99.895 689.963 null] >> -% 1545 0 obj +% 1926 0 obj +<< +/D [1922 0 R /XYZ 99.895 693.143 null] +>> +% 1921 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F75 685 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1551 0 obj +% 1928 0 obj << /Type /Page -/Contents 1552 0 R -/Resources 1550 0 R +/Contents 1929 0 R +/Resources 1927 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R -/Annots [ 1549 0 R ] +/Parent 1914 0 R >> -% 1549 0 obj +% 1930 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 494.069 409.811 506.129] -/A << /S /GoTo /D (descdata) >> +/D [1928 0 R /XYZ 149.705 753.953 null] >> -% 1553 0 obj +% 513 0 obj << -/D [1551 0 R /XYZ 149.705 753.953 null] +/D [1928 0 R /XYZ 150.705 716.092 null] >> -% 384 0 obj +% 1931 0 obj << -/D [1551 0 R /XYZ 150.705 716.092 null] +/D [1928 0 R /XYZ 150.705 678.98 null] >> -% 1554 0 obj +% 1932 0 obj << -/D [1551 0 R /XYZ 150.705 382.093 null] +/D [1928 0 R /XYZ 150.705 679.195 null] >> -% 1550 0 obj +% 1927 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1557 0 obj +% 1934 0 obj << /Type /Page -/Contents 1558 0 R -/Resources 1556 0 R +/Contents 1935 0 R +/Resources 1933 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R -/Annots [ 1555 0 R ] +/Parent 1914 0 R >> -% 1555 0 obj +% 1936 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 494.069 359.001 506.129] -/A << /S /GoTo /D (descdata) >> +/D [1934 0 R /XYZ 98.895 753.953 null] >> -% 1559 0 obj +% 517 0 obj +<< +/D [1934 0 R /XYZ 99.895 716.092 null] +>> + +endstream +endobj +1941 0 obj +<< +/Length 4752 +>> +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 150.705 706.129 Td [(9.1)]TJ 0.984 0 0 1 180.598 706.129 Tm [(hb)]TJ +ET +q +1 0 0 1 195.691 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 0.984 0 0 1 199.278 706.129 Tm [(read)-255(\227)-254(Read)-255(a)-254(sparse)-255(matrix)-254(from)-255(a)-254(\002le)-255(in)-254(the)-255(Harwell\226)]TJ 1 0 0 1 177.604 692.181 Tm [(Boeing)-250(format)]TJ/F84 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-865(h)-90(b)]TJ +ET +q +1 0 0 1 195.188 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 199.074 667.24 Td [(r)-90(e)-90(a)-90(d)-224(\050)-166(a)-242(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-871(f)-97(i)-96(l)-96(e)-96(n)-96(a)-97(m)-96(e)-367(,)-791(b)-206(,)-919(m)-143(t)-144(i)-143(t)-143(l)-144(e)-277(\051)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +/F75 9.9626 Tf -48.369 -27.896 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(\002lename)]TJ +0 g 0 G +/F84 9.9626 Tf 43.965 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ 1.02 0 0 1 175.611 575.584 Tm [(Speci\002ed)-313(as:)-440(a)-313(character)-314(variable)-313(containing)-313(a)-313(valid)-313(\002le)-313(name,)-331(or)]TJ/F145 9.9626 Tf 1 0 0 1 474.418 575.584 Tm [(-)]TJ/F84 9.9626 Tf 1.02 0 0 1 479.649 575.584 Tm [(,)-330(in)]TJ 1.003 0 0 1 175.193 563.628 Tm [(which)-250(case)-250(the)-250(default)-250(input)-250(unit)-250(5)-250(\050i.e.)-311(standar)18(d)-250(input)-250(in)-250(Unix)-250(jar)18(gon\051)-250(is)]TJ 1 0 0 1 175.611 551.673 Tm [(used.)-310(Default:)]TJ/F145 9.9626 Tf 65.185 0 Td [(-)]TJ/F84 9.9626 Tf 5.231 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ +0 g 0 G +/F84 9.9626 Tf 26.799 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.201 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -61.878 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F145 9.9626 Tf 287.758 0 Td [(-)]TJ/F84 9.9626 Tf 5.23 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -317.894 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(the)-250(sparse)-250(matrix)-250(r)18(ead)-250(fr)18(om)-250(\002le.)]TJ 14.636 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 442.283 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 363.206 442.084 Td [(Tspmat)]TJ +ET +q +1 0 0 1 395.216 442.283 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 398.354 442.084 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -268.57 -19.925 Td [(b)]TJ +0 g 0 G +/F84 9.9626 Tf 11.068 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.53 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F84 9.9626 Tf 0.995 0 0 1 175.223 398.249 Tm [(An)-251(array)-252(of)-251(type)-252(r)18(eal)-251(or)-252(complex,)-251(rank)-252(2)-251(and)-252(having)-251(the)-252(ALLOCA)75(T)74(ABLE)]TJ 1.02 0 0 1 175.611 386.293 Tm [(attribute;)-293(will)-277(be)-278(allocated)-277(and)-277(\002lled)-277(in)-277(if)-278(the)-277(input)-277(\002le)-277(contains)-278(a)-277(right)]TJ 1 0 0 1 175.611 374.338 Tm [(hand)-250(side,)-250(otherwise)-250(will)-250(be)-250(left)-250(in)-250(the)-250(UNALLOCA)74(TED)-250(state.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -19.925 Td [(mtitle)]TJ +0 g 0 G +/F84 9.9626 Tf 32.089 0 Td [(Matrix)-250(title.)]TJ -7.491 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F84 9.9626 Tf 1.02 0 0 1 175.223 330.503 Tm [(A)-292(charachter)-292(variable)-292(of)-292(length)-292(72)-292(holding)-292(a)-293(copy)-292(of)-292(the)-292(matrix)-292(title)-292(as)]TJ 1 0 0 1 175.611 318.547 Tm [(speci\002ed)-250(by)-250(the)-250(Harwell-Boeing)-250(format)-250(and)-250(contained)-250(in)-250(the)-250(input)-250(\002le.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -19.925 Td [(iret)]TJ +0 g 0 G +/F84 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.125 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -27.168 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 139.865 -184.274 Td [(144)]TJ +0 g 0 G +ET + +endstream +endobj +1948 0 obj << -/D [1557 0 R /XYZ 98.895 753.953 null] +/Length 5153 >> -% 388 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 99.895 706.129 Td [(9.2)]TJ 1.02 0 0 1 126.795 706.129 Tm [(hb)]TJ +ET +q +1 0 0 1 142.413 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 1.02 0 0 1 146 706.129 Tm [(write)-264(\227)-265(W)73(rite)-265(a)-264(sparse)-264(matrix)-265(to)-264(a)-264(\002le)-265(in)-264(the)-264(Harwell\226)]TJ 1 0 0 1 126.795 692.181 Tm [(Boeing)-250(format)]TJ/F84 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-884(h)-109(b)]TJ +ET +q +1 0 0 1 144.944 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 149.017 667.24 Td [(w)-109(r)-109(i)-109(t)-109(e)-242(\050)-167(a)-242(,)-926(i)-152(r)-151(e)-152(t)-478(,)-904(i)-130(u)-129(n)-130(i)-129(t)-435(,)-871(f)-96(i)-96(l)-96(e)-97(n)-96(a)-96(m)-96(e)-368(,)-817(k)-41(e)-42(y)-259(,)-855(r)-79(h)-80(s)-335(,)-918(m)-144(t)-143(i)-144(t)-143(l)-143(e)-277(\051)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +/F75 9.9626 Tf -49.122 -27.896 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(to)-250(be)-250(written.)]TJ 14.635 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(required)]TJ/F84 9.9626 Tf 39.292 0 Td [(.)]TJ -62.983 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 575.783 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 312.397 575.584 Td [(Tspmat)]TJ +ET +q +1 0 0 1 344.406 575.783 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 347.544 575.584 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -268.571 -19.926 Td [(b)]TJ +0 g 0 G +/F84 9.9626 Tf 11.069 0 Td [(Rigth)-250(hand)-250(side.)]TJ 13.529 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F84 9.9626 Tf 0.995 0 0 1 124.413 531.748 Tm [(An)-252(array)-251(of)-252(type)-251(r)18(eal)-251(or)-252(complex,)-251(rank)-252(1)-251(and)-252(having)-251(the)-252(ALLOCA)74(T)75(ABLE)]TJ 1.02 0 0 1 124.802 519.793 Tm [(attribute;)-293(will)-277(be)-278(allocated)-277(and)-277(\002lled)-277(in)-277(if)-278(the)-277(input)-277(\002le)-277(contains)-277(a)-278(right)]TJ 1 0 0 1 124.802 507.838 Tm [(hand)-250(side.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.926 Td [(\002lename)]TJ +0 g 0 G +/F84 9.9626 Tf 43.965 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written)-250(to.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ 1.02 0 0 1 124.802 464.002 Tm [(Speci\002ed)-313(as:)-440(a)-313(character)-314(variable)-313(containing)-313(a)-313(valid)-313(\002le)-313(name,)-331(or)]TJ/F145 9.9626 Tf 1 0 0 1 423.609 464.002 Tm [(-)]TJ/F84 9.9626 Tf 1.02 0 0 1 428.839 464.002 Tm [(,)-330(in)]TJ 0.999 0 0 1 124.384 452.047 Tm [(which)-249(case)-249(the)-249(default)-249(output)-250(unit)-249(6)-249(\050i.e.)-310(standar)18(d)-249(output)-249(in)-249(Unix)-249(jar)18(gon\051)]TJ 1 0 0 1 124.802 440.092 Tm [(is)-250(used.)-310(Default:)]TJ/F145 9.9626 Tf 74.799 0 Td [(-)]TJ/F84 9.9626 Tf 5.23 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -104.936 -19.926 Td [(iunit)]TJ +0 g 0 G +/F84 9.9626 Tf 26.8 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -61.877 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F145 9.9626 Tf 287.757 0 Td [(-)]TJ/F84 9.9626 Tf 5.231 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -317.895 -19.925 Td [(key)]TJ +0 g 0 G +/F84 9.9626 Tf 21.589 0 Td [(Matrix)-250(key)111(.)]TJ 3.009 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F84 9.9626 Tf 1.02 0 0 1 124.413 352.42 Tm [(A)-245(charachter)-245(variable)-245(of)-245(length)-245(8)-245(holding)-245(the)-245(matrix)-245(key)-245(as)-245(speci\002ed)-245(by)]TJ 1 0 0 1 124.802 340.465 Tm [(the)-250(Harwell-Boeing)-250(format)-250(and)-250(to)-250(be)-250(written)-250(to)-250(\002le.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.925 Td [(mtitle)]TJ +0 g 0 G +/F84 9.9626 Tf 32.09 0 Td [(Matrix)-250(title.)]TJ -7.492 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F84 9.9626 Tf 0.998 0 0 1 124.413 296.63 Tm [(A)-251(charachter)-251(variable)-251(of)-251(length)-251(72)-251(holding)-251(the)-251(matrix)-251(title)-251(as)-251(sp)1(eci\002ed)-251(by)]TJ 1 0 0 1 124.802 284.674 Tm [(the)-250(Harwell-Boeing)-250(format)-250(and)-250(to)-250(be)-250(written)-250(to)-250(\002le.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -21.917 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(iret)]TJ +0 g 0 G +/F84 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.125 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -27.168 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 139.866 -128.483 Td [(145)]TJ +0 g 0 G +ET + +endstream +endobj +1956 0 obj << -/D [1557 0 R /XYZ 99.895 716.092 null] +/Length 3684 >> -% 1560 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 150.705 706.129 Td [(9.3)]TJ 1.02 0 0 1 177.604 706.129 Tm [(mm)]TJ +ET +q +1 0 0 1 200.002 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 1.02 0 0 1 203.589 706.129 Tm [(mat)]TJ +ET +q +1 0 0 1 225.305 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 1.02 0 0 1 228.891 706.129 Tm [(read)-355(\227)-356(Read)-355(a)-356(sparse)-355(matrix)-356(from)-355(a)-356(\002le)-355(in)-356(the)]TJ 1 0 0 1 177.604 692.181 Tm [(MatrixMarket)-250(format)]TJ/F84 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-810(m)-35(m)]TJ +ET +q +1 0 0 1 199.831 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 203.169 667.24 Td [(m)-35(a)-35(t)]TJ +ET +q +1 0 0 1 221.839 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 225.176 667.24 Td [(r)-35(e)-35(a)-35(d)-169(\050)-166(a)-242(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-882(f)-107(i)-107(l)-107(e)-107(n)-107(a)-106(m)-107(e)-241(\051)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +/F75 9.9626 Tf -74.471 -27.896 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(\002lename)]TJ +0 g 0 G +/F84 9.9626 Tf 43.965 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ 1.02 0 0 1 175.611 575.584 Tm [(Speci\002ed)-313(as:)-440(a)-313(character)-314(variable)-313(containing)-313(a)-313(valid)-313(\002le)-313(name,)-331(or)]TJ/F145 9.9626 Tf 1 0 0 1 474.418 575.584 Tm [(-)]TJ/F84 9.9626 Tf 1.02 0 0 1 479.649 575.584 Tm [(,)-330(in)]TJ 1.003 0 0 1 175.193 563.628 Tm [(which)-250(case)-250(the)-250(default)-250(input)-250(unit)-250(5)-250(\050i.e.)-311(standar)18(d)-250(input)-250(in)-250(Unix)-250(jar)18(gon\051)-250(is)]TJ 1 0 0 1 175.611 551.673 Tm [(used.)-310(Default:)]TJ/F145 9.9626 Tf 65.185 0 Td [(-)]TJ/F84 9.9626 Tf 5.231 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ +0 g 0 G +/F84 9.9626 Tf 26.799 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.201 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -61.878 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F145 9.9626 Tf 287.758 0 Td [(-)]TJ/F84 9.9626 Tf 5.23 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -317.894 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(the)-250(sparse)-250(matrix)-250(r)18(ead)-250(fr)18(om)-250(\002le.)]TJ 14.636 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.984 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 442.283 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 363.206 442.084 Td [(Tspmat)]TJ +ET +q +1 0 0 1 395.216 442.283 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 398.354 442.084 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -268.57 -19.925 Td [(iret)]TJ +0 g 0 G +/F84 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.125 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -27.168 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 139.865 -307.811 Td [(146)]TJ +0 g 0 G +ET + +endstream +endobj +1963 0 obj << -/D [1557 0 R /XYZ 99.895 258.556 null] +/Length 4361 >> -% 1556 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 99.895 706.129 Td [(9.4)]TJ 1.02 0 0 1 126.795 706.129 Tm [(mm)]TJ +ET +q +1 0 0 1 149.193 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 1.02 0 0 1 152.78 706.129 Tm [(array)]TJ +ET +q +1 0 0 1 181.958 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 1.02 0 0 1 185.545 706.129 Tm [(read)-377(\227)-378(Read)-377(a)-378(dense)-377(array)-378(from)-377(a)-378(\002le)-377(in)-377(the)]TJ 1 0 0 1 126.795 692.181 Tm [(MatrixMarket)-250(format)]TJ/F84 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-845(m)-71(m)]TJ +ET +q +1 0 0 1 150.074 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 153.763 667.24 Td [(a)-70(r)-70(r)-71(a)-70(y)]TJ +ET +q +1 0 0 1 181.23 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 184.919 667.24 Td [(r)-70(e)-70(a)-71(d)-203(\050)-150(b)-206(,)-926(i)-152(r)-151(e)-152(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-882(f)-107(i)-107(l)-107(e)-106(n)-107(a)-107(m)-107(e)-241(\051)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +/F75 9.9626 Tf -85.024 -27.896 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(\002lename)]TJ +0 g 0 G +/F84 9.9626 Tf 43.965 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ 1.02 0 0 1 124.802 575.584 Tm [(Speci\002ed)-313(as:)-440(a)-313(character)-314(variable)-313(containing)-313(a)-313(valid)-313(\002le)-313(name,)-331(or)]TJ/F145 9.9626 Tf 1 0 0 1 423.609 575.584 Tm [(-)]TJ/F84 9.9626 Tf 1.02 0 0 1 428.839 575.584 Tm [(,)-330(in)]TJ 1.003 0 0 1 124.384 563.628 Tm [(which)-250(case)-250(the)-250(default)-250(input)-250(unit)-250(5)-250(\050i.e.)-311(standar)18(d)-250(input)-250(in)-250(Unix)-250(jar)18(gon\051)-250(is)]TJ 1 0 0 1 124.802 551.673 Tm [(used.)-310(Default:)]TJ/F145 9.9626 Tf 65.185 0 Td [(-)]TJ/F84 9.9626 Tf 5.23 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ +0 g 0 G +/F84 9.9626 Tf 26.8 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -61.877 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F145 9.9626 Tf 287.757 0 Td [(-)]TJ/F84 9.9626 Tf 5.231 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -317.895 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(b)]TJ +0 g 0 G +/F84 9.9626 Tf 11.069 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.529 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 1.02 0 0 1 124.413 442.084 Tm [(An)-368(array)-368(of)-367(type)-368(r)18(eal)-368(or)-368(complex,)-398(rank)-368(1)-368(or)-368(2)-367(and)-368(having)-368(the)-368(ALLO-)]TJ 1.005 0 0 1 124.802 430.129 Tm [(CA)74(T)73(ABLE)-248(at)1(tribute,)-248(or)-248(an)-248(object)-248(of)-248(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 302.957 430.129 Tm [(psb)]TJ +ET +q +1 0 0 1 319.275 430.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 322.414 430.129 Td [(T)]TJ +ET +q +1 0 0 1 328.272 430.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 331.41 430.129 Td [(vect)]TJ +ET +q +1 0 0 1 352.959 430.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 356.097 430.129 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 1.005 0 0 1 377.018 430.129 Tm [(,)-248(of)-248(type)-248(r)18(eal)-247(or)]TJ 1 0 0 1 124.802 418.174 Tm [(complex.)]TJ 1.019 0 0 1 124.304 406.219 Tm [(W)54(ill)-245(be)-245(allocated)-245(and)-245(\002lled)-245(in)-245(if)-245(the)-245(input)-244(\002le)-245(contains)-245(a)-245(right)-245(hand)-245(side,)]TJ 1 0 0 1 124.802 394.263 Tm [(otherwise)-250(will)-250(be)-250(left)-250(in)-250(the)-250(UNALLOCA)74(TED)-250(state.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -31.88 Td [(iret)]TJ +0 g 0 G +/F84 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.125 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -27.168 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 139.866 -248.035 Td [(147)]TJ +0 g 0 G +ET + +endstream +endobj +1970 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> -/ProcSet [ /PDF /Text ] +/Length 7544 >> -% 1563 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 150.705 706.129 Td [(9.5)]TJ 1.019 0 0 1 177.604 706.129 Tm [(mm)]TJ +ET +q +1 0 0 1 199.981 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 1.019 0 0 1 203.568 706.129 Tm [(mat)]TJ +ET +q +1 0 0 1 225.263 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 1.019 0 0 1 228.849 706.129 Tm [(write)-246(\227)-246(W)73(rite)-246(a)-246(sparse)-246(matrix)-246(to)-246(a)-246(\002le)-246(in)-246(the)-246(Ma-)]TJ 1 0 0 1 177.604 692.181 Tm [(trixMarket)-250(format)]TJ/F84 9.9626 Tf -25.158 -24.48 Td [(c)-175(a)-175(l)-174(l)-828(m)-52(m)]TJ +ET +q +1 0 0 1 200.348 667.901 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 203.858 667.701 Td [(m)-52(a)-53(t)]TJ +ET +q +1 0 0 1 223.046 667.901 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 226.556 667.701 Td [(w)-52(r)-53(i)-52(t)-52(e)-186(\050)-167(a)-242(,)-900(m)-126(t)-125(i)-126(t)-125(l)-126(e)-426(,)-926(i)-152(r)-151(e)-152(t)-478(,)-904(i)-130(u)-129(n)-130(i)-130(t)-434(,)-882(f)-107(i)-106(l)-107(e)-107(n)-107(a)-107(m)-107(e)-240(\051)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +/F75 9.9626 Tf -75.851 -26.279 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -19.464 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.464 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(to)-250(be)-250(written.)]TJ 14.635 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.983 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.137 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 578.783 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 363.206 578.584 Td [(Tspmat)]TJ +ET +q +1 0 0 1 395.216 578.783 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 398.354 578.584 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -268.57 -19.464 Td [(mtitle)]TJ +0 g 0 G +/F84 9.9626 Tf 32.089 0 Td [(Matrix)-250(title.)]TJ -7.491 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 0.98 0 0 1 175.223 535.21 Tm [(A)-201(charachter)-200(variable)-201(holding)-200(a)-201(descriptive)-200(title)-201(for)-201(the)-200(matrix)-201(to)-200(be)-201(written)]TJ 1 0 0 1 175.611 523.255 Tm [(to)-250(\002le.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.906 -19.464 Td [(\002lename)]TJ +0 g 0 G +/F84 9.9626 Tf 43.965 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written)-250(to.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ 1.02 0 0 1 175.611 479.881 Tm [(Speci\002ed)-313(as:)-440(a)-313(character)-314(variable)-313(containing)-313(a)-313(valid)-313(\002le)-313(name,)-331(or)]TJ/F145 9.9626 Tf 1 0 0 1 474.418 479.881 Tm [(-)]TJ/F84 9.9626 Tf 1.02 0 0 1 479.649 479.881 Tm [(,)-330(in)]TJ 0.999 0 0 1 175.193 467.926 Tm [(which)-249(case)-249(the)-249(default)-250(outp)1(ut)-250(unit)-249(6)-249(\050i.e.)-310(standar)18(d)-249(output)-249(in)-249(Unix)-249(jar)18(gon\051)]TJ 1 0 0 1 175.611 455.97 Tm [(is)-250(used.)-310(Default:)]TJ/F145 9.9626 Tf 74.799 0 Td [(-)]TJ/F84 9.9626 Tf 5.23 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -104.935 -19.463 Td [(iunit)]TJ +0 g 0 G +/F84 9.9626 Tf 26.799 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.201 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -61.878 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F145 9.9626 Tf 287.758 0 Td [(-)]TJ/F84 9.9626 Tf 5.23 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -317.894 -20.764 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.463 Td [(iret)]TJ +0 g 0 G +/F84 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.125 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -27.168 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -20.763 Td [(Notes)]TJ/F84 9.9626 Tf 1.016 0 0 1 165.649 315.74 Tm [(If)-246(this)-246(function)-247(is)-246(called)-246(on)-246(a)-246(matrix)]TJ 1 0 0 1 325.978 315.74 Tm [(a)]TJ +0 g 0 G +0 g 0 G + 1.016 0 0 1 333.452 315.74 Tm [(on)-246(a)-246(distributed)-247(communicator)-246(only)]TJ 1.02 0 0 1 150.705 303.784 Tm [(the)-273(local)-274(part)-273(is)-274(written)-273(in)-273(output.)-389(T)90(o)-273(get)-274(a)-273(single)-274(Matri)1(xMarket)-274(\002le)-273(with)-274(the)]TJ 0.994 0 0 1 150.286 291.829 Tm [(whole)-253(matrix)-252(when)-253(appr)19(opriate,)-253(e.g.)-315(for)-252(debugging)-253(purposes,)-253(one)-252(could)]TJ/F78 9.9626 Tf 0.994 0 0 1 469.114 291.829 Tm [(gather)]TJ/F84 9.9626 Tf 1.02 0 0 1 150.705 279.874 Tm [(the)-293(whole)-293(matrix)-293(on)-292(a)-293(single)-293(rank)-293(and)-293(then)-293(write)-293(it.)-447(Consider)-293(the)-293(following)]TJ 1 0 0 1 150.705 267.919 Tm [(example)-250(for)-250(a)]TJ/F78 9.9626 Tf 62.495 0 Td [(double)]TJ/F84 9.9626 Tf 28.692 0 Td [(pr)18(ecision)-250(matrix)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 150.705 178.717 cm +0 0 343.711 82.69 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F233 8.9664 Tf 153.694 250.747 Td [(type)]TJ +0 g 0 G + [(\050psb_ldspmat_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(aglobal)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -21.918 Td [(call)]TJ +0 g 0 G + [-525(psb_gather\050aglobal,a,desc_a,info\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050iam)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(==)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_root_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 37.658 -10.959 Td [(call)]TJ +0 g 0 G + [-525(mm_mat_write\050aglobal,mtitle,info,filename\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -37.658 -10.959 Td [(end)-525(if)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_spfree\050aglobal,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +/F84 9.9626 Tf -3.298 -23.747 Td [(T)92(o)-250(simplify)-250(this)-250(pr)18(ocedur)18(e)-250(in)]TJ/F145 9.9626 Tf 129.513 0 Td [(C)]TJ/F84 9.9626 Tf 5.23 0 Td [(,)-250(ther)18(e)-250(is)-250(a)-250(utility)-250(function)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 150.705 137.797 cm +0 0 343.711 16.936 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +BT +/F233 8.9664 Tf 153.694 144.073 Td [(psb_i_t)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_c_)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(<)]TJ +0 g 0 G + [(s,d,c,z)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(>)]TJ +0 g 0 G + [(global_mat_write\050ah,cdh\051;)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +/F84 9.9626 Tf -2.989 -23.747 Td [(that)-250(pr)18(oduces)-250(exactly)-250(this)-250(r)18(esult.)]TJ +0 g 0 G + 164.383 -29.888 Td [(148)]TJ +0 g 0 G +ET + +endstream +endobj +1977 0 obj << -/Type /Page -/Contents 1564 0 R -/Resources 1562 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R -/Annots [ 1561 0 R ] +/Length 7395 >> -% 1561 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F75 11.9552 Tf 99.895 706.129 Td [(9.6)]TJ 1.02 0 0 1 126.795 706.129 Tm [(mm)]TJ +ET +q +1 0 0 1 149.193 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 1.02 0 0 1 152.78 706.129 Tm [(array)]TJ +ET +q +1 0 0 1 181.958 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 1.02 0 0 1 185.545 706.129 Tm [(write)-324(\227)-323(W)72(rite)-324(a)-323(dense)-324(array)-324(from)-323(a)-324(\002le)-324(in)-323(the)]TJ 1 0 0 1 126.795 692.181 Tm [(MatrixMarket)-250(format)]TJ/F84 9.9626 Tf -25.158 -24.509 Td [(c)-175(a)-175(l)-174(l)-858(m)-83(m)]TJ +ET +q +1 0 0 1 150.452 667.872 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 154.267 667.672 Td [(a)-83(r)-83(r)-83(a)-82(y)]TJ +ET +q +1 0 0 1 182.365 667.872 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 186.18 667.672 Td [(w)-83(r)-83(i)-83(t)-82(e)-217(\050)-149(b)-206(,)-941(v)-165(t)-165(i)-165(t)-166(l)-165(e)-505(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-130(u)-129(n)-130(i)-129(t)-435(,)-881(f)-107(i)-107(l)-107(e)-107(n)-107(a)-107(m)-107(e)-240(\051)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +/F75 9.9626 Tf -86.285 -26.38 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.44 -19.493 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.493 Td [(b)]TJ +0 g 0 G +/F84 9.9626 Tf 11.069 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.529 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 0.98 0 0 1 124.413 578.396 Tm [(An)-194(array)-194(of)-194(type)-194(r)19(eal)-194(or)-194(complex,)-206(rank)-194(1)-194(or)-194(2,)-206(or)-194(an)-193(object)-194(of)-194(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 397.211 578.396 Tm [(psb)]TJ +ET +q +1 0 0 1 413.53 578.595 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 416.668 578.396 Td [(T)]TJ +ET +q +1 0 0 1 422.526 578.595 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 425.664 578.396 Td [(vect)]TJ +ET +q +1 0 0 1 447.213 578.595 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 450.351 578.396 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 471.273 578.396 Tm [(,)]TJ 1 0 0 1 124.802 566.441 Tm [(of)-250(type)-250(r)18(eal)-250(or)-250(complex;)-250(its)-250(contents)-250(will)-250(be)-250(written)-250(to)-250(disk.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -31.448 Td [(\002lename)]TJ +0 g 0 G +/F84 9.9626 Tf 43.965 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written.)]TJ +0 g 0 G +/F75 9.9626 Tf -43.965 -31.448 Td [(vtitle)]TJ +0 g 0 G +/F84 9.9626 Tf 28.772 0 Td [(Matrix)-250(title.)]TJ -4.174 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 0.98 0 0 1 124.413 479.635 Tm [(A)-214(charachter)-213(variable)-214(holding)-213(a)-214(descriptive)-214(t)1(itle)-214(for)-214(the)-213(vector)-214(to)-213(be)-214(written)]TJ 1 0 0 1 124.802 467.68 Tm [(to)-250(\002le.)-310(T)90(ype:)]TJ/F75 9.9626 Tf 54.455 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ 1.02 0 0 1 124.802 455.725 Tm [(Speci\002ed)-313(as:)-440(a)-313(character)-314(variable)-313(containing)-313(a)-313(valid)-313(\002le)-313(name,)-331(or)]TJ/F145 9.9626 Tf 1 0 0 1 423.609 455.725 Tm [(-)]TJ/F84 9.9626 Tf 1.02 0 0 1 428.839 455.725 Tm [(,)-330(in)]TJ 1.003 0 0 1 124.384 443.77 Tm [(which)-250(case)-250(the)-250(default)-250(input)-250(unit)-250(5)-250(\050i.e.)-311(standar)18(d)-250(input)-250(in)-250(Unix)-250(jar)18(gon\051)-250(is)]TJ 1 0 0 1 124.802 431.814 Tm [(used.)-310(Default:)]TJ/F145 9.9626 Tf 65.185 0 Td [(-)]TJ/F84 9.9626 Tf 5.23 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -95.322 -19.492 Td [(iunit)]TJ +0 g 0 G +/F84 9.9626 Tf 26.8 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 24 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -61.877 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F145 9.9626 Tf 287.757 0 Td [(-)]TJ/F84 9.9626 Tf 5.231 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -317.895 -20.836 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.492 Td [(iret)]TJ +0 g 0 G +/F84 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.125 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -27.168 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf -24.518 -20.836 Td [(Notes)]TJ/F84 9.9626 Tf 1.019 0 0 1 114.839 291.381 Tm [(If)-246(this)-246(function)-246(is)-246(called)-246(on)-246(a)-246(vector)]TJ 1 0 0 1 274.046 291.381 Tm [(v)]TJ +0 g 0 G +0 g 0 G + 1.019 0 0 1 282.172 291.381 Tm [(on)-246(a)-246(distributed)-246(communicator)-246(only)]TJ 1.02 0 0 1 99.895 279.426 Tm [(the)-273(local)-274(part)-273(is)-274(written)-273(in)-273(output.)-389(T)90(o)-273(get)-274(a)-273(single)-274(MatrixMarket)-273(\002le)-273(with)-274(the)]TJ 0.999 0 0 1 99.477 267.471 Tm [(whole)-251(vector)-251(when)-250(appr)18(opriate,)-251(e.g.)-312(for)-251(debugging)-251(purposes,)-251(one)-251(could)]TJ/F78 9.9626 Tf 0.999 0 0 1 418.178 267.471 Tm [(gather)]TJ/F84 9.9626 Tf 1.02 0 0 1 99.895 255.516 Tm [(the)-303(whole)-304(vector)-303(on)-303(a)-303(single)-304(rank)-303(and)-303(then)-303(write)-304(it.)-478(Consider)-303(the)-304(following)]TJ 1 0 0 1 99.895 243.561 Tm [(example)-250(for)-250(a)]TJ/F78 9.9626 Tf 62.495 0 Td [(double)]TJ/F84 9.9626 Tf 28.692 0 Td [(pr)18(ecision)-250(vector)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 99.895 149.348 cm +0 0 343.711 82.69 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +BT +/F233 8.9664 Tf 102.884 221.378 Td [(real)]TJ +0 g 0 G + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(vglobal\050:\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -21.918 Td [(call)]TJ +0 g 0 G + [-525(psb_gather\050vglobal,v,desc,info\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050iam)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(==)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_root_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ +0 g 0 G + [-525(mm_array_write\050vglobal,vtitle,info,filename\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.958 Td [(end)-525(if)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)-525(deallocate)]TJ +0 g 0 G + [(\050vglobal,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(stat)]TJ +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(info\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +/F84 9.9626 Tf -3.297 -23.777 Td [(T)92(o)-250(simplify)-250(this)-250(pr)18(ocedur)18(e)-250(in)]TJ/F145 9.9626 Tf 129.512 0 Td [(C)]TJ/F84 9.9626 Tf 5.231 0 Td [(,)-250(ther)18(e)-250(is)-250(a)-250(utility)-250(function)]TJ +0 g 0 G + 29.949 -41.41 Td [(149)]TJ +0 g 0 G +ET + +endstream +endobj +1983 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 494.069 409.811 506.129] -/A << /S /GoTo /D (descdata) >> +/Length 656 >> -% 1565 0 obj +stream +0 g 0 G +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +q +1 0 0 1 150.705 695.17 cm +0 0 343.711 16.936 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +BT +/F233 8.9664 Tf 153.694 701.446 Td [(psb_i_t)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_c_)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(<)]TJ +0 g 0 G + [(s,d,c,z)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(>)]TJ +0 g 0 G + [(global_vec_write\050vh,cdh\051;)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +/F84 9.9626 Tf -2.989 -24.209 Td [(that)-250(pr)18(oduces)-250(exactly)-250(this)-250(r)18(esult.)]TJ +0 g 0 G + 164.383 -586.799 Td [(150)]TJ +0 g 0 G +ET + +endstream +endobj +1987 0 obj << -/D [1563 0 R /XYZ 149.705 753.953 null] +/Length 1399 >> +stream +0 g 0 G +0 g 0 G +BT +/F75 14.3462 Tf 99.895 705.784 Td [(10)-1000(Preconditioner)-250(routines)]TJ/F84 9.9626 Tf 0.98 0 0 1 99.587 683.082 Tm [(The)-255(base)-256(PSBLAS)-255(library)-256(contains)-255(the)-256(implementation)-255(of)-256(some)-255(simple)-256(pr)19(econdi-)]TJ 1 0 0 1 99.895 671.127 Tm [(tioning)-250(techniques:)]TJ +0 g 0 G + 13.888 -19.925 Td [(\225)]TJ +0 g 0 G + [-500(Diagonal)-250(Scaling)]TJ +0 g 0 G + 0 -19.926 Td [(\225)]TJ +0 g 0 G + [-500(Block)-250(Jacobi)-250(with)-250(ILU\0500\051)-250(factorization)]TJ +0 g 0 G + 0 -19.925 Td [(\225)]TJ +0 g 0 G + [-500(Block)-250(Jacobi)-250(with)-250(an)-250(appr)18(oximate)-250(inverse)]TJ 1.02 0 0 1 99.587 591.426 Tm [(The)-312(supporting)-312(data)-312(type)-312(and)-312(subr)18(outine)-312(interfaces)-312(ar)17(e)-312(de\002ned)-312(in)-312(the)-312(mod-)]TJ 1.011 0 0 1 99.895 579.471 Tm [(ule)]TJ/F145 9.9626 Tf 1 0 0 1 116.209 579.471 Tm [(psb_prec_mod)]TJ/F84 9.9626 Tf 1.011 0 0 1 178.973 579.471 Tm [(.)-306(The)-247(old)-247(interfaces)]TJ/F145 9.9626 Tf 1 0 0 1 266.312 579.471 Tm [(psb_precinit)]TJ/F84 9.9626 Tf 1.011 0 0 1 331.561 579.471 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 351.097 579.471 Tm [(psb_precbld)]TJ/F84 9.9626 Tf 1.011 0 0 1 411.115 579.471 Tm [(ar)18(e)-247(still)]TJ 1 0 0 1 99.895 567.515 Tm [(supported)-250(for)-250(backwar)18(d)-250(compatibility)]TJ +0 g 0 G + 164.384 -477.077 Td [(151)]TJ +0 g 0 G +ET endstream endobj -1571 0 obj +1995 0 obj << -/Length 4785 +/Length 5053 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.22)-1000(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(10.1)-1000(init)-250(\227)-250(Initialize)-250(a)-250(preconditioner)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf 0 -18.964 Td [(call)-525(prec%init\050icontxt,ptype,)-525(info\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(icontxt)]TJ +0 g 0 G +/F84 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.059 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 28.344 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -57.434 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 23.999 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -62.983 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.925 Td [(ptype)]TJ +0 g 0 G +/F84 9.9626 Tf 30.994 0 Td [(the)-250(type)-250(of)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F75 9.9626 Tf 151.121 0 Td [(global)]TJ/F84 9.9626 Tf -157.517 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(string,)-250(see)-250(usage)-250(notes.)]TJ +0 g 0 G +/F75 9.9626 Tf -24.907 -19.925 Td [(On)-250(Exit)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(prec)]TJ +0 g 0 G +/F84 9.9626 Tf 24.349 0 Td [(Scope:)]TJ/F75 9.9626 Tf 31.431 0 Td [(local)]TJ/F84 9.9626 Tf -31.182 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 196.51 0 Td [(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 388.441 446.268 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(local)]TJ +/F145 9.9626 Tf 391.579 446.069 Td [(Tprec)]TJ ET q -1 0 0 1 183.605 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 418.358 446.268 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 187.192 706.129 Td [(index)-250(\227)]TJ +/F145 9.9626 Tf 421.497 446.069 Td [(type)]TJ 0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F59 9.9626 Tf -87.297 -18.964 Td [(call)-525(psb_local_index\050y,)-525(x,)-525(desc_a,)-525(info\051)]TJ +/F75 9.9626 Tf -291.713 -19.925 Td [(info)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -30.634 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.956 Td [(Err)18(or)-250(code:)-310(if)-250(no)-250(err)18(or)74(,)-250(0)-250(is)-250(r)18(eturned.)]TJ/F75 11.9552 Tf 0.998 0 0 1 150.705 368.361 Tm [(Notes)]TJ/F84 9.9626 Tf 0.998 0 0 1 185.005 368.361 Tm [(Legal)-251(inputs)-250(to)-251(this)-250(subr)18(outine)-251(ar)18(e)-250(interpr)18(eted)-251(depending)-250(on)-251(the)]TJ/F78 9.9626 Tf 1 0 0 1 470.611 368.361 Tm [(p)-25(t)-25(y)-80(p)-25(e)]TJ/F84 9.9626 Tf -319.906 -11.956 Td [(string)-250(as)-250(follows)]TJ +0 0 1 rg 0 0 1 RG +/F84 7.5716 Tf 72.358 3.617 Td [(4)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F84 9.9626 Tf 4.284 -3.617 Td [(:)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F75 9.9626 Tf -76.642 -19.925 Td [(NONE)]TJ 0 g 0 G +/F84 9.9626 Tf 35.965 0 Td [(No)-250(pr)18(econditioning,)-250(i.e.)-310(the)-250(pr)18(econditioner)-250(is)-250(just)-250(a)-250(copy)-250(operator)74(.)]TJ 0 g 0 G - 0 -19.925 Td [(x)]TJ +/F75 9.9626 Tf -35.965 -19.925 Td [(DIAG)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Integer)-250(indices.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F54 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +/F84 9.9626 Tf 1.02 0 0 1 183.91 316.555 Tm [(Diagonal)-318(scaling;)-354(each)-318(entry)-318(of)-318(the)-318(input)-318(vector)-317(is)-318(multiplied)-318(by)-318(the)]TJ 1.02 0 0 1 175.611 304.6 Tm [(r)18(ecipr)17(ocal)-378(of)-378(the)-377(sum)-378(of)-378(the)-378(absolute)-378(values)-378(of)-378(the)-378(coef)18(\002cients)-378(in)-378(the)]TJ 1 0 0 1 175.611 292.645 Tm [(corr)18(esponding)-250(r)18(ow)-250(of)-250(matrix)]TJ/F78 9.9626 Tf 129.947 0 Td [(A)]TJ/F84 9.9626 Tf 7.317 0 Td [(;)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 545.895 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 545.696 Td [(a)]TJ +/F75 9.9626 Tf -162.17 -19.926 Td [(BJAC)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F84 9.9626 Tf 0.994 0 0 1 181.15 272.719 Tm [(Pr)18(econdition)-250(by)-251(a)-250(factorization)-251(or)-250(an)-250(appr)18(oximante)-251(inverse)-250(of)-251(the)-250(block-)]TJ 0.982 0 0 1 175.611 260.764 Tm [(diagonal)-255(of)-256(matrix)]TJ/F78 9.9626 Tf 1 0 0 1 258.736 260.764 Tm [(A)]TJ/F84 9.9626 Tf 0.982 0 0 1 266.054 260.764 Tm [(,)-256(wh)1(er)18(e)-256(b)1(lock)-256(boundaries)-255(ar)18(e)-255(determined)-255(by)-256(the)-255(data)]TJ 1.015 0 0 1 175.611 248.809 Tm [(allocation)-246(boundaries)-247(for)-246(each)-246(pr)18(ocess;)-247(r)18(equir)18(es)-247(no)-246(communication.)-305(See)]TJ 1 0 0 1 175.611 236.854 Tm [(also)-250(T)92(able-)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ + [(21)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G ET q -1 0 0 1 309.258 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 150.705 130.181 cm +[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S Q BT -/F59 9.9626 Tf 312.397 497.875 Td [(desc)]TJ +/F84 5.9776 Tf 161.564 123.219 Td [(4)]TJ/F84 7.9701 Tf 3.24 -2.893 Td [(The)-250(string)-250(is)-250(case-insensitive)]TJ +0 g 0 G +0 g 0 G +/F84 9.9626 Tf 150.284 -29.888 Td [(152)]TJ +0 g 0 G ET -q -1 0 0 1 333.945 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +2006 0 obj +<< +/Length 4359 +>> +stream +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 337.084 497.875 Td [(type)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(10.2)-1000(Set)-250(\227)-250(set)-250(preconditioner)-250(parameters)]TJ +0 g 0 G 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf 107 -18.964 Td [(call)]TJ +0 g 0 G + [-525(p%set\050what,val,info\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 99.587 665.247 Tm [(This)-384(method)-385(sets)-384(the)-385(parameters)-384(de\002ning)-385(the)-385(sub)1(domain)-385(solver)-385(whe)1(n)-385(the)]TJ 0.987 0 0 1 99.596 653.292 Tm [(pr)18(econditioner)-253(type)-253(is)]TJ/F145 9.9626 Tf 1 0 0 1 197.784 653.292 Tm [(BJAC)]TJ/F84 9.9626 Tf 0.987 0 0 1 218.705 653.292 Tm [(.)-253(Mor)18(e)-253(pr)19(ecisely)112(,)-253(the)-253(parameter)-253(identi\002ed)-252(by)]TJ/F145 9.9626 Tf 1 0 0 1 413.168 653.292 Tm [(what)]TJ/F84 9.9626 Tf 0.987 0 0 1 436.576 653.292 Tm [(is)]TJ 1 0 0 1 99.895 641.337 Tm [(assigned)-250(the)-250(value)-250(contained)-250(in)]TJ/F145 9.9626 Tf 141.229 0 Td [(val)]TJ/F84 9.9626 Tf 15.691 0 Td [(.)]TJ/F75 11.9552 Tf -157.386 -29.888 Td [(Arguments)]TJ/F145 9.9626 Tf 21.126 -15.534 Td [(what)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + [-2541(character)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(iact)]TJ + [(\050)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(len)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F51 9.9626 Tf 253.796 0 Td [(global)]TJ/F54 9.9626 Tf -249.91 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F59 9.9626 Tf 143.341 0 Td [(I)]TJ/F54 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F59 9.9626 Tf 29.808 0 Td [(W)]TJ/F54 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F59 9.9626 Tf 42.111 0 Td [(A)]TJ/F54 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F59 9.9626 Tf 55.839 0 Td [(I)]TJ/F54 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=*)]TJ 0 g 0 G -/F51 9.9626 Tf -316.929 -21.918 Td [(On)-250(Return)]TJ + [(\051)]TJ/F84 9.9626 Tf 129.918 0 Td [(.)]TJ 1.02 0 0 1 166.479 583.96 Tm [(The)-322(p)1(arameter)-322(to)-322(b)1(e)-322(set.)-533(It)-322(can)-321(be)-322(speci\002ed)-321(thr)17(ough)-321(its)-322(name;)-359(the)-322(string)-321(is)]TJ 1 0 0 1 166.788 572.005 Tm [(case-insensitive.)-310(See)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(21)]TJ 0 g 0 G + [(.)]TJ/F145 9.9626 Tf -46.233 -11.955 Td [(val)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(y)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + [-3066(integer)]TJ +0 g 0 G +/F78 9.9626 Tf 1.02 0 0 1 211.187 560.05 Tm [(or)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F145 9.9626 Tf 1 0 0 1 227.438 560.05 Tm [(character)]TJ +0 g 0 G + [(\050)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(len)]TJ +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=*)]TJ +0 g 0 G + [(\051)]TJ/F78 9.9626 Tf 1.02 0 0 1 318.911 560.05 Tm [(or)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F145 9.9626 Tf 1 0 0 1 335.162 560.05 Tm [(real)]TJ +0 g 0 G + [(\050psb_spk_\051)]TJ/F78 9.9626 Tf 1.02 0 0 1 416.174 560.05 Tm [(or)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F145 9.9626 Tf 1 0 0 1 432.425 560.05 Tm [(real)]TJ +0 g 0 G + [(\050psb_dpk_\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 505.65 560.05 Tm [(,)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf 1 0 0 1 166.788 548.095 Tm [(intent)]TJ +0 g 0 G + [(\050in\051)]TJ/F84 9.9626 Tf 52.303 0 Td [(.)]TJ 1.02 0 0 1 166.479 536.14 Tm [(The)-390(value)-390(of)-390(the)-389(parameter)-390(to)-390(be)-390(set.)-738(The)-390(list)-390(of)-390(allowed)-390(values)-389(and)-390(the)]TJ 1.02 0 0 1 166.788 524.184 Tm [(corr)18(esponding)-365(data)-364(types)-365(is)-364(given)-365(in)-364(T)90(able)]TJ +0 0 1 rg 0 0 1 RG + [-364(21)]TJ +0 g 0 G + [(.)-663(When)-364(the)-365(valu)1(e)-365(is)-364(of)-365(type)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F145 9.9626 Tf 1 0 0 1 166.788 512.229 Tm [(character)]TJ +0 g 0 G + [(\050)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(len)]TJ +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=*)]TJ +0 g 0 G + [(\051)]TJ/F84 9.9626 Tf 83.685 0 Td [(,)-250(it)-250(is)-250(also)-250(tr)18(eated)-250(as)-250(case)-250(insensitive.)]TJ/F145 9.9626 Tf -129.656 -11.955 Td [(info)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + [-2514(integer)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(A)-270(logical)-270(mask)-270(which)-271(is)-270(tr)8(ue)-270(for)-270(all)-270(corr)18(esponding)-270(entries)-270(of)]TJ/F52 9.9626 Tf 268.484 0 Td [(x)]TJ/F54 9.9626 Tf 7.897 0 Td [(that)-270(ar)18(e)-270(local)]TJ -261.995 -11.955 Td [(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess)-250(Scope:)]TJ/F51 9.9626 Tf 128.666 0 Td [(local)]TJ/F54 9.9626 Tf -128.666 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(rank)-250(one)-250(logical)-250(array)111(.)]TJ + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(intent)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ + [(\050out\051)]TJ/F84 9.9626 Tf 150.578 0 Td [(.)]TJ -104.607 -11.955 Td [(Err)18(or)-250(code.)-310(If)-250(no)-250(err)18(or)74(,)-250(0)-250(is)-250(r)18(eturned.)-310(See)-250(Section)]TJ +0 0 1 rg 0 0 1 RG + [-250(8)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.926 Td [(1.)]TJ + [-250(for)-250(details.)]TJ 1.02 0 0 1 99.507 476.478 Tm [(A)-253(number)-253(of)-253(subdomain)-253(solvers)-253(can)-253(be)-253(chosen)-253(with)-254(this)-253(method;)-257(a)-253(list)-253(of)-253(the)]TJ 0.98 0 0 1 99.596 464.523 Tm [(parameters)-247(that)-247(can)-247(be)-247(set,)-248(along)-247(with)-247(their)-247(allowed)-247(and)-247(default)-246(values,)-249(is)-247(given)]TJ 1 0 0 1 99.895 452.568 Tm [(in)-250(T)92(able-)]TJ +0 0 1 rg 0 0 1 RG + [(21)]TJ 0 g 0 G - [-500(This)-264(r)18(outine)-265(r)18(eturns)-264(a)]TJ/F59 9.9626 Tf 110.663 0 Td [(.true.)]TJ/F54 9.9626 Tf 34.017 0 Td [(value)-264(for)-265(those)-264(indices)-265(that)-264(ar)18(e)-265(local)-264(to)-265(the)]TJ -132.227 -11.955 Td [(curr)18(ent)-250(pr)18(ocess,)-250(including)-250(the)-250(halo)-250(indices.)]TJ + [(.)]TJ 0 g 0 G - 139.477 -140.438 Td [(103)]TJ + 164.384 -362.13 Td [(153)]TJ 0 g 0 G ET endstream endobj -1578 0 obj +2010 0 obj << -/Length 3647 +/Length 13651 >> stream 0 g 0 G 0 g 0 G +0 g 0 G +1 0 0 1 322.56 716.092 cm +q +0 -1 1 0 0 0 cm +0 g 0 G +0 g 0 G +0 g 0 G +q +1 0 0 1 6.907 167.723 cm +[]0 d 0 J 0.398 w 0 0 m 581.953 0 l S +Q +q +1 0 0 1 6.907 156.565 cm +[]0 d 0 J 0.398 w 0 0 m 0 10.959 l S +Q +1 0 0 1 -322.56 -716.092 cm BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.23)-1000(psb)]TJ +/F233 8.9664 Tf 335.209 875.945 Td [(what)]TJ ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 432.13 872.657 cm +[]0 d 0 J 0.398 w 0 0 m 0 10.959 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(get)]TJ +/F84 7.1731 Tf 438.332 875.945 Td [(D)-62(A)11(T)12(A)-374(T)-62(Y)-62(P)-62(E)]TJ ET q -1 0 0 1 225.126 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 538.231 872.657 cm +[]0 d 0 J 0.398 w 0 0 m 0 10.959 l S Q BT -/F51 11.9552 Tf 228.712 706.129 Td [(boundary)-250(\227)-250(Extract)-250(list)-250(of)-250(boundary)-250(elements)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -78.007 -18.964 Td [(call)-525(psb_get_boundary\050bndel,)-525(desc,)-525(info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(desc)]TJ -0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F233 8.9664 Tf 543.974 875.945 Td [(val)]TJ ET q -1 0 0 1 360.068 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 623.888 872.657 cm +[]0 d 0 J 0.398 w 0 0 m 0 10.959 l S Q BT -/F59 9.9626 Tf 363.206 577.576 Td [(desc)]TJ +/F84 7.1731 Tf 630.089 875.945 Td [(D)-62(E)-62(F)12(A)-62(U)-62(L)13(T)]TJ ET q -1 0 0 1 384.755 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 709.544 872.657 cm +[]0 d 0 J 0.398 w 0 0 m 0 10.959 l S Q BT -/F59 9.9626 Tf 387.893 577.576 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F84 7.1731 Tf 715.745 875.945 Td [(C)-62(O)-62(M)-61(M)-62(E)-61(N)-62(T)-62(S)]TJ +ET +q +1 0 0 1 911.42 872.657 cm +[]0 d 0 J 0.398 w 0 0 m 0 10.959 l S +Q +q +1 0 0 1 329.467 872.458 cm +[]0 d 0 J 0.398 w 0 0 m 581.953 0 l S +Q +q +1 0 0 1 329.467 795.546 cm +[]0 d 0 J 0.398 w 0 0 m 0 76.712 l S +Q +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG +BT +/F233 8.9664 Tf 335.444 864.587 Td [(\015SUB_SOLVE\015)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +ET +q +1 0 0 1 432.13 795.546 cm +[]0 d 0 J 0.398 w 0 0 m 0 76.712 l S +Q +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +BT +/F233 8.9664 Tf 438.108 864.587 Td [(character)]TJ 0 g 0 G + [(\050)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(len)]TJ 0 g 0 G - 0 -19.925 Td [(bndel)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=*)]TJ 0 g 0 G -/F54 9.9626 Tf 31.541 0 Td [(The)-307(list)-307(of)-307(boundary)-307(elements)-307(on)-306(the)-307(calling)-307(pr)18(ocess,)-321(in)-307(local)-307(number)18(-)]TJ -6.635 -11.955 Td [(ing.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Speci\002ed)-234(as:)-302(a)-234(rank)-234(one)-234(a)-1(r)1(ray)-235(with)-234(the)-234(ALLOCA)74(T)74(ABLE)-234(attribute,)-237(of)-234(type)]TJ 0 -11.955 Td [(integer)74(.)]TJ + [(\051)]TJ +ET +q +1 0 0 1 538.231 795.546 cm +[]0 d 0 J 0.398 w 0 0 m 0 76.712 l S +Q +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG +BT +/F233 8.9664 Tf 544.209 864.587 Td [(\015ILU\015)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.881 Td [(info)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + 0 -10.959 Td [(\015ILUT\015)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + 0 -10.959 Td [(\015INVT\015)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + 0 -10.959 Td [(\015INVK\015)]TJ 0 g 0 G - [-500(If)-219(ther)18(e)-220(ar)18(e)-219(no)-220(boundary)-219(elements)-219(\050i.e.,)-226(if)-219(the)-220(local)-219(part)-219(of)-220(the)-219(connectivity)]TJ 12.453 -11.956 Td [(graph)-391(is)-392(self-contained\051)-391(the)-392(output)-391(vector)-391(is)-392(set)-391(to)-391(the)-392(\223not)-391(allocated\224)]TJ 0 -11.955 Td [(state.)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + 0 -10.959 Td [(\015AINV\015)]TJ 0 g 0 G - -12.453 -19.925 Td [(2.)]TJ +ET +q +1 0 0 1 623.888 795.546 cm +[]0 d 0 J 0.398 w 0 0 m 0 76.712 l S +Q +q +1 0 0 1 709.544 795.546 cm +[]0 d 0 J 0.398 w 0 0 m 0 76.712 l S +Q +BT +/F84 8.9664 Tf 1.02 0 0 1 715.243 864.587 Tm [(The)-987(local)-987(solver)-987(to)-987(be)-987(used)-988(wi)1(th)-988(the)]TJ 1.02 0 0 1 715.521 853.628 Tm [(smoother)-490(or)-490(one-level)-491(pr)18(econditioner)-490(ILU\050)]TJ/F78 8.9664 Tf 1 0 0 1 896.636 853.628 Tm [(p)]TJ/F84 8.9664 Tf 1.02 0 0 1 901.231 853.628 Tm [(\051,)]TJ 1.02 0 0 1 715.521 842.669 Tm [(ILU\050)]TJ/F78 8.9664 Tf 1 0 0 1 734.957 842.669 Tm [(p)]TJ/F84 8.9664 Tf 1.02 0 0 1 739.552 842.669 Tm [(,)]TJ/F78 8.9664 Tf 1 0 0 1 743.445 842.669 Tm [(t)]TJ/F84 8.9664 Tf 1.02 0 0 1 746.543 842.669 Tm [(\051,)-862(Appr)18(oximate)-738(Inverses)-738(INVK\050)]TJ/F78 8.9664 Tf 1 0 0 1 888.48 842.669 Tm [(p)]TJ/F84 8.9664 Tf 1.02 0 0 1 893.075 842.669 Tm [(,)]TJ/F78 8.9664 Tf 1 0 0 1 896.968 842.669 Tm [(q)]TJ/F84 8.9664 Tf 1.02 0 0 1 901.231 842.669 Tm [(\051,)]TJ 1.02 0 0 1 715.521 831.71 Tm [(INVT\050)]TJ/F78 8.9664 Tf 1 0 0 1 742.063 831.71 Tm [(p)]TJ/F84 6.9738 Tf 4.596 -1.783 Td [(1)]TJ/F84 8.9664 Tf 1.02 0 0 1 750.644 831.71 Tm [(,)]TJ/F78 8.9664 Tf 1 0 0 1 755.03 831.71 Tm [(p)]TJ/F84 8.9664 Tf 1.02 0 0 1 759.625 831.71 Tm [(2,)]TJ/F78 8.9664 Tf 1 0 0 1 768.091 831.71 Tm [(t)]TJ/F84 6.9738 Tf 3.097 -1.783 Td [(1)]TJ/F84 8.9664 Tf 1.02 0 0 1 775.173 831.71 Tm [(,)]TJ/F78 8.9664 Tf 1 0 0 1 779.066 831.71 Tm [(t)]TJ/F84 6.9738 Tf 3.098 -1.678 Td [(2)]TJ/F84 8.9664 Tf 1.02 0 0 1 786.149 831.71 Tm [(\051)-786(and)-787(AINV\050)]TJ/F78 8.9664 Tf 1 0 0 1 846.619 831.71 Tm [(t)]TJ/F84 8.9664 Tf 1.02 0 0 1 849.717 831.71 Tm [(\051;)-1057(note)-786(that)]TJ 1.02 0 0 1 715.521 820.751 Tm [(appr)18(oximate)-623(inverses)-623(ar)18(e)-623(speci\002cally)-622(suited)]TJ 1.02 0 0 1 715.521 809.793 Tm [(for)-339(GPUs)-339(since)-339(they)-339(do)-340(not)-339(employ)-339(triangular)]TJ 1 0 0 1 715.521 798.834 Tm [(system)-250(solve)-250(kernels,)-250(see)-250([)]TJ +1 0 0 rg 1 0 0 RG + [(2)]TJ 0 g 0 G - [-500(Otherwise)-206(the)-205(size)-206(of)]TJ/F59 9.9626 Tf 105.891 0 Td [(bndel)]TJ/F54 9.9626 Tf 28.201 0 Td [(will)-206(be)-205(exactly)-206(equal)-206(to)-206(the)-205(number)-206(of)-206(bound-)]TJ -121.639 -11.955 Td [(ary)-250(elements.)]TJ + [(].)]TJ +ET +q +1 0 0 1 911.42 795.546 cm +[]0 d 0 J 0.398 w 0 0 m 0 76.712 l S +Q +q +1 0 0 1 329.467 795.347 cm +[]0 d 0 J 0.398 w 0 0 m 581.953 0 l S +Q +q +1 0 0 1 329.467 773.23 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG +BT +/F233 8.9664 Tf 335.444 787.476 Td [(\015SUB_FILLIN\015)]TJ 0 g 0 G - 139.477 -196.229 Td [(104)]TJ +ET +q +1 0 0 1 432.13 773.23 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +BT +/F233 8.9664 Tf 438.108 787.476 Td [(integer)]TJ 0 g 0 G ET - -endstream -endobj -1585 0 obj -<< -/Length 3458 ->> -stream +q +1 0 0 1 538.231 773.23 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q +BT +/F84 8.9664 Tf 543.859 787.476 Td [(Any)-250(integer)]TJ 0.35 -10.959 Td [(number)]TJ/F190 9.343 Tf 33.691 0 Td [(\025)]TJ/F84 8.9664 Tf 9.865 0 Td [(0)]TJ +ET +q +1 0 0 1 623.888 773.23 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q +BT +/F84 8.9664 Tf 629.865 787.476 Td [(0)]TJ +ET +q +1 0 0 1 709.544 773.23 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q +BT +/F84 8.9664 Tf 0.98 0 0 1 715.521 787.476 Tm [(Fill-in)-207(level)]TJ/F78 8.9664 Tf 1 0 0 1 761.42 787.476 Tm [(p)]TJ/F84 8.9664 Tf 0.98 0 0 1 767.837 787.476 Tm [(of)-207(the)-208(incomplete)-207(LU)-207(factorizations.)]TJ +ET +q +1 0 0 1 911.42 773.23 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q +q +1 0 0 1 329.467 773.03 cm +[]0 d 0 J 0.398 w 0 0 m 581.953 0 l S +Q +q +1 0 0 1 329.467 750.913 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG +BT +/F233 8.9664 Tf 335.444 765.16 Td [(\015SUB_ILUTHRS\015)]TJ 0 g 0 G +ET +q +1 0 0 1 432.13 750.913 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +BT +/F233 8.9664 Tf 438.108 765.16 Td [(real)]TJ 0 g 0 G + [(\050kind_parameter\051)]TJ +ET +q +1 0 0 1 538.231 750.913 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q +BT +/F84 8.9664 Tf 1.02 0 0 1 543.859 765.16 Tm [(Any)-1148(r)18(eal)-1148(num-)]TJ 1 0 0 1 544.209 754.201 Tm [(ber)]TJ/F190 9.343 Tf 15.148 0 Td [(\025)]TJ/F84 8.9664 Tf 9.866 0 Td [(0)]TJ +ET +q +1 0 0 1 623.888 750.913 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q +BT +/F84 8.9664 Tf 629.865 765.16 Td [(0)]TJ +ET +q +1 0 0 1 709.544 750.913 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.24)-1000(psb)]TJ +/F84 8.9664 Tf 715.521 765.16 Td [(Dr)18(op)-250(tolerance)]TJ/F78 8.9664 Tf 61.442 0 Td [(t)]TJ/F84 8.9664 Tf 5.339 0 Td [(in)-250(the)-250(ILU\050)]TJ/F78 8.9664 Tf 43.814 0 Td [(p)]TJ/F84 8.9664 Tf 4.595 0 Td [(,)]TJ/F78 8.9664 Tf 3.848 0 Td [(t)]TJ/F84 8.9664 Tf 3.098 0 Td [(\051)-250(factorization.)]TJ ET q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 911.42 750.913 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q +q +1 0 0 1 329.467 750.714 cm +[]0 d 0 J 0.398 w 0 0 m 581.953 0 l S +Q +q +1 0 0 1 329.467 739.556 cm +[]0 d 0 J 0.398 w 0 0 m 0 10.959 l S Q +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG BT -/F51 11.9552 Tf 156.993 706.129 Td [(get)]TJ +/F233 8.9664 Tf 335.444 742.844 Td [(\015ILU_ALG\015)]TJ +0 g 0 G ET q -1 0 0 1 174.316 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 432.13 739.556 cm +[]0 d 0 J 0.398 w 0 0 m 0 10.959 l S Q +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG BT -/F51 11.9552 Tf 177.903 706.129 Td [(overlap)-250(\227)-250(Extract)-250(list)-250(of)-250(overlap)-250(elements)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -78.008 -18.964 Td [(call)-525(psb_get_overlap\050ovrel,)-525(desc,)-525(info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F233 8.9664 Tf 438.108 742.844 Td [(character)]TJ 0 g 0 G + [(\050)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(len)]TJ 0 g 0 G - 0 -19.925 Td [(desc)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=*)]TJ 0 g 0 G -/F54 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ + [(\051)]TJ ET q -1 0 0 1 309.258 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 538.231 739.556 cm +[]0 d 0 J 0.398 w 0 0 m 0 10.959 l S Q +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG BT -/F59 9.9626 Tf 312.397 577.576 Td [(desc)]TJ +/F233 8.9664 Tf 544.209 742.844 Td [(\015MILU\015)]TJ +0 g 0 G ET q -1 0 0 1 333.945 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 623.888 739.556 cm +[]0 d 0 J 0.398 w 0 0 m 0 10.959 l S Q +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG BT -/F59 9.9626 Tf 337.084 577.576 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(ovrel)]TJ -0 g 0 G -/F54 9.9626 Tf 28.234 0 Td [(The)-250(list)-250(of)-250(overlap)-250(elements)-250(on)-250(the)-250(calling)-250(pr)18(ocess,)-250(in)-250(local)-250(numbering.)]TJ -3.327 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-234(as:)-302(a)-234(rank)-234(one)-234(array)-235(with)-234(the)-234(ALLOCA)74(T)74(ABLE)-234(attribute,)-237(of)-234(type)]TJ 0 -11.955 Td [(integer)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -31.88 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(If)-287(ther)18(e)-287(ar)18(e)-287(no)-287(overlap)-287(elements)-287(the)-287(output)-287(vector)-287(is)-287(set)-287(to)-287(the)-287(\223not)-287(allo-)]TJ 12.453 -11.955 Td [(cated\224)-250(state.)]TJ -0 g 0 G - -12.453 -19.926 Td [(2.)]TJ -0 g 0 G - [-500(Otherwise)-194(the)-194(size)-195(of)]TJ/F59 9.9626 Tf 105.434 0 Td [(ovrel)]TJ/F54 9.9626 Tf 28.087 0 Td [(will)-194(be)-194(exactly)-195(equal)-194(to)-194(the)-194(number)-195(of)-194(overlap)]TJ -121.068 -11.955 Td [(elements.)]TJ -0 g 0 G - 139.477 -220.139 Td [(105)]TJ +/F233 8.9664 Tf 629.865 742.844 Td [(\015NONE\015)]TJ 0 g 0 G ET - -endstream -endobj -1592 0 obj -<< -/Length 5480 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 709.544 739.556 cm +[]0 d 0 J 0.398 w 0 0 m 0 10.959 l S +Q BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.25)-1000(psb)]TJ +/F84 8.9664 Tf 715.521 742.844 Td [(ILU)-250(algorithmic)-250(variant)]TJ ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 911.42 739.556 cm +[]0 d 0 J 0.398 w 0 0 m 0 10.959 l S +Q +q +1 0 0 1 329.467 739.357 cm +[]0 d 0 J 0.398 w 0 0 m 581.953 0 l S +Q +q +1 0 0 1 329.467 673.404 cm +[]0 d 0 J 0.398 w 0 0 m 0 65.753 l S Q +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG BT -/F51 11.9552 Tf 207.803 706.129 Td [(sp)]TJ +/F233 8.9664 Tf 335.444 731.486 Td [(\015ILUT_SCALE\015)]TJ +0 g 0 G ET q -1 0 0 1 221.133 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 432.13 673.404 cm +[]0 d 0 J 0.398 w 0 0 m 0 65.753 l S Q +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG BT -/F51 11.9552 Tf 224.719 706.129 Td [(getrow)-250(\227)-250(Extract)-250(row\050s\051)-250(from)-250(a)-250(sparse)-250(matrix)]TJ +/F233 8.9664 Tf 438.108 731.486 Td [(character)]TJ 0 g 0 G + [(\050)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(len)]TJ 0 g 0 G -/F59 9.9626 Tf -74.014 -19.204 Td [(call)-525(psb_sp_getrow\050row,)-525(a,)-525(nz,)-525(ia,)-525(ja,)-525(val,)-525(info,)-525(&)]TJ 73.225 -11.955 Td [(&)-525(append,)-525(nzin,)-525(lrw\051)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=*)]TJ 0 g 0 G -/F51 9.9626 Tf -73.225 -22.29 Td [(T)90(ype:)]TJ + [(\051)]TJ +ET +q +1 0 0 1 538.231 673.404 cm +[]0 d 0 J 0.398 w 0 0 m 0 65.753 l S +Q +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG +BT +/F233 8.9664 Tf 544.209 731.486 Td [(\015MAXVAL\015)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + 0 -10.959 Td [(\015DIAG\015)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.42 Td [(On)-250(Entry)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + 0 -10.959 Td [(\015ARSWUM\015)]TJ 0 g 0 G +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + 0 -10.958 Td [(\015ARCSUM\015)]TJ 0 g 0 G - 0 -20.421 Td [(row)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + 0 -10.959 Td [(\015ACLSUM\015)]TJ 0 g 0 G -/F54 9.9626 Tf 22.695 0 Td [(The)-250(\050\002rst\051)-250(r)18(ow)-250(to)-250(be)-250(extracted.)]TJ 2.212 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf -28.343 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf -23.999 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)]TJ/F61 10.3811 Tf 104.322 0 Td [(>)]TJ/F54 9.9626 Tf 10.962 0 Td [(0.)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + 0 -10.959 Td [(\015NONE\015)]TJ 0 g 0 G -/F51 9.9626 Tf -140.191 -20.42 Td [(a)]TJ +ET +q +1 0 0 1 623.888 673.404 cm +[]0 d 0 J 0.398 w 0 0 m 0 65.753 l S +Q +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG +BT +/F233 8.9664 Tf 629.865 731.486 Td [(\015NONE\015)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(fr)18(om)-250(which)-250(to)-250(get)-250(r)18(ows.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf -28.343 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf -23.999 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.137 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 495.976 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 709.544 673.404 cm +[]0 d 0 J 0.398 w 0 0 m 0 65.753 l S Q BT -/F59 9.9626 Tf 363.206 495.777 Td [(Tspmat)]TJ +/F84 8.9664 Tf 715.521 731.486 Td [(ILU)-250(scaling)-250(strategy)]TJ ET q -1 0 0 1 395.216 495.976 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 911.42 673.404 cm +[]0 d 0 J 0.398 w 0 0 m 0 65.753 l S Q +q +1 0 0 1 329.467 673.205 cm +[]0 d 0 J 0.398 w 0 0 m 581.953 0 l S +Q +q +1 0 0 1 329.467 651.088 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG BT -/F59 9.9626 Tf 398.354 495.777 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -268.57 -20.421 Td [(append)]TJ -0 g 0 G -/F54 9.9626 Tf 39.292 0 Td [(Whether)-250(to)-250(append)-250(or)-250(overwrite)-250(existing)-250(output.)]TJ -14.386 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value)-250(default:)-310(false)-250(\050overwrite\051.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -20.421 Td [(nzin)]TJ -0 g 0 G -/F54 9.9626 Tf 25.454 0 Td [(Input)-250(size)-250(to)-250(be)-250(appended)-250(to.)]TJ -0.548 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf -24 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-304(as:)-417(an)-303(integer)]TJ/F61 10.3811 Tf 106.988 0 Td [(>)]TJ/F54 9.9626 Tf 11.949 0 Td [(0.)-471(When)-303(append)-303(is)-304(tr)8(ue,)-317(speci\002es)-303(how)-304(many)]TJ -118.937 -11.955 Td [(entries)-250(in)-250(the)-250(output)-250(vectors)-250(ar)18(e)-250(alr)18(eady)-250(\002lled.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -20.421 Td [(lrw)]TJ -0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(The)-250(last)-250(r)18(ow)-250(to)-250(be)-250(extracted.)]TJ 4.433 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)]TJ/F61 10.3811 Tf 104.323 0 Td [(>)]TJ/F54 9.9626 Tf 10.962 0 Td [(0,)-250(default:)]TJ/F52 9.9626 Tf 46.878 0 Td [(r)-17(o)-35(w)]TJ/F54 9.9626 Tf 16.134 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -203.203 -22.29 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -20.42 Td [(nz)]TJ -0 g 0 G -/F54 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(elements)-250(r)18(eturned)-250(by)-250(this)-250(call.)]TJ 8.856 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Returned)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -20.421 Td [(ia)]TJ -0 g 0 G -/F54 9.9626 Tf 13.28 0 Td [(the)-250(r)18(ow)-250(indices.)]TJ 11.626 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(with)-250(the)]TJ/F59 9.9626 Tf 169.114 0 Td [(ALLOCATABLE)]TJ/F54 9.9626 Tf 60.025 0 Td [(attribute.)]TJ -0 g 0 G - -89.662 -29.887 Td [(106)]TJ +/F233 8.9664 Tf 335.444 665.334 Td [(\015INV_FILLIN\015)]TJ 0 g 0 G ET - -endstream -endobj -1596 0 obj -<< -/Length 3529 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G +q +1 0 0 1 432.13 651.088 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG BT -/F51 9.9626 Tf 99.895 706.129 Td [(ja)]TJ -0 g 0 G -/F54 9.9626 Tf 13.281 0 Td [(the)-250(column)-250(indices)-250(of)-250(the)-250(elements)-250(to)-250(be)-250(inserted.)]TJ 11.626 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(with)-250(the)]TJ/F59 9.9626 Tf 169.114 0 Td [(ALLOCATABLE)]TJ/F54 9.9626 Tf 60.024 0 Td [(attribute.)]TJ -0 g 0 G -/F51 9.9626 Tf -254.045 -19.925 Td [(val)]TJ -0 g 0 G -/F54 9.9626 Tf 18.82 0 Td [(the)-250(elements)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(array)-250(with)-250(the)]TJ/F59 9.9626 Tf 148.761 0 Td [(ALLOCATABLE)]TJ/F54 9.9626 Tf 60.024 0 Td [(attribute.)]TJ -0 g 0 G -/F51 9.9626 Tf -233.692 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)-307(output)]TJ/F52 9.9626 Tf 65.308 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 13.394 0 Td [(is)-307(always)-307(the)-307(size)-307(of)-307(the)-307(output)-307(generated)-307(by)-306(the)-307(curr)18(ent)]TJ -66.249 -11.955 Td [(call;)-283(thus,)-277(if)]TJ/F59 9.9626 Tf 53.971 0 Td [(append=.true.)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)-278(the)-271(total)-272(output)-272(size)-272(will)-272(be)]TJ/F52 9.9626 Tf 129.372 0 Td [(n)-25(z)-18(i)-32(n)]TJ/F85 10.3811 Tf 21.286 0 Td [(+)]TJ/F52 9.9626 Tf 10.336 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.337 0 Td [(,)-277(with)]TJ -293.296 -11.955 Td [(the)-292(newly)-293(extracted)-292(coef)18(\002cients)-293(stor)18(ed)-292(in)-293(entries)]TJ/F59 9.9626 Tf 217.177 0 Td [(nzin+1:nzin+nz)]TJ/F54 9.9626 Tf 76.139 0 Td [(of)-292(the)]TJ -293.316 -11.955 Td [(array)-250(ar)18(guments;)]TJ -0 g 0 G - -12.453 -19.926 Td [(2.)]TJ -0 g 0 G - [-500(When)]TJ/F59 9.9626 Tf 41.275 0 Td [(append=.true.)]TJ/F54 9.9626 Tf 70.485 0 Td [(the)-250(output)-250(arrays)-250(ar)18(e)-250(r)18(eallocated)-250(as)-250(necessary;)]TJ -0 g 0 G - -111.76 -19.925 Td [(3.)]TJ -0 g 0 G - [-500(The)-218(r)18(ow)-218(and)-219(column)-218(indices)-218(ar)18(e)-218(r)18(eturned)-218(in)-219(the)-218(local)-218(numbering)-218(scheme;)]TJ 12.453 -11.955 Td [(if)-190(the)-190(global)-190(numbering)-190(is)-190(desir)18(ed,)-202(the)-190(user)-190(may)-190(employ)-190(the)]TJ/F59 9.9626 Tf 258.836 0 Td [(psb_loc_to_glob)]TJ/F54 9.9626 Tf -258.836 -11.955 Td [(r)18(outine)-250(on)-250(the)-250(output.)]TJ -0 g 0 G - 139.477 -290.909 Td [(107)]TJ +/F233 8.9664 Tf 438.108 665.334 Td [(integer)]TJ 0 g 0 G ET - -endstream -endobj -1606 0 obj -<< -/Length 3995 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 538.231 651.088 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.26)-1000(psb)]TJ +/F84 8.9664 Tf 543.859 665.334 Td [(Any)-250(integer)]TJ 0.35 -10.959 Td [(number)]TJ/F190 9.343 Tf 33.691 0 Td [(\025)]TJ/F84 8.9664 Tf 9.865 0 Td [(0)]TJ ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 623.888 651.088 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(sizeof)-250(\227)-250(Memory)-250(occupation)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(memory)-250(occupation)-250(of)-250(a)-250(PSBLAS)-250(object.)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf 0 -21.918 Td [(isz)-525(=)-525(psb_sizeof\050a\051)]TJ 0 -11.955 Td [(isz)-525(=)-525(psb_sizeof\050desc_a\051)]TJ 0 -11.955 Td [(isz)-525(=)-525(psb_sizeof\050prec\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(A)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 72.97 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(.)]TJ -65.344 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.137 0 Td [(psb)]TJ +/F84 8.9664 Tf 629.865 665.334 Td [(0)]TJ ET q -1 0 0 1 360.068 531.947 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 709.544 651.088 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S Q BT -/F59 9.9626 Tf 363.206 531.748 Td [(Tspmat)]TJ +/F84 8.9664 Tf 1.02 0 0 1 715.521 665.334 Tm [(Second)-285(\002ll-in)-284(level)]TJ/F78 8.9664 Tf 1 0 0 1 793.724 665.334 Tm [(q)]TJ/F84 8.9664 Tf 1.02 0 0 1 800.59 665.334 Tm [(of)-285(the)-284(INVK\050)]TJ/F78 8.9664 Tf 1 0 0 1 854.095 665.334 Tm [(p)]TJ/F84 8.9664 Tf 1.02 0 0 1 858.69 665.334 Tm [(,)]TJ/F78 8.9664 Tf 1 0 0 1 862.583 665.334 Tm [(q)]TJ/F84 8.9664 Tf 1.02 0 0 1 866.846 665.334 Tm [(\051)-285(appr)18(oxi-)]TJ 1 0 0 1 715.521 654.375 Tm [(mate)-250(inverse.)]TJ ET q -1 0 0 1 395.216 531.947 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 911.42 651.088 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q +q +1 0 0 1 329.467 650.888 cm +[]0 d 0 J 0.398 w 0 0 m 581.953 0 l S +Q +q +1 0 0 1 329.467 628.771 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S Q +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG BT -/F59 9.9626 Tf 398.354 531.748 Td [(type)]TJ +/F233 8.9664 Tf 335.444 643.018 Td [(\015INV_ILUTHRS\015)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -268.57 -19.925 Td [(desc)]TJ ET q -1 0 0 1 171.218 512.022 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 432.13 628.771 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S Q +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG BT -/F51 9.9626 Tf 174.207 511.823 Td [(a)]TJ +/F233 8.9664 Tf 438.108 643.018 Td [(real)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(Communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ + [(\050kind_parameter\051)]TJ ET q -1 0 0 1 360.068 464.201 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 538.231 628.771 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S Q BT -/F59 9.9626 Tf 363.206 464.002 Td [(desc)]TJ +/F84 8.9664 Tf 1.02 0 0 1 543.859 643.018 Tm [(Any)-1148(r)18(eal)-1148(num-)]TJ 1 0 0 1 544.209 632.059 Tm [(ber)]TJ/F190 9.343 Tf 15.148 0 Td [(\025)]TJ/F84 8.9664 Tf 9.866 0 Td [(0)]TJ ET q -1 0 0 1 384.755 464.201 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 623.888 628.771 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S Q BT -/F59 9.9626 Tf 387.893 464.002 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(prec)]TJ -0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -30.874 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ +/F84 8.9664 Tf 629.865 643.018 Td [(0)]TJ ET q -1 0 0 1 388.441 408.41 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 709.544 628.771 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S Q BT -/F59 9.9626 Tf 391.579 408.211 Td [(prec)]TJ +/F84 8.9664 Tf 0.985 0 0 1 715.521 643.018 Tm [(Second)-255(dr)19(op)-255(tolerance)]TJ/F78 8.9664 Tf 1 0 0 1 805.081 643.018 Tm [(s)]TJ/F84 8.9664 Tf 0.985 0 0 1 810.931 643.018 Tm [(in)-255(the)-254(INVT\050)]TJ/F78 8.9664 Tf 1 0 0 1 860.549 643.018 Tm [(t)]TJ/F84 8.9664 Tf 0.985 0 0 1 863.647 643.018 Tm [(,)]TJ/F78 8.9664 Tf 1 0 0 1 867.461 643.018 Tm [(s)]TJ/F84 8.9664 Tf 0.985 0 0 1 871.061 643.018 Tm [(\051)-255(appr)19(ox-)]TJ 1 0 0 1 715.521 632.059 Tm [(imate)-250(inverse.)]TJ ET q -1 0 0 1 413.128 408.41 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 911.42 628.771 cm +[]0 d 0 J 0.398 w 0 0 m 0 21.918 l S +Q +q +1 0 0 1 329.467 628.572 cm +[]0 d 0 J 0.398 w 0 0 m 581.953 0 l S Q +q +1 0 0 1 329.467 584.537 cm +[]0 d 0 J 0.398 w 0 0 m 0 43.836 l S +Q +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG BT -/F59 9.9626 Tf 416.266 408.211 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F233 8.9664 Tf 335.444 620.702 Td [(\015AINV_ALG\015)]TJ 0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(On)-250(Return)]TJ +ET +q +1 0 0 1 432.13 584.537 cm +[]0 d 0 J 0.398 w 0 0 m 0 43.836 l S +Q +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +BT +/F233 8.9664 Tf 438.108 620.702 Td [(character)]TJ 0 g 0 G + [(\050)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(len)]TJ 0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=*)]TJ 0 g 0 G -/F54 9.9626 Tf 72.776 0 Td [(The)-322(memory)-322(occupation)-322(of)-323(the)-322(object)-322(speci\002ed)-322(in)-322(the)-322(calling)]TJ -47.87 -11.956 Td [(sequence,)-250(in)-250(bytes.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Returned)-250(as:)-310(an)]TJ/F59 9.9626 Tf 71.82 0 Td [(integer\050psb_long_int_k_\051)]TJ/F54 9.9626 Tf 128.019 0 Td [(number)74(.)]TJ + [(\051)]TJ +ET +q +1 0 0 1 538.231 584.537 cm +[]0 d 0 J 0.398 w 0 0 m 0 43.836 l S +Q +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG +BT +/F233 8.9664 Tf 544.209 620.702 Td [(\015LLK\015)]TJ 0 g 0 G - -60.362 -242.057 Td [(108)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + 0 -10.959 Td [(\015SYM-LLK\015)]TJ 0 g 0 G -ET - -endstream -endobj -1610 0 obj -<< -/Length 5626 ->> -stream +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + 0 -10.959 Td [(\015STAB-LLK\015)]TJ 0 g 0 G +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + 0 -10.959 Td [(\015MLK,LMX\015)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.27)-1000(Sorting)-250(utilities)-250(\227)]TJ 0 -20.164 Td [(psb)]TJ ET q -1 0 0 1 120.53 686.164 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 623.888 584.537 cm +[]0 d 0 J 0.398 w 0 0 m 0 43.836 l S Q +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG BT -/F51 11.9552 Tf 124.116 685.965 Td [(msort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Merge-sort)-250(algorithm)]TJ -24.221 -12.574 Td [(psb)]TJ +/F233 8.9664 Tf 629.865 620.702 Td [(\015LLK\015)]TJ +0 g 0 G ET q -1 0 0 1 120.53 673.59 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 709.544 584.537 cm +[]0 d 0 J 0.398 w 0 0 m 0 43.836 l S Q BT -/F51 11.9552 Tf 124.116 673.391 Td [(qsort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Quicksort)-250(algorithm)]TJ -24.221 -12.575 Td [(psb)]TJ +/F84 8.9664 Tf 715.172 620.702 Td [(AINV)-250(algorithmic)-250(strategy)111(.)]TJ ET q -1 0 0 1 120.53 661.016 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 911.42 584.537 cm +[]0 d 0 J 0.398 w 0 0 m 0 43.836 l S +Q +q +1 0 0 1 329.467 584.338 cm +[]0 d 0 J 0.398 w 0 0 m 581.953 0 l S Q -BT -/F51 11.9552 Tf 124.116 660.816 Td [(hsort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Heapsort)-250(algorithm)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -24.221 -22.402 Td [(call)-525(psb_msort\050x,ix,dir,flag\051)]TJ 0 -11.955 Td [(call)-525(psb_qsort\050x,ix,dir,flag\051)]TJ 0 -11.955 Td [(call)-525(psb_hsort\050x,ix,dir,flag\051)]TJ/F54 9.9626 Tf 14.944 -21.783 Td [(These)-236(serial)-235(r)18(outines)-236(sort)-236(a)-235(sequence)]TJ/F52 9.9626 Tf 162.066 0 Td [(X)]TJ/F54 9.9626 Tf 9.884 0 Td [(into)-236(ascending)-235(or)-236(descending)-236(or)18(der)74(.)]TJ -186.894 -11.955 Td [(The)-243(ar)18(gument)-243(meaning)-243(is)-242(identical)-243(for)-243(the)-243(thr)18(ee)-243(calls;)-245(the)-243(only)-243(dif)18(fer)18(ence)-242(is)-243(the)]TJ 0 -11.955 Td [(algorithm)-250(used)-250(to)-250(accomplish)-250(the)-250(task)-250(\050see)-250(Usage)-250(Notes)-250(below\051.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.783 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.402 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.402 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(sequence)-250(to)-250(be)-250(sorted.)]TJ 14.944 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(,)-250(r)18(eal)-250(or)-250(complex)-250(array)-250(of)-250(rank)-250(1.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.402 Td [(ix)]TJ -0 g 0 G -/F54 9.9626 Tf 13.281 0 Td [(A)-250(vector)-250(of)-250(indices.)]TJ 11.626 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(\050at)-250(least\051)-250(the)-250(same)-250(size)-250(as)]TJ/F52 9.9626 Tf 254.189 0 Td [(X)]TJ/F54 9.9626 Tf 7.537 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -286.633 -22.402 Td [(dir)]TJ +BT +/F84 9.9626 Tf 470.482 555.96 Td [(T)92(able)-250(21:)-310(Parameters)-250(de\002ning)-250(the)-250(solver)-250(of)-250(the)-250(BJAC)-250(pr)18(econditioner)74(.)]TJ 0 g 0 G -/F54 9.9626 Tf 18.262 0 Td [(The)-250(desir)18(ed)-250(or)18(dering.)]TJ 6.645 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value:)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.402 Td [(Integer)-250(and)-250(real)-250(data:)]TJ +ET +1 0 0 1 322.56 716.092 cm +Q 0 g 0 G -/F59 9.9626 Tf 101.28 0 Td [(psb_sort_up_)]TJ/F54 9.9626 Tf 62.764 0 Td [(,)]TJ/F59 9.9626 Tf 5.525 0 Td [(psb_sort_down_)]TJ/F54 9.9626 Tf 73.225 0 Td [(,)]TJ/F59 9.9626 Tf 5.525 0 Td [(psb_asort_up_)]TJ/F54 9.9626 Tf 67.995 0 Td [(,)]TJ/F59 9.9626 Tf -294.396 -11.955 Td [(psb_asort_down_)]TJ/F54 9.9626 Tf 78.455 0 Td [(;)-250(default)]TJ/F59 9.9626 Tf 38.784 0 Td [(psb_sort_up_)]TJ/F54 9.9626 Tf 62.764 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -201.921 -17.178 Td [(Complex)-250(data:)]TJ +1 0 0 1 -322.56 -716.092 cm +BT +/F84 9.9626 Tf 315.088 90.438 Td [(154)]TJ 0 g 0 G -/F59 9.9626 Tf 70.286 0 Td [(psb_lsort_up_)]TJ/F54 9.9626 Tf 67.995 0 Td [(,)]TJ/F59 9.9626 Tf 4.503 0 Td [(psb_lsort_down_)]TJ/F54 9.9626 Tf 78.455 0 Td [(,)]TJ/F59 9.9626 Tf 4.503 0 Td [(psb_asort_up_)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)]TJ/F59 9.9626 Tf 4.504 0 Td [(psb_asort_down_)]TJ/F54 9.9626 Tf 78.455 0 Td [(;)]TJ -354.777 -11.956 Td [(default)]TJ/F59 9.9626 Tf 33.803 0 Td [(psb_lsort_up_)]TJ/F54 9.9626 Tf 67.994 0 Td [(.)]TJ +ET + +endstream +endobj +2019 0 obj +<< +/Length 7660 +>> +stream 0 g 0 G -/F51 9.9626 Tf -148.622 -22.402 Td [(\003ag)]TJ 0 g 0 G -/F54 9.9626 Tf 21.589 0 Td [(Whether)-250(to)-250(keep)-250(the)-250(original)-250(values)-250(in)]TJ/F52 9.9626 Tf 171.52 0 Td [(I)-81(X)]TJ/F54 9.9626 Tf 11.661 0 Td [(.)]TJ -179.863 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)-190(value)]TJ/F59 9.9626 Tf 128.287 0 Td [(psb_sort_ovw_idx_)]TJ/F54 9.9626 Tf 90.809 0 Td [(or)]TJ/F59 9.9626 Tf 11.268 0 Td [(psb_sort_keep_idx_)]TJ/F54 9.9626 Tf 94.146 0 Td [(;)]TJ -324.51 -11.955 Td [(default)]TJ/F59 9.9626 Tf 33.803 0 Td [(psb_sort_ovw_idx_)]TJ/F54 9.9626 Tf 88.916 0 Td [(.)]TJ +BT +/F75 11.9552 Tf 99.895 706.129 Td [(10.3)-1000(build)-250(\227)-250(Builds)-250(a)-250(preconditioner)]TJ 0 g 0 G -/F51 9.9626 Tf -147.626 -24.395 Td [(On)-250(Return)]TJ 0 g 0 G +/F145 9.9626 Tf 0 -20.364 Td [(call)-525(prec%build\050a,)-525(desc_a,)-525(info[,amold,vmold,imold]\051)]TJ 0 g 0 G - 0 -22.402 Td [(x)]TJ +/F75 9.9626 Tf 0 -24.086 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(sequence)-250(of)-250(values,)-250(in)-250(the)-250(chosen)-250(or)18(dering.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(,)-250(r)18(eal)-250(or)-250(complex)-250(array)-250(of)-250(rank)-250(1.)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.402 Td [(ix)]TJ +/F75 9.9626 Tf -29.828 -22.815 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 13.281 0 Td [(A)-250(vector)-250(of)-250(indices.)]TJ 11.626 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-238(integer)-237(array)-238(of)-237(rank)-238(1,)-240(whose)-238(entries)-237(ar)18(e)-238(moved)-237(to)-238(the)-238(same)-237(position)]TJ 0 -11.955 Td [(as)-250(the)-250(corr)18(esponding)-250(entries)-250(in)]TJ/F52 9.9626 Tf 138.215 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ 0 g 0 G - -3.943 -44.517 Td [(109)]TJ + 0 -22.816 Td [(a)]TJ 0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(system)-250(sparse)-250(matrix.)-310(Scope:)]TJ/F75 9.9626 Tf 146.23 0 Td [(local)]TJ/F84 9.9626 Tf -131.595 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(,)-250(tar)18(get.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(sparse)-250(matrix)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 190.872 0 Td [(psb)]TJ ET - -endstream -endobj -1615 0 obj -<< -/Length 7300 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 331.993 580.382 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 335.131 580.183 Td [(Tspmat)]TJ +ET +q +1 0 0 1 367.141 580.382 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 11.9552 Tf 150.705 706.129 Td [(Notes)]TJ +/F145 9.9626 Tf 370.279 580.183 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [-500(For)-370(integer)-370(or)-370(r)18(eal)-370(data)-370(the)-370(sorting)-370(can)-370(be)-370(performed)-370(in)-370(the)-370(up/down)]TJ 12.453 -11.956 Td [(dir)18(ection,)-250(on)-250(the)-250(natural)-250(or)-250(absolute)-250(values;)]TJ +/F75 9.9626 Tf -291.305 -22.815 Td [(prec)]TJ 0 g 0 G - -12.453 -19.925 Td [(2.)]TJ +/F84 9.9626 Tf 24.349 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ 0.98 0 0 1 124.802 509.547 Tm [(Speci\002ed)-194(as:)-286(an)-193(alr)18(eady)-194(initialized)-194(pr)19(econdtioner)-194(data)-194(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 1 0 0 1 396.852 509.547 Tm [(psb)]TJ +ET +q +1 0 0 1 413.17 509.746 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 416.309 509.547 Td [(Tprec)]TJ +ET +q +1 0 0 1 443.088 509.746 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 446.226 509.547 Td [(type)]TJ 0 g 0 G - [-500(For)-329(complex)-330(data)-329(the)-330(sorting)-329(can)-330(be)-329(done)-329(in)-330(a)-329(lexicographic)-330(or)18(der)-329(\050i.e.:)]TJ 12.453 -11.955 Td [(sort)-263(on)-263(the)-263(r)18(eal)-263(part)-263(with)-263(ties)-263(br)18(oken)-263(accor)18(ding)-263(to)-263(the)-263(imaginary)-263(part\051)-263(or)]TJ 0 -11.955 Td [(on)-250(the)-250(absolute)-250(values;)]TJ 0 g 0 G - -12.453 -19.925 Td [(3.)]TJ +/F75 9.9626 Tf -346.331 -34.771 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 474.975 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 123.397 474.776 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(pr)18(oblem)-250(communication)-250(descriptor)74(.)-310(Scope:)]TJ/F75 9.9626 Tf 208.625 0 Td [(local)]TJ/F84 9.9626 Tf -217.492 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(,)-250(tar)18(get.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(communication)-250(descriptor)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 247.648 0 Td [(psb)]TJ +ET +q +1 0 0 1 388.769 439.11 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 391.907 438.911 Td [(desc)]TJ +ET +q +1 0 0 1 413.456 439.11 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 416.594 438.911 Td [(type)]TJ 0 g 0 G - [-500(The)-325(r)18(outines)-325(r)18(eturn)-325(the)-325(items)-324(in)-325(the)-325(chosen)-325(or)18(dering;)-362(the)-325(output)-325(dif)18(fer)18(-)]TJ 12.453 -11.956 Td [(ence)-244(is)-245(the)-244(handling)-244(of)-244(ties)-244(\050i.e.)-309(items)-244(with)-244(an)-244(equal)-245(v)1(alue\051)-245(in)-244(the)-244(original)]TJ 0 -11.955 Td [(input.)-316(W)55(ith)-252(the)-252(mer)18(ge-sort)-252(algorithm)-252(ties)-252(ar)18(e)-252(pr)18(eserved)-252(in)-252(the)-252(same)-252(r)18(ela-)]TJ 0 -11.955 Td [(tive)-278(or)18(der)-278(as)-278(they)-278(had)-278(in)-278(the)-278(original)-278(sequence,)-285(while)-278(this)-278(is)-278(not)-278(guaran-)]TJ 0 -11.955 Td [(teed)-250(for)-250(quicksort)-250(or)-250(heapsort;)]TJ +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - -12.453 -19.925 Td [(4.)]TJ +/F75 9.9626 Tf -337.621 -22.816 Td [(amold)]TJ 0 g 0 G - [-500(If)]TJ/F52 9.9626 Tf 22.66 0 Td [(f)-160(l)-70(a)-47(g)]TJ/F85 10.3811 Tf 20.72 0 Td [(=)]TJ/F52 9.9626 Tf 11.634 0 Td [(p)-25(s)-25(b)]TJ +/F84 9.9626 Tf 33.455 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(matrix)-250(storage.)]TJ -8.548 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F145 9.9626 Tf 201.393 0 Td [(psb)]TJ ET q -1 0 0 1 232.862 542.941 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 342.513 368.474 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 235.975 542.742 Td [(s)-25(o)-25(r)-35(t)]TJ +/F145 9.9626 Tf 345.652 368.275 Td [(T)]TJ ET q -1 0 0 1 253.036 542.941 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 351.51 368.474 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 256.149 542.742 Td [(o)-35(v)-25(w)]TJ +/F145 9.9626 Tf 354.648 368.275 Td [(base)]TJ ET q -1 0 0 1 274.067 542.941 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 376.197 368.474 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 277.11 542.742 Td [(i)-32(d)-42(x)]TJ +/F145 9.9626 Tf 379.335 368.275 Td [(sparse)]TJ ET q -1 0 0 1 291.402 542.941 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 411.345 368.474 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 296.503 542.742 Td [(then)-212(the)-212(entries)-212(in)]TJ/F52 9.9626 Tf 80.283 0 Td [(i)-49(x)]TJ/F85 10.3811 Tf 8.588 0 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(1)-290(:)]TJ/F52 9.9626 Tf 13.381 0 Td [(n)]TJ/F85 10.3811 Tf 5.788 0 Td [(\051)]TJ/F54 9.9626 Tf 6.262 0 Td [(wher)18(e)]TJ/F52 9.9626 Tf 29.644 0 Td [(n)]TJ/F54 9.9626 Tf 7.776 0 Td [(is)-212(the)-212(size)]TJ -276.762 -11.956 Td [(of)]TJ/F52 9.9626 Tf 12.255 0 Td [(x)]TJ/F54 9.9626 Tf 8.411 0 Td [(ar)18(e)-322(initialized)-321(to)]TJ/F52 9.9626 Tf 76.228 0 Td [(i)-49(x)]TJ/F85 10.3811 Tf 8.588 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 8.364 0 Td [(\040)]TJ/F52 9.9626 Tf 14.651 0 Td [(i)]TJ/F54 9.9626 Tf 2.963 0 Td [(;)-358(thus,)-339(upon)-322(r)18(eturn)-321(fr)18(om)-322(the)-322(subr)18(outine,)]TJ -138.753 -11.955 Td [(for)-270(each)-271(index)]TJ/F52 9.9626 Tf 65.501 0 Td [(i)]TJ/F54 9.9626 Tf 5.657 0 Td [(we)-270(have)-271(in)]TJ/F52 9.9626 Tf 51.095 0 Td [(i)-49(x)]TJ/F85 10.3811 Tf 8.587 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 6.843 0 Td [(the)-270(position)-271(that)-270(the)-270(item)]TJ/F52 9.9626 Tf 114.324 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 6.844 0 Td [(occupied)]TJ -278.766 -11.955 Td [(in)-250(the)-250(original)-250(data)-250(sequence;)]TJ +/F145 9.9626 Tf 414.483 368.275 Td [(mat)]TJ/F84 9.9626 Tf 15.691 0 Td [(.)]TJ 0 g 0 G - -12.453 -19.925 Td [(5.)]TJ +/F75 9.9626 Tf -330.279 -22.816 Td [(vmold)]TJ 0 g 0 G - [-500(If)]TJ/F52 9.9626 Tf 24.08 0 Td [(f)-160(l)-70(a)-47(g)]TJ/F85 10.3811 Tf 22.648 0 Td [(=)]TJ/F52 9.9626 Tf 13.563 0 Td [(p)-25(s)-25(b)]TJ +/F84 9.9626 Tf 34.013 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -9.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F145 9.9626 Tf 201.393 0 Td [(psb)]TJ ET q -1 0 0 1 238.138 487.15 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 342.513 297.838 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 241.252 486.951 Td [(s)-25(o)-25(r)-35(t)]TJ +/F145 9.9626 Tf 345.652 297.638 Td [(T)]TJ ET q -1 0 0 1 258.312 487.15 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 351.51 297.838 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 261.426 486.951 Td [(k)-30(e)-25(e)-80(p)]TJ +/F145 9.9626 Tf 354.648 297.638 Td [(base)]TJ ET q -1 0 0 1 280.648 487.15 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 376.197 297.838 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 283.692 486.951 Td [(i)-32(d)-42(x)]TJ +/F145 9.9626 Tf 379.335 297.638 Td [(vect)]TJ ET q -1 0 0 1 297.983 487.15 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 400.884 297.838 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 304.504 486.951 Td [(the)-355(r)18(ou)1(tine)-355(will)-354(assume)-355(that)-355(th)1(e)-355(entries)-355(i)1(n)]TJ/F52 9.9626 Tf -128.838 -11.955 Td [(i)-49(x)]TJ/F85 10.3811 Tf 8.588 0 Td [(\050)]TJ/F54 9.9626 Tf 4.274 0 Td [(:)]TJ/F85 10.3811 Tf 2.74 0 Td [(\051)]TJ/F54 9.9626 Tf 6.64 0 Td [(have)-250(alr)18(eady)-250(been)-250(initialized)-250(by)-250(the)-250(user;)]TJ -0 g 0 G - -34.75 -19.926 Td [(6.)]TJ -0 g 0 G - [-500(The)-270(thr)18(ee)-269(sorting)-270(algorithms)-269(have)-270(a)-269(similar)]TJ/F52 9.9626 Tf 205.79 0 Td [(O)]TJ/F85 10.3811 Tf 8 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(n)]TJ/F54 9.9626 Tf 7.324 0 Td [(log)]TJ/F52 9.9626 Tf 15.663 0 Td [(n)]TJ/F85 10.3811 Tf 5.788 0 Td [(\051)]TJ/F54 9.9626 Tf 6.835 0 Td [(expected)-270(r)8(unning)]TJ -241.221 -11.955 Td [(time;)-278(in)-268(the)-268(average)-269(case)-268(quicksort)-268(will)-269(be)-268(the)-268(fastest)-269(and)-268(mer)18(ge-sort)-268(the)]TJ 0 -11.955 Td [(slowest.)-310(However)-250(note)-250(that:)]TJ -0 g 0 G - 5.321 -19.925 Td [(\050a\051)]TJ -0 g 0 G - [-500(The)-336(worst)-336(case)-336(r)8(unning)-336(time)-336(for)-337(quicksort)-336(is)]TJ/F52 9.9626 Tf 220.017 0 Td [(O)]TJ/F85 10.3811 Tf 8 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(n)]TJ/F54 7.5716 Tf 5.664 3.616 Td [(2)]TJ/F85 10.3811 Tf 4.408 -3.616 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(;)-379(the)-336(algorithm)]TJ -229.916 -11.955 Td [(implemented)-293(her)18(e)-293(follows)-293(the)-292(well-known)-293(median-of-thr)18(ee)-293(heuris-)]TJ 0 -11.956 Td [(tics,)-250(but)-250(the)-250(worst)-250(case)-250(may)-250(still)-250(apply;)]TJ -0 g 0 G - -17.125 -15.94 Td [(\050b\051)]TJ -0 g 0 G - [-500(The)-190(worst)-190(case)-190(r)8(unning)-190(time)-190(for)-190(mer)18(ge-sort)-190(and)-190(heap-sort)-190(is)]TJ/F52 9.9626 Tf 277.76 0 Td [(O)]TJ/F85 10.3811 Tf 8 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(n)]TJ/F54 9.9626 Tf 7.324 0 Td [(log)]TJ/F52 9.9626 Tf 15.663 0 Td [(n)]TJ/F85 10.3811 Tf 5.788 0 Td [(\051)]TJ/F54 9.9626 Tf -301.684 -11.955 Td [(as)-250(the)-250(average)-250(case;)]TJ -0 g 0 G - -16.04 -15.94 Td [(\050c\051)]TJ -0 g 0 G - [-500(The)-244(mer)18(ge-sort)-244(algorithm)-243(is)-244(implemented)-244(to)-244(take)-244(advantage)-243(of)-244(sub-)]TJ 16.04 -11.955 Td [(sequences)-314(that)-313(may)-314(be)-314(alr)18(eady)-314(in)-313(the)-314(desir)18(ed)-314(or)18(dering)-314(prior)-313(to)-314(the)]TJ 0 -11.956 Td [(subr)18(outine)-390(call;)-459(this)-390(situation)-389(is)-390(r)18(elatively)-390(common)-389(when)-390(dealing)]TJ 0 -11.955 Td [(with)-335(gr)18(oups)-335(of)-335(indices)-335(of)-335(sparse)-336(matrix)-335(entries,)-356(thus)-335(mer)18(ge-sort)-335(is)]TJ 0 -11.955 Td [(the)-319(pr)18(eferr)18(ed)-318(choice)-319(when)-319(a)-318(sorting)-319(is)-319(needed)-318(by)-319(other)-319(r)18(outi)1(nes)-319(in)]TJ 0 -11.955 Td [(the)-250(library)111(.)]TJ +/F145 9.9626 Tf 404.022 297.638 Td [(type)]TJ/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - 117.559 -193.275 Td [(110)]TJ +/F75 9.9626 Tf -325.049 -22.815 Td [(imold)]TJ 0 g 0 G +/F84 9.9626 Tf 31.791 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(integer)-250(vector)-250(storage.)]TJ -6.884 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 0.98 0 0 1 124.802 227.002 Tm [(Speci\002ed)-194(as:)-286(an)-193(object)-194(of)-194(a)-194(class)-194(derived)-194(fr)19(om)-194(\050integer\051)]TJ/F145 9.9626 Tf 1 0 0 1 356.282 227.002 Tm [(psb)]TJ ET - -endstream -endobj -1628 0 obj -<< -/Length 171 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 372.601 227.202 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 14.3462 Tf 99.895 705.784 Td [(7)-1000(Parallel)-250(environment)-250(routines)]TJ -0 g 0 G -/F54 9.9626 Tf 164.384 -615.346 Td [(111)]TJ -0 g 0 G +/F145 9.9626 Tf 375.739 227.002 Td [(T)]TJ ET - -endstream -endobj -1632 0 obj -<< -/Length 5510 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 381.597 227.202 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.1)-1000(psb)]TJ +/F145 9.9626 Tf 384.735 227.002 Td [(base)]TJ ET q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 406.284 227.202 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(init)-250(\227)-250(Initializes)-250(PSBLAS)-250(parallel)-250(environment)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_init\050ctxt,)-525(np,)-525(basectxt,)-525(ids\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-214(subr)18(outine)-215(initializes)-214(the)-215(PSBLAS)-214(parallel)-215(envir)18(onment,)-221(de\002ning)-215(a)-214(vir)18(-)]TJ -14.944 -11.955 Td [(tual)-250(parallel)-250(machine.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G +/F145 9.9626 Tf 409.422 227.002 Td [(vect)]TJ +ET +q +1 0 0 1 430.971 227.202 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 434.11 227.002 Td [(type)]TJ/F84 9.9626 Tf 0.98 0 0 1 455.031 227.002 Tm [(.)]TJ 0 g 0 G - 0 -19.926 Td [(np)]TJ +/F75 9.9626 Tf 1 0 0 1 99.895 202.917 Tm [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 17.156 0 Td [(Number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 7.751 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-560(Default:)-310(use)-250(all)-250(available)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(basectxt)]TJ + 0 -22.816 Td [(prec)]TJ 0 g 0 G -/F54 9.9626 Tf 41.494 0 Td [(the)-321(initial)-321(communication)-321(context.)-524(The)-321(new)-321(context)-321(will)-321(be)-321(de\002ned)]TJ -16.587 -11.955 Td [(fr)18(om)-250(the)-250(pr)18(ocesses)-250(participating)-250(in)-250(the)-250(initial)-250(one.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-560(Default:)-310(use)-250(MPI)]TJ +/F84 9.9626 Tf 24.349 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econdtioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 193.612 0 Td [(psb)]TJ ET q -1 0 0 1 387.574 466.194 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 334.732 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 390.563 465.994 Td [(COMM)]TJ +/F145 9.9626 Tf 337.871 132.281 Td [(Tprec)]TJ ET q -1 0 0 1 424.904 466.194 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 364.65 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 427.893 465.994 Td [(WORLD.)]TJ -0 g 0 G -/F51 9.9626 Tf -277.188 -19.925 Td [(ids)]TJ -0 g 0 G -/F54 9.9626 Tf 18.809 0 Td [(Identities)-306(of)-307(the)-306(pr)18(ocesses)-307(to)-306(use)-306(for)-307(the)-306(new)-306(context;)-335(the)-306(ar)18(gument)-307(is)-306(ig-)]TJ 6.097 -11.955 Td [(nor)18(ed)-388(when)]TJ/F59 9.9626 Tf 58.258 0 Td [(np)]TJ/F54 9.9626 Tf 14.324 0 Td [(is)-388(not)-388(speci\002ed.)-723(This)-388(allows)-388(the)-387(pr)18(ocesses)-388(in)-388(the)-388(new)]TJ -72.582 -11.955 Td [(envir)18(onment)-250(to)-250(be)-250(in)-250(an)-250(or)18(der)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(original)-250(one.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)-560(Default:)-310(use)-250(the)-250(indices)]TJ/F85 10.3811 Tf 240.57 0 Td [(\050)]TJ/F54 9.9626 Tf 4.15 0 Td [(0)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.966 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -322.483 -21.918 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-285(communication)-284(context)-285(identifying)-285(the)-284(virtual)-285(parallel)-285(machine,)-293(type)]TJ/F59 9.9626 Tf 3.885 -11.955 Td [(psb_ctxt_type)]TJ/F54 9.9626 Tf 67.995 0 Td [(.)-327(Note)-256(that)-256(this)-256(is)-256(always)-256(a)-255(duplicate)-256(of)]TJ/F59 9.9626 Tf 174.426 0 Td [(basectxt)]TJ/F54 9.9626 Tf 41.842 0 Td [(,)-257(so)-256(that)]TJ -284.263 -11.955 Td [(library)-296(communications)-297(ar)18(e)-296(completely)-297(separated)-296(fr)18(om)-297(other)-296(communi-)]TJ 0 -11.955 Td [(cation)-250(operations.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -66.381 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ/F51 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(A)-250(call)-250(to)-250(this)-250(r)18(outine)-250(must)-250(pr)18(ecede)-250(any)-250(other)-250(PSBLAS)-250(call.)]TJ -0 g 0 G - 0 -19.926 Td [(2.)]TJ +/F145 9.9626 Tf 367.788 132.281 Td [(type)]TJ 0 g 0 G - [-500(It)-194(is)-195(an)-194(err)18(or)-194(to)-194(specify)-195(a)-194(value)-194(for)]TJ/F52 9.9626 Tf 158.156 0 Td [(n)-80(p)]TJ/F54 9.9626 Tf 13.378 0 Td [(gr)18(eater)-194(than)-195(the)-194(number)-194(of)-194(pr)18(ocesses)]TJ -159.081 -11.955 Td [(available)-250(in)-250(the)-250(underlying)-250(base)-250(parallel)-250(envir)18(onment.)]TJ 0 g 0 G - 139.477 -84.647 Td [(112)]TJ +/F84 9.9626 Tf -103.509 -41.843 Td [(155)]TJ 0 g 0 G ET endstream endobj -1638 0 obj +2023 0 obj << -/Length 4457 +/Length 1133 >> stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(7.2)-1000(psb)]TJ -ET -q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 151.016 706.129 Td [(info)-211(\227)-211(Return)-211(information)-210(about)-211(PSBLAS)-211(parallel)-211(en-)]TJ -24.221 -13.948 Td [(vironment)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -26.9 -18.964 Td [(call)-525(psb_info\050ctxt,)-525(iam,)-525(np\051)]TJ/F54 9.9626 Tf 14.944 -21.917 Td [(This)-397(subr)18(outine)-396(r)18(eturns)-397(information)-397(about)-396(the)-397(PSBLAS)-397(parallel)-396(envir)18(on-)]TJ -14.944 -11.956 Td [(ment,)-250(de\002ning)-250(a)-250(virtual)-250(parallel)-250(machine.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(iam)]TJ -0 g 0 G -/F54 9.9626 Tf 22.137 0 Td [(Identi\002er)-250(of)-250(curr)18(ent)-250(pr)18(ocess)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 2.77 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ/F83 10.3811 Tf 134.19 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)]TJ/F83 10.3811 Tf 7.873 0 Td [(\024)]TJ/F52 9.9626 Tf 11.017 0 Td [(i)-47(a)-25(m)]TJ/F83 10.3811 Tf 18.677 0 Td [(\024)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1)]TJ -0 g 0 G -/F51 9.9626 Tf -239.579 -19.925 Td [(np)]TJ -0 g 0 G -/F54 9.9626 Tf 17.156 0 Td [(Number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 7.751 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(For)-396(pr)18(ocesses)-396(in)-395(the)-396(virtual)-396(parallel)-396(machine)-396(the)-396(ident)1(i\002er)-396(will)-396(satisfy)]TJ 12.453 -11.955 Td [(0)]TJ/F83 10.3811 Tf 7.873 0 Td [(\024)]TJ/F52 9.9626 Tf 11.016 0 Td [(i)-47(a)-25(m)]TJ/F83 10.3811 Tf 18.678 0 Td [(\024)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1;)]TJ -0 g 0 G - -84.741 -19.925 Td [(2.)]TJ +BT +/F75 9.9626 Tf 150.705 706.129 Td [(info)]TJ 0 g 0 G - [-500(If)-349(the)-349(user)-350(has)-349(r)18(equested)-349(on)]TJ/F59 9.9626 Tf 142.217 0 Td [(psb_init)]TJ/F54 9.9626 Tf 45.321 0 Td [(a)-349(number)-349(of)-350(pr)18(ocesses)-349(less)-349(than)]TJ -175.085 -11.956 Td [(the)-321(total)-322(available)-321(in)-321(the)-322(parallel)-321(execution)-322(envir)18(onment,)-339(the)-321(r)18(emaining)]TJ 0 -11.955 Td [(pr)18(ocesses)-229(will)-228(have)-229(on)-229(r)18(eturn)]TJ/F52 9.9626 Tf 130.21 0 Td [(i)-47(a)-25(m)]TJ/F85 10.3811 Tf 18.678 0 Td [(=)]TJ/F83 10.3811 Tf 11.086 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1;)-236(the)-229(on)1(ly)-229(call)-229(involving)]TJ/F59 9.9626 Tf 110.162 0 Td [(ctxt)]TJ/F54 9.9626 Tf 23.2 0 Td [(that)]TJ -301.53 -11.955 Td [(any)-250(such)-250(pr)18(ocess)-250(may)-250(execute)-250(is)-250(to)]TJ/F59 9.9626 Tf 155.296 0 Td [(psb_exit)]TJ/F54 9.9626 Tf 41.843 0 Td [(.)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 1.02 0 0 1 150.396 636.39 Tm [(The)]TJ/F145 9.9626 Tf 1 0 0 1 170.746 636.39 Tm [(amold)]TJ/F84 9.9626 Tf 1.02 0 0 1 196.898 636.39 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 202.989 636.39 Tm [(vmold)]TJ/F84 9.9626 Tf 1.02 0 0 1 232.48 636.39 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 253.023 636.39 Tm [(imold)]TJ/F84 9.9626 Tf 1.02 0 0 1 282.514 636.39 Tm [(ar)18(guments)-329(may)-328(be)-329(employed)-329(to)-328(interface)-329(with)]TJ 1 0 0 1 150.705 624.435 Tm [(special)-250(devices,)-250(such)-250(as)-250(GPUs)-250(and)-250(other)-250(accelerators.)]TJ 0 g 0 G - -57.662 -174.311 Td [(113)]TJ + 164.383 -533.997 Td [(156)]TJ 0 g 0 G ET endstream endobj -1644 0 obj +2032 0 obj << -/Length 4180 +/Length 6132 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.3)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(exit)-250(\227)-250(Exit)-250(from)-250(PSBLAS)-250(parallel)-250(environment)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_exit\050ctxt\051)]TJ 0 -11.955 Td [(call)-525(psb_exit\050ctxt,close\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(exits)-250(fr)18(om)-250(the)-250(PSBLAS)-250(parallel)-250(virtual)-250(machine.)]TJ -0 g 0 G -/F51 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G +/F75 11.9552 Tf 99.895 706.129 Td [(10.4)-1000(apply)-250(\227)-250(Preconditioner)-250(application)-250(routine)]TJ 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(close)]TJ -0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(Whether)-369(to)-368(close)-369(all)-369(data)-368(str)8(uctur)18(es)-369(r)18(elated)-369(to)-368(the)-369(virtual)-369(parall)1(el)-369(ma-)]TJ -2.759 -11.955 Td [(chine,)-250(besides)-250(those)-250(associated)-250(with)-250(ctxt.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(variable,)-250(default)-250(value:)-310(tr)8(ue.)]TJ/F51 11.9552 Tf -24.907 -19.925 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +/F145 9.9626 Tf 0 -18.964 Td [(call)-525(prec%apply\050x,y,desc_a,info,trans,work\051)]TJ 0 -11.955 Td [(call)-525(prec%apply\050x,desc_a,info,trans\051)]TJ 0 g 0 G - [-500(This)-376(r)18(outine)-376(may)-377(be)-376(called)-376(even)-376(if)-377(a)-376(pr)18(evious)-376(call)-376(to)]TJ/F59 9.9626 Tf 255.069 0 Td [(psb_info)]TJ/F54 9.9626 Tf 45.591 0 Td [(has)-376(r)18(e-)]TJ -288.206 -11.955 Td [(turned)-251(with)]TJ/F52 9.9626 Tf 55.156 0 Td [(i)-47(a)-25(m)]TJ/F85 10.3811 Tf 18.696 0 Td [(=)]TJ/F83 10.3811 Tf 11.104 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1;)-251(indeed,)-252(it)-251(it)-251(i)1(s)-251(the)-251(only)-251(r)18(outine)-251(that)-251(may)-251(be)-251(called)]TJ -93.15 -11.955 Td [(with)-250(ar)18(gument)]TJ/F59 9.9626 Tf 68.133 0 Td [(ctxt)]TJ/F54 9.9626 Tf 23.412 0 Td [(in)-250(this)-250(situation.)]TJ +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G - -103.999 -19.926 Td [(2.)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - [-500(A)-269(call)-269(to)-268(this)-269(r)18(outine)-269(with)]TJ/F59 9.9626 Tf 128.502 0 Td [(close=.true.)]TJ/F54 9.9626 Tf 65.442 0 Td [(implies)-269(a)-268(call)-269(to)]TJ/F59 9.9626 Tf 72.059 0 Td [(MPI_Finalize)]TJ/F54 9.9626 Tf 62.764 0 Td [(,)]TJ -316.313 -11.955 Td [(after)-250(which)-250(no)-250(parallel)-250(r)18(outine)-250(may)-250(be)-250(called.)]TJ +/F75 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - -12.454 -19.925 Td [(3.)]TJ 0 g 0 G - [-500(If)-289(the)-288(user)-288(whishes)-289(to)-288(use)-289(multiple)-288(communication)-289(contexts)-288(in)-289(the)-288(same)]TJ 12.454 -11.955 Td [(pr)18(ogram,)-401(or)-371(to)-371(enter)-371(and)-371(exit)-371(multiple)-371(times)-370(into)-371(the)-371(parallel)-371(envir)18(on-)]TJ 0 -11.956 Td [(ment,)-425(this)-389(r)18(outine)-390(may)-390(be)-390(called)-389(to)-390(selectively)-390(close)-390(the)-389(contexts)-390(with)]TJ/F59 9.9626 Tf 0 -11.955 Td [(close=.false.)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)-202(while)-190(on)-190(the)-190(last)-190(call)-190(it)-190(should)-190(be)-190(called)-190(with)]TJ/F59 9.9626 Tf 196.919 0 Td [(close=.true.)]TJ/F54 9.9626 Tf -264.913 -11.955 Td [(to)-250(shutdown)-250(in)-250(a)-250(clean)-250(way)-250(the)-250(entir)18(e)-250(parallel)-250(envir)18(onment.)]TJ -0 g 0 G - 139.477 -212.169 Td [(114)]TJ + 0 -19.925 Td [(prec)]TJ 0 g 0 G +/F84 9.9626 Tf 24.349 0 Td [(the)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F75 9.9626 Tf 117.836 0 Td [(local)]TJ/F84 9.9626 Tf -117.587 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 196.511 0 Td [(psb)]TJ ET - -endstream -endobj -1651 0 obj -<< -/Length 2476 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 337.631 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 11.9552 Tf 99.895 706.129 Td [(7.4)-1000(psb)]TJ +/F145 9.9626 Tf 340.77 577.576 Td [(Tprec)]TJ ET q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 367.549 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(get)]TJ +/F145 9.9626 Tf 370.687 577.576 Td [(type)]TJ +0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -291.714 -19.925 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(sour)18(ce)-250(vector)74(.)-310(Scope:)]TJ/F75 9.9626 Tf 111.142 0 Td [(local)]TJ/F84 9.9626 Tf -96.507 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 218.688 0 Td [(psb)]TJ ET q -1 0 0 1 168.338 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 359.808 521.985 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 171.925 706.129 Td [(mpi)]TJ +/F145 9.9626 Tf 362.947 521.785 Td [(T)]TJ ET q -1 0 0 1 194.556 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 368.804 521.985 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 198.143 706.129 Td [(comm)-250(\227)-250(Get)-250(the)-250(MPI)-250(communicator)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -98.248 -18.964 Td [(icomm)-525(=)-525(psb_get_mpi_comm\050ctxt\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-417(func)1(tion)-417(r)18(eturns)-416(the)-417(MPI)-416(communicator)-417(associated)-416(with)-417(a)-416(PSBLAS)]TJ -14.944 -11.955 Td [(context)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-372(MPI)-371(communicator)-372(associated)-371(with)-372(the)-372(PSBLAS)-371(virtual)]TJ -47.87 -11.955 Td [(parallel)-250(machine.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ/F51 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ/F54 9.9626 Tf 34.165 0 Td [(The)-230(subr)18(outine)-230(version)]TJ/F59 9.9626 Tf 103.913 0 Td [(psb_get_mpicomm)]TJ/F54 9.9626 Tf 80.748 0 Td [(is)-230(still)-230(available)-230(but)-230(is)-230(depr)18(e-)]TJ -218.826 -11.955 Td [(cated.)]TJ -0 g 0 G - 164.384 -319.766 Td [(115)]TJ -0 g 0 G +/F145 9.9626 Tf 371.943 521.785 Td [(vect)]TJ ET - -endstream -endobj -1656 0 obj -<< -/Length 3337 ->> -stream +q +1 0 0 1 393.492 521.985 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 396.63 521.785 Td [(type)]TJ 0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.5)-1000(psb)]TJ +/F75 9.9626 Tf -317.656 -19.925 Td [(desc)]TJ ET q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 120.408 502.059 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(get)]TJ +/F75 9.9626 Tf 123.397 501.86 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(pr)18(oblem)-250(communication)-250(descriptor)74(.)-310(Scope:)]TJ/F75 9.9626 Tf 208.625 0 Td [(local)]TJ/F84 9.9626 Tf -217.492 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(communication)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 200.207 0 Td [(psb)]TJ ET q -1 0 0 1 219.148 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 341.328 466.194 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 222.735 706.129 Td [(mpi)]TJ +/F145 9.9626 Tf 344.466 465.994 Td [(desc)]TJ ET q -1 0 0 1 245.365 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 366.015 466.194 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 248.952 706.129 Td [(rank)-250(\227)-250(Get)-250(the)-250(MPI)-250(rank)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -98.247 -18.964 Td [(rank)-525(=)-525(psb_get_mpi_rank\050ctxt,)-525(id\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-250(function)-250(r)18(eturns)-250(the)-250(MPI)-250(rank)-250(of)-250(the)-250(PSBLAS)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 257.337 0 Td [(i)-32(d)]TJ +/F145 9.9626 Tf 369.153 465.994 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -272.281 -19.925 Td [(T)90(ype:)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F75 9.9626 Tf -290.179 -19.925 Td [(trans)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F84 9.9626 Tf 27.666 0 Td [(Scope:)]TJ -3.068 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)74(.)]TJ 0 g 0 G +/F75 9.9626 Tf -24.907 -19.926 Td [(work)]TJ 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(id)]TJ -0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(Identi\002er)-250(of)-250(a)-250(pr)18(ocess)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 10.521 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(0)]TJ/F83 10.3811 Tf 141.938 0 Td [(\024)]TJ/F52 9.9626 Tf 11.017 0 Td [(i)-32(d)]TJ/F83 10.3811 Tf 11.086 0 Td [(\024)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.503 0 Td [(\000)]TJ/F54 9.9626 Tf 10.132 0 Td [(1)]TJ +/F84 9.9626 Tf 28.782 0 Td [(an)-250(optional)-250(work)-250(space)-250(Scope:)]TJ/F75 9.9626 Tf 136.477 0 Td [(local)]TJ/F84 9.9626 Tf -140.661 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(double)-250(pr)18(ecision)-250(array)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf -223.669 -21.918 Td [(On)-250(Return)]TJ +/F75 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(Funciton)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(MPI)-250(rank)-250(associated)-250(with)-250(the)-250(PSBLAS)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 224.291 0 Td [(i)-32(d)]TJ/F54 9.9626 Tf 8.195 0 Td [(.)]TJ -280.356 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ/F51 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ/F54 9.9626 Tf 35.734 0 Td [(The)-388(subr)18(outine)-387(version)]TJ/F59 9.9626 Tf 108.62 0 Td [(psb_get_rank)]TJ/F54 9.9626 Tf 66.626 0 Td [(is)-388(still)-387(available)-388(but)-387(is)-388(depr)18(e-)]TJ -210.98 -11.955 Td [(cated.)]TJ -0 g 0 G - 164.384 -275.93 Td [(116)]TJ + 0 -19.925 Td [(y)]TJ 0 g 0 G +/F84 9.9626 Tf 10.521 0 Td [(the)-250(destination)-250(vector)74(.)-310(Scope:)]TJ/F75 9.9626 Tf 131.913 0 Td [(local)]TJ/F84 9.9626 Tf -117.836 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 218.688 0 Td [(psb)]TJ ET - -endstream -endobj -1660 0 obj -<< -/Length 1155 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 359.808 276.904 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 11.9552 Tf 99.895 706.129 Td [(7.6)-1000(psb)]TJ +/F145 9.9626 Tf 362.947 276.704 Td [(T)]TJ ET q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 368.804 276.904 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(wtime)-250(\227)-250(W)74(all)-250(clock)-250(timing)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -51.121 -18.964 Td [(time)-525(=)-525(psb_wtime\050\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-298(function)-298(r)18(eturns)-299(a)-298(wall)-298(clock)-298(timer)74(.)-455(The)-298(r)18(esolution)-298(of)-299(the)-298(timer)-298(is)-298(de-)]TJ -14.944 -11.955 Td [(pendent)-250(on)-250(the)-250(underlying)-250(parallel)-250(envir)18(onment)-250(implementation.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F145 9.9626 Tf 371.943 276.704 Td [(vect)]TJ +ET +q +1 0 0 1 393.492 276.904 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 396.63 276.704 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Exit)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G +/F75 9.9626 Tf -317.656 -19.925 Td [(info)]TJ 0 g 0 G - 0 -19.926 Td [(Function)-250(value)]TJ +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F75 11.9552 Tf 0.988 0 0 1 99.895 187.041 Tm [(Notes)]TJ/F84 9.9626 Tf 0.988 0 0 1 133.867 187.041 Tm [(This)-252(method)-252(is)-252(almost)-252(always)-253(called)-252(by)-252(the)-252(iterative)-252(methods)-252(of)-252(Sec.)]TJ +0 0 1 rg 0 0 1 RG + [-252(11)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(the)-250(elapsed)-250(time)-250(in)-250(seconds.)]TJ -47.87 -11.955 Td [(Returned)-250(as:)-310(a)]TJ/F59 9.9626 Tf 66.022 0 Td [(real\050psb_dpk_\051)]TJ/F54 9.9626 Tf 75.715 0 Td [(variable.)]TJ + [(;)]TJ 1 0 0 1 99.895 175.085 Tm [(it)-250(is)-250(extr)18(emely)-250(unlikely)-250(to)-250(be)-250(needed)-250(dir)18(ectly)-250(by)-250(the)-250(application)-250(developer)74(.)]TJ 0 g 0 G - -2.26 -491.123 Td [(117)]TJ + 164.384 -84.647 Td [(157)]TJ 0 g 0 G ET endstream endobj -1664 0 obj +2038 0 obj << -/Length 1388 +/Length 3490 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.7)-1000(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(10.5)-1000(descr)-250(\227)-250(Prints)-250(a)-250(description)-250(of)-250(current)-250(preconditioner)]TJ +0 g 0 G +0 g 0 G +/F145 9.9626 Tf 0 -18.964 Td [(call)-525(prec%descr\050info\051)]TJ 0 -11.955 Td [(call)-525(prec%descr\050info,iout,)-525(root\051)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.439 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(prec)]TJ +0 g 0 G +/F84 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F75 9.9626 Tf 117.837 0 Td [(local)]TJ/F84 9.9626 Tf -117.587 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 196.511 0 Td [(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 388.441 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(barrier)-240(\227)-240(Sinchronization)-239(point)-240(parallel)-240(environment)]TJ +/F145 9.9626 Tf 391.579 577.576 Td [(Tprec)]TJ +ET +q +1 0 0 1 418.358 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 421.497 577.576 Td [(type)]TJ 0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_barrier\050ctxt\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-358(subr)18(outine)-357(acts)-358(as)-358(an)-358(explicit)-357(synchr)18(onization)-358(point)-358(for)-357(the)-358(PSBLAS)]TJ -14.944 -11.955 Td [(parallel)-250(virtual)-250(machine.)]TJ +/F75 9.9626 Tf -291.713 -19.925 Td [(iout)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ +/F84 9.9626 Tf 23.243 0 Td [(output)-250(unit.)-310(Scope:)]TJ/F75 9.9626 Tf 87.39 0 Td [(local)]TJ/F84 9.9626 Tf -86.035 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(number)74(.)-310(Default:)-310(default)-250(output)-250(unit.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F75 9.9626 Tf -24.906 -19.925 Td [(root)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F84 9.9626 Tf 23.252 0 Td [(Pr)18(ocess)-250(fr)18(om)-250(which)-250(to)-250(print)-250(Scope:)]TJ/F75 9.9626 Tf 155.834 0 Td [(local)]TJ/F84 9.9626 Tf -154.488 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ 1.02 0 0 1 175.611 465.994 Tm [(Speci\002ed)-342(as:)-498(an)-341(integer)-342(number)-342(between)-342(0)-342(and)]TJ/F78 9.9626 Tf 1 0 0 1 396.349 465.994 Tm [(n)-80(p)]TJ/F190 10.3811 Tf 13.868 0 Td [(\000)]TJ/F84 9.9626 Tf 1.02 0 0 1 420.713 465.994 Tm [(1,)-366(in)-342(which)-342(case)]TJ 1.02 0 0 1 175.611 454.039 Tm [(the)-264(speci\002ed)-265(pr)18(ocess)-265(will)-264(print)-264(the)-265(description,)-269(or)]TJ/F190 10.3811 Tf 1 0 0 1 401.395 454.039 Tm [(\000)]TJ/F84 9.9626 Tf 1.02 0 0 1 409.589 454.039 Tm [(1,)-269(in)-265(which)-264(case)-265(all)]TJ 1 0 0 1 175.313 442.084 Tm [(pr)18(ocesses)-250(will)-250(print.)-310(Default:)-310(0.)]TJ 0 g 0 G +/F75 9.9626 Tf -24.608 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ + 0 -19.925 Td [(info)]TJ +0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 139.477 -455.258 Td [(118)]TJ + 139.865 -263.975 Td [(158)]TJ 0 g 0 G ET endstream endobj -1668 0 obj +2042 0 obj << -/Length 1283 +/Length 973 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(7.8)-1000(psb)]TJ -ET -q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 151.016 706.129 Td [(abort)-250(\227)-250(Abort)-250(a)-250(computation)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(10.6)-1000(clone)-250(\227)-250(clone)-250(current)-250(preconditioner)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -51.121 -18.964 Td [(call)-525(psb_abort\050ctxt\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(aborts)-250(computation)-250(on)-250(the)-250(parallel)-250(virtual)-250(machine.)]TJ +/F145 9.9626 Tf 0 -18.964 Td [(call)-1050(prec%clone\050precout,info\051)]TJ 0 g 0 G -/F51 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F84 9.9626 Tf 29.44 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F75 9.9626 Tf -29.44 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ + 0 -19.925 Td [(prec)]TJ +0 g 0 G +/F84 9.9626 Tf 24.349 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F75 9.9626 Tf -77.918 -33.873 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(precout)]TJ +0 g 0 G +/F84 9.9626 Tf 38.904 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ +0 g 0 G +/F75 9.9626 Tf -38.904 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F84 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ 0 g 0 G - 139.477 -467.213 Td [(119)]TJ + 140.583 -449.28 Td [(159)]TJ 0 g 0 G ET endstream endobj -1567 0 obj +1937 0 obj << /Type /ObjStm /N 100 -/First 970 -/Length 9451 ->> -stream -392 0 1566 58 1562 117 1570 224 1568 363 1572 510 396 568 1573 625 1569 683 1577 790 -1575 929 1579 1074 400 1133 1580 1191 1581 1250 1576 1309 1584 1403 1582 1542 1586 1687 405 1745 -1587 1802 1588 1860 1583 1918 1591 2012 1589 2151 1593 2296 409 2355 1590 2413 1595 2534 1597 2652 -1598 2710 1599 2768 1600 2826 1594 2884 1605 3004 1601 3161 1602 3306 1603 3453 1607 3600 413 3659 -1604 3717 1609 3824 1611 3942 417 4000 1608 4057 1614 4164 1616 4282 1617 4341 1618 4400 1619 4459 -1620 4518 1621 4577 1622 4636 1623 4695 1624 4754 1625 4813 1613 4872 1627 4992 1629 5110 421 5168 -1626 5225 1631 5306 1633 5424 425 5483 1634 5541 1635 5600 1630 5659 1637 5792 1639 5910 429 5968 -1640 6025 1641 6082 1636 6138 1643 6271 1645 6389 433 6448 1646 6506 1647 6565 1648 6624 1642 6683 -1650 6816 1652 6934 437 6992 1649 7049 1655 7143 1657 7261 441 7320 1654 7378 1659 7498 1661 7616 -445 7674 1658 7731 1663 7825 1665 7943 449 8002 1662 8060 1667 8154 1669 8272 453 8330 1666 8387 -% 392 0 obj -<< -/D [1563 0 R /XYZ 150.705 716.092 null] ->> -% 1566 0 obj -<< -/D [1563 0 R /XYZ 150.705 382.093 null] +/First 973 +/Length 10967 >> -% 1562 0 obj +stream +1933 0 1940 95 1938 234 1942 379 521 438 1943 496 1944 555 1939 614 1947 709 1945 848 +1949 993 525 1051 1950 1108 1951 1166 1946 1224 1955 1319 1953 1458 1957 1603 529 1662 1958 1720 +1959 1778 1954 1837 1962 1932 1960 2071 1964 2215 533 2273 1965 2330 1966 2387 1961 2445 1969 2540 +1967 2679 1971 2824 537 2883 1972 2941 1973 3000 1968 3059 1976 3182 1974 3321 1978 3463 541 3521 +1979 3578 1980 3636 1975 3694 1982 3817 1984 3935 1981 3994 1986 4077 1988 4195 545 4253 1985 4310 +1994 4405 1990 4562 1991 4709 1992 4857 1996 5004 549 5063 1997 5121 1993 5179 2005 5287 1999 5453 +2000 5600 2001 5747 2002 5895 2007 6042 553 6100 2004 6157 2009 6265 2003 6404 2011 6567 1998 6626 +2008 6686 2018 6796 2013 6962 2014 7107 2015 7254 2016 7401 2020 7548 557 7606 2017 7663 2022 7758 +2024 7876 2021 7935 2031 8030 2025 8205 2026 8350 2027 8493 2028 8640 2029 8784 2033 8933 561 8991 +2030 9048 2037 9143 2035 9282 2039 9427 565 9486 2036 9544 2041 9666 2043 9784 569 9842 2040 9899 +% 1933 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1570 0 obj +% 1940 0 obj << /Type /Page -/Contents 1571 0 R -/Resources 1569 0 R +/Contents 1941 0 R +/Resources 1939 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R -/Annots [ 1568 0 R ] +/Parent 1914 0 R +/Annots [ 1938 0 R ] >> -% 1568 0 obj +% 1938 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 494.069 359.001 506.129] -/A << /S /GoTo /D (descdata) >> +/Rect [342.753 438.278 420.271 450.338] +/A << /S /GoTo /D (spdata) >> >> -% 1572 0 obj +% 1942 0 obj << -/D [1570 0 R /XYZ 98.895 753.953 null] +/D [1940 0 R /XYZ 149.705 753.953 null] >> -% 396 0 obj +% 521 0 obj << -/D [1570 0 R /XYZ 99.895 716.092 null] +/D [1940 0 R /XYZ 150.705 716.092 null] >> -% 1573 0 obj +% 1943 0 obj << -/D [1570 0 R /XYZ 99.895 258.556 null] +/D [1940 0 R /XYZ 150.705 676.015 null] >> -% 1569 0 obj +% 1944 0 obj +<< +/D [1940 0 R /XYZ 150.705 679.195 null] +>> +% 1939 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1577 0 obj +% 1947 0 obj << /Type /Page -/Contents 1578 0 R -/Resources 1576 0 R +/Contents 1948 0 R +/Resources 1946 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R -/Annots [ 1575 0 R ] +/Parent 1952 0 R +/Annots [ 1945 0 R ] >> -% 1575 0 obj +% 1945 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 573.77 409.811 585.83] -/A << /S /GoTo /D (descdata) >> +/Rect [291.943 571.778 369.462 583.837] +/A << /S /GoTo /D (spdata) >> >> -% 1579 0 obj +% 1949 0 obj << -/D [1577 0 R /XYZ 149.705 753.953 null] +/D [1947 0 R /XYZ 98.895 753.953 null] >> -% 400 0 obj +% 525 0 obj << -/D [1577 0 R /XYZ 150.705 716.092 null] +/D [1947 0 R /XYZ 99.895 716.092 null] >> -% 1580 0 obj +% 1950 0 obj << -/D [1577 0 R /XYZ 150.705 358.183 null] +/D [1947 0 R /XYZ 99.895 676.015 null] >> -% 1581 0 obj +% 1951 0 obj << -/D [1577 0 R /XYZ 150.705 314.403 null] +/D [1947 0 R /XYZ 99.895 679.195 null] >> -% 1576 0 obj +% 1946 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1584 0 obj +% 1955 0 obj << /Type /Page -/Contents 1585 0 R -/Resources 1583 0 R +/Contents 1956 0 R +/Resources 1954 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R -/Annots [ 1582 0 R ] +/Parent 1952 0 R +/Annots [ 1953 0 R ] >> -% 1582 0 obj +% 1953 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 573.77 359.001 585.83] -/A << /S /GoTo /D (descdata) >> +/Rect [342.753 438.278 420.271 450.338] +/A << /S /GoTo /D (spdata) >> >> -% 1586 0 obj +% 1957 0 obj << -/D [1584 0 R /XYZ 98.895 753.953 null] +/D [1955 0 R /XYZ 149.705 753.953 null] >> -% 405 0 obj +% 529 0 obj << -/D [1584 0 R /XYZ 99.895 716.092 null] +/D [1955 0 R /XYZ 150.705 716.092 null] >> -% 1587 0 obj +% 1958 0 obj << -/D [1584 0 R /XYZ 99.895 370.138 null] +/D [1955 0 R /XYZ 150.705 678.98 null] >> -% 1588 0 obj +% 1959 0 obj << -/D [1584 0 R /XYZ 99.895 338.313 null] +/D [1955 0 R /XYZ 150.705 679.195 null] >> -% 1583 0 obj +% 1954 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1591 0 obj +% 1962 0 obj << /Type /Page -/Contents 1592 0 R -/Resources 1590 0 R +/Contents 1963 0 R +/Resources 1961 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R -/Annots [ 1589 0 R ] +/Parent 1952 0 R +/Annots [ 1960 0 R ] >> -% 1589 0 obj +% 1960 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 491.971 420.271 504.031] -/A << /S /GoTo /D (spdata) >> +/Rect [301.961 426.323 378.015 438.383] +/A << /S /GoTo /D (vdata) >> >> -% 1593 0 obj +% 1964 0 obj << -/D [1591 0 R /XYZ 149.705 753.953 null] +/D [1962 0 R /XYZ 98.895 753.953 null] >> -% 409 0 obj +% 533 0 obj << -/D [1591 0 R /XYZ 150.705 716.092 null] +/D [1962 0 R /XYZ 99.895 716.092 null] >> -% 1590 0 obj +% 1965 0 obj +<< +/D [1962 0 R /XYZ 99.895 678.98 null] +>> +% 1966 0 obj +<< +/D [1962 0 R /XYZ 99.895 679.195 null] +>> +% 1961 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F61 1360 0 R /F52 585 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1595 0 obj +% 1969 0 obj << /Type /Page -/Contents 1596 0 R -/Resources 1594 0 R +/Contents 1970 0 R +/Resources 1968 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R +/Parent 1952 0 R +/Annots [ 1967 0 R ] >> -% 1597 0 obj +% 1967 0 obj << -/D [1595 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 574.778 420.271 586.838] +/A << /S /GoTo /D (spdata) >> >> -% 1598 0 obj +% 1971 0 obj << -/D [1595 0 R /XYZ 99.895 496.698 null] +/D [1969 0 R /XYZ 149.705 753.953 null] >> -% 1599 0 obj +% 537 0 obj << -/D [1595 0 R /XYZ 99.895 438.313 null] +/D [1969 0 R /XYZ 150.705 716.092 null] >> -% 1600 0 obj +% 1972 0 obj << -/D [1595 0 R /XYZ 99.895 418.388 null] +/D [1969 0 R /XYZ 150.705 679.441 null] >> -% 1594 0 obj +% 1973 0 obj +<< +/D [1969 0 R /XYZ 150.705 679.657 null] +>> +% 1968 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R /F78 686 0 R /F233 1044 0 R >> /ProcSet [ /PDF /Text ] >> -% 1605 0 obj +% 1976 0 obj << /Type /Page -/Contents 1606 0 R -/Resources 1604 0 R +/Contents 1977 0 R +/Resources 1975 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R -/Annots [ 1601 0 R 1602 0 R 1603 0 R ] +/Parent 1952 0 R +/Annots [ 1974 0 R ] >> -% 1601 0 obj +% 1974 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 527.942 420.271 540.002] -/A << /S /GoTo /D (spdata) >> +/Rect [396.215 574.59 472.269 586.65] +/A << /S /GoTo /D (vdata) >> >> -% 1602 0 obj +% 1978 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 460.196 409.811 472.256] -/A << /S /GoTo /D (descdata) >> +/D [1976 0 R /XYZ 98.895 753.953 null] >> -% 1603 0 obj +% 541 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [371.126 404.405 438.184 416.465] -/A << /S /GoTo /D (precdata) >> +/D [1976 0 R /XYZ 99.895 716.092 null] >> -% 1607 0 obj +% 1979 0 obj << -/D [1605 0 R /XYZ 149.705 753.953 null] +/D [1976 0 R /XYZ 99.895 679.413 null] >> -% 413 0 obj +% 1980 0 obj << -/D [1605 0 R /XYZ 150.705 716.092 null] +/D [1976 0 R /XYZ 99.895 679.628 null] >> -% 1604 0 obj +% 1975 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R /F78 686 0 R /F233 1044 0 R >> /ProcSet [ /PDF /Text ] >> -% 1609 0 obj +% 1982 0 obj << /Type /Page -/Contents 1610 0 R -/Resources 1608 0 R +/Contents 1983 0 R +/Resources 1981 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R ->> -% 1611 0 obj -<< -/D [1609 0 R /XYZ 98.895 753.953 null] +/Parent 1952 0 R >> -% 417 0 obj +% 1984 0 obj << -/D [1609 0 R /XYZ 99.895 716.092 null] +/D [1982 0 R /XYZ 149.705 753.953 null] >> -% 1608 0 obj +% 1981 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F233 1044 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1614 0 obj +% 1986 0 obj << /Type /Page -/Contents 1615 0 R -/Resources 1613 0 R +/Contents 1987 0 R +/Resources 1985 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R +/Parent 1989 0 R >> -% 1616 0 obj +% 1988 0 obj << -/D [1614 0 R /XYZ 149.705 753.953 null] +/D [1986 0 R /XYZ 98.895 753.953 null] >> -% 1617 0 obj +% 545 0 obj << -/D [1614 0 R /XYZ 150.705 701.929 null] +/D [1986 0 R /XYZ 99.895 716.092 null] >> -% 1618 0 obj +% 1985 0 obj << -/D [1614 0 R /XYZ 150.705 668.729 null] +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1619 0 obj +% 1994 0 obj << -/D [1614 0 R /XYZ 150.705 624.894 null] +/Type /Page +/Contents 1995 0 R +/Resources 1993 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1989 0 R +/Annots [ 1990 0 R 1991 0 R 1992 0 R ] >> -% 1620 0 obj +% 1990 0 obj << -/D [1614 0 R /XYZ 150.705 555.872 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [371.126 442.264 443.414 454.323] +/A << /S /GoTo /D (precdata) >> >> -% 1621 0 obj +% 1991 0 obj << -/D [1614 0 R /XYZ 150.705 500.082 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [222.066 352.6 228.343 366.303] +/A << /S /GoTo /D (Hfootnote.4) >> >> -% 1622 0 obj +% 1992 0 obj << -/D [1614 0 R /XYZ 150.705 468.201 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [221.319 235.698 233.275 245.108] +/A << /S /GoTo /D (table.21) >> >> -% 1623 0 obj +% 1996 0 obj << -/D [1614 0 R /XYZ 150.705 425.023 null] +/D [1994 0 R /XYZ 149.705 753.953 null] >> -% 1624 0 obj +% 549 0 obj << -/D [1614 0 R /XYZ 150.705 382.522 null] +/D [1994 0 R /XYZ 150.705 716.092 null] >> -% 1625 0 obj +% 1997 0 obj << -/D [1614 0 R /XYZ 150.705 354.627 null] +/D [1994 0 R /XYZ 165.051 129.79 null] >> -% 1613 0 obj +% 1993 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1627 0 obj +% 2005 0 obj << /Type /Page -/Contents 1628 0 R -/Resources 1626 0 R +/Contents 2006 0 R +/Resources 2004 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R ->> -% 1629 0 obj -<< -/D [1627 0 R /XYZ 98.895 753.953 null] ->> -% 421 0 obj -<< -/D [1627 0 R /XYZ 99.895 716.092 null] +/Parent 1989 0 R +/Annots [ 1999 0 R 2000 0 R 2001 0 R 2002 0 R ] >> -% 1626 0 obj +% 1999 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [283.359 567.422 295.314 580.259] +/A << /S /GoTo /D (table.21) >> >> -% 1631 0 obj +% 2000 0 obj << -/Type /Page -/Contents 1632 0 R -/Resources 1630 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [366.137 520.379 378.292 532.438] +/A << /S /GoTo /D (table.21) >> >> -% 1633 0 obj +% 2001 0 obj << -/D [1631 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [379.319 483.736 386.292 496.573] +/A << /S /GoTo /D (section.8) >> >> -% 425 0 obj +% 2002 0 obj << -/D [1631 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 451.412 148.712 460.822] +/A << /S /GoTo /D (table.21) >> >> -% 1634 0 obj +% 2007 0 obj << -/D [1631 0 R /XYZ 150.705 222.691 null] +/D [2005 0 R /XYZ 98.895 753.953 null] >> -% 1635 0 obj +% 553 0 obj << -/D [1631 0 R /XYZ 150.705 200.171 null] +/D [2005 0 R /XYZ 99.895 716.092 null] >> -% 1630 0 obj +% 2004 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F85 814 0 R /F52 585 0 R /F83 813 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1637 0 obj +% 2009 0 obj << /Type /Page -/Contents 1638 0 R -/Resources 1636 0 R +/Contents 2010 0 R +/Resources 2008 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R ->> -% 1639 0 obj -<< -/D [1637 0 R /XYZ 98.895 753.953 null] ->> -% 429 0 obj -<< -/D [1637 0 R /XYZ 99.895 716.092 null] ->> -% 1640 0 obj -<< -/D [1637 0 R /XYZ 99.895 348.22 null] ->> -% 1641 0 obj -<< -/D [1637 0 R /XYZ 99.895 313.8 null] +/Parent 1989 0 R +/Annots [ 2003 0 R ] >> -% 1636 0 obj +% 2003 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F52 585 0 R /F85 814 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [404.253 212.933 412.423 220.405] +/A << /S /GoTo /D (cite.BERTACCINIFILIPPONE) >> >> -% 1643 0 obj -<< -/Type /Page -/Contents 1644 0 R -/Resources 1642 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R +% 2011 0 obj +<< +/D [2009 0 R /XYZ 149.705 753.953 null] >> -% 1645 0 obj +% 1998 0 obj << -/D [1643 0 R /XYZ 149.705 753.953 null] +/D [2009 0 R /XYZ -3900.052 719.08 null] >> -% 433 0 obj +% 2008 0 obj << -/D [1643 0 R /XYZ 150.705 716.092 null] +/Font << /F233 1044 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1646 0 obj +% 2018 0 obj << -/D [1643 0 R /XYZ 150.705 441.869 null] +/Type /Page +/Contents 2019 0 R +/Resources 2017 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1989 0 R +/Annots [ 2013 0 R 2014 0 R 2015 0 R 2016 0 R ] >> -% 1647 0 obj +% 2013 0 obj << -/D [1643 0 R /XYZ 150.705 395.439 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [314.678 576.377 392.197 588.437] +/A << /S /GoTo /D (spdata) >> >> -% 1648 0 obj +% 2014 0 obj << -/D [1643 0 R /XYZ 150.705 363.559 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [395.856 505.741 468.144 517.801] +/A << /S /GoTo /D (precdata) >> >> -% 1642 0 obj +% 2015 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [371.454 435.105 438.512 447.165] +/A << /S /GoTo /D (descdata) >> >> -% 1650 0 obj +% 2016 0 obj << -/Type /Page -/Contents 1651 0 R -/Resources 1649 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [317.418 128.475 389.706 140.535] +/A << /S /GoTo /D (precdata) >> >> -% 1652 0 obj +% 2020 0 obj << -/D [1650 0 R /XYZ 98.895 753.953 null] +/D [2018 0 R /XYZ 98.895 753.953 null] >> -% 437 0 obj +% 557 0 obj << -/D [1650 0 R /XYZ 99.895 716.092 null] +/D [2018 0 R /XYZ 99.895 716.092 null] >> -% 1649 0 obj +% 2017 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1655 0 obj +% 2022 0 obj << /Type /Page -/Contents 1656 0 R -/Resources 1654 0 R +/Contents 2023 0 R +/Resources 2021 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R ->> -% 1657 0 obj -<< -/D [1655 0 R /XYZ 149.705 753.953 null] +/Parent 1989 0 R >> -% 441 0 obj +% 2024 0 obj << -/D [1655 0 R /XYZ 150.705 716.092 null] +/D [2022 0 R /XYZ 149.705 753.953 null] >> -% 1654 0 obj +% 2021 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1659 0 obj +% 2031 0 obj << /Type /Page -/Contents 1660 0 R -/Resources 1658 0 R +/Contents 2032 0 R +/Resources 2030 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R +/Parent 2034 0 R +/Annots [ 2025 0 R 2026 0 R 2027 0 R 2028 0 R 2029 0 R ] >> -% 1661 0 obj +% 2025 0 obj << -/D [1659 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [320.317 573.77 392.605 585.83] +/A << /S /GoTo /D (precdata) >> >> -% 445 0 obj +% 2026 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.493 517.98 418.548 530.039] +/A << /S /GoTo /D (vdata) >> +>> +% 2027 0 obj << -/D [1659 0 R /XYZ 99.895 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [324.013 462.189 391.071 474.248] +/A << /S /GoTo /D (descdata) >> >> -% 1658 0 obj +% 2028 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.493 272.899 418.548 284.958] +/A << /S /GoTo /D (vdata) >> >> -% 1663 0 obj +% 2029 0 obj << -/Type /Page -/Contents 1664 0 R -/Resources 1662 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [431.552 183.235 443.387 196.238] +/A << /S /GoTo /D (section.11) >> >> -% 1665 0 obj +% 2033 0 obj << -/D [1663 0 R /XYZ 149.705 753.953 null] +/D [2031 0 R /XYZ 98.895 753.953 null] >> -% 449 0 obj +% 561 0 obj << -/D [1663 0 R /XYZ 150.705 716.092 null] +/D [2031 0 R /XYZ 99.895 716.092 null] >> -% 1662 0 obj +% 2030 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1667 0 obj +% 2037 0 obj << /Type /Page -/Contents 1668 0 R -/Resources 1666 0 R +/Contents 2038 0 R +/Resources 2036 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R ->> -% 1669 0 obj -<< -/D [1667 0 R /XYZ 98.895 753.953 null] +/Parent 2034 0 R +/Annots [ 2035 0 R ] >> -% 453 0 obj +% 2035 0 obj << -/D [1667 0 R /XYZ 99.895 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [371.126 573.77 443.414 585.83] +/A << /S /GoTo /D (precdata) >> >> -% 1666 0 obj +% 2039 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/D [2037 0 R /XYZ 149.705 753.953 null] >> - -endstream -endobj -1672 0 obj +% 565 0 obj << -/Length 5526 +/D [2037 0 R /XYZ 150.705 716.092 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.9)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(bcast)-250(\227)-250(Broadcast)-250(data)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -51.12 -20.269 Td [(call)-525(psb_bcast\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -24.611 Td [(This)-221(subr)18(outine)-222(implements)-221(a)-221(br)18(oadcast)-222(operation)-221(based)-221(on)-222(the)-221(underlying)]TJ -14.944 -11.955 Td [(communication)-250(library)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.619 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ -0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(On)-250(the)-250(r)18(oot)-250(pr)18(ocess,)-250(the)-250(data)-250(to)-250(be)-250(br)18(oadcast.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-344(a)-344(rank)-344(1)-344(or)-344(2)-344(array)111(,)-367(or)-344(a)-344(character)-344(or)-344(logical)-344(variable,)-367(which)-344(may)-344(be)]TJ 0 -11.955 Td [(a)-377(scalar)-377(or)-377(rank)-377(1)-377(array)111(.)-1067(T)90(ype,)-409(kind,)-409(rank)-377(and)-376(size)-377(must)-377(agr)18(ee)-377(on)-377(all)]TJ 0 -11.955 Td [(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(root)]TJ -0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Root)-250(pr)18(ocess)-250(holding)-250(data)-250(to)-250(be)-250(br)18(oadcast.)]TJ 1.654 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F61 10.3811 Tf 138.85 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.503 0 Td [(\000)]TJ/F54 9.9626 Tf 10.132 0 Td [(1,)-250(default)-250(0)]TJ -0 g 0 G -/F51 9.9626 Tf -246.025 -22.618 Td [(mode)]TJ -0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -34.573 Td [(request)]TJ -0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 164.384 -29.887 Td [(120)]TJ -0 g 0 G -ET - -endstream -endobj -1677 0 obj +% 2036 0 obj << -/Length 4820 +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R /F78 686 0 R /F190 941 0 R >> +/ProcSet [ /PDF /Text ] >> -stream -0 g 0 G -0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 99.895 706.129 Td [(dat)]TJ -0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(On)-250(all)-250(pr)18(ocesses)-250(other)-250(than)-250(r)18(oot,)-250(the)-250(br)18(oadcasted)-250(data.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-315(a)-314(rank)-315(1)-315(or)-314(2)-315(array)111(,)-331(or)-314(a)-315(character)-315(or)-314(logical)-315(scalar)74(.)-819(T)90(ype,)-330(kind,)-331(rank)]TJ 0 -11.955 Td [(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ -0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.956 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ -0 g 0 G - -12.453 -19.925 Td [(2.)]TJ -0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET -q -1 0 0 1 124.802 417.212 cm -0 0 318.804 27.895 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -BT -/F94 8.9664 Tf 137.205 434.448 Td [(call)]TJ -0 g 0 G - [-525(psb_bcast\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ -0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0 g 0 G -/F54 9.9626 Tf -48.393 -32.18 Td [(3.)]TJ -0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET -q -1 0 0 1 124.802 306.627 cm -0 0 318.804 60.772 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -BT -/F94 8.9664 Tf 137.205 356.739 Td [(call)]TJ -0 g 0 G - [-525(psb_bcast\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(psb_collective_start_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(bcast_request\051)]TJ -23.537 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ -0 g 0 G - [-525(psb_bcast\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(bcast_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 103.537 -222.465 Td [(121)]TJ -0 g 0 G -ET - -endstream -endobj -1686 0 obj +% 2041 0 obj << -/Length 5829 ->> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.10)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(sum)-250(\227)-250(Global)-250(sum)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_sum\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -22.401 Td [(This)-353(subr)18(outine)-353(implements)-354(a)-353(sum)-353(r)18(eduction)-353(operation)-353(based)-354(on)-353(the)-353(un-)]TJ -14.944 -11.955 Td [(derlying)-250(communication)-250(library)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -20.408 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ -0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(sum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.956 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -20.408 Td [(root)]TJ -0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-250(to)-250(hold)-250(the)-250(\002nal)-250(sum,)-250(or)]TJ/F83 10.3811 Tf 143.744 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-250(to)-250(make)-250(it)-250(available)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -150.284 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ -0 g 0 G -/F51 9.9626 Tf -254.343 -20.409 Td [(mode)]TJ -0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.956 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.955 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -32.364 Td [(request)]TJ -0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.956 Td [(be)-250(pr)18(esent.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.401 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -20.408 Td [(dat)]TJ -0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(sum)-250(operation.)]TJ 5.54 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.781 0 Td [(.)]TJ -0 g 0 G - 79.264 -29.887 Td [(122)]TJ -0 g 0 G -ET +/Type /Page +/Contents 2042 0 R +/Resources 2040 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 2034 0 R +>> +% 2043 0 obj +<< +/D [2041 0 R /XYZ 98.895 753.953 null] +>> +% 569 0 obj +<< +/D [2041 0 R /XYZ 99.895 716.092 null] +>> +% 2040 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> +/ProcSet [ /PDF /Text ] +>> endstream endobj -1690 0 obj +2048 0 obj << -/Length 4455 +/Length 2715 >> stream 0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 124.802 706.129 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.956 Td [(or)-250(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)]TJ 0 -11.955 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(10.7)-1000(free)-250(\227)-250(Free)-250(a)-250(preconditioner)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +/F145 9.9626 Tf 0 -18.964 Td [(call)-525(prec%free\050info\051)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +/F84 9.9626 Tf 29.439 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - -12.453 -19.926 Td [(2.)]TJ +/F75 9.9626 Tf -29.439 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G + 0 -19.925 Td [(prec)]TJ +0 g 0 G +/F84 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 196.511 0 Td [(psb)]TJ ET q -1 0 0 1 124.802 441.123 cm -0 0 318.804 27.895 re f +1 0 0 1 388.441 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 458.358 Td [(call)]TJ -0 g 0 G - [-525(psb_sum\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ -0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0 g 0 G -/F54 9.9626 Tf -48.393 -32.179 Td [(3.)]TJ -0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F145 9.9626 Tf 391.579 577.576 Td [(Tprec)]TJ ET q -1 0 0 1 124.802 330.537 cm -0 0 318.804 60.772 re f +1 0 0 1 418.358 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 380.649 Td [(call)]TJ +/F145 9.9626 Tf 421.497 577.576 Td [(type)]TJ 0 g 0 G - [-525(psb_sum\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(psb_collective_start_,&)]TJ 23.537 -10.958 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F75 9.9626 Tf -291.713 -19.925 Td [(On)-250(Exit)]TJ 0 g 0 G - [(sum_request\051)]TJ -23.537 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ + 0 -19.926 Td [(prec)]TJ 0 g 0 G - [-525(psb_sum\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F84 9.9626 Tf 24.348 0 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.182 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 196.511 0 Td [(psb)]TJ +ET +q +1 0 0 1 388.441 502.059 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 391.579 501.86 Td [(Tprec)]TJ +ET +q +1 0 0 1 418.358 502.059 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 421.497 501.86 Td [(type)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(sum_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F75 9.9626 Tf -291.713 -19.925 Td [(info)]TJ 0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -30.634 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Err)18(or)-250(code:)-310(if)-250(no)-250(err)18(or)74(,)-250(0)-250(is)-250(r)18(eturned.)]TJ/F75 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ/F84 9.9626 Tf 34.363 0 Td [(Releases)-250(all)-250(internal)-250(storage.)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -246.376 Td [(123)]TJ + 130.02 -333.713 Td [(160)]TJ 0 g 0 G ET endstream endobj -1697 0 obj +2057 0 obj << -/Length 5548 +/Length 6107 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.11)-1000(psb)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(10.8)-1000(allocate)]TJ ET q -1 0 0 1 204.216 706.328 cm +1 0 0 1 175.32 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(max)-250(\227)-250(Global)-250(maximum)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -20.269 Td [(call)-525(psb_max\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -24.611 Td [(This)-354(subr)18(outine)-354(implements)-354(a)-354(maximum)-354(valuer)18(eduction)-354(operation)-354(based)]TJ -14.944 -11.955 Td [(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.619 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ +/F75 11.9552 Tf 178.907 706.129 Td [(wrk)-250(\227)-250(preconditioner)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(maximum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.955 Td [(1)-250(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(root)]TJ +/F145 9.9626 Tf -79.012 -18.964 Td [(call)-525(prec%allocate_wrk\050info[,vmold]\051)]TJ 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-255(to)-255(hold)-255(the)-255(\002nal)-255(maximum,)-257(or)]TJ/F83 10.3811 Tf 170.502 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-255(to)-255(make)-255(it)-255(available)-255(on)-255(all)-255(pr)18(o-)]TJ -177.042 -11.955 Td [(cesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -254.343 -34.574 Td [(mode)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +/F75 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -34.573 Td [(request)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 164.384 -29.887 Td [(124)]TJ + 0 -19.925 Td [(prec)]TJ 0 g 0 G +/F84 9.9626 Tf 24.349 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 196.511 0 Td [(psb)]TJ ET - -endstream -endobj -1701 0 obj -<< -/Length 4718 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G +q +1 0 0 1 337.631 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 9.9626 Tf 99.895 706.129 Td [(dat)]TJ -0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(maximum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.955 Td [(1)-250(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ -0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F145 9.9626 Tf 340.77 577.576 Td [(Tprec)]TJ +ET +q +1 0 0 1 367.549 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 370.687 577.576 Td [(type)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - -12.453 -19.926 Td [(2.)]TJ +/F75 9.9626 Tf -291.714 -19.925 Td [(vmold)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F84 9.9626 Tf 34.013 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -9.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F145 9.9626 Tf 201.393 0 Td [(psb)]TJ ET q -1 0 0 1 124.802 429.167 cm -0 0 318.804 27.895 re f +1 0 0 1 342.513 510.029 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 446.403 Td [(call)]TJ -0 g 0 G - [-525(psb_max\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ +/F145 9.9626 Tf 345.652 509.83 Td [(T)]TJ +ET +q +1 0 0 1 351.51 510.029 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 354.648 509.83 Td [(base)]TJ +ET +q +1 0 0 1 376.197 510.029 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 379.335 509.83 Td [(vect)]TJ +ET +q +1 0 0 1 400.884 510.029 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 404.022 509.83 Td [(type)]TJ/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F75 9.9626 Tf -325.049 -19.925 Td [(On)-250(Exit)]TJ 0 g 0 G 0 g 0 G -/F54 9.9626 Tf -48.393 -32.179 Td [(3.)]TJ + 0 -19.925 Td [(prec)]TJ 0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.956 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F84 9.9626 Tf 24.349 0 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.183 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 196.511 0 Td [(psb)]TJ ET q -1 0 0 1 124.802 318.582 cm -0 0 318.804 60.772 re f +1 0 0 1 337.631 434.313 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 368.694 Td [(call)]TJ -0 g 0 G - [-525(psb_max\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(psb_collective_start_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(max_request\051)]TJ -23.537 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +/F145 9.9626 Tf 340.77 434.114 Td [(Tprec)]TJ +ET +q +1 0 0 1 367.549 434.313 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 370.687 434.114 Td [(type)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.958 Td [(call)]TJ +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - [-525(psb_max\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F75 9.9626 Tf -291.714 -19.925 Td [(info)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F84 9.9626 Tf 23.801 0 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -30.635 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Err)18(or)-250(code:)-310(if)-250(no)-250(err)18(or)74(,)-250(0)-250(is)-250(r)18(eturned.)]TJ/F75 11.9552 Tf 1.02 0 0 1 99.895 356.405 Tm [(Notes)]TJ/F84 9.9626 Tf 1.02 0 0 1 135.411 356.405 Tm [(Pr)18(econditioners)-296(often)-296(need)-296(internal)-295(work)-296(storage)-296(during)-296(their)-295(appli-)]TJ 1.02 0 0 1 99.895 344.45 Tm [(cation)-257(at)-258(each)-257(iteration)-258(of)-257(a)-258(linear)-257(solver)-257(method:)-329(in)-257(many)-258(situations)-257(this)-258(can)]TJ 1.02 0 0 1 99.895 332.495 Tm [(be)-264(accomplished)-263(by)-264(allocating)-263(and)-264(r)18(eleasing)-264(memory)-263(\223on)-264(the)-263(\003y\224.)-360(However)73(,)]TJ 0.992 0 0 1 99.477 320.54 Tm [(when)-252(r)8(unning)-253(on)-252(an)-253(accelerator)-252(thr)18(ough)-252(e.g.)-314(the)-253(CUDA)-252(enabled)-252(data)-253(str)8(utur)18(es)]TJ 1.013 0 0 1 99.895 308.585 Tm [(of)-248(Sec.)]TJ +0 0 1 rg 0 0 1 RG + [-248(12.4)]TJ 0 g 0 G - [(max_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + [-248(and)]TJ +0 0 1 rg 0 0 1 RG + [-495(13)]TJ 0 g 0 G + [(,)-249(memor)1(y)-248(allocation)-248(and)-248(deallocation)-248(usually)-248(have)-248(a)-247(much)]TJ 1.005 0 0 1 99.895 296.63 Tm [(lar)18(ger)-247(over)18(head,)-248(signi\002cantly)-247(af)18(fecting)-247(performance.)-308(T)92(o)-247(alleviate)-247(this)-247(pr)17(oblem)]TJ 1.02 0 0 1 99.477 284.674 Tm [(we)-301(de\002ne)-300(this)-301(method)-300(that)-301(pr)18(eallocates)-301(internal)-300(storage;)-329(it)-300(is)-301(intended)-300(to)-301(be)]TJ 0.98 0 0 1 99.895 272.719 Tm [(invoked)-245(p)1(rior)-245(to)-244(the)-245(iterative)-244(solver)-245(method,)-246(so)-245(that)-244(the)-245(necessary)-244(internal)-245(work)]TJ 1 0 0 1 99.895 260.764 Tm [(storage)-250(is)-250(available)-250(thr)18(oughout)-250(the)-250(iterative)-250(method)-250(application.)]TJ 1.002 0 0 1 114.839 248.809 Tm [(When)-250(using)-251(GPUs)-250(or)-251(other)-250(specialized)-250(devices,)-251(the)]TJ/F145 9.9626 Tf 1 0 0 1 342.254 248.809 Tm [(vmold)]TJ/F84 9.9626 Tf 1.002 0 0 1 370.906 248.809 Tm [(ar)18(gument)-250(is)-251(also)]TJ 1.019 0 0 1 99.895 236.854 Tm [(necessary)-246(to)-246(ensur)17(e)-246(the)-246(internal)-246(work)-246(vectors)-247(ar)18(e)-246(of)-246(the)-246(appr)17(opriate)-246(dynamic)]TJ 1 0 0 1 99.895 224.899 Tm [(type)-250(to)-250(exploit)-250(the)-250(accelerator)-250(har)18(dwar)18(e.)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -234.421 Td [(125)]TJ + 164.384 -134.461 Td [(161)]TJ 0 g 0 G ET endstream endobj -1708 0 obj +2064 0 obj << -/Length 5813 +/Length 3194 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.12)-1000(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(10.9)-1000(deallocate)]TJ ET q -1 0 0 1 204.216 706.328 cm +1 0 0 1 239.412 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(min)-250(\227)-250(Global)-250(minimum)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_min\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -22.401 Td [(This)-328(subr)18(outine)-327(implements)-328(a)-328(minimum)-327(value)-328(r)18(eduction)-328(o)1(peration)-328(based)]TJ -14.944 -11.955 Td [(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ +/F75 11.9552 Tf 242.998 706.129 Td [(wrk)-250(\227)-250(preconditioner)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ +/F145 9.9626 Tf -92.293 -18.964 Td [(call)-525(prec%allocate_wrk\050info\051)]TJ 0 -11.955 Td [(call)-525(prec%free_wrk\050info\051)]TJ 0 g 0 G +/F75 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -20.408 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ -0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.956 Td [(1)-250(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -20.408 Td [(root)]TJ -0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F83 10.3811 Tf 147.052 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ -0 g 0 G -/F51 9.9626 Tf -254.343 -32.364 Td [(mode)]TJ -0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.956 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.955 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -32.364 Td [(request)]TJ +/F75 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.956 Td [(be)-250(pr)18(esent.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.401 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -20.408 Td [(dat)]TJ -0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(minimum)-250(operation.)]TJ 5.54 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.781 0 Td [(.)]TJ -0 g 0 G - 79.264 -29.887 Td [(126)]TJ + 0 -19.925 Td [(prec)]TJ 0 g 0 G +/F84 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.319 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 196.511 0 Td [(psb)]TJ ET - -endstream -endobj -1712 0 obj -<< -/Length 4437 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 388.441 565.82 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F54 9.9626 Tf 124.802 706.129 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.956 Td [(1)-250(or)-250(2)-250(array)111(.)]TJ 0 -11.955 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ -0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ -0 g 0 G - -12.453 -19.926 Td [(2.)]TJ -0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F145 9.9626 Tf 391.579 565.621 Td [(Tprec)]TJ ET q -1 0 0 1 124.802 441.123 cm -0 0 318.804 27.895 re f +1 0 0 1 418.358 565.82 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 458.358 Td [(call)]TJ +/F145 9.9626 Tf 421.497 565.621 Td [(type)]TJ 0 g 0 G - [-525(psb_min\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F75 9.9626 Tf -291.713 -19.925 Td [(On)-250(Exit)]TJ 0 g 0 G 0 g 0 G -/F54 9.9626 Tf -48.393 -32.179 Td [(3.)]TJ + 0 -19.926 Td [(prec)]TJ 0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F84 9.9626 Tf 24.348 0 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.182 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 196.511 0 Td [(psb)]TJ ET q -1 0 0 1 124.802 330.537 cm -0 0 318.804 60.772 re f +1 0 0 1 388.441 490.104 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 380.649 Td [(call)]TJ +/F145 9.9626 Tf 391.579 489.905 Td [(Tprec)]TJ +ET +q +1 0 0 1 418.358 490.104 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 421.497 489.905 Td [(type)]TJ 0 g 0 G - [-525(psb_min\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(psb_collective_start_,&)]TJ 23.537 -10.958 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F75 9.9626 Tf -291.713 -19.925 Td [(info)]TJ 0 g 0 G - [(min_request\051)]TJ -23.537 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +/F84 9.9626 Tf 23.8 0 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -30.634 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Err)18(or)-250(code:)-310(if)-250(no)-250(err)18(or)74(,)-250(0)-250(is)-250(r)18(eturned.)]TJ/F75 11.9552 Tf 0.98 0 0 1 150.705 412.196 Tm [(Notes)]TJ/F84 9.9626 Tf 0.98 0 0 1 184.378 412.196 Tm [(Deallocates)-250(pr)19(econditioner)-250(internal)-250(work)-250(storage;)-251(to)-250(be)-250(invoked)-249(after)-250(an)]TJ 1 0 0 1 150.705 400.241 Tm [(iterative)-250(solver)-250(has)-250(completed)-250(execution,)-250(see)-250(the)-250(discussion)-250(in)-250(Sec.)]TJ +0 0 1 rg 0 0 1 RG + [-250(10.8)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ + [(.)]TJ 0 g 0 G - [-525(psb_min\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + 164.383 -309.803 Td [(162)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +ET + +endstream +endobj +2068 0 obj +<< +/Length 607 +>> +stream 0 g 0 G - [(min_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +BT +/F75 14.3462 Tf 99.895 705.784 Td [(11)-1000(Iterative)-250(Methods)]TJ/F84 9.9626 Tf 1.006 0 0 1 99.895 683.082 Tm [(In)-249(this)-250(chapter)-249(we)-250(pr)18(ovide)-249(r)18(outines)-250(for)-249(pr)18(econditioners)-250(and)-249(iterative)-249(methods.)]TJ 0.98 0 0 1 99.587 671.127 Tm [(The)-194(interfaces)-194(for)-194(it)1(erative)-194(methods)-194(ar)18(e)-194(available)-194(in)-193(the)-194(module)]TJ/F145 9.9626 Tf 1 0 0 1 369.457 671.127 Tm [(psb_linsolve_mod)]TJ/F84 9.9626 Tf 0.98 0 0 1 453.142 671.127 Tm [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -246.376 Td [(127)]TJ + 1 0 0 1 264.279 90.438 Tm [(163)]TJ 0 g 0 G ET endstream endobj -1720 0 obj +2076 0 obj << -/Length 5616 +/Length 8466 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.13)-1000(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(11.1)-1000(psb)]TJ ET q 1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(amx)-250(\227)-250(Global)-250(maximum)-250(absolute)-250(value)]TJ +/F75 11.9552 Tf 207.803 706.129 Td [(krylov)-250(\227)-250(Krylov)-250(Methods)-250(Driver)-250(Routine)]TJ/F84 9.9626 Tf 1.013 0 0 1 150.396 687.165 Tm [(This)-247(subr)18(outine)-247(is)-248(a)-247(driver)-247(that)-247(pr)18(ovides)-247(a)-247(general)-247(interface)-248(for)-247(all)-247(the)-247(Krylov-)]TJ 1 0 0 1 150.705 675.21 Tm [(Subspace)-250(family)-250(methods)-250(implemented)-250(in)-250(PSBLAS)-250(version)-250(2.)]TJ 14.944 -11.955 Td [(The)-250(stopping)-250(criterion)-250(can)-250(take)-250(the)-250(following)-250(values:)]TJ +0 g 0 G +/F75 9.9626 Tf -14.944 -18.774 Td [(1)]TJ +0 g 0 G +/F84 9.9626 Tf 0.99 0 0 1 160.667 644.481 Tm [(normwise)-253(backwar)19(d)-253(err)18(or)-252(in)-253(the)-252(in\002nity)-253(norm;)-252(the)-253(iteration)-252(is)-253(stopped)-252(when)]TJ/F78 9.9626 Tf 1 0 0 1 269.703 617.617 Tm [(e)-15(r)-25(r)]TJ/F192 10.3811 Tf 15.141 0 Td [(=)]TJ/F190 10.3811 Tf 40.619 6.745 Td [(k)]TJ/F78 9.9626 Tf 5.34 0 Td [(r)]TJ/F78 7.5716 Tf 4.042 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(k)]TJ +ET +q +1 0 0 1 297.001 620.108 cm +[]0 d 0 J 0.398 w 0 0 m 74.372 0 l S +Q +BT +/F192 10.3811 Tf 297.125 610.783 Td [(\050)]TJ/F190 10.3811 Tf 4.274 0 Td [(k)]TJ/F78 9.9626 Tf 5.938 0 Td [(A)]TJ/F190 10.3811 Tf 7.442 0 Td [(k)-24(k)]TJ/F78 9.9626 Tf 11.048 0 Td [(x)]TJ/F78 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(k)]TJ/F192 10.3811 Tf 7.377 0 Td [(+)]TJ/F190 10.3811 Tf 10.255 0 Td [(k)]TJ/F78 9.9626 Tf 5.44 0 Td [(b)]TJ/F190 10.3811 Tf 4.862 0 Td [(k)]TJ/F192 10.3811 Tf 5.439 0 Td [(\051)]TJ/F148 10.3811 Tf 8.237 6.834 Td [(<)]TJ/F78 9.9626 Tf 11.086 0 Td [(e)-80(p)-25(s)]TJ +0 g 0 G +/F75 9.9626 Tf -235.841 -29.908 Td [(2)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(Relative)-250(r)18(esidual)-250(in)-250(the)-250(2-norm;)-250(the)-250(iteration)-250(is)-250(stopped)-250(when)]TJ/F78 9.9626 Tf 136.209 -26.865 Td [(e)-15(r)-25(r)]TJ/F192 10.3811 Tf 15.14 0 Td [(=)]TJ/F190 10.3811 Tf 13.446 6.745 Td [(k)]TJ/F78 9.9626 Tf 5.34 0 Td [(r)]TJ/F78 7.5716 Tf 4.042 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(k)]TJ +ET +q +1 0 0 1 324.174 563.335 cm +[]0 d 0 J 0.398 w 0 0 m 20.025 0 l S +Q +BT +/F190 10.3811 Tf 324.299 554.01 Td [(k)]TJ/F78 9.9626 Tf 5.439 0 Td [(b)]TJ/F190 10.3811 Tf 4.862 0 Td [(k)]TJ/F84 7.5716 Tf 5.315 -1.744 Td [(2)]TJ/F148 10.3811 Tf 8.371 8.578 Td [(<)]TJ/F78 9.9626 Tf 11.086 0 Td [(e)-80(p)-25(s)]TJ +0 g 0 G +/F75 9.9626 Tf -208.667 -29.848 Td [(3)]TJ +0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(Relative)-250(r)18(esidual)-250(r)18(eduction)-250(in)-250(the)-250(2-norm;)-250(the)-250(iteration)-250(is)-250(stopped)-250(when)]TJ/F78 9.9626 Tf 134.486 -26.865 Td [(e)-15(r)-25(r)]TJ/F192 10.3811 Tf 15.141 0 Td [(=)]TJ/F190 10.3811 Tf 15.169 6.745 Td [(k)]TJ/F78 9.9626 Tf 5.34 0 Td [(r)]TJ/F78 7.5716 Tf 4.042 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(k)]TJ +ET +q +1 0 0 1 322.451 506.622 cm +[]0 d 0 J 0.398 w 0 0 m 23.472 0 l S +Q +BT +/F190 10.3811 Tf 322.575 497.297 Td [(k)]TJ/F78 9.9626 Tf 5.34 0 Td [(r)]TJ/F84 7.5716 Tf 4 -1.744 Td [(0)]TJ/F190 10.3811 Tf 4.409 1.744 Td [(k)]TJ/F84 7.5716 Tf 5.315 -1.744 Td [(2)]TJ/F148 10.3811 Tf 8.371 8.578 Td [(<)]TJ/F78 9.9626 Tf 11.086 0 Td [(e)-80(p)-25(s)]TJ/F84 9.9626 Tf 1.02 0 0 1 150.396 474.283 Tm [(The)-333(behaviour)-333(is)-333(contr)18(olled)-333(by)-333(the)-333(istop)-333(ar)18(gument)-333(\050see)-333(later\051.)-567(In)-333(the)-333(above)]TJ 0.98 0 0 1 150.705 462.327 Tm [(formulae,)]TJ/F78 9.9626 Tf 1 0 0 1 194.836 462.327 Tm [(x)]TJ/F78 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F84 9.9626 Tf 0.98 0 0 1 204.675 462.327 Tm [(is)-199(the)-199(tentative)-198(solution)-199(and)]TJ/F78 9.9626 Tf 1 0 0 1 324.514 462.327 Tm [(r)]TJ/F78 7.5716 Tf 4.042 -1.96 Td [(i)]TJ/F192 10.3811 Tf 5.642 1.96 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(b)]TJ/F190 10.3811 Tf 6.29 0 Td [(\000)]TJ/F78 9.9626 Tf 10.245 0 Td [(A)-42(x)]TJ/F78 7.5716 Tf 12.759 -1.96 Td [(i)]TJ/F84 9.9626 Tf 0.98 0 0 1 379.269 462.327 Tm [(the)-199(corr)19(esponding)-199(r)18(esidual)]TJ 1 0 0 1 150.705 450.372 Tm [(at)-250(the)]TJ/F78 9.9626 Tf 27.083 0 Td [(i)]TJ/F84 9.9626 Tf 2.964 0 Td [(-th)-250(iteration.)]TJ -28.306 -17.357 Td [(c)-175(a)-175(l)-174(l)-880(p)-105(s)-105(b)]TJ +ET +q +1 0 0 1 201.089 433.215 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 205.122 433.015 Td [(k)-105(r)-105(y)-105(l)-104(o)-105(v)-238(\050)-156(m)-21(e)-22(t)-22(h)-21(o)-22(d)-218(,)-208(a)-242(,)-255(p)-80(r)-81(e)-80(c)-335(,)-191(b)-206(,)-203(x)-231(,)-234(e)-60(p)-59(s)-293(,)-273(d)-98(e)-97(s)-98(c)]TJ +ET +q +1 0 0 1 402.829 433.215 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 406.792 433.015 Td [(a)-371(,)-283(i)-108(n)-108(f)-108(o)-273(,)-57(&)]TJ -227.086 -11.955 Td [(&)-580(i)-69(t)-69(m)-70(a)-69(x)-313(,)-327(i)-151(t)-152(e)-151(r)-478(,)-281(e)-107(r)-106(r)-387(,)-321(i)-145(t)-146(r)-146(a)-145(c)-146(e)-466(,)-336(i)-161(r)-160(s)-161(t)-496(,)-291(i)-116(s)-116(t)-116(o)-116(p)-407(,)-219(c)-43(o)-44(n)-43(d)-177(\051)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +/F75 9.9626 Tf -29.001 -25.88 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F75 9.9626 Tf -29.828 -19.349 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.349 Td [(method)]TJ +0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 190.555 356.482 Tm [(a)-253(string)-254(that)-253(de\002nes)-253(the)-253(iterative)-254(method)-253(to)-253(be)-254(used.)-315(Supported)-254(values)]TJ 1 0 0 1 175.611 344.526 Tm [(ar)18(e:)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -19.349 Td [(CG:)]TJ +0 g 0 G +/F84 9.9626 Tf 22.964 0 Td [(the)-250(Conjugate)-250(Gradient)-250(method;)]TJ +0 g 0 G +/F75 9.9626 Tf -22.964 -15.364 Td [(CGS:)]TJ 0 g 0 G +/F84 9.9626 Tf 29.051 0 Td [(the)-250(Conjugate)-250(Gradient)-250(Stabilized)-250(method;)]TJ 0 g 0 G -/F59 9.9626 Tf -57.098 -20.269 Td [(call)-525(psb_amx\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -24.611 Td [(This)-342(subr)18(outine)-342(implements)-342(a)-342(maximum)-341(absolute)-342(value)-342(r)18(eduction)-342(opera-)]TJ -14.944 -11.955 Td [(tion)-250(based)-250(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ +/F75 9.9626 Tf -29.051 -15.365 Td [(GCR:)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ +/F84 9.9626 Tf 30.157 0 Td [(the)-250(Generalized)-250(Conjugate)-250(Residual)-250(method;)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F75 9.9626 Tf -30.157 -15.364 Td [(FCG:)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ +/F84 9.9626 Tf 28.503 0 Td [(the)-250(Flexible)-250(Conjugate)-250(Gradient)-250(method)]TJ +0 0 1 rg 0 0 1 RG +/F84 7.5716 Tf 176.855 3.616 Td [(5)]TJ +0 g 0 G +/F84 9.9626 Tf 4.284 -3.616 Td [(;)]TJ 0 g 0 G +/F75 9.9626 Tf -209.642 -15.364 Td [(BICG:)]TJ 0 g 0 G - 0 -22.619 Td [(ctxt)]TJ +/F84 9.9626 Tf 33.485 0 Td [(the)-250(Bi-Conjugate)-250(Gradient)-250(method;)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F75 9.9626 Tf -33.485 -15.365 Td [(BICGST)92(AB:)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ +/F84 9.9626 Tf 59.696 0 Td [(the)-250(Bi-Conjugate)-250(Gradient)-250(Stabilized)-250(method;)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(maximum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ +/F75 9.9626 Tf -59.696 -15.364 Td [(BICGST)92(ABL:)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(root)]TJ +/F84 9.9626 Tf 0.999 0 0 1 241.394 232.991 Tm [(the)-250(Bi-Conjugate)-249(Gradient)-250(Stabilized)-250(metho)1(d)-250(with)-250(r)18(estart-)]TJ 1 0 0 1 197.529 221.036 Tm [(ing;)]TJ 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F83 10.3811 Tf 147.052 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +/F75 9.9626 Tf -21.918 -15.365 Td [(RGMRES:)]TJ 0 g 0 G -/F51 9.9626 Tf -254.343 -34.574 Td [(mode)]TJ +/F84 9.9626 Tf 52.294 0 Td [(the)-250(Generalized)-250(Minimal)-250(Residual)-250(method)-250(with)-250(r)18(estarting.)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +/F75 9.9626 Tf -77.2 -19.349 Td [(a)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -34.573 Td [(request)]TJ +/F84 9.9626 Tf 9.962 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(sparse)-250(matrix)]TJ/F78 9.9626 Tf 178.414 0 Td [(A)]TJ/F84 9.9626 Tf 7.318 0 Td [(.)]TJ -170.788 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 138.701 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 363.206 138.501 Td [(Tspmat)]TJ +ET +q +1 0 0 1 395.216 138.701 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 398.354 138.501 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ +ET +q +1 0 0 1 150.705 130.091 cm +[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S +Q +BT +/F84 5.9776 Tf 161.564 123.219 Td [(5)]TJ/F84 7.9701 Tf 3.487 -2.893 Td [(Note:)-310(the)-250(implementation)-250(is)-250(for)]TJ/F78 7.9701 Tf 113.298 0 Td [(F)-31(C)-45(G)]TJ/F192 8.3049 Tf 16.386 0 Td [(\050)]TJ/F84 7.9701 Tf 3.32 0 Td [(1)]TJ/F192 8.3049 Tf 4.085 0 Td [(\051)]TJ/F84 7.9701 Tf 3.319 0 Td [(.)]TJ 0 g 0 G 0 g 0 G -/F54 9.9626 Tf 164.384 -29.887 Td [(128)]TJ +/F84 9.9626 Tf 9.629 -29.888 Td [(164)]TJ 0 g 0 G ET endstream endobj -1724 0 obj +2087 0 obj << -/Length 4760 +/Length 7173 >> stream 0 g 0 G 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(dat)]TJ +/F75 9.9626 Tf 99.895 706.129 Td [(prec)]TJ 0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(maximum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ +/F84 9.9626 Tf 24.04 0 Td [(The)-250(data)-250(str)8(uctur)18(e)-250(containing)-250(the)-250(pr)18(econditioner)74(.)]TJ 0.867 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 312.397 658.308 Td [(Tprec)]TJ +ET +q +1 0 0 1 339.176 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 342.314 658.308 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +/F75 9.9626 Tf -263.34 -22.202 Td [(b)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F84 9.9626 Tf 10.76 0 Td [(The)-250(RHS)-250(vector)74(.)]TJ 14.147 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 218.688 0 Td [(psb)]TJ +ET +q +1 0 0 1 359.808 588.484 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 362.947 588.285 Td [(T)]TJ +ET +q +1 0 0 1 368.804 588.484 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 371.943 588.285 Td [(vect)]TJ +ET +q +1 0 0 1 393.492 588.484 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 396.63 588.285 Td [(type)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.956 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - -12.453 -19.925 Td [(2.)]TJ +/F75 9.9626 Tf -317.656 -22.202 Td [(x)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F84 9.9626 Tf 9.654 0 Td [(The)-250(initial)-250(guess.)]TJ 15.253 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(inout)]TJ/F84 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 218.688 0 Td [(psb)]TJ ET q -1 0 0 1 124.802 417.212 cm -0 0 318.804 27.895 re f +1 0 0 1 359.808 518.461 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 434.448 Td [(call)]TJ -0 g 0 G - [-525(psb_amx\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ +/F145 9.9626 Tf 362.947 518.262 Td [(T)]TJ +ET +q +1 0 0 1 368.804 518.461 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 371.943 518.262 Td [(vect)]TJ +ET +q +1 0 0 1 393.492 518.461 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 396.63 518.262 Td [(type)]TJ 0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G +/F75 9.9626 Tf -317.656 -22.203 Td [(eps)]TJ 0 g 0 G -/F54 9.9626 Tf -48.393 -32.18 Td [(3.)]TJ +/F84 9.9626 Tf 20.165 0 Td [(The)-250(stopping)-250(tolerance.)]TJ 4.742 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F75 9.9626 Tf -24.907 -22.203 Td [(desc)]TJ ET q -1 0 0 1 124.802 306.627 cm -0 0 318.804 60.772 re f +1 0 0 1 120.408 426.236 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 127.791 356.739 Td [(call)]TJ +/F75 9.9626 Tf 123.397 426.036 Td [(a)]TJ 0 g 0 G - [-525(psb_amx\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F84 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 378.415 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 312.397 378.216 Td [(desc)]TJ +ET +q +1 0 0 1 333.945 378.415 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 337.084 378.216 Td [(type)]TJ 0 g 0 G - [(psb_collective_start_,&)]TJ 23.536 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(amx_request\051)]TJ -14.122 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +/F75 9.9626 Tf -258.11 -22.203 Td [(itmax)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +/F84 9.9626 Tf 30.127 0 Td [(The)-250(maximum)-250(number)-250(of)-250(iterations)-250(to)-250(perform.)]TJ -5.22 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F78 9.9626 Tf 38.57 0 Td [(i)-32(t)-25(m)-40(a)-42(x)]TJ/F192 10.3811 Tf 27.743 0 Td [(=)]TJ/F84 9.9626 Tf 10.962 0 Td [(1000.)]TJ -77.275 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable)]TJ/F78 9.9626 Tf 142.349 0 Td [(i)-32(t)-25(m)-40(a)-42(x)]TJ/F190 10.3811 Tf 27.744 0 Td [(\025)]TJ/F84 9.9626 Tf 10.961 0 Td [(1.)]TJ 0 g 0 G - [-525(psb_amx\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F75 9.9626 Tf -205.961 -22.202 Td [(itrace)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F84 9.9626 Tf 0.98 0 0 1 129.773 274.035 Tm [(If)]TJ/F148 10.3811 Tf 1 0 0 1 138.672 274.035 Tm [(>)]TJ/F84 9.9626 Tf 0.98 0 0 1 149.634 274.035 Tm [(0)-229(print)-228(out)-229(an)-229(informational)-228(message)-229(about)-229(conver)19(gence)-229(every)]TJ/F78 9.9626 Tf 1 0 0 1 419.702 274.035 Tm [(i)-32(t)-15(r)-50(a)-25(c)-25(e)]TJ/F84 9.9626 Tf -294.9 -11.955 Td [(iterations.)-310(If)]TJ/F192 10.3811 Tf 56.313 0 Td [(=)]TJ/F84 9.9626 Tf 10.962 0 Td [(0)-250(print)-250(a)-250(message)-250(in)-250(case)-250(of)-250(conver)18(gence)-250(failur)18(e.)]TJ -67.275 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.956 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F78 9.9626 Tf 38.57 0 Td [(i)-32(t)-15(r)-50(a)-25(c)-25(e)]TJ/F192 10.3811 Tf 26.797 0 Td [(=)]TJ/F190 10.3811 Tf 11.086 0 Td [(\000)]TJ/F84 9.9626 Tf 8.194 0 Td [(1.)]TJ 0 g 0 G - [(amx_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F75 9.9626 Tf -109.554 -34.158 Td [(irst)]TJ 0 g 0 G +/F84 9.9626 Tf 19.527 0 Td [(An)-250(integer)-250(specifying)-250(the)-250(r)18(estart)-250(parameter)74(.)]TJ 5.38 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.02 0 0 1 124.443 132.281 Tm [(V)90(alues:)]TJ/F78 9.9626 Tf 1 0 0 1 162.528 132.281 Tm [(i)-22(r)-35(s)-25(t)]TJ/F148 10.3811 Tf 20.115 0 Td [(>)]TJ/F84 9.9626 Tf 1.02 0 0 1 196.049 132.281 Tm [(0.)-694(Th)1(is)-375(is)-375(employed)-375(for)-375(the)-375(BiCGST)72(ABL)-375(or)-374(RGMRES)]TJ 1 0 0 1 124.802 120.326 Tm [(methods,)-250(otherwise)-250(it)-250(is)-250(ignor)18(ed.)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -222.465 Td [(129)]TJ + 139.477 -29.888 Td [(165)]TJ 0 g 0 G ET endstream endobj -1731 0 obj +2092 0 obj << -/Length 5619 +/Length 4580 >> stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.14)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(amn)-250(\227)-250(Global)-250(minimum)-250(absolute)-250(value)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -20.269 Td [(call)-525(psb_amn\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -24.611 Td [(This)-360(subr)18(outine)-360(impl)1(ements)-360(a)-360(minimum)-360(absolute)-360(value)-359(r)18(eduction)-360(opera-)]TJ -14.944 -11.955 Td [(tion)-250(based)-250(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +BT +/F75 9.9626 Tf 150.705 706.129 Td [(istop)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ +/F84 9.9626 Tf 27.277 0 Td [(An)-250(integer)-250(specifying)-250(the)-250(stopping)-250(criterion.)]TJ -2.371 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.966 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ 1.012 0 0 1 175.253 658.308 Tm [(V)91(alues:)-307(1:)-307(use)-247(the)-248(normwise)-247(backwar)18(d)-247(err)17(or)73(,)-247(2:)-307(use)-247(the)-248(scaled)-247(2-norm)-247(of)]TJ 1 0 0 1 175.611 646.353 Tm [(the)-250(r)18(esidual,)-250(3:)-310(use)-250(the)-250(r)18(esidual)-250(r)18(eduction)-250(in)-250(the)-250(2-norm.)-310(Default:)-310(2.)]TJ 0 g 0 G +/F75 9.9626 Tf -24.906 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G - 0 -22.619 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ + 0 -19.926 Td [(x)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ +/F84 9.9626 Tf 9.654 0 Td [(The)-250(computed)-250(solution.)]TJ 15.252 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 218.688 0 Td [(psb)]TJ +ET +q +1 0 0 1 410.618 558.881 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 413.756 558.682 Td [(T)]TJ +ET +q +1 0 0 1 419.614 558.881 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 422.752 558.682 Td [(vect)]TJ +ET +q +1 0 0 1 444.301 558.881 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 447.439 558.682 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(root)]TJ +/F75 9.9626 Tf -317.656 -19.925 Td [(iter)]TJ 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F83 10.3811 Tf 147.052 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +/F84 9.9626 Tf 20.164 0 Td [(The)-250(number)-250(of)-250(iterations)-250(performed.)]TJ 4.742 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Returned)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F51 9.9626 Tf -254.343 -34.574 Td [(mode)]TJ +/F75 9.9626 Tf -24.906 -19.925 Td [(err)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +/F84 9.9626 Tf 17.404 0 Td [(The)-250(conver)18(gence)-250(estimate)-250(on)-250(exit.)]TJ 7.502 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-250(as:)-310(a)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -34.573 Td [(request)]TJ +/F75 9.9626 Tf -24.907 -19.925 Td [(cond)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ +/F84 9.9626 Tf 0.985 0 0 1 177.434 403.265 Tm [(An)-253(estimate)-254(of)-253(the)-253(condition)-253(number)-254(of)-253(matrix)]TJ/F78 9.9626 Tf 1 0 0 1 382.801 403.265 Tm [(A)]TJ/F84 9.9626 Tf 0.985 0 0 1 390.119 403.265 Tm [(;)-253(only)-254(available)-253(with)-253(the)]TJ/F78 9.9626 Tf 1 0 0 1 175.696 391.309 Tm [(C)-45(G)]TJ/F84 9.9626 Tf 17.001 0 Td [(method)-250(on)-250(r)18(eal)-250(data.)]TJ -17.086 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Returned)-249(as:)-310(a)-249(r)18(eal)-249(number)74(.)-310(A)-249(corr)18(ect)-250(r)18(esult)-249(will)-249(be)-249(gr)18(eater)-250(than)-249(or)-249(equal)]TJ 0.98 0 0 1 175.611 331.534 Tm [(to)-255(one;)-255(if)-255(speci\002ed)-254(for)-255(non-r)18(eal)-255(data,)-254(or)-255(an)-255(err)18(or)-254(occurr)18(ed,)-255(zer)18(o)-254(is)-255(r)18(eturned.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ +/F75 9.9626 Tf 1 0 0 1 150.705 311.608 Tm [(info)]TJ 0 g 0 G +/F84 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F54 9.9626 Tf 164.384 -29.887 Td [(130)]TJ + 139.865 -173.35 Td [(166)]TJ 0 g 0 G ET endstream endobj -1735 0 obj +2100 0 obj << -/Length 4753 +/Length 8759 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(dat)]TJ -0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(minimum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-250(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)]TJ 0 -11.955 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(11.2)-1000(psb)]TJ +ET +q +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 156.993 706.129 Td [(richardson)-250(\227)-250(Richardson)-250(Iteration)-250(Driver)-250(Routine)]TJ/F84 9.9626 Tf -57.406 -19.209 Td [(This)-250(subr)18(outine)-250(is)-250(a)-250(driver)-250(implementig)-250(a)-250(Richar)18(dson)-250(iteration)]TJ/F78 9.9626 Tf 111.157 -22.171 Td [(x)]TJ/F78 7.5716 Tf 5.2 -2.085 Td [(k)]TJ/F192 7.8896 Tf 3.589 0 Td [(+)]TJ/F84 7.5716 Tf 6.228 0 Td [(1)]TJ/F192 10.3811 Tf 7.176 2.085 Td [(=)]TJ/F78 9.9626 Tf 11.534 0 Td [(M)]TJ/F190 7.8896 Tf 9.674 4.115 Td [(\000)]TJ/F84 9.9626 Tf 6.726 -4.115 Td [(1)]TJ/F192 10.3811 Tf 5.105 0 Td [(\050)]TJ/F78 9.9626 Tf 4.274 0 Td [(b)]TJ/F190 10.3811 Tf 6.799 0 Td [(\000)]TJ/F78 9.9626 Tf 10.754 0 Td [(A)-42(x)]TJ/F78 7.5716 Tf 12.812 -2.085 Td [(k)]TJ/F192 10.3811 Tf 4.117 2.085 Td [(\051)-209(+)]TJ/F78 9.9626 Tf 16.636 0 Td [(x)]TJ/F78 7.5716 Tf 5.201 -2.085 Td [(k)]TJ/F84 9.9626 Tf 3.992 2.085 Td [(,)]TJ -231.084 -22.17 Td [(with)-250(the)-250(pr)18(econditioner)-250(operator)]TJ/F78 9.9626 Tf 147.599 0 Td [(M)]TJ/F84 9.9626 Tf 12.07 0 Td [(de\002ned)-250(in)-250(the)-250(pr)18(evious)-250(section.)]TJ -144.307 -12.082 Td [(The)-250(stopping)-250(criterion)-250(can)-250(take)-250(the)-250(following)-250(values:)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +/F75 9.9626 Tf -14.944 -20.304 Td [(1)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +/F84 9.9626 Tf 0.99 0 0 1 109.858 610.193 Tm [(normwise)-252(backwar)18(d)-253(err)18(or)-252(in)-253(the)-252(in\002nity)-253(norm;)-252(the)-253(iteration)-252(is)-253(stopped)-252(when)]TJ/F78 9.9626 Tf 1 0 0 1 218.894 581.636 Tm [(e)-15(r)-25(r)]TJ/F192 10.3811 Tf 15.14 0 Td [(=)]TJ/F190 10.3811 Tf 40.62 6.745 Td [(k)]TJ/F78 9.9626 Tf 5.34 0 Td [(r)]TJ/F78 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(k)]TJ +ET +q +1 0 0 1 246.191 584.127 cm +[]0 d 0 J 0.398 w 0 0 m 74.372 0 l S +Q +BT +/F192 10.3811 Tf 246.316 574.802 Td [(\050)]TJ/F190 10.3811 Tf 4.274 0 Td [(k)]TJ/F78 9.9626 Tf 5.938 0 Td [(A)]TJ/F190 10.3811 Tf 7.442 0 Td [(k)-24(k)]TJ/F78 9.9626 Tf 11.048 0 Td [(x)]TJ/F78 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.876 1.96 Td [(k)]TJ/F192 10.3811 Tf 7.376 0 Td [(+)]TJ/F190 10.3811 Tf 10.256 0 Td [(k)]TJ/F78 9.9626 Tf 5.44 0 Td [(b)]TJ/F190 10.3811 Tf 4.861 0 Td [(k)]TJ/F192 10.3811 Tf 5.44 0 Td [(\051)]TJ/F148 10.3811 Tf 8.236 6.834 Td [(<)]TJ/F78 9.9626 Tf 11.087 0 Td [(e)-80(p)-25(s)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F75 9.9626 Tf -235.842 -32.142 Td [(2)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.956 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +/F84 9.9626 Tf 9.963 0 Td [(Relative)-250(r)18(esidual)-250(in)-250(the)-250(2-norm;)-250(the)-250(iteration)-250(is)-250(stopped)-250(when)]TJ/F78 9.9626 Tf 136.209 -28.557 Td [(e)-15(r)-25(r)]TJ/F192 10.3811 Tf 15.141 0 Td [(=)]TJ/F190 10.3811 Tf 13.446 6.745 Td [(k)]TJ/F78 9.9626 Tf 5.34 0 Td [(r)]TJ/F78 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(k)]TJ +ET +q +1 0 0 1 273.365 523.428 cm +[]0 d 0 J 0.398 w 0 0 m 20.025 0 l S +Q +BT +/F190 10.3811 Tf 273.49 514.103 Td [(k)]TJ/F78 9.9626 Tf 5.439 0 Td [(b)]TJ/F190 10.3811 Tf 4.862 0 Td [(k)]TJ/F84 7.5716 Tf 5.315 -1.744 Td [(2)]TJ/F148 10.3811 Tf 8.371 8.578 Td [(<)]TJ/F78 9.9626 Tf 11.086 0 Td [(e)-80(p)-25(s)]TJ 0 g 0 G - -12.453 -19.925 Td [(2.)]TJ +/F75 9.9626 Tf -208.668 -32.082 Td [(3)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F84 9.9626 Tf 9.963 0 Td [(Relative)-250(r)18(esidual)-250(r)18(eduction)-250(in)-250(the)-250(2-norm;)-250(the)-250(iteration)-250(is)-250(stopped)-250(when)]TJ/F78 9.9626 Tf 134.486 -28.556 Td [(e)-15(r)-25(r)]TJ/F192 10.3811 Tf 15.14 0 Td [(=)]TJ/F190 10.3811 Tf 15.17 6.744 Td [(k)]TJ/F78 9.9626 Tf 5.34 0 Td [(r)]TJ/F78 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F190 10.3811 Tf 2.875 1.96 Td [(k)]TJ ET q -1 0 0 1 124.802 417.212 cm -0 0 318.804 27.895 re f +1 0 0 1 271.641 462.789 cm +[]0 d 0 J 0.398 w 0 0 m 23.472 0 l S +Q +BT +/F190 10.3811 Tf 271.766 453.464 Td [(k)]TJ/F78 9.9626 Tf 5.34 0 Td [(r)]TJ/F84 7.5716 Tf 4 -1.744 Td [(0)]TJ/F190 10.3811 Tf 4.408 1.744 Td [(k)]TJ/F84 7.5716 Tf 5.315 -1.744 Td [(2)]TJ/F148 10.3811 Tf 8.371 8.579 Td [(<)]TJ/F78 9.9626 Tf 11.086 0 Td [(e)-80(p)-25(s)]TJ/F84 9.9626 Tf 1.02 0 0 1 99.587 428.217 Tm [(The)-333(behaviour)-333(is)-333(contr)18(olled)-333(by)-333(the)-333(istop)-333(ar)18(gument)-333(\050see)-333(later\051.)-567(In)-333(the)-333(above)]TJ 0.98 0 0 1 99.895 416.261 Tm [(formulae,)]TJ/F78 9.9626 Tf 1 0 0 1 144.027 416.261 Tm [(x)]TJ/F78 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F84 9.9626 Tf 0.98 0 0 1 153.866 416.261 Tm [(is)-199(the)-199(tentat)1(ive)-199(solution)-199(and)]TJ/F78 9.9626 Tf 1 0 0 1 273.705 416.261 Tm [(r)]TJ/F78 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F192 10.3811 Tf 5.643 1.96 Td [(=)]TJ/F78 9.9626 Tf 11.086 0 Td [(b)]TJ/F190 10.3811 Tf 6.29 0 Td [(\000)]TJ/F78 9.9626 Tf 10.245 0 Td [(A)-42(x)]TJ/F78 7.5716 Tf 12.758 -1.96 Td [(i)]TJ/F84 9.9626 Tf 0.98 0 0 1 328.46 416.261 Tm [(the)-199(corr)19(esponding)-199(r)18(esidual)]TJ 1 0 0 1 99.895 404.306 Tm [(at)-250(the)]TJ/F78 9.9626 Tf 27.083 0 Td [(i)]TJ/F84 9.9626 Tf 2.964 0 Td [(-th)-250(iteration.)]TJ -28.305 -18.185 Td [(c)-175(a)-175(l)-174(l)-888(p)-113(s)-113(b)]TJ +ET +q +1 0 0 1 150.598 386.32 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F84 9.9626 Tf 154.71 386.121 Td [(r)-113(i)-113(c)-112(h)-113(a)-113(r)-113(d)-113(s)-113(o)-112(n)-247(\050)-166(a)-242(,)-255(p)-80(r)-81(e)-80(c)-335(,)-191(b)-206(,)-203(x)-231(,)-234(e)-60(p)-59(s)-293(,)-273(d)-98(e)-98(s)-97(c)]TJ +ET +q +1 0 0 1 334.087 386.32 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 434.448 Td [(call)]TJ +/F84 9.9626 Tf 338.05 386.121 Td [(a)-370(,)-283(i)-108(n)-109(f)-108(o)-273(,)-57(&)]TJ -209.153 -11.956 Td [(&)-580(i)-69(t)-69(m)-70(a)-69(x)-313(,)-327(i)-151(t)-152(e)-151(r)-478(,)-281(e)-107(r)-106(r)-387(,)-321(i)-145(t)-146(r)-146(a)-145(c)-146(e)-466(,)-311(i)-135(s)-135(t)-136(o)-135(p)-269(\051)]TJ 0 g 0 G - [-525(psb_amn\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ 0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F75 9.9626 Tf -29.002 -28.653 Td [(T)90(ype:)]TJ 0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf -48.393 -32.18 Td [(3.)]TJ +/F75 9.9626 Tf -29.828 -20.431 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G + 0 -20.431 Td [(a)]TJ +0 g 0 G +/F84 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(sparse)-250(matrix)]TJ/F78 9.9626 Tf 178.414 0 Td [(A)]TJ/F84 9.9626 Tf 7.317 0 Td [(.)]TJ -170.787 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 124.802 306.627 cm -0 0 318.804 60.772 re f +1 0 0 1 309.258 257.028 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 356.739 Td [(call)]TJ -0 g 0 G - [-525(psb_amn\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F145 9.9626 Tf 312.397 256.829 Td [(Tspmat)]TJ +ET +q +1 0 0 1 344.406 257.028 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 347.544 256.829 Td [(type)]TJ 0 g 0 G - [(psb_collective_start_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - [(amn_request\051)]TJ -23.537 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +/F75 9.9626 Tf -268.571 -20.431 Td [(prec)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +/F84 9.9626 Tf 24.04 0 Td [(The)-250(data)-250(str)8(uctur)18(e)-250(containing)-250(the)-250(pr)18(econditioner)74(.)]TJ 0.867 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 188.777 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 312.397 188.577 Td [(Tprec)]TJ +ET +q +1 0 0 1 339.176 188.777 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 342.314 188.577 Td [(type)]TJ 0 g 0 G - [-525(psb_amn\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F75 9.9626 Tf -263.34 -20.431 Td [(b)]TJ 0 g 0 G - [(amn_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F84 9.9626 Tf 10.76 0 Td [(The)-250(RHS)-250(vector)74(.)]TJ 14.147 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 218.688 0 Td [(psb)]TJ +ET +q +1 0 0 1 359.808 120.525 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 362.947 120.326 Td [(T)]TJ +ET +q +1 0 0 1 368.804 120.525 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 371.943 120.326 Td [(vect)]TJ +ET +q +1 0 0 1 393.492 120.525 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 396.63 120.326 Td [(type)]TJ 0 g 0 G +/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -222.465 Td [(131)]TJ + -153.272 -29.888 Td [(167)]TJ 0 g 0 G ET endstream endobj -1742 0 obj +2109 0 obj << -/Length 5776 +/Length 6806 >> stream 0 g 0 G 0 g 0 G +0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.15)-1000(psb)]TJ +/F75 9.9626 Tf 150.705 706.129 Td [(x)]TJ +0 g 0 G +/F84 9.9626 Tf 9.654 0 Td [(The)-250(initial)-250(guess.)]TJ 15.252 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 218.688 0 Td [(psb)]TJ ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 410.618 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(nrm2)-250(\227)-250(Global)-250(2-norm)-250(reduction)]TJ -0 g 0 G +/F145 9.9626 Tf 413.756 658.308 Td [(T)]TJ +ET +q +1 0 0 1 419.614 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 422.752 658.308 Td [(vect)]TJ +ET +q +1 0 0 1 444.301 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 447.439 658.308 Td [(type)]TJ 0 g 0 G -/F59 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_nrm2\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -22.401 Td [(This)-297(subr)18(outine)-296(implements)-297(a)-297(2-norm)-296(value)-297(r)18(eduction)-297(operation)-296(based)-297(on)]TJ -14.944 -11.955 Td [(the)-250(underlying)-250(communication)-250(library)111(.)]TJ +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ +/F75 9.9626 Tf -317.656 -19.427 Td [(eps)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F84 9.9626 Tf 20.164 0 Td [(The)-250(stopping)-250(tolerance.)]TJ 4.742 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ +/F75 9.9626 Tf -24.906 -19.427 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 571.832 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F75 9.9626 Tf 174.207 571.633 Td [(a)]TJ 0 g 0 G +/F84 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 524.012 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 363.206 523.813 Td [(desc)]TJ +ET +q +1 0 0 1 384.755 524.012 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 387.893 523.813 Td [(type)]TJ 0 g 0 G - 0 -20.408 Td [(ctxt)]TJ +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F75 9.9626 Tf -258.11 -19.428 Td [(itmax)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ +/F84 9.9626 Tf 30.127 0 Td [(The)-250(maximum)-250(number)-250(of)-250(iterations)-250(to)-250(perform.)]TJ -5.221 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F78 9.9626 Tf 38.57 0 Td [(i)-32(t)-25(m)-40(a)-42(x)]TJ/F192 10.3811 Tf 27.744 0 Td [(=)]TJ/F84 9.9626 Tf 10.961 0 Td [(1000.)]TJ -77.275 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable)]TJ/F78 9.9626 Tf 142.35 0 Td [(i)-32(t)-25(m)-40(a)-42(x)]TJ/F190 10.3811 Tf 27.743 0 Td [(\025)]TJ/F84 9.9626 Tf 10.962 0 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-370(as:)-551(a)-371(r)18(eal)-370(variable,)-401(which)-370(may)-371(be)-370(a)-371(scalar)74(,)-400(or)-371(a)-370(rank)-371(1)-370(array)111(.)]TJ 0 -11.956 Td [(Kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +/F75 9.9626 Tf -205.961 -19.428 Td [(itrace)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.408 Td [(root)]TJ +/F84 9.9626 Tf 0.98 0 0 1 180.583 425.182 Tm [(If)]TJ/F148 10.3811 Tf 1 0 0 1 189.481 425.182 Tm [(>)]TJ/F84 9.9626 Tf 0.98 0 0 1 200.443 425.182 Tm [(0)-229(print)-228(out)-229(an)-229(informational)-228(message)-229(about)-229(conver)19(gence)-229(every)]TJ/F78 9.9626 Tf 1 0 0 1 470.511 425.182 Tm [(i)-32(t)-15(r)-50(a)-25(c)-25(e)]TJ/F84 9.9626 Tf -294.9 -11.955 Td [(iterations.)-310(If)]TJ/F192 10.3811 Tf 56.313 0 Td [(=)]TJ/F84 9.9626 Tf 10.962 0 Td [(0)-250(print)-250(a)-250(message)-250(in)-250(case)-250(of)-250(conver)18(gence)-250(failur)18(e.)]TJ -67.275 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F78 9.9626 Tf 38.57 0 Td [(i)-32(t)-15(r)-50(a)-25(c)-25(e)]TJ/F192 10.3811 Tf 26.797 0 Td [(=)]TJ/F190 10.3811 Tf 11.086 0 Td [(\000)]TJ/F84 9.9626 Tf 8.194 0 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F83 10.3811 Tf 147.052 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +/F75 9.9626 Tf -109.553 -31.383 Td [(istop)]TJ 0 g 0 G -/F51 9.9626 Tf -254.343 -32.364 Td [(mode)]TJ +/F84 9.9626 Tf 27.277 0 Td [(An)-250(integer)-250(specifying)-250(the)-250(stopping)-250(criterion.)]TJ -2.371 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.186 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(in)]TJ/F84 9.9626 Tf 9.404 0 Td [(.)]TJ 1.012 0 0 1 175.253 286.204 Tm [(V)91(alues:)-307(1:)-307(use)-247(the)-248(normwise)-247(backwar)18(d)-247(err)17(or)73(,)-247(2:)-307(use)-247(the)-248(scaled)-247(2-norm)-247(of)]TJ 1 0 0 1 175.611 274.248 Tm [(the)-250(r)18(esidual,)-250(3:)-310(use)-250(the)-250(r)18(esidual)-250(r)18(eduction)-250(in)-250(the)-250(2-norm.)-310(Default:)-310(2.)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.956 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.955 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +/F75 9.9626 Tf -24.906 -19.427 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -32.364 Td [(request)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.956 Td [(be)-250(pr)18(esent.)]TJ + 0 -19.427 Td [(x)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.401 Td [(On)-250(Return)]TJ +/F84 9.9626 Tf 9.654 0 Td [(The)-250(computed)-250(solution.)]TJ 15.252 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(inout)]TJ/F84 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F145 9.9626 Tf 218.688 0 Td [(psb)]TJ +ET +q +1 0 0 1 410.618 187.773 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 413.756 187.573 Td [(T)]TJ +ET +q +1 0 0 1 419.614 187.773 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 422.752 187.573 Td [(vect)]TJ +ET +q +1 0 0 1 444.301 187.773 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F145 9.9626 Tf 447.439 187.573 Td [(type)]TJ 0 g 0 G +/F84 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - 0 -20.408 Td [(dat)]TJ +/F75 9.9626 Tf -317.656 -19.427 Td [(iter)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(2-norm)-250(r)18(eduction.)]TJ 5.54 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.781 0 Td [(.)]TJ +/F84 9.9626 Tf 20.164 0 Td [(The)-250(number)-250(of)-250(iterations)-250(performed.)]TJ 4.742 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.74 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.78 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.801 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Returned)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G - 79.264 -29.887 Td [(132)]TJ + 139.477 -29.888 Td [(168)]TJ 0 g 0 G ET endstream endobj -1746 0 obj +2113 0 obj << -/Length 5783 +/Length 1090 >> stream 0 g 0 G 0 g 0 G -BT -/F54 9.9626 Tf 124.802 706.129 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(variable,)-250(which)-250(may)-250(be)-250(a)-250(scalar)74(,)-250(or)-250(a)-250(rank)-250(1)-250(array)111(.)]TJ 0 -11.956 Td [(Kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +BT +/F75 9.9626 Tf 99.895 706.129 Td [(err)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.956 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +/F84 9.9626 Tf 17.405 0 Td [(The)-250(conver)18(gence)-250(estimate)-250(on)-250(exit.)]TJ 7.502 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-250(as:)-310(a)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F75 9.9626 Tf -24.907 -19.925 Td [(info)]TJ 0 g 0 G - [-500(This)-345(r)18(eduction)-345(is)-346(appr)18(opriate)-345(to)-345(compute)-345(the)-345(r)18(esults)-346(of)-345(multiple)-345(\050local\051)]TJ 12.453 -11.955 Td [(NRM2)-250(operations)-250(at)-250(the)-250(same)-250(time.)]TJ +/F84 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf -31.741 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf -26.779 -11.956 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(out)]TJ/F84 9.9626 Tf 14.944 0 Td [(.)]TJ -47.133 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - -12.453 -19.925 Td [(2.)]TJ + 139.866 -500.124 Td [(169)]TJ 0 g 0 G - [-500(Denoting)-249(by)]TJ/F52 9.9626 Tf 69.789 0 Td [(d)-40(a)-25(t)]TJ/F52 7.5716 Tf 13.536 -1.96 Td [(i)]TJ/F54 9.9626 Tf 5.23 1.96 Td [(the)-249(value)-249(of)-248(the)-249(variable)]TJ/F52 9.9626 Tf 108.808 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 15.973 0 Td [(on)-249(pr)18(ocess)]TJ/F52 9.9626 Tf 49.078 0 Td [(i)]TJ/F54 9.9626 Tf 2.964 0 Td [(,)-249(the)-249(output)]TJ/F52 9.9626 Tf 53.71 0 Td [(r)-17(e)-25(s)]TJ/F54 9.9626 Tf -306.635 -11.955 Td [(is)-250(equivalent)-250(to)-250(the)-250(computation)-250(of)]TJ/F52 9.9626 Tf 124.796 -25.468 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 15.061 0 Td [(=)]TJ/F1 9.9626 Tf 11.086 10.922 Td [(r)]TJ ET -q -1 0 0 1 285.832 490.532 cm -[]0 d 0 J 0.389 w 0 0 m 30.512 0 l S -Q + +endstream +endobj +2124 0 obj +<< +/Length 8172 +>> +stream +0 g 0 G +0 g 0 G BT -/F96 13.9477 Tf 285.957 477.344 Td [(\345)]TJ/F52 7.5716 Tf 4.245 -8.764 Td [(i)]TJ/F52 9.9626 Tf 8.364 10.836 Td [(d)-40(a)-25(t)]TJ/F54 7.5716 Tf 13.495 3.473 Td [(2)]TJ/F52 7.5716 Tf 0.041 -7.027 Td [(i)]TJ/F54 9.9626 Tf 4.243 3.554 Td [(,)]TJ -191.543 -30.806 Td [(with)-250(car)18(e)-250(taken)-250(to)-250(avoid)-250(unnecessary)-250(over\003ow)92(.)]TJ +/F75 14.3462 Tf 150.705 706.042 Td [(12)-1000(Extensions)]TJ/F84 9.9626 Tf 0.987 0 0 1 150.396 683.34 Tm [(The)-254(EXT)75(,)-254(CUDA)-254(and)-253(RSB)-254(subdir)18(ectories)-254(contains)-254(a)-254(set)-254(of)-253(extensions)-254(to)-254(the)-254(base)]TJ 1.02 0 0 1 150.705 671.385 Tm [(library)109(.)-500(Th)1(e)-311(extensions)-310(pr)18(ovide)-311(additi)1(onal)-311(storage)-310(formats)-310(beyond)-310(the)-311(ones)]TJ 1 0 0 1 150.705 659.43 Tm [(alr)18(eady)-250(contained)-250(in)-250(the)-250(base)-250(library)111(,)-250(as)-250(well)-250(as)-250(interfaces)-250(to:)]TJ 0 g 0 G - -12.453 -19.926 Td [(3.)]TJ +/F75 9.9626 Tf 0 -19.102 Td [(SPGPU)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +/F84 9.9626 Tf 1.02 0 0 1 189.997 640.328 Tm [(a)-255(CUDA)-255(library)-255(originally)-254(published)-255(as)]TJ +0 1 0 0 k 0 1 0 0 K +/F145 9.9626 Tf 1 0 0 1 369.934 640.328 Tm [(https://code.google.com/)]TJ -194.323 -11.955 Td [(p/spgpu/)]TJ 0 g 0 G - -12.453 -19.925 Td [(4.)]TJ +/F84 9.9626 Tf 1.02 0 0 1 221.323 628.373 Tm [(and)-381(now)-380(included)-381(in)-381(the)]TJ/F145 9.9626 Tf 1 0 0 1 340.526 628.373 Tm [(cuda)]TJ/F84 9.9626 Tf 1.02 0 0 1 365.316 628.373 Tm [(subdir)73(,)-415(for)-381(computations)-380(on)]TJ 1 0 0 1 175.611 616.418 Tm [(NVIDIA)-250(GPUs;)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET -q -1 0 0 1 124.802 333.043 cm -0 0 318.804 27.895 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F75 9.9626 Tf -24.906 -19.514 Td [(LIBRSB)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -BT -/F94 8.9664 Tf 137.205 350.279 Td [(call)]TJ +0 1 0 0 k 0 1 0 0 K +/F145 9.9626 Tf 41.514 0 Td [(http://sourceforge.net/projects/librsb/)]TJ 0 g 0 G - [-525(psb_nrm2\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F84 9.9626 Tf 1.02 0 0 1 396.202 596.904 Tm [(,)-324(for)-309(computations)-308(on)]TJ 1 0 0 1 175.611 584.949 Tm [(multicor)18(e)-250(parallel)-250(machines.)]TJ 1.02 0 0 1 150.396 565.847 Tm [(The)-350(infrastr)8(uctur)18(e)-350(laid)-350(out)-350(in)-349(the)-350(base)-350(library)-350(to)-350(allow)-349(for)-350(these)-350(extensions)]TJ 1.02 0 0 1 150.705 553.891 Tm [(is)-306(detailed)-306(in)-306(the)-306(r)18(efer)17(ences)-306([)]TJ +1 0 0 rg 1 0 0 RG + 1 0 0 1 281.586 553.891 Tm [(21)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ + 1.02 0 0 1 291.549 553.891 Tm [(,)]TJ +1 0 0 rg 1 0 0 RG + 1 0 0 1 297.199 553.891 Tm [(22)]TJ 0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + 1.02 0 0 1 307.161 553.891 Tm [(,)]TJ +1 0 0 rg 1 0 0 RG + 1 0 0 1 312.812 553.891 Tm [(11)]TJ 0 g 0 G + 1.02 0 0 1 322.774 553.891 Tm [(];)-336(the)-306(CUDA-speci\002c)-306(data)-306(formats)-306(ar)17(e)]TJ 1 0 0 1 150.705 541.936 Tm [(described)-250(in)-250([)]TJ +1 0 0 rg 1 0 0 RG + [(23)]TJ +0 g 0 G + [(].)]TJ/F75 11.9552 Tf 0 -28.94 Td [(12.1)-1000(Using)-250(the)-250(extensions)]TJ/F84 9.9626 Tf 1.01 0 0 1 150.316 494.032 Tm [(A)-249(sample)-248(application)-249(using)-249(the)-249(PSBLAS)-248(extensions)-249(will)-249(contain)-248(the)-249(following)]TJ 1 0 0 1 150.705 482.077 Tm [(steps:)]TJ +0 g 0 G + 13.888 -19.102 Td [(\225)]TJ +0 g 0 G +/F145 9.9626 Tf 11.018 0 Td [(USE)]TJ/F84 9.9626 Tf 18.182 0 Td [(the)-250(appr)18(opriat)-250(modules)-250(\050)]TJ/F145 9.9626 Tf 110.036 0 Td [(psb_ext_mod)]TJ/F84 9.9626 Tf 57.534 0 Td [(,)]TJ/F145 9.9626 Tf 4.981 0 Td [(psb_cuda_mod)]TJ/F84 9.9626 Tf 62.764 0 Td [(\051;)]TJ 0 g 0 G -/F54 9.9626 Tf -48.393 -32.18 Td [(5.)]TJ + -264.515 -19.514 Td [(\225)]TJ 0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ + 0.98 0 0 1 175.611 443.461 Tm [(Declar)18(e)-214(a)]TJ/F78 9.9626 Tf 0.98 0 0 1 217.329 443.461 Tm [(mold)]TJ/F84 9.9626 Tf 0.98 0 0 1 238.952 443.461 Tm [(variable)-215(of)-214(the)-215(necessary)-214(type)-215(\050e.g.)]TJ/F145 9.9626 Tf 1 0 0 1 388.614 443.461 Tm [(psb_d_ell_sparse_mat)]TJ/F84 9.9626 Tf 0.98 0 0 1 493.22 443.461 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 175.611 431.506 Tm [(psb_d_hlg_sparse_mat)]TJ/F84 9.9626 Tf 104.607 0 Td [(,)]TJ/F145 9.9626 Tf 4.982 0 Td [(psb_d_vect_cuda)]TJ/F84 9.9626 Tf 78.455 0 Td [(\051;)]TJ +0 g 0 G + -199.062 -19.513 Td [(\225)]TJ +0 g 0 G + 0.98 0 0 1 175.611 411.993 Tm [(Pass)-215(the)-215(mold)-214(variable)-215(to)-215(the)-215(base)-214(library)-215(interface)-215(wher)19(e)-215(needed)-215(to)-215(ensur)19(e)]TJ 1 0 0 1 175.611 400.037 Tm [(the)-250(appr)18(opriate)-250(dynamic)-250(type.)]TJ 1.02 0 0 1 150.705 380.935 Tm [(Suppose)-302(you)-301(want)-302(to)-301(use)-302(the)-302(CUDA-enabled)-301(ELLP)90(ACK)-302(data)-301(str)8(uctur)17(e;)-330(you)]TJ 1.02 0 0 1 150.286 368.98 Tm [(would)-323(u)1(se)-323(a)-322(piece)-323(of)-322(code)-323(like)-322(this)-323(\050and)-322(don't)-323(for)18(get,)-342(you)-322(need)-323(CUDA-side)]TJ 1 0 0 1 150.426 357.025 Tm [(vectors)-250(along)-250(with)-250(the)-250(matrices\051:)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 124.802 222.458 cm -0 0 318.804 60.772 re f +1 0 0 1 150.705 120.326 cm +0 0 343.711 225.156 re f Q 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 127.791 272.57 Td [(call)]TJ +/F233 8.9664 Tf 153.694 334.821 Td [(program)]TJ 0 g 0 G - [-525(psb_nrm2\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-525(my_cuda_test)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(psb_collective_start_,&)]TJ 23.536 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.414 -10.959 Td [(use)]TJ 0 g 0 G - [(nrm2_request\051)]TJ -14.122 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ + [-525(psb_base_mod)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ + 0 -10.959 Td [(use)]TJ 0 g 0 G - [-525(psb_nrm2\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-525(psb_util_mod)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.958 Td [(use)]TJ 0 g 0 G - [(nrm2_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + [-525(psb_ext_mod)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(use)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -138.296 Td [(133)]TJ + [-525(psb_cuda_mod)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1757 0 obj -<< -/Length 5352 ->> -stream +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ 0 g 0 G + [(\050psb_dspmat_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.16)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(snd)-250(\227)-250(Send)-250(data)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_snd\050ctxt,)-525(dat,)-525(dst,)-525(m\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(sends)-250(a)-250(packet)-250(of)-250(data)-250(to)-250(a)-250(destination.)]TJ + [-525(a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ + [-525(agpu)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous:)-310(see)-250(usage)-250(notes.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [(\050psb_d_vect_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(dat)]TJ + [-525(xg,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(data)-250(to)-250(be)-250(sent.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-220(a)-220(rank)-219(1)-220(or)-220(2)-220(array)111(,)-226(or)-220(a)-219(character)-220(or)-220(logical)-220(scalar)74(.)-520(T)90(ype,)-225(kind)-220(and)-220(rank)]TJ 0 -11.956 Td [(must)-215(agr)18(ee)-216(on)-215(sender)-215(and)-216(r)18(eceiver)-215(pr)18(ocess;)-227(if)]TJ/F52 9.9626 Tf 197.687 0 Td [(m)]TJ/F54 9.9626 Tf 10.021 0 Td [(is)-215(not)-216(speci\002ed,)-222(size)-215(must)]TJ -207.708 -11.955 Td [(agr)18(ee)-250(as)-250(well.)]TJ + [-525(bg)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(dst)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -21.918 Td [(real)]TJ 0 g 0 G -/F54 9.9626 Tf 18.809 0 Td [(Destination)-250(pr)18(ocess.)]TJ 6.098 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F61 10.3811 Tf 138.85 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(d)-25(s)-25(t)]TJ/F61 10.3811 Tf 15.689 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1.)]TJ + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -241.89 -31.88 Td [(m)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G -/F54 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows.)]TJ 11.069 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F54 9.9626 Tf 40.946 0 Td [(.)]TJ -68.034 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F61 10.3811 Tf 138.85 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(m)]TJ/F61 10.3811 Tf 10.767 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F85 10.3811 Tf 15.94 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F85 10.3811 Tf 9.257 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(.)]TJ -235.542 -11.955 Td [(When)]TJ/F52 9.9626 Tf 29.859 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 16.898 0 Td [(is)-342(a)-341(rank)-342(2)-341(array)111(,)-365(speci\002es)-342(the)-341(number)-342(of)-341(r)18(ows)-342(to)-342(be)-341(sent)-342(in-)]TJ -46.757 -11.955 Td [(dependently)-341(of)-340(the)-341(leading)-341(dimension)]TJ/F52 9.9626 Tf 175.121 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F85 10.3811 Tf 15.94 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F85 10.3811 Tf 9.257 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(;)-386(must)-341(have)-340(the)-341(same)]TJ -222.236 -11.955 Td [(value)-250(on)-250(sending)-250(and)-250(r)18(eceiving)-250(pr)18(ocesses.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F51 11.9552 Tf 0 -21.918 Td [(Notes)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ + [-525(xtmp\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(This)-292(subr)18(outine)-292(impl)1(ies)-292(a)-292(synchr)18(onization,)-302(but)-292(only)-292(between)-291(the)-292(calling)]TJ 12.454 -11.955 Td [(pr)18(ocess)-250(and)-250(the)-250(destination)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 158.309 0 Td [(d)-25(s)-25(t)]TJ/F54 9.9626 Tf 12.797 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ 0 g 0 G - -31.629 -104.573 Td [(134)]TJ + [(\050psb_d_vect_cuda\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1762 0 obj -<< -/Length 5356 ->> -stream +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-3675(::)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(7.17)-1000(psb)]TJ -ET -q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 156.993 706.129 Td [(rcv)-250(\227)-250(Receive)-250(data)]TJ + [-525(vmold)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_rcv\050ctxt,)-525(dat,)-525(src,)-525(m\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(r)18(eceives)-250(a)-250(packet)-250(of)-250(data)-250(to)-250(a)-250(destination.)]TJ + [(\050psb_d_elg_sparse_mat\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous:)-310(see)-250(usage)-250(notes.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [-525(aelg)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ + [(\050psb_ctxt_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(src)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 17.704 0 Td [(Sour)18(ce)-250(pr)18(ocess.)]TJ 7.203 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F61 10.3811 Tf 138.85 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(s)-15(r)-17(c)]TJ/F61 10.3811 Tf 15.141 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1.)]TJ + [-525(ctxt)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -241.342 -31.881 Td [(m)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ 0 g 0 G -/F54 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows.)]TJ 11.069 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F54 9.9626 Tf 40.946 0 Td [(.)]TJ -68.034 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F61 10.3811 Tf 138.85 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(m)]TJ/F61 10.3811 Tf 10.767 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F85 10.3811 Tf 15.94 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F85 10.3811 Tf 9.257 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(.)]TJ -235.542 -11.956 Td [(When)]TJ/F52 9.9626 Tf 29.859 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 16.898 0 Td [(is)-342(a)-341(rank)-342(2)-341(array)111(,)-365(speci\002es)-342(the)-341(number)-342(of)-341(r)18(ows)-342(to)-342(be)-341(sent)-342(in-)]TJ -46.757 -11.955 Td [(dependently)-341(of)-340(the)-341(leading)-341(dimension)]TJ/F52 9.9626 Tf 175.121 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F85 10.3811 Tf 15.94 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F85 10.3811 Tf 9.257 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(;)-386(must)-341(have)-340(the)-341(same)]TJ -222.236 -11.955 Td [(value)-250(on)-250(sending)-250(and)-250(r)18(eceiving)-250(pr)18(ocesses.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-6825(::)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(dat)]TJ + [-525(iam,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(The)-250(data)-250(to)-250(be)-250(r)18(eceived.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-220(a)-220(rank)-219(1)-220(or)-220(2)-220(array)111(,)-226(or)-220(a)-219(character)-220(or)-220(logical)-220(scalar)74(.)-520(T)90(ype,)-225(kind)-220(and)-220(rank)]TJ 0 -11.955 Td [(must)-215(agr)18(ee)-216(on)-215(sender)-215(and)-216(r)18(eceiver)-215(pr)18(ocess;)-227(if)]TJ/F52 9.9626 Tf 197.687 0 Td [(m)]TJ/F54 9.9626 Tf 10.021 0 Td [(is)-215(not)-216(speci\002ed,)-222(size)-215(must)]TJ -207.708 -11.955 Td [(agr)18(ee)-250(as)-250(well.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ + [-525(np)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -32.876 Td [(call)]TJ 0 g 0 G - [-500(This)-292(subr)18(outine)-291(implies)-292(a)-292(synchr)18(onization,)-302(but)-292(only)-292(between)-291(the)-292(calling)]TJ 12.453 -11.955 Td [(pr)18(ocess)-250(and)-250(the)-250(sour)18(ce)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 137.538 0 Td [(s)-15(r)-17(c)]TJ/F54 9.9626 Tf 12.249 0 Td [(.)]TJ + [-525(psb_init\050ctxt\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -10.31 -104.573 Td [(135)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ 0 g 0 G -ET - -endstream -endobj -1769 0 obj -<< -/Length 6241 ->> -stream + [-525(psb_info\050ctxt,iam,np\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ 0 g 0 G -BT -/F51 14.3462 Tf 150.705 705.784 Td [(8)-1000(Error)-250(handling)]TJ/F54 9.9626 Tf 0 -22.702 Td [(The)-382(PSBLAS)-382(library)-381(err)18(or)-382(handling)-382(policy)-382(has)-382(been)-382(comple)1(tely)-382(r)18(ewritten)-382(in)]TJ 0 -11.955 Td [(version)-359(2.0.)-638(The)-359(idea)-359(behind)-360(the)-359(design)-359(of)-359(this)-359(new)-360(err)18(or)-359(handling)-359(strategy)]TJ 0 -11.955 Td [(is)-303(to)-303(keep)-303(err)18(or)-303(messages)-303(on)-303(a)-303(stack)-303(allowing)-303(the)-303(user)-303(to)-303(trace)-303(back)-303(up)-303(to)-303(the)]TJ 0 -11.955 Td [(point)-317(wher)18(e)-318(the)-317(\002rst)-317(err)18(or)-318(message)-317(has)-318(been)-317(generated.)-512(Every)-317(r)18(outine)-318(in)-317(the)]TJ 0 -11.956 Td [(PSBLAS-2.0)-336(library)-336(has,)-358(as)-336(last)-337(non-optional)-336(ar)18(gument,)-358(an)-336(integer)]TJ/F59 9.9626 Tf 298.678 0 Td [(info)]TJ/F54 9.9626 Tf 24.271 0 Td [(vari-)]TJ -322.949 -11.955 Td [(able;)-364(whenever)74(,)-344(inside)-326(the)-326(r)18(outine,)-345(an)-326(err)18(or)-325(is)-326(detected,)-345(this)-326(variable)-326(is)-325(set)-326(to)]TJ 0 -11.955 Td [(a)-384(value)-384(corr)18(esponding)-384(to)-384(a)-384(speci\002c)-384(err)18(or)-384(code.)-711(Then)-384(this)-384(err)18(or)-384(code)-384(is)-384(also)]TJ 0 -11.955 Td [(pushed)-274(on)-273(the)-274(err)18(or)-274(stack)-274(and)-273(then)-274(either)-274(contr)18(ol)-274(is)-273(r)18(eturned)-274(to)-274(the)-273(caller)-274(r)18(ou-)]TJ 0 -11.955 Td [(tine)-342(or)-342(the)-342(execution)-343(is)-342(aborted,)-365(depending)-342(on)-342(the)-342(users)-342(choice.)-587(At)-342(the)-342(time)]TJ 0 -11.955 Td [(when)-243(the)-242(execution)-243(is)-242(aborted,)-244(an)-243(err)18(or)-242(message)-243(is)-243(p)1(rinted)-243(on)-243(standar)18(d)-242(output)]TJ 0 -11.956 Td [(with)-257(a)-256(level)-257(of)-256(verbosity)-257(than)-256(can)-257(be)-256(chosen)-257(by)-257(the)-256(user)74(.)-330(If)-256(the)-257(execution)-257(is)-256(not)]TJ 0 -11.955 Td [(aborted,)-259(then,)-259(the)-257(caller)-257(r)18(outine)-258(checks)-257(the)-257(value)-257(r)18(eturned)-257(in)-258(the)]TJ/F59 9.9626 Tf 284.621 0 Td [(info)]TJ/F54 9.9626 Tf 23.484 0 Td [(variable)]TJ -308.105 -11.955 Td [(and,)-290(if)-282(not)-282(zer)18(o,)-291(an)-282(err)18(or)-282(condition)-282(is)-282(raised.)-407(This)-282(pr)18(ocess)-282(continues)-282(on)-282(all)-282(the)]TJ 0 -11.955 Td [(levels)-203(of)-203(nested)-203(calls)-203(until)-203(the)-203(level)-203(wher)18(e)-202(the)-203(user)-203(decides)-203(to)-203(abort)-203(the)-203(pr)18(ogram)]TJ 0 -11.955 Td [(execution.)]TJ 14.944 -11.955 Td [(Figur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-286(5)]TJ + [-525(psb_cuda_init\050ctxt,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-285(shows)-286(the)-286(layou)1(t)-286(of)-286(a)-285(generic)]TJ/F59 9.9626 Tf 172.064 0 Td [(psb_foo)]TJ/F54 9.9626 Tf 39.458 0 Td [(r)18(outine)-286(with)-285(r)18(espect)-286(to)-286(the)]TJ -226.466 -11.956 Td [(PSBLAS-2.0)-258(err)18(or)-259(handling)-258(policy)111(.)-335(It)-258(is)-258(possible)-259(to)-258(see)-258(how)92(,)-261(whenever)-258(an)-258(err)18(or)]TJ 0 -11.955 Td [(condition)-298(is)-298(detected,)-311(the)]TJ/F59 9.9626 Tf 114.879 0 Td [(info)]TJ/F54 9.9626 Tf 23.893 0 Td [(variable)-298(is)-299(set)-298(to)-298(the)-298(corr)18(esponding)-299(err)18(or)-298(code)]TJ -138.772 -11.955 Td [(which)-309(is,)-324(then,)-324(pushed)-310(on)-309(top)-309(of)-310(the)-309(stack)-309(by)-309(means)-310(of)-309(the)]TJ/F59 9.9626 Tf 265.277 0 Td [(psb_errpush)]TJ/F54 9.9626 Tf 57.534 0 Td [(.)-488(An)]TJ -322.811 -11.955 Td [(err)18(or)-325(condition)-326(may)-325(be)-326(dir)18(ectl)1(y)-326(detected)-325(inside)-326(a)-325(r)18(outine)-325(or)-326(indir)18(ectly)-325(check-)]TJ 0 -11.955 Td [(ing)-331(the)-331(err)18(or)-331(code)-331(r)18(eturned)-331(r)18(eturned)-331(by)-331(a)-331(called)-331(r)18(outine.)-553(Whenever)-331(an)-331(err)18(or)]TJ 0 -11.956 Td [(is)-253(encounter)18(ed,)-255(after)-253(it)-254(has)-253(been)-254(pushed)-253(on)-254(st)1(ack,)-255(the)-253(pr)18(ogram)-254(execution)-253(skips)]TJ 0 -11.955 Td [(to)-264(a)-265(point)-264(wher)18(e)-264(the)-265(err)18(or)-264(condition)-264(is)-264(handled;)-272(the)-264(err)18(or)-265(condition)-264(is)-264(handled)]TJ 0 -11.955 Td [(either)-336(by)-336(r)18(eturning)-336(contr)18(ol)-336(to)-336(the)-336(caller)-335(r)17(o)1(utine)-336(or)-336(by)-336(calling)-336(the)]TJ/F59 9.9626 Tf 291.408 0 Td [(psb\134_error)]TJ/F54 9.9626 Tf -291.408 -11.955 Td [(r)18(outine)-273(which)-274(prints)-273(the)-274(content)-273(of)-273(the)-274(err)18(or)-273(stack)-274(and)-273(aborts)-273(the)-274(pr)18(ogram)-273(ex-)]TJ 0 -11.955 Td [(ecution,)-373(accor)18(ding)-348(to)-348(the)-348(choice)-348(made)-348(by)-348(the)-348(user)-348(with)]TJ/F59 9.9626 Tf 252.305 0 Td [(psb_set_erraction)]TJ/F54 9.9626 Tf 88.915 0 Td [(.)]TJ -341.22 -11.955 Td [(The)-297(default)-296(is)-297(to)-296(print)-297(the)-297(err)18(or)-296(and)-297(terminate)-296(the)-297(pr)18(ogram,)-308(but)-297(the)-297(user)-296(may)]TJ 0 -11.956 Td [(choose)-250(to)-250(handle)-250(the)-250(err)18(or)-250(explicitly)111(.)]TJ 14.944 -11.955 Td [(Figur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-347(6)]TJ + [-525(iam\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - [-348(r)18(eports)-347(a)-347(sample)-347(err)18(or)-348(message)-347(generated)-347(by)-348(the)-347(PSBLAS-2.0)-347(li-)]TJ -14.944 -11.955 Td [(brary)111(.)-539(This)-327(err)18(or)-326(has)-327(been)-326(generated)-327(by)-326(the)-326(fact)-327(that)-326(the)-327(user)-326(has)-327(chosen)-326(the)]TJ 0 -11.955 Td [(invalid)-379(\223FOO\224)-380(stor)1(a)-1(g)1(e)-380(format)-379(to)-379(r)18(epr)18(esent)-380(the)-379(sparse)-379(matrix.)-698(Fr)18(om)-380(this)-379(er)18(-)]TJ 0 -11.955 Td [(r)18(or)-394(message)-393(it)-394(is)-393(possible)-394(to)-394(se)1(e)-394(that)-394(the)-393(err)18(or)-394(has)-393(been)-394(detected)-394(inside)-393(the)]TJ/F59 9.9626 Tf 0 -11.955 Td [(psb_cest)]TJ/F54 9.9626 Tf 45.361 0 Td [(subr)18(outine)-353(called)-353(by)]TJ/F59 9.9626 Tf 95.326 0 Td [(psb_spasb)]TJ/F54 9.9626 Tf 50.591 0 Td [(...)-619(by)-354(pr)18(ocess)-353(0)-353(\050i.e.)-619(the)-353(r)18(oot)-354(pr)18(o-)]TJ -191.278 -11.956 Td [(cess\051.)]TJ 0 g 0 G - 164.384 -198.123 Td [(136)]TJ +/F84 9.9626 Tf 151.98 -58.082 Td [(170)]TJ 0 g 0 G ET endstream endobj -1775 0 obj +2138 0 obj << -/Length 6642 +/Length 9201 >> stream 0 g 0 G 0 g 0 G -0 g 0 G 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG q -1 0 0 1 99.895 417.212 cm -0 0 343.711 292.902 re f +1 0 0 1 99.895 421.197 cm +0 0 343.711 290.909 re f Q 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -BT -/F59 9.9626 Tf 102.884 698.757 Td [(subroutine)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_foo\050some)-525(args,)-525(info\051)]TJ 0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 9.9626 Tf 15.691 -11.956 Td [(!...)]TJ +BT +/F279 8.9664 Tf 112.299 701.446 Td [(!)-525(My)-525(own)-525(home-grown)-525(matrix)-525(generator)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 0 -11.955 Td [(if)]TJ +/F233 8.9664 Tf 0 -10.958 Td [(call)]TJ 0 g 0 G - [(\050error)-525(detected\051)]TJ + [-525(gen_matrix\050ctxt,)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ + [(idim)]TJ +0 g 0 G + [(,desc_a,a,x,info\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 15.691 -11.955 Td [(info)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-525(/=)]TJ 0 g 0 G - [(errcode1)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_errpush\050)]TJ -0.25 0.44 0.63 rg 0.25 0.44 0.63 RG - [(\015psb_foo\015)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ +0 g 0 G + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(,)-525(errcode1\051)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(goto)]TJ + [-525(goto)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.25 0.63 0.44 rg 0.25 0.63 0.44 RG [-525(9999)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -15.691 -11.955 Td [(end)-525(if)]TJ + 0 -21.918 Td [(call)]TJ 0 g 0 G -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 9.9626 Tf 0 -11.956 Td [(!...)]TJ + [-525(a%cscnv\050agpu,info,mold)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 0 -11.955 Td [(call)]TJ + [(aelg\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_bar\050some)-525(args,)-525(info\051)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(if)]TJ + 0 -10.959 Td [(if)]TJ 0 g 0 G - [(\050info)-525(.ne.)-525(zero\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(\050info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 15.691 -11.955 Td [(info)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-525(/=)]TJ 0 g 0 G - [(errcode2)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ 0 g 0 G - [-525(psb_errpush\050)]TJ -0.25 0.44 0.63 rg 0.25 0.44 0.63 RG - [(\015psb_foo\015)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(,)-525(errcode2\051)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.956 Td [(goto)]TJ + [-525(goto)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.25 0.63 0.44 rg 0.25 0.63 0.44 RG [-525(9999)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -15.691 -11.955 Td [(end)-525(if)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 9.9626 Tf 0 -11.955 Td [(!...)]TJ + 0 -10.959 Td [(xtmp)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG -/F59 9.9626 Tf -15.691 -11.955 Td [(9999)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(continue)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 15.691 -11.955 Td [(if)]TJ + [-525(x%get_vect\050\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(\050err_act)-525(.eq.)-525(act_abort\051)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ + 0 -10.959 Td [(call)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 15.691 -11.955 Td [(call)]TJ + [-525(xg%bld\050xtmp,mold)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-525(psb_error\050icontxt\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.956 Td [(return)]TJ + [(vmold\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -15.691 -11.955 Td [(else)]TJ + 0 -10.959 Td [(call)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(return)]TJ + [-525(bg%bld\050size\050xtmp\051,mold)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -15.691 -11.955 Td [(end)-525(if)]TJ + [(vmold\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F279 8.9664 Tf 0 -21.918 Td [(!)-525(Do)-525(sparse)-525(MV)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -23.91 Td [(end)-525(subroutine)]TJ +/F233 8.9664 Tf 0 -10.958 Td [(call)]TJ 0 g 0 G - [-525(psb_foo)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + [-525(psb_spmm\050done,agpu,xg,dzero,bg,desc_a,info\051)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + -9.415 -32.877 Td [(9999)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf -2.989 -41.729 Td [(Listing)-289(5:)-387(The)-289(layout)-289(of)-289(a)-289(generic)]TJ/F59 9.9626 Tf 149.96 0 Td [(psb)]TJ -ET -q -1 0 0 1 266.174 382.258 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 269.312 382.059 Td [(foo)]TJ/F54 9.9626 Tf 18.568 0 Td [(r)18(outine)-289(with)-289(r)18(espect)-288(to)-289(PSBLAS-2.0)]TJ -187.985 -11.955 Td [(err)18(or)-250(handling)-250(policy)111(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(continue)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG -/F59 9.9626 Tf 0 -19.609 Td [(==========================================================)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.415 -10.959 Td [(if)]TJ 0 g 0 G - 0 -11.955 Td [(Process:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [-525(0)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(.)-1050(PSBLAS)-525(Error)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(\050)]TJ + [-525(==)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(4010)]TJ + [-525(0)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(in)]TJ + [-525(then)]TJ 0 g 0 G - [-525(subroutine:)-525(df_sample)]TJ 0 -11.955 Td [(Error)-525(from)-525(call)-525(to)-525(subroutine)-525(mat)]TJ -0.73 0.38 0.84 rg 0.73 0.38 0.84 RG - [-525(dist)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 14.122 -10.959 Td [(write)]TJ 0 g 0 G + [(\050)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - 0 -11.955 Td [(==========================================================)]TJ -0 g 0 G - 0 -11.955 Td [(Process:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [-525(0)]TJ + [(*)]TJ 0 g 0 G - [(.)-1050(PSBLAS)-525(Error)]TJ + [(,)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(\050)]TJ -0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(4010)]TJ + [(*)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(in)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + [-525(\01542\015)]TJ 0 g 0 G - [-525(subroutine:)-525(mat_distv)]TJ 0 -11.955 Td [(Error)-525(from)-525(call)-525(to)-525(subroutine)]TJ -0.73 0.38 0.84 rg 0.73 0.38 0.84 RG - [-525(psb_spasb)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - 0 -11.956 Td [(==========================================================)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -14.122 -10.959 Td [(else)]TJ 0 g 0 G - 0 -11.955 Td [(Process:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [-525(0)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 14.122 -10.959 Td [(write)]TJ 0 g 0 G - [(.)-1050(PSBLAS)-525(Error)]TJ + [(\050)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(\050)]TJ -0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(4010)]TJ + [(*)]TJ 0 g 0 G + [(,)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ +0 g 0 G [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(in)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + [-525(\015Something)-525(went)-525(wrong)-525(\015)]TJ 0 g 0 G - [-525(subroutine:)-525(psb_spasb)]TJ 0 -11.955 Td [(Error)-525(from)-525(call)-525(to)-525(subroutine)]TJ -0.73 0.38 0.84 rg 0.73 0.38 0.84 RG - [-525(psb_cest)]TJ + [(,info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - 0 -11.955 Td [(==========================================================)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -14.122 -10.959 Td [(end)-525(if)]TJ 0 g 0 G - 0 -11.955 Td [(Process:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [-525(0)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -32.876 Td [(call)]TJ 0 g 0 G - [(.)-1050(PSBLAS)-525(Error)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(\050)]TJ + [-525(psb_cuda_exit\050\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(136)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(\051)]TJ + [-525(psb_exit\050ctxt\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(in)]TJ + 0 -10.959 Td [(stop)]TJ 0 g 0 G - [-525(subroutine:)-525(psb_cest)]TJ 0 -11.955 Td [(Format)-525(FOO)-525(is)]TJ -0.73 0.38 0.84 rg 0.73 0.38 0.84 RG - [-525(unknown)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.415 -10.959 Td [(end)-525(program)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - 0 -11.956 Td [(==========================================================)]TJ + [-525(my_cuda_test)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - 0 -11.955 Td [(Aborting...)]TJ +/F84 9.9626 Tf 1.02 0 0 1 114.839 403.191 Tm [(A)-337(full)-338(example)-337(of)-337(this)-338(strategy)-337(can)-338(be)-337(seen)-337(in)-338(the)]TJ/F145 9.9626 Tf 1 0 0 1 344.519 403.191 Tm [(test/ext/kernel)]TJ/F84 9.9626 Tf 1.02 0 0 1 426.402 403.191 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 99.895 391.236 Tm [(test/cuda/kernel)]TJ/F84 9.9626 Tf 1.008 0 0 1 186.064 391.236 Tm [(subdir)18(ectories,)-248(wher)18(e)-247(we)-247(pr)18(ovide)-248(sample)-247(pr)18(ograms)-247(to)-248(test)]TJ 1.003 0 0 1 99.895 379.281 Tm [(the)-250(speed)-249(of)-250(the)-250(sparse)-249(matrix-vector)-250(pr)18(oduct)-249(with)-250(the)-250(various)-249(data)-250(str)8(uctur)18(es)]TJ 1 0 0 1 99.895 367.325 Tm [(included)-250(in)-250(the)-250(library)111(.)]TJ/F75 11.9552 Tf 0 -29.238 Td [(12.2)-1000(Extensions')-250(Data)-250(Structures)]TJ/F84 9.9626 Tf 0.995 0 0 1 99.507 319.088 Tm [(Access)-250(to)-250(the)-250(facilities)-250(pr)18(ovided)-250(by)-250(the)-250(EXT)-250(library)-251(is)-250(mainly)-250(achieved)-250(thr)18(ough)]TJ 1.02 0 0 1 99.895 307.133 Tm [(the)-335(data)-336(types)-335(that)-335(ar)18(e)-336(pr)18(ovided)-335(within.)-575(The)-335(data)-335(classes)-335(ar)17(e)-335(derived)-335(fr)17(om)]TJ 1.004 0 0 1 99.895 295.178 Tm [(the)-250(base)-249(classes)-250(in)-249(PSBLAS,)-250(thr)18(ough)-249(the)-250(Fortran)-249(2003)-250(mechanism)-249(of)]TJ/F78 9.9626 Tf 1.004 0 0 1 400.534 295.178 Tm [(type)-250(exten-)]TJ 1 0 0 1 99.895 283.222 Tm [(sion)]TJ/F84 9.9626 Tf 19.098 0 Td [([)]TJ +1 0 0 rg 1 0 0 RG + [(18)]TJ 0 g 0 G -/F54 9.9626 Tf 0 -29.397 Td [(Listing)-364(6:)-537(A)-364(sample)-364(PSBLAS-3.0)-363(err)18(or)-364(message.)-651(Pr)18(ocess)-364(0)-364(detected)-363(an)-364(err)18(or)]TJ 0 -11.955 Td [(condition)-250(inside)-250(the)-250(psb)]TJ -ET -q -1 0 0 1 206.215 153.925 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 209.204 153.726 Td [(cest)-250(subr)18(outine)]TJ + [(].)]TJ 1.006 0 0 1 114.839 271.249 Tm [(The)-248(data)-247(classes)-248(ar)18(e)-247(divided)-248(between)-247(the)-248(general)-248(purpose)-247(CPU)-248(extensions,)]TJ 0.993 0 0 1 99.895 259.294 Tm [(the)-252(GPU)-251(interfaces)-252(and)-251(the)-252(RSB)-252(interfaces.)-312(In)-251(the)-252(description)-252(we)-251(will)-252(make)-251(use)]TJ 1 0 0 1 99.895 247.338 Tm [(of)-250(the)-250(notation)-250(intr)18(oduced)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(22)]TJ +0 g 0 G + [(.)]TJ/F75 11.9552 Tf 0 -29.237 Td [(12.3)-1000(CPU-class)-250(extensions)]TJ/F75 9.9626 Tf 0 -19 Td [(ELLP)74(ACK)]TJ/F84 9.9626 Tf 0.98 0 0 1 99.587 180.101 Tm [(The)-194(ELLP)94(ACK/ITP)94(ACK)-194(format)-194(\050shown)-194(in)-193(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-194(6)]TJ +0 g 0 G + [(\051)-194(comprises)-194(two)-193(2-dimensional)]TJ 1.02 0 0 1 99.895 168.146 Tm [(arrays)]TJ/F145 9.9626 Tf 1 0 0 1 130.767 168.146 Tm [(AS)]TJ/F84 9.9626 Tf 1.02 0 0 1 143.951 168.146 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 163.879 168.146 Tm [(JA)]TJ/F84 9.9626 Tf 1.02 0 0 1 177.063 168.146 Tm [(with)]TJ/F145 9.9626 Tf 1 0 0 1 200.445 168.146 Tm [(M)]TJ/F84 9.9626 Tf 1.02 0 0 1 208.399 168.146 Tm [(r)18(ows)-268(and)]TJ/F145 9.9626 Tf 1 0 0 1 253.216 168.146 Tm [(MAXNZR)]TJ/F84 9.9626 Tf 1.02 0 0 1 287.322 168.146 Tm [(columns,)-274(wher)18(e)]TJ/F145 9.9626 Tf 1 0 0 1 361.667 168.146 Tm [(MAXNZR)]TJ/F84 9.9626 Tf 1.02 0 0 1 395.772 168.146 Tm [(is)-268(the)-268(max-)]TJ 1.02 0 0 1 99.895 156.191 Tm [(imum)-289(number)-288(of)-289(nonzer)17(os)-288(in)-289(any)-289(r)18(ow)-289([)]TJ/F75 9.9626 Tf 1 0 0 1 279.688 156.191 Tm [(?)]TJ/F84 9.9626 Tf 1.02 0 0 1 284.111 156.191 Tm [(].)-435(Each)-289(r)18(ow)-289(of)-288(the)-289(arrays)]TJ/F145 9.9626 Tf 1 0 0 1 399.875 156.191 Tm [(AS)]TJ/F84 9.9626 Tf 1.02 0 0 1 413.269 156.191 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 433.407 156.191 Tm [(JA)]TJ/F84 9.9626 Tf 1.02 0 0 1 99.895 144.236 Tm [(contains)-357(the)-358(coef)18(\002cients)-357(and)-357(column)-358(indices;)-413(r)17(ows)-357(shorter)-357(than)]TJ/F145 9.9626 Tf 1 0 0 1 394.811 144.236 Tm [(MAXNZR)]TJ/F84 9.9626 Tf 1.02 0 0 1 429.823 144.236 Tm [(ar)18(e)]TJ 0.98 0 0 1 99.596 132.281 Tm [(padded)-229(with)-229(zer)19(o)-229(coef)18(\002cients)-229(and)-229(appr)19(opriate)-229(column)-229(indices,)-234(e.g.)-307(the)-229(last)-229(valid)]TJ 1 0 0 1 99.895 120.326 Tm [(one)-250(found)-250(in)-250(the)-250(same)-250(r)18(ow)92(.)]TJ 0 g 0 G - 55.075 -63.288 Td [(137)]TJ + 164.384 -29.888 Td [(171)]TJ 0 g 0 G ET endstream endobj -1779 0 obj +2147 0 obj << -/Length 3570 +/Length 4837 >> stream 0 g 0 G 0 g 0 G +0 g 0 G +0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(8.1)-1000(psb)]TJ +/F84 9.9626 Tf 189.471 698.871 Td [(T)92(able)-250(22:)-310(Notation)-250(for)-250(parameters)-250(describing)-250(a)-250(sparse)-250(matrix)]TJ +0 g 0 G +0 g 0 G +0 g 0 G ET q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 222.652 684.904 cm +[]0 d 0 J 0.398 w 0 0 m 199.817 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(errpush)-250(\227)-250(Pushes)-250(an)-250(error)-250(code)-250(onto)-250(the)-250(error)-250(stack)]TJ/F54 9.9626 Tf -49.379 -24.942 Td [(c)-175(a)-175(l)-174(l)-874(p)-98(s)-99(b)]TJ +/F84 7.9701 Tf 228.629 678.079 Td [(Name)-3364(Description)]TJ ET q -1 0 0 1 200.841 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 222.652 675.041 cm +[]0 d 0 J 0.398 w 0 0 m 199.817 0 l S Q BT -/F54 9.9626 Tf 204.812 681.187 Td [(e)-99(r)-98(r)-99(p)-98(u)-99(s)-99(h)-232(\050)-266(e)-132(r)-132(r)]TJ +/F84 7.9701 Tf 228.629 668.216 Td [(M)-5111(Number)-250(of)-250(r)18(ows)-250(in)-250(matrix)]TJ 0 -9.464 Td [(N)-5226(Number)-250(of)-250(columns)-250(in)-250(matrix)]TJ 0 -9.465 Td [(NZ)-4559(Number)-250(of)-250(nonzer)18(os)-250(in)-250(matrix)]TJ 0 -9.464 Td [(A)111(VGNZR)-1739(A)92(verage)-250(number)-250(of)-250(nonzer)18(os)-250(per)-250(r)18(ow)]TJ 0 -9.465 Td [(MAXNZR)-1500(Maximum)-250(number)-250(of)-250(nonzer)18(os)-250(per)-250(r)18(ow)]TJ 0 -9.464 Td [(NDIAG)-2574(Numer)18(o)-250(of)-250(nonzer)18(o)-250(diagonals)]TJ 0 -9.465 Td [(AS)-4754(Coef)18(\002cients)-250(array)]TJ 0 -9.464 Td [(IA)-4942(Row)-250(indices)-250(array)]TJ 0 -9.465 Td [(JA)-4946(Column)-250(indices)-250(array)]TJ 0 -9.464 Td [(IRP)-4448(Row)-250(start)-250(pointers)-250(array)]TJ 0 -9.465 Td [(JCP)-4411(Column)-250(start)-250(pointers)-250(array)]TJ 0 -9.464 Td [(NZR)-3891(Number)-250(of)-250(nonzer)18(os)-250(per)-250(r)18(ow)-250(array)]TJ 0 -9.465 Td [(OFFSET)-2410(Of)18(fset)-250(for)-250(diagonals)]TJ ET q -1 0 0 1 270.843 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 222.652 551.604 cm +[]0 d 0 J 0.398 w 0 0 m 199.817 0 l S Q -BT -/F54 9.9626 Tf 275.151 681.187 Td [(c)-440(,)-825(r)]TJ -ET +0 g 0 G +0 g 0 G +1 0 0 1 247.614 395.491 cm q -1 0 0 1 299.7 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 303.188 681.187 Td [(n)-50(a)-50(m)-50(e)-276(,)-929(i)]TJ -ET +.33653 0 0 .33653 0 0 cm q -1 0 0 1 348.561 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 0 0 cm +/Im6 Do Q -BT -/F54 9.9626 Tf 353.087 681.187 Td [(e)-154(r)-155(r)-483(,)-920(a)]TJ -ET -q -1 0 0 1 392.305 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q +0 g 0 G +1 0 0 1 -247.614 -395.491 cm BT -/F54 9.9626 Tf 396.74 681.187 Td [(e)-145(r)-145(r)-279(\051)]TJ +/F84 9.9626 Tf 245.769 373.573 Td [(Figur)18(e)-250(5:)-310(Example)-250(of)-250(sparse)-250(matrix)]TJ 0 g 0 G 0 g 0 G + 0.98 0 0 1 165.649 339.81 Tm [(The)-252(matrix-vector)-252(pr)19(oduct)]TJ/F78 9.9626 Tf 1 0 0 1 282.309 339.81 Tm [(y)]TJ/F192 10.3811 Tf 7.997 0 Td [(=)]TJ/F78 9.9626 Tf 11.584 0 Td [(A)-42(x)]TJ/F84 9.9626 Tf 0.98 0 0 1 317.166 339.81 Tm [(can)-252(be)-252(computed)-251(with)-252(the)-252(code)-252(shown)-252(in)]TJ 1.003 0 0 1 150.316 327.855 Tm [(Alg.)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ 0 g 0 G -/F51 9.9626 Tf -246.035 -27.895 Td [(T)90(ype:)]TJ + [(;)-250(it)-250(costs)-250(one)-250(memory)-251(wri)1(te)-251(pe)1(r)-251(oute)1(r)-251(ite)1(ration,)-251(plus)-250(thr)18(ee)-250(memory)-250(r)18(eads)]TJ 1 0 0 1 150.705 315.9 Tm [(and)-250(two)-250(\003oating-point)-250(operations)-250(per)-250(inner)-250(iteration.)]TJ 1.02 0 0 1 165.649 303.412 Tm [(Unless)-327(all)-327(r)18(ows)-327(have)-327(exactly)-326(the)-327(same)-327(number)-327(of)-327(nonzer)18(os,)-347(some)-327(of)-327(the)]TJ 1.02 0 0 1 150.705 291.457 Tm [(coef)18(\002cients)-388(in)-387(the)]TJ/F145 9.9626 Tf 1 0 0 1 235.636 291.457 Tm [(AS)]TJ/F84 9.9626 Tf 1.02 0 0 1 250.035 291.457 Tm [(array)-388(will)-387(be)-388(zer)18(os;)-459(ther)18(efor)17(e)-387(this)-388(data)-388(str)8(uctur)18(e)-388(will)]TJ 1.02 0 0 1 150.705 279.501 Tm [(have)-270(an)-269(over)17(head)-269(both)-270(in)-270(ter)1(ms)-270(of)-270(memory)-269(space)-270(and)-270(r)18(edundant)-270(operations)]TJ 1 0 0 1 150.376 267.546 Tm [(\050multiplications)-250(by)-250(zer)18(o\051.)-310(The)-250(over)18(head)-250(can)-250(be)-250(acceptable)-250(if:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + 12.782 -21.523 Td [(1.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + 1.018 0 0 1 175.303 246.023 Tm [(The)-244(maximum)-245(number)-244(of)-245(nonzer)18(os)-244(per)-245(r)18(ow)-244(is)-245(not)-244(much)-245(lar)18(ger)-244(than)-245(the)]TJ 1 0 0 1 175.611 234.067 Tm [(average;)]TJ 0 g 0 G + -12.453 -22.056 Td [(2.)]TJ 0 g 0 G - 0 -19.925 Td [(err)]TJ -ET -q -1 0 0 1 164.035 613.641 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 167.023 613.442 Td [(c)]TJ + 0.98 0 0 1 175.303 212.011 Tm [(The)-243(r)19(egularity)-243(of)-243(th)1(e)-243(data)-243(str)8(uctur)19(e)-243(allows)-243(for)-242(faster)-243(code,)-245(e.g.)-312(by)-243(allowing)]TJ 1 0 0 1 175.333 200.056 Tm [(vectorization,)-250(ther)18(eby)-250(of)18(fsetting)-250(the)-250(additional)-250(storage)-250(r)18(equir)18(ements.)]TJ 1.02 0 0 1 150.705 178.532 Tm [(In)-323(the)-323(extr)18(eme)-324(case)-323(wher)18(e)-323(the)-323(input)-323(matrix)-323(has)-323(one)-323(full)-323(r)17(ow)91(,)-343(the)-323(ELLP)90(ACK)]TJ 1.01 0 0 1 150.705 166.577 Tm [(str)8(uctur)18(e)-246(would)-246(r)18(equir)17(e)-246(mor)18(e)-246(memory)-246(than)-246(the)-246(normal)-246(2D)-246(array)-246(storage.)-307(The)]TJ 1.02 0 0 1 150.705 154.621 Tm [(ELLP)90(ACK)-246(storage)-245(format)-246(was)-246(very)-246(popular)-245(in)-246(the)-246(vector)-246(computing)-245(days;)-247(in)]TJ 0.98 0 0 1 150.705 142.666 Tm [(modern)-231(CPUs)-231(it)-231(is)-231(not)-231(quite)-231(as)-231(popular)75(,)-236(but)-231(it)-231(is)-231(the)-231(basis)-231(for)-231(many)-231(GPU)-231(formats.)]TJ 1 0 0 1 165.649 130.178 Tm [(The)-250(r)18(elevant)-250(data)-250(type)-250(is)]TJ/F145 9.9626 Tf 110.952 0 Td [(psb_T_ell_sparse_mat)]TJ/F84 9.9626 Tf 104.607 0 Td [(:)]TJ 0 g 0 G -/F54 9.9626 Tf 9.405 0 Td [(the)-250(err)18(or)-250(code)]TJ -0.817 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ + -66.12 -39.74 Td [(172)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(r)]TJ ET -q -1 0 0 1 155.178 545.895 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 158.167 545.696 Td [(name)]TJ + +endstream +endobj +2131 0 obj +<< +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/mat.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 2152 0 R +/BBox [0 0 438 395] +/Resources << +/ProcSet [ /PDF /ImageC ] +/ExtGState << +/R7 2153 0 R +>>/XObject << +/R8 2154 0 R +>>>> +/Length 3551 +/Filter /FlateDecode +>> +stream +xœí\K®,· ÷*j®èÿ™ðØö ¸pìÁívÙ~HЇ¬R 'žƒ¤ø·É–ÈsDEIÕþég<ÿ§?Þ¯?|ÛþöúéŽ~Ž0ê{tùãÕz:spMËálÍ @TÔŠžÏÑë^c=kq*¼ ‚jô²°“øxýùÃñ÷ HH¡Ò¨œa†0úä1 +uÖ~|ûõSÉÿøúîÇ8K!-yKdïýŠ9žü©3·œÑΜµh‘G%Çõ¨sž-f²’â{œÇÌ4j:Ê9ú˜ñˆyfêéòlõœ<ФZÈy>`!‡0Ï1ÔCÎ5œ‰ÛAn“Q@˜}æä°LÌ0~|¾"yŠ“\ô˜Îž;ó=r-áé˜Ô±„väT‡ƒ†³žmÎt¤YÉÛ<|ÊôMª4òÉDbÏ.ƒ¦¤~†|ÒˆM ‘᲋'M•XÎLãB0)"•8SC¡žfƤÑÌl¼" $óçéú =ª÷ñy¿öñ»oÙ"0.@Œö^bLãë9ÐãŠ@•)Rg9öIäèyH.äíJþ>8e ÿmôyäp`tM¾Ž¿h4@{üþ9·9ÐÉÕ>Gn³h\çÕšhŸ/LÄ}¢îy%Ó,£,ÚtéS¤(I˜M_?ÿh=‹öDŽÖ®"ògóùº|\=: +q””_R ÇW4¥Z¯b›gHy­ãøã_y‚ý¦Ö+_¿¾jý+¥Nš¼G!äü´ÿü=%™o~}}1ÿOFÿKɈ—. Jó¼­0n +‚VÒ§ZɼxÍKØJ®í$ã¶LË]÷¨©èA[ +Ä Ý-ª j€ˆ¨) d‹-š“3bFOÆ]±zPá!õ<,î>’Í +-Ë›£O×JXü°j™‰™›a• \«NÔÀæsI•Š žZ<™eU„¢uHª Ír~6d S«RHA¦½ 4cUŸ/ë£wÒ%h›SE²™y¿vWd¸.T K/TÌ Ç +v]¡Žµ?pm.}¯•(_œ¼5´¹fÅ9MÿbqM“ãS/q6†ƒà£j̦Fh÷Ê[‰ÿ$¢ÍÎûµû¢Ç!ðjàšxìD{žTIŠî¶¹\@:-‘T!çØí¹y bšFU£¸Ç,V!†®^a@Ÿ4Ð_Þ¦$2>Ó§Ù:xšé7oר-^brW–_Äû{X•ÕòÎëíŠ$ݤ9$Àjâ/býò6ñ n_¼_[×ÏF·å{ÇË\ø’Våê´í¼Žƒ)V¯Ô¦"°šZ‘ÂÌ"Õ¹ÎТV¹¿0 &ïitVÀ¦|ø´ +k¡ aàNB’Ô#yÉB'•„”ŸÂÔÂÞ5eÕå™Ï&ø”f®ºRº@æ’Š¾c²T¸ÏáLž¨ì¡‰ÊA-ä8Vi¯> SEa-€6µçr»F.õ*'‡ÎŽæÁ‹Ø:9%e\ +.Óh¹#ßÔZ55ÌóRoÍ9Vyµ“þðfG»C´Z +-–Ð{•Rf¥¼ë×À¦wôò€=Œ‘fÃÆ÷NЀS‰|ÔdœrËz +ˆGZîÏ/¬ùvª{Í-À…î +ö•úׯ½7üšÆINAJÜX™FQÑÚze•ôšßY¥Ôo¬¿ºà¬ š[(—u_°Í>XÁ¿~ tè½á—ÇqZ‘ +rµçõ“iP·f¾È*^×ò2¦xå˯¹„ä-Rì‚ {åk­|a•/|Xýd(ÐÂp«…ÇŠØS¹IäÂ\ñtvÐ[¨2=„5_¼ð嫼|)g”ù†ºWE/{¡ÐšÝœ™"°€¬Ä6 +´'ÒÒÛY>|¼T¼PhÅøì2YAËê½xšSß[_йÎFÑÛD”5PhÝc½¥.Ên^e¾¥Uÿh¡ðPìÞá¯ãYŒ8J•b-/göQù•½÷E“ ϦÂ+ù4~ߌ+—©­Rú@‘97àòrA ¹óÛ 00úàùdTþxÕF5°³àh=•Ù-j¥QníWn%È5§ÙÍM²³yVÙ¹©Â«——‹ ·åÕV¨•²ut*_¸©Æ0ªÅ-jcÆQÃïhœ›k4üÐñ[ç†_ó@£?öá +fݸ90jA5Q-ì$8h%¦;eÙ?;5×,d”uåM^§¼,ç*NmRZnò³Ê&L„_(˜uWàæÀ¨´Dµ°“Ð ó‰­}sËÒË©™F©•Ä»qc–»ìÖmo9„öô×ûC„[(€ Ýœ™"@„ÂÚ:?–äŤْ9+§h¸¢éGšnC-¼µèY²Êœ=0U´¬ }õ7Qpö€†&X­ú§²^—„•){‚jó2°s´øH^ˆmÐFŽgmY‰ñÏ0‘¿²Á뙞ޞÅ-¿½£"Óš‹ïRjÚq ;dØW^¬IÞ W3ù:€¨´Š)€W{oð«'1ÒµKöe5´³7=M¡ý\®|îxcWb>iåi&;?Èœ€CZEÖÒLznùa€ ”ã¡©üþo9`1£sÝ~Aò2kÔ0°±ÐÅë‘ÌtßøžØ1M“ÖdŠaŒ57i®%>ÊÎò.(d‚:Ö{ÜÐÌBé£N³`rÇ|&¥5¡`!5šoâsù€Ì¸PX E ;ÝA?”›Þ„ñÛÓ=+[çe{Ždžæ ¼gû4ùµ{o0æªqW-»f%kô×\n,ÛÕâÕ# —`ã´ÞÊR‡xº‡ ;çÂ… òyNS*Oø9eYZ­Eœ\*Yÿ%zB†B6º#ŸÃ'|E` 6 +녜ҺÆËó=4H×jÙ~=ß™µXÀ­ÿ=C¡¹Ý‘ËáÀ“½"°€ …k¼EKâÕ£ß ýrR“÷J¬EJ·“—±O7îäÍ‚îô͇ +´JXØxèóC¹ù[— S¬ë¹Ïæ^zݰ.¼¯÷ëFÜ$ä 5`2. L£× 0 · æÁî# Z(Dô¿Sð÷žÅjý²¥¬³*'fÖåÃÝÖ;?buÞîR­rœ†þ—* Ýœ—"@„ÂúaËiý(ÿ« o^ÿ/o*o +endstream +endobj +2154 0 obj +<< +/Subtype /Image +/ColorSpace /DeviceGray +/Width 454 +/Height 425 +/BitsPerComponent 1 +/Interpolate true +/Filter /CCITTFaxDecode +/DecodeParms << +/K -1 +/Columns 454 +>> +/Length 164 +>> +stream +& ÙÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿøÿÀ@ +endstream +endobj +2158 0 obj +<< +/Length 7547 +>> +stream 0 g 0 G -/F54 9.9626 Tf 29.888 0 Td [(the)-250(soutine)-250(wher)18(e)-250(the)-250(err)18(or)-250(has)-250(been)-250(caught.)]TJ -12.444 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(string.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.881 Td [(i)]TJ -ET +0 g 0 G +1 0 0 1 154.285 609.491 cm q -1 0 0 1 154.62 466.194 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +.52 0 0 .52 0 0 cm +q +1 0 0 1 0 0 cm +/Im7 Do +Q Q +0 g 0 G +1 0 0 1 -154.285 -609.491 cm BT -/F51 9.9626 Tf 157.609 465.994 Td [(err)]TJ +/F84 9.9626 Tf 152.938 587.573 Td [(Figur)18(e)-250(6:)-310(ELLP)92(ACK)-250(compr)18(ession)-250(of)-250(matrix)-250(in)-250(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-250(5)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +/F75 8.9664 Tf -16.48 -31.498 Td [(d)-11(o)]TJ/F84 8.9664 Tf 17.426 0 Td [(i)-243(=)-89(1)-178(,)-98(n)]TJ -5.537 -10.959 Td [(t)-168(=)-32(0)]TJ/F75 8.9664 Tf -1.13 -10.958 Td [(d)-11(o)]TJ/F84 8.9664 Tf 17.682 0 Td [(j)-272(=)-89(1)-177(,)-121(m)-32(a)-32(x)-32(n)-32(z)-32(r)]TJ -5.792 -10.959 Td [(t)-734(=)-734(t)-734(+)-1289(a)-92(s)-226(\050)-236(i)-381(,)-358(j)-342(\051)]TJ 85.313 -2.332 Td [(*)]TJ 5.293 2.332 Td [(x)-176(\050)-288(j)-156(a)-289(\050)-236(i)-381(,)-358(j)-361(\051)-178(\051)]TJ/F75 8.9664 Tf -102.419 -10.959 Td [(e)-19(n)-20(d)-630(d)-11(o)]TJ/F84 8.9664 Tf 0.022 -10.959 Td [(y)-156(\050)-288(i)-288(\051)-730(=)-734(t)]TJ/F75 8.9664 Tf -10.782 -10.959 Td [(e)-19(n)-20(d)-630(d)-12(o)]TJ +0 g 0 G +0 g 0 G 0 g 0 G -/F54 9.9626 Tf 17.713 0 Td [(addional)-250(info)-250(for)-250(err)18(or)-250(code)]TJ 0.289 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.881 Td [(a)]TJ +0 g 0 G +/F75 9.9626 Tf 16.498 -17.519 Td [(Algorithm)-250(1:)]TJ/F84 9.9626 Tf 60.055 0 Td [(Matrix-V)111(ector)-250(pr)18(oduct)-250(in)-250(ELL)-250(format)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 156.284 398.448 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 99.895 316.473 cm +0 0 343.711 126.526 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F51 9.9626 Tf 159.273 398.249 Td [(err)]TJ +/F233 8.9664 Tf 112.299 432.339 Td [(type)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(extends)]TJ +0 g 0 G + [(\050psb_d_base_sparse_mat\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_d_ell_sparse_mat)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F279 8.9664 Tf 9.414 -10.959 Td [(!)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)-525(ITPACK/ELL)-525(format,)-525(extended.)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F233 8.9664 Tf 0 -21.918 Td [(integer)]TJ +0 g 0 G + [(\050psb_ipk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 17.713 0 Td [(addional)-250(info)-250(for)-250(err)18(or)-250(code)]TJ -1.375 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(string.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 139.477 -271.945 Td [(138)]TJ + [-525(irn\050:\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1785 0 obj -<< -/Length 1332 ->> -stream + [-525(ja\050:,:\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(idiag\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(8.2)-1000(psb)]TJ -ET -q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 151.016 706.129 Td [(error)-306(\227)-306(Prints)-306(the)-306(error)-307(s)1(tack)-307(content)-306(and)-306(aborts)-306(exe-)]TJ -24.221 -13.948 Td [(cution)]TJ/F54 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-900(p)-126(s)-125(b)]TJ -ET -q -1 0 0 1 151.092 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 155.328 667.24 Td [(e)-125(r)-125(r)-126(o)-125(r)-259(\050)-279(i)-146(c)-146(o)-147(n)-146(t)-146(x)-146(t)-280(\051)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(real)]TJ +0 g 0 G + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(val\050:,:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -55.433 -27.896 Td [(T)90(ype:)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -21.918 Td [(contains)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + 9.414 -10.959 Td [(....)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -10.958 Td [(end)-525(type)]TJ 0 g 0 G - 0 -19.925 Td [(icontxt)]TJ + [-525(psb_d_ell_sparse_mat)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ +/F75 9.9626 Tf -12.404 -39.81 Td [(Hacked)-250(ELLP)74(ACK)]TJ/F84 9.9626 Tf 0.98 0 0 1 99.587 263.883 Tm [(The)]TJ/F78 9.9626 Tf 0.98 0 0 1 118.234 263.883 Tm [(hacked)-236(ELLP)132(ACK)]TJ/F84 9.9626 Tf 0.98 0 0 1 190.781 263.883 Tm [(\050)]TJ/F75 9.9626 Tf 0.98 0 0 1 194.032 263.883 Tm [(HLL)]TJ/F84 9.9626 Tf 0.98 0 0 1 214.096 263.883 Tm [(\051)-236(format)-236(alleviates)-236(the)-236(main)-236(pr)19(oblem)-236(of)-236(the)-236(ELLP)94(ACK)]TJ 0.989 0 0 1 99.895 251.928 Tm [(format,)-252(that)-253(is,)-252(the)-252(amount)-253(of)-252(memory)-252(r)18(equir)18(ed)-252(by)-252(padding)-252(for)-253(sparse)-252(matrices)]TJ 1 0 0 1 99.895 239.973 Tm [(in)-250(which)-250(the)-250(maximum)-250(r)18(ow)-250(length)-250(is)-250(lar)18(ger)-250(than)-250(the)-250(average.)]TJ 1.02 0 0 1 114.839 227.97 Tm [(The)-421(number)-421(of)-422(elements)-421(allocated)-421(to)-421(padding)-421(is)]TJ/F192 10.3811 Tf 1 0 0 1 345.511 227.97 Tm [([)-24(\050)]TJ/F78 9.9626 Tf 7.403 0 Td [(m)]TJ/F190 10.3811 Tf 10.6 0 Td [(\003)]TJ/F78 9.9626 Tf 8.039 0 Td [(m)-40(a)-42(x)-70(N)-76(R)]TJ/F192 10.3811 Tf 34.072 0 Td [(\051)]TJ/F190 10.3811 Tf 6.874 0 Td [(\000)]TJ/F192 10.3811 Tf 10.919 0 Td [(\050)]TJ/F78 9.9626 Tf 4.274 0 Td [(m)]TJ/F190 10.3811 Tf 10.6 0 Td [(\003)]TJ/F78 9.9626 Tf -338.123 -11.955 Td [(a)-25(v)-47(g)-60(N)-76(R)]TJ/F192 10.3811 Tf 31.104 0 Td [(\051)-537(=)]TJ/F78 9.9626 Tf 23.266 0 Td [(m)]TJ/F190 10.3811 Tf 10.451 0 Td [(\003)]TJ/F192 10.3811 Tf 7.891 0 Td [(\050)]TJ/F78 9.9626 Tf 4.274 0 Td [(m)-40(a)-42(x)-70(N)-76(R)]TJ/F190 10.3811 Tf 36.523 0 Td [(\000)]TJ/F78 9.9626 Tf 10.919 0 Td [(a)-25(v)-47(g)-60(N)-76(R)]TJ/F192 10.3811 Tf 31.103 0 Td [(\051)-23(])]TJ/F84 9.9626 Tf 1.02 0 0 1 266.856 216.015 Tm [(for)-382(both)]TJ/F145 9.9626 Tf 1 0 0 1 307.954 216.015 Tm [(AS)]TJ/F84 9.9626 Tf 1.02 0 0 1 322.291 216.015 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 343.374 216.015 Tm [(JA)]TJ/F84 9.9626 Tf 1.02 0 0 1 357.711 216.015 Tm [(arrays,)-416(wher)18(e)]TJ/F78 9.9626 Tf 1 0 0 1 424.587 216.015 Tm [(m)]TJ/F84 9.9626 Tf 1.02 0 0 1 436.341 216.015 Tm [(is)]TJ 0.995 0 0 1 99.895 204.06 Tm [(equal)-252(to)-252(the)-252(number)-252(of)-252(r)18(ows)-252(of)-252(the)-252(matrix)1(,)]TJ/F78 9.9626 Tf 1 0 0 1 287.784 204.06 Tm [(m)-40(a)-42(x)-70(N)-76(R)]TJ/F84 9.9626 Tf 0.995 0 0 1 324.229 204.06 Tm [(is)-252(the)-252(maximum)-252(number)-252(of)]TJ 0.998 0 0 1 99.895 192.104 Tm [(nonzer)18(o)-251(elements)-251(in)-251(every)-251(r)18(ow)-252(and)]TJ/F78 9.9626 Tf 1 0 0 1 257.847 192.104 Tm [(a)-25(v)-47(g)-60(N)-76(R)]TJ/F84 9.9626 Tf 0.998 0 0 1 291.322 192.104 Tm [(is)-251(the)-251(average)-251(number)-252(of)-251(nonzer)18(os.)]TJ 1.017 0 0 1 99.587 180.149 Tm [(Ther)18(efor)17(e)-245(a)-246(single)-245(densely)-246(populated)-245(r)17(ow)-245(can)-246(seriously)-246(af)18(fec)1(t)-246(the)-246(total)-245(size)-246(of)]TJ 1 0 0 1 99.895 168.194 Tm [(the)-250(allocation.)]TJ 1.02 0 0 1 114.839 156.191 Tm [(T)90(o)-335(limit)-336(this)-335(ef)18(fect,)-358(in)-336(t)1(he)-336(HLL)-335(format)-336(we)-335(br)18(eak)-336(the)-335(original)-336(matrix)-335(into)]TJ 0.985 0 0 1 99.895 144.236 Tm [(equally)-254(sized)-253(gr)18(oups)-253(of)-254(r)18(ows)-253(\050called)]TJ/F78 9.9626 Tf 0.985 0 0 1 259.082 144.236 Tm [(hacks)]TJ/F84 9.9626 Tf 0.985 0 0 1 280.514 144.236 Tm [(\051,)-254(and)-253(then)-254(stor)19(e)-254(these)-253(gr)18(oups)-254(as)-253(inde-)]TJ 1.02 0 0 1 99.596 132.281 Tm [(pendent)-249(matrices)-249(in)-249(ELLP)90(ACK)-249(format.)-315(The)-249(gr)18(oups)-249(can)-249(be)-249(arranged)-249(selecting)]TJ 1.02 0 0 1 99.895 120.326 Tm [(r)18(ows)-332(in)-331(an)-331(arbitrarily)-331(manner;)-374(indeed,)-353(if)-331(the)-331(r)18(ows)-332(ar)18(e)-331(sorted)-331(by)-331(decr)17(easing)]TJ 0 g 0 G - 139.477 -461.235 Td [(139)]TJ + 1 0 0 1 264.279 90.438 Tm [(173)]TJ 0 g 0 G ET endstream endobj -1674 0 obj +2133 0 obj +<< +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/ell.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 2169 0 R +/BBox [0 0 447 205] +/Resources << +/ProcSet [ /PDF /ImageC /Text ] +/ExtGState << +/R7 2170 0 R +>>/XObject << +/R8 2171 0 R +>>/Font << /R9 2172 0 R/R11 2173 0 R>> +>> +/Length 2281 +/Filter /FlateDecode +>> +stream +xœÝYËŠ%¹ÝçWäÎU‹’õ~,ÛØoº§À˜Á«kOý6=^ø÷!Å )E—™M3Pôâv…â)ŒÌúrZãNËÿä÷ö8~ÿ©œŸÿs|9Zñ&”vúêM +î|-7¬;}¡ßâΖ‹)…Äd²_Dg|¨çí³©]]¶ç`l©§:HÍ4Çúd(f²)‰Ö›ñŽÅd)´Lš"é×bZ^š3ÅÍý-’ßÕ~«Æ¦%€ÖLŽuyf ˆ¤ ¨\Hx‡ÄPiGZb¤_NIˆÈIK–P@``kÃMã8¯Ø–Æ8N¼6tÂ%Ê3L1V“óì‹‹Ñø®-»£5Iý¹@ñøº´Å…h\ñڼɶ!%ˆ³/ª 5QRµÅ…ÔUBªk€*#HÛ‘?H…f[µÞùÙ—cÇ‘ƒˆ³-ª€"ÀÀÖ„ÙNª¸õ¾PÚÞYíC°&²i½»]ï‹3¹»D_(Ô®—QveíŒ3©¶ª¡j¥ÂJVggTenº@in‚Ô;¤Y¢iŠ­‚ø@fs¸ùa¹3Ñg]FgsTAê ¶VÜŽ¿žÿ¢òÚÓ“+nßãpÖNñ>Ä.Pˆ÷®;¥Ÿú~R·‘À@»c,S¼‘õ ócß—3‘õίޓÞKH…NùK®ÄR1 Ò­çÿ}|¤_Ži½,¼K@4y²ÉtҔÁ[г©a>¸3†˜-Ý0'‰ÑÄI¬$W²³Poˆ%Rè*NÕ¿Ú§†¼ø¯‘ÎÓŸÈK@£XÐ ÊŠð¦F(¼ª9(ój–ScA-\úpCc&à”¼¤2ˆkƒÙÔ@ç=ØWZDD ND bEN“z‘õÔU™zÙV£®ˆ`T}ÆY@3”íZa_YQÞÔ…W5e^MrjŒ"¨…K´-“wQU0ªFš·œ¥ˆ°šöEX>•5(!N bEZ“z‘øÔ¨…Q¹Å…”Vc¡1âÍ,2ófº‹ÉŠˆ¼‰Á«Èb2/ò\4z¦…K/ñ¸2oˆmeÞ.N®íÊ;õzç‹©îW0´¯4Lè¯eð®úÿI¼û GÌ,Nûÿ{PeZ®D‡>ÝùR2³áƒ…ûÿïÇ÷$]×.‹¹¬‹,-‹­­‹,-‹.¸uµ‹ër¾„ÔÅu¹]¢êâ²ìRÎðŒNUšJ¬bÏ’Ö%‘Æ’³e]ƒ(‹Ñ^E”Åâ/‹"ö "Y¡–EÛ(:ß{œ/ȪUâ®dÕr63¸h¹h…¶k rÑ¢[»i ÒµøÄøäø8ÑÜ i ®Ù®Z‚¬Z9úb ÈE+ò#÷¢%ÈE«Ô-. «V¤Ú\m¹hE¿ÅdÕ¢7Rëyl´Æ6ky`d(µTÎ_>Ó”äÆe”ŸÛãüÃ+]@zšÇóõÇc¼ÎòÀC׸ËÆl’¥¥Çñäž_&û‰ž_6Ó4å™äõ²xnôë_Ž'ÿü»ãeÑõüÔº¡ë:9¶Fæ=Q/xؾ±_ýÜã ¼•†Ý¾Pyá…êN†ŸhÇž(~Jò7õ+n$Ö­ÑLÜxxÑ„8CÛÍyS¨ÚׄÆÒ ]¨¾f;XûÓkL'/ƒ<íùË?O¦G7øîíž}"´Ö„ÄÃ5ž^ B‡Üò|IgÊñ8D7†*ãm‘7Ó!ßGó&?-8ÇkšX˜òðA;€doië©r¦¹®L"ßBR,ìyÜèÁÔôùÈ.~‹²ô-ü &gjg*±ûªŽêR㙼'ƒS¾|÷È$j­¡ìöI]žVè*ÉîFÇ„Ÿ#0ùv Xöoñߎß_JÜ«F>,¿kV@V¥w‚è½FÍüjý’U¡Q‹/Bñ ö‹8ݰ°Q‹ý™¢!‹=îÕ»L‹ûå|L†»O/ÉsbŠøj p4°žbÍ&z½p‰î®ŠLÖf.(Åí$}6Os¸XÈ…_­\@¤ ƒ*HŒj`Ë‚[ö^3[oÙÌM §¡Ëq™¹á<‘ã†ýÕ­ˆ Û·ØŸ‰! öÖ[ö®Òâ~ UμHX`ZD .žy Y*Ç~ˆêTD%»4ÌÏ´‰û·ø—§×ûI‰û„ùcf¥ˆ„… QcÆYHC¦ É*cJRÒ€G ;É¿õ©˜Ï@3~ýZL²kÄÅŽÿ8EÜóú8~xúðìÓ÷ç‡OŸž_([zR6 +Íy-?ýíùï¯ßñGz·5®„ùËéOß}àMžså¾Atû·ÔÇÿ|THF +endstream +endobj +2050 0 obj << /Type /ObjStm /N 100 -/First 975 -/Length 9667 ->> -stream -1671 0 1673 118 457 177 1670 235 1676 382 1678 500 1679 558 1680 616 1681 674 1675 731 -1685 866 1687 984 461 1043 1684 1101 1689 1248 1691 1366 1692 1424 1693 1482 1694 1540 1688 1597 -1696 1732 1698 1850 465 1909 1695 1967 1700 2114 1702 2232 1703 2290 1704 2348 1705 2406 1699 2464 -1707 2599 1709 2717 469 2776 1706 2834 1711 2981 1713 3099 1714 3157 1715 3215 1716 3273 1710 3330 -1719 3465 1721 3583 473 3642 1718 3700 1723 3847 1725 3965 1726 4023 1727 4081 1728 4139 1722 4196 -1730 4331 1732 4449 477 4508 1729 4566 1734 4713 1736 4831 1737 4889 1738 4947 1739 5005 1733 5062 -1741 5197 1743 5315 481 5374 1740 5432 1745 5579 1747 5697 1748 5755 1749 5813 1751 5870 1752 5928 -1753 5986 1744 6043 1756 6218 1758 6336 485 6395 1759 6453 1755 6512 1761 6659 1763 6777 489 6835 -1764 6892 1760 6950 1768 7097 1765 7245 1766 7393 1770 7541 493 7600 1767 7658 1774 7752 1776 7870 -1771 7928 1772 7986 1773 8044 1778 8140 1780 8258 497 8317 1781 8375 1782 8434 1777 8493 1784 8574 -% 1671 0 obj +/First 991 +/Length 12168 +>> +stream +2047 0 2044 148 2045 293 2049 440 573 499 2046 557 2056 652 2051 818 2052 963 2053 1110 +2054 1264 2058 1413 577 1471 2055 1528 2063 1623 2059 1780 2060 1927 2061 2074 2065 2228 581 2287 +2062 2345 2067 2440 2069 2558 585 2616 2066 2673 2075 2768 2071 2916 2072 3066 2077 3211 589 3270 +2078 3328 2079 3387 2080 3446 2081 3505 2074 3563 2086 3714 2073 3880 2082 4027 2083 4171 2084 4315 +2088 4460 2085 4518 2091 4669 2089 4808 2093 4952 2090 5011 2099 5119 2094 5276 2095 5421 2096 5568 +2101 5710 593 5768 2102 5825 2103 5883 2104 5941 2098 5999 2108 6150 2097 6307 2105 6451 2106 6598 +2110 6742 2107 6801 2112 6952 2114 7070 2111 7128 2123 7209 2116 7402 2126 7578 2117 7753 2118 7936 +2119 8088 2120 8243 2121 8395 2125 8549 597 8608 601 8666 2122 8724 2137 8847 2129 9004 2130 9153 +2132 9300 2139 9447 606 9505 610 9562 2140 9619 2141 9677 2136 9735 2146 9873 2152 10012 2153 10192 +2135 10235 2148 10382 2143 10441 2149 10500 2150 10559 2151 10618 2145 10677 2157 10815 2169 10954 2170 11134 +% 2047 0 obj << /Type /Page -/Contents 1672 0 R -/Resources 1670 0 R +/Contents 2048 0 R +/Resources 2046 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R +/Parent 2034 0 R +/Annots [ 2044 0 R 2045 0 R ] >> -% 1673 0 obj +% 2044 0 obj << -/D [1671 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [371.126 573.77 443.414 585.83] +/A << /S /GoTo /D (precdata) >> >> -% 457 0 obj +% 2045 0 obj << -/D [1671 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [371.126 498.054 443.414 510.114] +/A << /S /GoTo /D (precdata) >> >> -% 1670 0 obj +% 2049 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R /F83 813 0 R >> -/ProcSet [ /PDF /Text ] +/D [2047 0 R /XYZ 149.705 753.953 null] >> -% 1676 0 obj +% 573 0 obj << -/Type /Page -/Contents 1677 0 R -/Resources 1675 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R +/D [2047 0 R /XYZ 150.705 716.092 null] >> -% 1678 0 obj +% 2046 0 obj << -/D [1676 0 R /XYZ 98.895 753.953 null] +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1679 0 obj +% 2056 0 obj << -/D [1676 0 R /XYZ 99.895 528.579 null] +/Type /Page +/Contents 2057 0 R +/Resources 2055 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 2034 0 R +/Annots [ 2051 0 R 2052 0 R 2053 0 R 2054 0 R ] >> -% 1680 0 obj +% 2051 0 obj << -/D [1676 0 R /XYZ 99.895 494.104 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [320.317 573.77 392.605 585.83] +/A << /S /GoTo /D (precdata) >> >> -% 1681 0 obj +% 2052 0 obj << -/D [1676 0 R /XYZ 99.895 407.25 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [320.317 430.308 392.605 442.368] +/A << /S /GoTo /D (precdata) >> >> -% 1675 0 obj +% 2053 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [129.909 304.779 149.563 316.839] +/A << /S /GoTo /D (subsection.12.4) >> >> -% 1685 0 obj +% 2054 0 obj << -/Type /Page -/Contents 1686 0 R -/Resources 1684 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [172.161 304.779 184.246 316.839] +/A << /S /GoTo /D (section.13) >> >> -% 1687 0 obj +% 2058 0 obj << -/D [1685 0 R /XYZ 149.705 753.953 null] +/D [2056 0 R /XYZ 98.895 753.953 null] >> -% 461 0 obj +% 577 0 obj << -/D [1685 0 R /XYZ 150.705 716.092 null] +/D [2056 0 R /XYZ 99.895 716.092 null] >> -% 1684 0 obj +% 2055 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1689 0 obj +% 2063 0 obj << /Type /Page -/Contents 1690 0 R -/Resources 1688 0 R +/Contents 2064 0 R +/Resources 2062 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R +/Parent 2034 0 R +/Annots [ 2059 0 R 2060 0 R 2061 0 R ] >> -% 1691 0 obj +% 2059 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [371.126 561.815 443.414 573.875] +/A << /S /GoTo /D (precdata) >> +>> +% 2060 0 obj << -/D [1689 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [371.126 486.099 443.414 498.159] +/A << /S /GoTo /D (precdata) >> >> -% 1692 0 obj +% 2061 0 obj << -/D [1689 0 R /XYZ 99.895 552.489 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [443.742 396.435 463.169 408.495] +/A << /S /GoTo /D (subsection.10.8) >> >> -% 1693 0 obj +% 2065 0 obj << -/D [1689 0 R /XYZ 99.895 518.014 null] +/D [2063 0 R /XYZ 149.705 753.953 null] >> -% 1694 0 obj +% 581 0 obj << -/D [1689 0 R /XYZ 99.895 431.16 null] +/D [2063 0 R /XYZ 150.705 716.092 null] >> -% 1688 0 obj +% 2062 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1696 0 obj +% 2067 0 obj << /Type /Page -/Contents 1697 0 R -/Resources 1695 0 R +/Contents 2068 0 R +/Resources 2066 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R +/Parent 2070 0 R >> -% 1698 0 obj +% 2069 0 obj << -/D [1696 0 R /XYZ 149.705 753.953 null] +/D [2067 0 R /XYZ 98.895 753.953 null] >> -% 465 0 obj +% 585 0 obj << -/D [1696 0 R /XYZ 150.705 716.092 null] +/D [2067 0 R /XYZ 99.895 716.092 null] >> -% 1695 0 obj +% 2066 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1700 0 obj +% 2075 0 obj << /Type /Page -/Contents 1701 0 R -/Resources 1699 0 R +/Contents 2076 0 R +/Resources 2074 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R +/Parent 2070 0 R +/Annots [ 2071 0 R 2072 0 R ] >> -% 1702 0 obj +% 2071 0 obj << -/D [1700 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [379.973 275.278 386.249 288.868] +/A << /S /GoTo /D (Hfootnote.5) >> >> -% 1703 0 obj +% 2072 0 obj << -/D [1700 0 R /XYZ 99.895 540.534 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 134.696 420.271 146.755] +/A << /S /GoTo /D (spdata) >> >> -% 1704 0 obj +% 2077 0 obj << -/D [1700 0 R /XYZ 99.895 506.059 null] +/D [2075 0 R /XYZ 149.705 753.953 null] >> -% 1705 0 obj +% 589 0 obj << -/D [1700 0 R /XYZ 99.895 419.205 null] +/D [2075 0 R /XYZ 150.705 716.092 null] >> -% 1699 0 obj +% 2078 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> -/ProcSet [ /PDF /Text ] +/D [2075 0 R /XYZ 150.705 444.811 null] >> -% 1707 0 obj +% 2079 0 obj << -/Type /Page -/Contents 1708 0 R -/Resources 1706 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R +/D [2075 0 R /XYZ 150.705 444.971 null] >> -% 1709 0 obj +% 2080 0 obj << -/D [1707 0 R /XYZ 149.705 753.953 null] +/D [2075 0 R /XYZ 150.705 433.015 null] >> -% 469 0 obj +% 2081 0 obj << -/D [1707 0 R /XYZ 150.705 716.092 null] +/D [2075 0 R /XYZ 165.051 129.79 null] >> -% 1706 0 obj +% 2074 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F192 942 0 R /F190 941 0 R /F148 1490 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1711 0 obj +% 2086 0 obj << /Type /Page -/Contents 1712 0 R -/Resources 1710 0 R +/Contents 2087 0 R +/Resources 2085 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R +/Parent 2070 0 R +/Annots [ 2073 0 R 2082 0 R 2083 0 R 2084 0 R ] >> -% 1713 0 obj +% 2073 0 obj << -/D [1711 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 654.503 364.232 666.562] +/A << /S /GoTo /D (precdata) >> >> -% 1714 0 obj +% 2082 0 obj << -/D [1711 0 R /XYZ 99.895 552.489 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.493 584.479 418.548 596.539] +/A << /S /GoTo /D (vdata) >> >> -% 1715 0 obj +% 2083 0 obj << -/D [1711 0 R /XYZ 99.895 518.014 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.493 514.456 418.548 526.516] +/A << /S /GoTo /D (vdata) >> >> -% 1716 0 obj +% 2084 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 374.41 359.001 386.47] +/A << /S /GoTo /D (descdata) >> +>> +% 2088 0 obj << -/D [1711 0 R /XYZ 99.895 431.16 null] +/D [2086 0 R /XYZ 98.895 753.953 null] >> -% 1710 0 obj +% 2085 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R /F78 686 0 R /F192 942 0 R /F190 941 0 R /F148 1490 0 R >> /ProcSet [ /PDF /Text ] >> -% 1719 0 obj +% 2091 0 obj << /Type /Page -/Contents 1720 0 R -/Resources 1718 0 R +/Contents 2092 0 R +/Resources 2090 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R +/Parent 2070 0 R +/Annots [ 2089 0 R ] >> -% 1721 0 obj +% 2089 0 obj << -/D [1719 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [393.303 554.876 469.357 566.936] +/A << /S /GoTo /D (vdata) >> >> -% 473 0 obj +% 2093 0 obj << -/D [1719 0 R /XYZ 150.705 716.092 null] +/D [2091 0 R /XYZ 149.705 753.953 null] >> -% 1718 0 obj +% 2090 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1723 0 obj +% 2099 0 obj << /Type /Page -/Contents 1724 0 R -/Resources 1722 0 R +/Contents 2100 0 R +/Resources 2098 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R +/Parent 2070 0 R +/Annots [ 2094 0 R 2095 0 R 2096 0 R ] >> -% 1725 0 obj +% 2094 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 253.023 369.462 265.083] +/A << /S /GoTo /D (spdata) >> +>> +% 2095 0 obj << -/D [1723 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 184.772 364.232 196.831] +/A << /S /GoTo /D (precdata) >> >> -% 1726 0 obj +% 2096 0 obj << -/D [1723 0 R /XYZ 99.895 528.579 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.493 116.52 418.548 128.58] +/A << /S /GoTo /D (vdata) >> >> -% 1727 0 obj +% 2101 0 obj << -/D [1723 0 R /XYZ 99.895 494.104 null] +/D [2099 0 R /XYZ 98.895 753.953 null] >> -% 1728 0 obj +% 593 0 obj << -/D [1723 0 R /XYZ 99.895 407.25 null] +/D [2099 0 R /XYZ 99.895 716.092 null] >> -% 1722 0 obj +% 2102 0 obj +<< +/D [2099 0 R /XYZ 99.895 397.916 null] +>> +% 2103 0 obj +<< +/D [2099 0 R /XYZ 99.895 398.076 null] +>> +% 2104 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> +/D [2099 0 R /XYZ 99.895 386.121 null] +>> +% 2098 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R /F192 942 0 R /F190 941 0 R /F148 1490 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1730 0 obj +% 2108 0 obj +<< +/Type /Page +/Contents 2109 0 R +/Resources 2107 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 2070 0 R +/Annots [ 2097 0 R 2105 0 R 2106 0 R ] +>> +% 2097 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [393.303 654.503 469.357 666.562] +/A << /S /GoTo /D (vdata) >> +>> +% 2105 0 obj << -/Type /Page -/Contents 1731 0 R -/Resources 1729 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 520.007 409.811 532.067] +/A << /S /GoTo /D (descdata) >> >> -% 1732 0 obj +% 2106 0 obj << -/D [1730 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [393.303 183.768 469.357 195.827] +/A << /S /GoTo /D (vdata) >> >> -% 477 0 obj +% 2110 0 obj << -/D [1730 0 R /XYZ 150.705 716.092 null] +/D [2108 0 R /XYZ 149.705 753.953 null] >> -% 1729 0 obj +% 2107 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R /F78 686 0 R /F192 942 0 R /F190 941 0 R /F148 1490 0 R >> /ProcSet [ /PDF /Text ] >> -% 1734 0 obj +% 2112 0 obj << /Type /Page -/Contents 1735 0 R -/Resources 1733 0 R +/Contents 2113 0 R +/Resources 2111 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R ->> -% 1736 0 obj -<< -/D [1734 0 R /XYZ 98.895 753.953 null] ->> -% 1737 0 obj -<< -/D [1734 0 R /XYZ 99.895 528.579 null] ->> -% 1738 0 obj -<< -/D [1734 0 R /XYZ 99.895 494.104 null] +/Parent 2115 0 R >> -% 1739 0 obj +% 2114 0 obj << -/D [1734 0 R /XYZ 99.895 407.25 null] +/D [2112 0 R /XYZ 98.895 753.953 null] >> -% 1733 0 obj +% 2111 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> +/Font << /F75 685 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1741 0 obj +% 2123 0 obj << /Type /Page -/Contents 1742 0 R -/Resources 1740 0 R +/Contents 2124 0 R +/Resources 2122 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R +/Parent 2115 0 R +/Annots [ 2116 0 R 2126 0 R 2117 0 R 2118 0 R 2119 0 R 2120 0 R 2121 0 R ] >> -% 1743 0 obj +% 2116 0 obj << -/D [1741 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Border[0 0 0]/H/I/C[0 1 1] +/Rect [368.938 636.522 495.412 648.582] +/Subtype/Link/A<> >> -% 481 0 obj +% 2126 0 obj << -/D [1741 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Border[0 0 0]/H/I/C[0 1 1] +/Rect [174.615 624.567 218.45 636.627] +/Subtype/Link/A<> >> -% 1740 0 obj +% 2117 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Border[0 0 0]/H/I/C[0 1 1] +/Rect [191.223 593.098 397.198 605.158] +/Subtype/Link/A<> >> -% 1745 0 obj +% 2118 0 obj << -/Type /Page -/Contents 1746 0 R -/Resources 1744 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [280.59 552.835 292.545 561.841] +/A << /S /GoTo /D (cite.DesPat:11) >> >> -% 1747 0 obj +% 2119 0 obj << -/D [1745 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [296.203 552.835 308.158 561.692] +/A << /S /GoTo /D (cite.CaFiRo:2014) >> >> -% 1748 0 obj +% 2120 0 obj << -/D [1745 0 R /XYZ 99.895 564.444 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [311.815 552.835 323.771 561.841] +/A << /S /GoTo /D (cite.Sparse03) >> >> -% 1749 0 obj +% 2121 0 obj << -/D [1745 0 R /XYZ 99.895 529.97 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [209.414 540.781 221.369 549.737] +/A << /S /GoTo /D (cite.OurTechRep) >> >> -% 1751 0 obj +% 2125 0 obj << -/D [1745 0 R /XYZ 99.895 441.815 null] +/D [2123 0 R /XYZ 149.705 753.953 null] >> -% 1752 0 obj +% 597 0 obj << -/D [1745 0 R /XYZ 99.895 409.935 null] +/D [2123 0 R /XYZ 150.705 716.092 null] >> -% 1753 0 obj +% 601 0 obj << -/D [1745 0 R /XYZ 99.895 323.08 null] +/D [2123 0 R /XYZ 150.705 525.151 null] >> -% 1744 0 obj +% 2122 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F52 585 0 R /F85 814 0 R /F1 1750 0 R /F96 1154 0 R /F94 915 0 R /F112 1682 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R /F78 686 0 R /F233 1044 0 R >> /ProcSet [ /PDF /Text ] >> -% 1756 0 obj +% 2137 0 obj << /Type /Page -/Contents 1757 0 R -/Resources 1755 0 R +/Contents 2138 0 R +/Resources 2136 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R +/Parent 2115 0 R +/Annots [ 2129 0 R 2130 0 R 2132 0 R ] >> -% 1758 0 obj +% 2129 0 obj << -/D [1756 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [121.315 282.067 133.27 291.173] +/A << /S /GoTo /D (cite.MRC:11) >> >> -% 485 0 obj +% 2130 0 obj << -/D [1756 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [253.836 246.183 265.791 255.592] +/A << /S /GoTo /D (table.22) >> >> -% 1759 0 obj +% 2132 0 obj << -/D [1756 0 R /XYZ 150.705 222.691 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [310.367 176.296 317.241 188.355] +/A << /S /GoTo /D (figure.6) >> >> -% 1755 0 obj +% 2139 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F61 1360 0 R /F85 814 0 R /F83 813 0 R >> -/ProcSet [ /PDF /Text ] +/D [2137 0 R /XYZ 98.895 753.953 null] >> -% 1761 0 obj +% 606 0 obj << -/Type /Page -/Contents 1762 0 R -/Resources 1760 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R +/D [2137 0 R /XYZ 99.895 349.244 null] >> -% 1763 0 obj +% 610 0 obj << -/D [1761 0 R /XYZ 98.895 753.953 null] +/D [2137 0 R /XYZ 99.895 231.907 null] >> -% 489 0 obj +% 2140 0 obj << -/D [1761 0 R /XYZ 99.895 716.092 null] +/D [2137 0 R /XYZ 99.895 211.056 null] >> -% 1764 0 obj +% 2141 0 obj << -/D [1761 0 R /XYZ 99.895 222.691 null] +/D [2137 0 R /XYZ 99.895 120.166 null] >> -% 1760 0 obj +% 2136 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R /F83 813 0 R >> +/Font << /F279 1813 0 R /F233 1044 0 R /F84 687 0 R /F145 940 0 R /F75 685 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1768 0 obj +% 2146 0 obj << /Type /Page -/Contents 1769 0 R -/Resources 1767 0 R +/Contents 2147 0 R +/Resources 2145 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R -/Annots [ 1765 0 R 1766 0 R ] +/Parent 2115 0 R +/Annots [ 2135 0 R ] >> -% 1765 0 obj +% 2152 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [196.011 499.949 202.985 512.009] -/A << /S /GoTo /D (listing.5) >> +/Producer (GPL Ghostscript 9.10) +/CreationDate (D:20140329133929+01'00') +/ModDate (D:20140329133929+01'00') +/Creator (cairo 1.13.1 \(http://cairographics.org\)) >> -% 1766 0 obj +% 2153 0 obj +<< +/Type /ExtGState +/OPM 1 +>> +% 2135 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [196.625 344.532 203.599 356.591] -/A << /S /GoTo /D (listing.6) >> ->> -% 1770 0 obj -<< -/D [1768 0 R /XYZ 149.705 753.953 null] ->> -% 493 0 obj -<< -/D [1768 0 R /XYZ 150.705 716.092 null] +/Rect [170.554 324.049 177.543 336.109] +/A << /S /GoTo /D (algocf.1) >> >> -% 1767 0 obj +% 2148 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/D [2146 0 R /XYZ 149.705 753.953 null] >> -% 1774 0 obj +% 2143 0 obj << -/Type /Page -/Contents 1775 0 R -/Resources 1773 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R +/D [2146 0 R /XYZ 150.705 716.092 null] >> -% 1776 0 obj +% 2149 0 obj << -/D [1774 0 R /XYZ 98.895 753.953 null] +/D [2146 0 R /XYZ 397.506 407.446 null] >> -% 1771 0 obj +% 2150 0 obj << -/D [1774 0 R /XYZ 99.895 411.235 null] +/D [2146 0 R /XYZ 150.705 260.219 null] >> -% 1772 0 obj +% 2151 0 obj << -/D [1774 0 R /XYZ 99.895 182.902 null] +/D [2146 0 R /XYZ 150.705 226.207 null] >> -% 1773 0 obj +% 2145 0 obj << -/Font << /F59 812 0 R /F112 1682 0 R /F54 586 0 R >> +/Font << /F84 687 0 R /F78 686 0 R /F192 942 0 R /F145 940 0 R >> +/XObject << /Im6 2131 0 R >> /ProcSet [ /PDF /Text ] >> -% 1778 0 obj +% 2157 0 obj << /Type /Page -/Contents 1779 0 R -/Resources 1777 0 R +/Contents 2158 0 R +/Resources 2156 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R +/Parent 2115 0 R +/Annots [ 2134 0 R ] >> -% 1780 0 obj +% 2169 0 obj << -/D [1778 0 R /XYZ 149.705 753.953 null] +/Producer (GPL Ghostscript 9.10) +/CreationDate (D:20140329133928+01'00') +/ModDate (D:20140329133928+01'00') +/Creator (cairo 1.13.1 \(http://cairographics.org\)) >> -% 497 0 obj +% 2170 0 obj << -/D [1778 0 R /XYZ 150.705 716.092 null] +/Type /ExtGState +/OPM 1 >> -% 1781 0 obj + +endstream +endobj +2171 0 obj << -/D [1778 0 R /XYZ 150.705 690.058 null] +/Subtype /Image +/ColorSpace /DeviceGray +/Width 510 +/Height 227 +/BitsPerComponent 1 +/Interpolate true +/Filter /CCITTFaxDecode +/DecodeParms << +/K -1 +/Columns 510 >> -% 1782 0 obj +/Length 48 +>> +stream +ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ;Y¯ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿà +endstream +endobj +2176 0 obj << -/D [1778 0 R /XYZ 150.705 693.143 null] +/Filter /FlateDecode +/Length 171 >> -% 1777 0 obj +stream +xœ]Mƒ F÷œ‚àOÛĸ±mš¶Àa0,‚¸èí; vфߛ|ˆ~¸ÖD.ÁÁ #ׯª€‹[ q2–•WâNy‡Yz&ú›ôïGNÔßåŒâYžóM¹9à.^i'dmQt­ÖC«þžªMõž¬)YŸ:2èDØ^26Y?‚iRªt4à°†€6æÞ¹Wêc,þ¾æO§Å¾=+V +endstream +endobj +2178 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Filter /FlateDecode +/Length 191 >> -% 1784 0 obj +stream +xœ]= …{NÁ Ñ™ahbc¡ã¨ °d( !…·w!‰…ÅÛáƒ}ûC3\/Wï2m)èdj7 –°& t„ÉyÂ[jœÎ;Õ¨gI3ÜT|"PL»ñ]ÍÐ> - +stream +xœÍzy|TU²pÕ9÷ö’îNwÖî$$Ý›ÎÖ Y€Ä–ܬ, $,B‚b:@@0$(&¸¡ Oq‚‹C§#ØŸDŸ:ƒ:Ž3nÏ‘8â¸Ìd†qÐGHuo”ï7¿÷{ïû}¼{RUçTÕÙêÔ©sN ˜ 8ÔÕÎÏÉõË<.xvC +œÇIð + Ã,Ø¥P;a:œ†#ðM@‚ +8.´ƒ*°¢Ãp¬ƒÏ`Ò¡>ÆHj§Ú!Š‚_®†{ƒÇI+ Êág0„«q>äP~ËB7õÜ+¤|ŸJOÁg˜€”ûD@tÿA$¬‚7‚ B3ìÇMø%8Á Û„¡7x\Gá·XM¹Ù°A|_VS­gЊÃÁ³ÁÏá%¡…Zºî¥ûa˜Mäåb?8 ®…9ÐDÒÛàŒÂI\¦Ë‚w?|ÍÜìu®¥q¸a&4Âý°‡¬ñ.œƒoЀ…ø¤ô6þY|ŸÆV ]°‘üê)²Þ~8ÇqNbVf%kY!®#Yì¥þá VcãË|¯˜;VŒÆ?!êi„»áeêãæ’õÀ“y§$tŠy—¶Ð —Á“pÞ¦q|Lvÿþ™”>a·³îà¢ààg4Øa*Ì…ÅÐëáxšVõxþŠß3=iž^7Šçƒ’mS¡ŒÆ^KÚó©ím´J~Pz—fšÅTœƒópöá#Àð¦aN¶–}Å}üMþ‘0YƒÅÔR,$Q¿,‚VZÛÉÚÒ|Àkp +c0³iFïRýoÙ5¬‚Ò3ì4û˜ßÍû„‹â=c#cû>Ø Zò²éd‡.xެðŒ¥1dà*ìÀOiä;Øó<œ[¸Ä y)_Àø½|'ÿÿ•°N8(|(Λăڦ±›ÇÞVï"[ hh\i0…üg9yÓM4¾vJë`l^x€üåA臃4ï“p +~ ¿ƒ?Ñ +:iÌ+©÷5äuwã”ÇCø2¾†§ðüVI,™R:›ÌJX9«b+ØÝ”v²3ì]öŸÀ—ònÞCi?Æ?@„ ˜Gi†¸MܯyS›®¡mÖ½uqôR楆KÁXüØõcŒ½<öypapßÙ0‘Fº•Fù8ùà^JÏ‘'ƒ×á-xOë×ÈP$·¡DÞE«V‚Óq&¥Ù8—Òu”ábJMØŒ­”º±ïÀ;ñ.¼VÓc4·½ø,£ôQú-žÅ?àWø5#'fœ¼ÙÅÒX+¢™–³é¬–Í£´‚µQjgëØzZ¡ýlgïò(îâÙ¼‰¯åóŸñWøoøw²„Á#,Vw +§…·…÷…ïE»X)¶Š»ÄW4 šÍušUšÇ4G4_h.j5Ú:m³v“ö7Ú ÎEÑêç4ï£ðã/Gs;ÄháVv–ö…·‹[ñ:²˜†-à«ùü×âr<Ïø!öò•ü¦à3¼Šýƒ·áBv“¹],æËa;ñ û„]`Ÿ 1¸€}‰éÂ¿á ¬—3Ò‰øŽ#Ü)~ÀÞƒb¶‡ÙküN~gðß¡XÜ…gÅ]ìmp#, +ÎÒ®ÞÊ¥J¿b+Ù6¨ +Äïa%ÙýYñV²÷4v/fòß»à3.±¿áy|„¢Æ/q–ÂndEx"î%L‚Q\ íø0Èx‡@<À÷c 3Òjù˜ §Ð1öKîÄßð0hPƈ©,ëØyvQs†"R”ø5lD޹ä;—¿1¸™vÀN–F1­’¢É;˜6x”âý…±•ˆ-¾/n#?Ûó`äÂö&ÓÞøŒR=Üy0D>x/ä²Ç`S°—QÜŸMñ“AWA(ZZilÝt^IJdŠ…Ôë?(þ¿AQ¿ÿ · ƒvÖ0¤ Šd»PI‘ÉKñw¥e°„JOƒš£â;P‹VÁ1¶‹¼ü#¸‘ΜO©ÿxðÐøÃ!‹Fí È¼–j<96dJ÷À›È`3yíó:aEÞG‚«h†+錪¡3ñ¬ > +å´vó‚w·AcpOðXóƒ(þ®úa2lØBÑ-PŒ=…¯ÒyôŸ¸âö øâ‘ mð¥ŸÑˆ¦‰' WxbgIp{ð·CöH& 5Ó)zÖÀŸÉn3ø0äÍaÁ*ÞN'ÔY˜Ü´c´WSä}öjEŠ==$î•e¹dÚµžkŠ‹¦N™\XŸ7)7gbv–;3#=-Õ•"%;ö¤Ä ñq6kltTd„Ån2Âô:­F8CȪ”ª¼_ª×'¤J3fd+e©‰M?bx}bU]­ãsxU5ÇÕš2i.ÿ¿4å¦|E-x²³•’Ã÷Ë +ÉÀÅsë)…ÔàðªùÙj~‡š7QÞé¤ +ŽJ[k…Ç^G¥¯j}ko¥·‚š0„•Kå-aÙY0f ¬r>«Ô>€Öi¨f˜µ²x€ÎDƒòÅK•¾8©B»*›–ùêæÖWV$8 ÙY>,_*5û@*ó™Ýª +”«Ýø4å>­Úc¥2ØæÈîݰ@³×m\&-kº¡ÞÇ›”>"ÜÔo…Ϻñœí‡"5Y^¿õÇÒÞ[i[éPн½[¾þ¹õ?–:ÜÐ@mø˜«ÊÛ[Eo'VÏwP_ìî†zÞM:”y(s +Í®EªT8ÞUŸ^*“Z{Wyiaâ{}0oƒÓ/Ž@|¥£wA½äô•$H M¢¡wÞ†Á8Ùwµ$;kÀ2ë@¸y·Û—™©8ˆ¶œV”Æ8M-fg­0ŸÔnq!óAÙ¶©¡8‡Œït*Ë»- C3|=sëCe4'øAÎq7ø˜W‘ _–Ä\§Hz.K®T÷JäÇσò²ˆñéR¯ü™-±Q•­Å>Œý/Ä-!yõ|©zîâzGe¯wÜ¶Õ ®*…äS¯ÈÆsÁ}‚‹,5S"×›·¸^aП誒*WzgÐV£1ú¢ÊëykåXW›"ÿ½áJËJ¡Þ¨´%¸4ªÿ/ huäÀ*U>‹wF7„9ÿÍJày¥–J~¨6>'_±ûêò5W•¯ž±—Ó€…TV½`qooØU²* +V½½U’£ª×ÛÛö4K‹Ô{œ×óúÞöJïå凶%øª¶7Ð$Z±8[¹#‚Ö9V ‹,ð}çØC0Bpž@€6¾º úŽ`&l'È!¨U8ü ?HãÜKõÍ„sÚúXÀŸ#þM +æø*:í|;=Lcˆnã©ô§Dã‰>M|z>ð=TVèîñòDùOÆùS9–ècãôQâ'}D}ðÚùÃãåõ¼K­×9Nûy‡?Én)M"¹ƒ —€Sn'åv’évR #]ìV«= Í#º&DÉ\›ýNI]£ÍƒÖ¸¼~2éf2ýf²Üf²ÜfH´é²Î¦N6ßD:›Hgél"«äòꯃ [œìÞAvWø>ÂÃgTþ]„wô+%~ Ù1ƒFu_åO·““­,’óJNÐ=©Ùåƒq‰y}?”ôaŠ# §fE·E•¶ ê +·e0>1DIë¦Òp¾n#`M8… € ‚@àKý)9ö!>Öè@·w³nÞ-t‹BnFžäyP§rÉHž RȰ7zpŠWß®ïÑs‹Þ¡ÏÕËú:½ØFO¢>Îí<‡—ðZÞÈÅ@pد-Î'"O×çï0ô|†aÃèÓ kÎhF4ç5¢C“«‘5u¯¦]Ó£Ù¡é×èwhvh™×Ðnè1p‹ÁaÈ5Ȇ:ƒh×béݼYÙ´„-í;²q#ñüF‚FZF2ÅÄÂ@% ÁÊ©d&=3陉k&®™¸@X‘Ôx ÚÇ¥š+’ËuýóŠ„ ¤áÄ 'ÛŽ>¯äfQÉD%•L¤u†]¤Z;ê¸Ê! ¯!|Y–;.÷hTùyUç²LV겋rSÚpú2°?wd ì))Í““ EFF6J®ÆôƽB›ÔæjKoÛ+ÔJµ®ÚôÚ½B‰Tâ*I/Ù+äH9®œôœ½‚]²»ìéö½B_Í‘š“5§k„Æš¶šî>…–nÐïÎÍSi²K¡GýqñyS̥ײ#4F» Îp0¶ä”´ˆìˆÊ=LÜÃÄ= µ"Õ:¬„Âöq™Âß­Ê”œ"gWÉ9Mþ¿8¿¶´†Ân#ÁnNm"ù!U;”;¢ò}„GT~í¸~¿ÊW´ì—ë)Ap±îÓ6\ %í"œæ‹à,µNØNÐNp„@à‹)-â‹ØaJ‡Ø!ž%›&ÅØ!6–N–È¥ÔÂŒä &< âÇT|ŸŠKTœ"‡Ï2};ËôÒ,Ó=³Li”aét™p§Š²¡Ôô|©©¶Ô”Qj¢Ö¬à‹Q±FÁøGÏQq–í4}ç4ýÍiú«Óô”Ó´ÖiºÖ©Ô›@{ØÄ¢UlP0=RÐ@%»7€° žTaÝ <îŽbÎÝ÷'(tÓÝ÷74`µox)T7;|ßΧy„Ñ%U”Êl»¾ÄV9-¢¨ªâ_ ï8vÿðÙÜ?þl‰¾Gªç×ûžKlðå)™`bCµoºò,<ÎÖ²¶ÊŠã¬]! õÇq#[[9OáãÆŠ†+jÌÚI < +QÔ!YQƒdTÕjT5rÓäÊŠääÒ+8SQ"÷yEUZj+…º ¶êBj, RÔ¶RX’¢FþjÌüãÆŒ€fµ1³ÔÆ&(J.©d¹•).RpMQÅK®ÐpÀ¥öãµÄtÒC:äã:LG:îÿŸ_KÙÿ@›>Z¶Tyœ{¥Ê¯oÛúV›¯§ÙáXöÑø«=ÕÛ¼´U¡M-¾¤– +ß2©Â1дô_ˆ—*â&©b–V.¨X*·Tø›ä¦J©©¢ap_wyõU}Ýw¥¯òîÑX·ÒX¹Ò×¾ê!®VÄû”¾ª•¾ª•¾öÉûÔ¾ªç•au]ý€Êèu§ÒAf£ýàMp6”ÅZÚ§©›ã§íö„!èØ2ÐËØ(•ùLŠ(»4»TÑîTDáÊÏ/ã"Ûí×8†ðÀ¸ÈBì© Ü`«\Yq寣££S®.7áÎ.›Êë¤Mëœ_í«R‹Ÿ§Ò'{+PYŽ®ñ¯¼^¶œôœö°6O·§Ï³ÛsÄ#vu5;òdòéd֘ܖÜÜ—¼;ùH²FÜPLöìNþK2ï"oÂNú*+Ô>»ˆÒŸRììêP> :Bݹ»Üåõ¥É°”n½H7ôlˆ"ò æˆð„ß!ø”àoÜIø!‚gÏæÙôt¯Pzlp+AÇÆós ó¦ˆ6-Ñù‹C´rNˆzJólDý%ùa¥fº€# ~ƒàC‚¯þI ò<ž§6ÞòÚ†èp# ¨Ð© w'º)ƒŠ¹;;ÜnP@qpZRuãÕ~ØÑd +Z"¤¤r;”j] +½¢Ç”‡³¨Þ™A ³ž`/ÑeUËNúAì¥ç9„i•ÌQ„8F|Qá{pÃjð?O]jÄVz&>*GË6¯­ß6bÀ&ÛØz¸Xxi®¤›ŠûéiÊÕ¼ŽòUþ½®WB,q¿–é 63=CQ¯32CøwRŸ)G†‡›åˆÂ\s·y‡¹ß,˜ã¬C,Ïшi°KÜžÙ–ÑseÄ%žˆÈ"Œ(‚oF/â7n÷¤\X‚k—D¹ò#¢cc­1ÎÂi¬0¢ -5UJÖžÇYÎ(Ï cÌ;56LëŠw• ?ßóýÖuS“˜ËÅ'mdíÌt$Ù•9fÑÒ“°J¾Ck3Ym®-°É„âdNŠÍÐz´3µÏj5²ãza±îzëbÛMºÎˆÎÈ' O…?qÈp(ü”xÊú ÛÖl#Žï„ï¬1tâÄ„˜¸Ø8k¢M«·l†Ä‚¸éq÷YûZ[cÖø8cœÆÄ㘨±Ycc¢µQ‚)€­²^/GKzô¨ð|ÙhãûâpwÜ‘87ÄóÉp÷"3&ð~Ùšß×F5FµEuG QÔÊQ2M*²£ÇÁ½Ž~sÄÀïÈwL(ËѬu³>v’fgÙ_˜ŽÅÙ‡ðºH…L>ûœgtŽeÉÚo—̾°dÔ2J¦½´d­§äÒZ÷¹ˆHk‘º ‘EEÌîÙj7¿þê¤\\»n ­‡â­näÎB€ÂZVš<9?O™šF˴μɓ§ðƒG° »n^¶;Õwú‰½¿Ëµï»iؼzQU<Šcß»° {v˾®µÇ_ÿÍŽ+ž>:v~ªeR¶ê“óƒ_ð…´^yXs‚#~c‘^yõzŒE¥úʰ*Cu²pZS3äoÁé‚‘‚¿‡i¡KõÝÒÆ‰Ï¥OšxjâYé¬ë?'~•ü¥Ë8S—Àíƒéé°sƒgr17À ŽrÑ‹±Ü}4Qvç$ÒõsÐbÊH?­ zö)=”ÉÆl‡jcZ©AŸÜAüìžl¶#»?›eÿh£¶›f`ŸÉarö °à´䨓Q,*.“ð‹+  ZtÉÚ +:G{JFÝ£ëJF—ŒFåŒNÊ-ß Ož˜“”f4ÉNÉ™ât9è +OM s4cŽÝŒIfÊ9 iͦŸ¨ÉmF»)±™¢‘Å3‡2·Ð§î¡u°ÖíŽRJuÂXu¹œÉ©…*+Ö›Ÿ7¹¶–²¹$eŸ)k«m-¸ë™EeC›{Úûã}Ksœqñ·Z]™Ë•âíîGæ8jwÏØâ}¢U˜ußëjïÜ5éØm¾-*Ò³tb‰Æ°kumõÔÄôÒ¤°ïª]ѽOù•Ã&Zß^Zß8H…|Ü(5 êóíù™imù“{ =Æžøž„;\=©½ùÏÚöÆïw Ÿ!õDÚka¯Þ3Åj! 5&¯O‹5Yã]&Wx5nÇ;Mw‡? á×@1Òýg¦7âõi7䯂U¸’­H]•ÖšnJ[Ÿµ)¿Oè{´=º;"îˆì‹î‹}LxD·3â‘È'b÷¥N;œŽé¾4|eü2üË´/ó2´&}Z1áÔ<±BÆø4AE«ƒÞ~˜­(Sb©ž®yz”UÈ¥¼—Ë(” ™\è-ì/) +¥IÀqd¢Y˵ÊÖVn+Â?ã——=dô¨â#£ç.Ðö-U¶%Z‹TÇÈsç$%GÄ +º—S”šÁ®MlƬèÌf˜I^,[$é¹c³›!'‚¨·0Õ)Ÿ ú[‡©! wÐhc×ÐHÉi +Ï5yrh{ÇDÇZ£4 +÷¼oÏ’·žýé/VôÕ|8ðòê…pÒ­òúåË{ +'Mž_wÿšÕw¤Ngïê_x×Iÿºš]7Ý;gùÚ¾774u,xwõæÚ•·¬¯-hÍû¼j¯wËÍ(Z¥üCŽqœ|"Œ^ÆïÉé±&2R¥I6sÙŒ™FŒÑ"Ó ×‹ŒF“ 1š(–N#µºh­V§ã‚VcÔÝ„¦ø$žÜ-›DÔèuNŒFáÝé9_Ëeƒ^o減áŒðï² KÔ jF/R#fnÖÈZÔÆ…ÿ(r®õ¨ûÖCa“²°(çlIQŽÅCiÔri'¢(Bœ['º…Í–W•¬Ùl¦slÝ\²vÆHR„³ó‰ ?~lï¥WX×Í{ÇRðÂc?Áå=üŽ‹ÛÙžK¡_ÓçÏñd+¤¡Qο-íñ½äÒ„Vaƒ¸Y·Q‹ñVÓ†¨[ÛtwF…éu}ì˜fs¦ÙDžä@+ÑãÛ†òóiu4Šn²>ÇÕæ¢’— ‘ÂâóV+˜lCä‘ñh~"-‘ŽHÀ92䌞 .gx3ú3F2„ "Ïv’šv2Œ…Å¥_×FCíÒ9:R,äµË–Œ®³\ ÷-R=X9UTÎLHÑES-® ©RªÝäl†Ds|3¦è(ç0$5cB¡d½+ä»—ïU!×]b-œ<9r +ù*¹ê”ñ Æ´ § y­ÑVß1òvÆSÝ}o-¿íõý·<øñë{^bù‘ef7ÜÓPÚ8ñö .Ö…)GZ~÷‚Û³½¿ÿý؆-«Øñ;æ4}rkÿ®wnY˜¥®Ã,ŠW/Ð:X †üM:u׋bŒBL¦øíãH}<¤Ê©LNõ¦ö§Ž¤ +© +;¼Ú ú Ÿîvq.ÅXWNáÐ<;´¿U£Ô`Š”’œÂ4 9¹»Ö5!!1!)k¢RÍ.Cª-ÎÇ4N!‚¶¼†ŒN¹X£b6¤Ýž lùHKL3Ä…Å4ÿ°ã3UÈÌÜU2VD4S÷ù‹j¥)“#ÔÈO_Ãfmï\ì}rÓ÷¾ÓüÊ–5¯V­Ü™417¥(£¸¢pFÛõÖÎ+ÝýÚØ‘?{ø³—ÿ>öÅÀÃMëaÑOtä:¯?ö¤z†×ïîkÀ‡ƒŒJ˜&ª¿HP&N‡"ÏÔ—lòšúMoà)ö>¾ÏFL†x@‚I6q& +B’ã9‹æœ Ü$ÊÓ Åߣ†ˆæ÷Êm6€ë7 !Î(±/€³Ïe#Aê„~A^dã¸É-tÞžSí~A¹ï¸-£îÏVq¢{køæË·›ud¯(g Jè$ShÓ~ÅÞó´ãÃcÛÖæ.ÈOkRÿù’ðZÂD¯!ôï]øÞÃÖó!Lø•Ø*~xåßÀ€®\ý'ñøî8½tÂÿCÝÿ•Ÿ*ûœ/ + ç*XùIаVý9Š+O Pþož¡™#D*ÿ­ŠrˆX°rMKÇœ–[æµ­iº¹nþìêX¸c¼ÞçÓ]]<çƒW1ÆÿUSS„EœóaOsaÔ„Öõ¿ø”úìí? øŽ 5š=ßèâB>ýiâ+ +=zí |ßyi»¥XÊ¿–Ž÷÷ +“žÑ +endstream +endobj +2180 0 obj +<< +/Filter /FlateDecode +/Length1 3512 +/Length 2264 +>> +stream +xœÍW{l[Wÿν~Û±ïÃ×vìÚ¹×7±Û\;îüHâÒG–¦éÒ”.khewi×4MÓŽf ]ºiÝØXe ¬PU“@BC“àƹN¶!Ĩ!e â! ¡b­T›–„ï\»i7!þã^ùœóûÎwÎ÷ø}÷žk Ðg‡±ûÇsy°.ßulöOÍNÎ5±W ¯MÍŸV½²ïü/››™ ,澉s'¸é™“OkêÛÏàïåãÓ“GyÏW.¡³(ì=ŽçŸà¯"î<>{úLS¿m›u'šlÙ°f'ÏÌ‘o WPÿ_ˆÕG&g§[þ½MlîÑÇN·Öfós§¦çj“Ñ=¶útÍËÜ%ÈHà»6ód{`Þˆfàx ʆ$ÄàØ 2A…mp/l€O 8GAÚá0<9‚N8ypÀN8_€sPƒ ð9˜ƒÇá‡OcfŸ‚çàyð‡B†‚xêÝŒÅÎ6mÍçÌ^gƒ†s&8õ0xŒ«nƒ–rfÁÖ ´?GóK‹½"P»WÀGÛXŒZ¨M2íhµ£‚™%†™7<é¬ËXt 0 Ò‚D9lP^0ûpí=ÖZ³ŒROV”h¸LÝâø;»;q˜–ꮀX.—iP¤Î2˜áNQ2yw¹¼ñ’)A?q:œzq«½¯·TLéI‡D©CO{å|s”*{‰¨‰vMÔˆ¶ûd2Ù•ˆ¼Ë°…¸®mÙ®M©°´ñàîÚ %¤kù:ùËÊRç^Ý7Ü9šËÕ¤”öæûvæ‡S÷FQ +?5¾ü-ŽÇC•ûü?ãKÝË +VsϤËÜ7 +qØusKIaQrBS| ŸÏ×CœBõ2ñ»¼;U3Œ´¸´àÄÁÌ´‚bÝ_ψtKÔÇ,öðHL!*äû +a½‡4É 'H“5„N=]HB¾—<9xèààÌp÷OÔ¡„Ú#S›{6 Ý?U™ßŸŸñyJ„»Žlïz17RìÛ_ÞýP2ù›Ê'ŠÖß—)Ílýø‰¤ì¿úàþm-~.àé¬B7|êa–a1bñc¶)È~•`éºÖ`±ÃšY´0 [‚Ù2r4¶dJþÕòu)Æ2&…ÜF=&±a,Œ@GžJr†i7Ì öV®€•Kí" `j„¦¦½L5‘ºY¥S»UÔ N ::æ!¬è¼“¥DÑX-ˆÅTº$³¢°êœèÆžñ<å¹+JWü•ýꦕwûãñþï=ñ©ê¡„Â]òë»J/hd¹=ÈɤÈý¸WµÉ²MßJÌR‡C^ù!·y|x{@Áºð`N0'ìË¢îeoŸ4u7K•BaQi0‰èXÀ„våh`ÉtøuG€íð`ü¼nÃLaÄŽ€UÌ`*¾VUÅOd-¤-$ÝÃ¥Kš‚Äwë N—¸…t¿q±sûÊ•!í³ùb2J";Å‹_ò'ÒÜ%™Ï¾s¹ÜÁÉ2¯ÝûÚ[#Û‚D–íírûŸÞ û%?V9;L#©uÉi1êGE§Å¨“E´gŒ2#òÚq aD}ÈW!ßäFÃèñÖ+çcJÄå÷¥Ô-¤cKGå£?~2ªFÉ/û:}+çͬWƒ‰æÙHa…œ%g9wŠû9b®uz±YPQü9à®7…µ3ùÖf6Îêìwßûÿâ²ÁE«µ±HnVWW±UY‹ØÿEîz}ô«åvªnûÀÝì­UJ9KÛ 0|jzz|úÔ‰cÖ~ä¥ÿ!¿®Ûpsõ?ùÇU Éý +œˆÿ0ÞGüU<äY|žï–æÓ×Ê7Lú7 +lþ'÷uþoL|E÷¾r«Ço¸ß9®ñߺӣú¾Á( endstream endobj -1793 0 obj +2187 0 obj << -/Length 1526 +/Length 8640 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(8.3)-1000(psb)]TJ +/F84 9.9626 Tf 1.002 0 0 1 150.705 706.129 Tm [(number)-250(of)-250(nonzer)18(os)-251(we)-250(obtain)-250(essentially)-250(the)-250(JAgged)-250(Diagonals)-251(format.)-311(If)-250(the)]TJ 1.02 0 0 1 150.705 694.174 Tm [(r)18(ows)-295(ar)18(e)-294(not)-294(in)-295(the)-294(original)-294(or)18(der)72(,)-306(then)-294(an)-295(additional)-294(vector)]TJ/F78 9.9626 Tf 1.02 0 0 1 423.988 694.174 Tm [(rIdx)]TJ/F84 9.9626 Tf 1.02 0 0 1 444.477 694.174 Tm [(is)-294(r)17(equir)18(ed,)]TJ 1 0 0 1 150.705 682.219 Tm [(storing)-250(the)-250(actual)-250(r)18(ow)-250(index)-250(for)-250(each)-250(r)18(ow)-250(in)-250(the)-250(data)-250(str)8(uctur)18(e.)]TJ 0.981 0 0 1 165.649 670.198 Tm [(The)-255(multiple)-255(ELLP)94(ACK-like)-255(buf)18(fers)-255(ar)18(e)-255(stac)1(ked)-255(together)-255(inside)-255(a)-255(single,)-255(one)]TJ 0.993 0 0 1 150.705 658.242 Tm [(dimensional)-252(array;)-252(an)-251(additional)-252(vector)]TJ/F78 9.9626 Tf 0.993 0 0 1 326.015 658.242 Tm [(hackOffsets)]TJ/F84 9.9626 Tf 0.993 0 0 1 374.299 658.242 Tm [(is)-252(pr)18(ovided)-251(to)-252(keep)-252(track)-252(of)]TJ 1.009 0 0 1 150.705 646.287 Tm [(the)-248(individual)-248(submatrices.)-308(All)-248(hacks)-249(have)-248(the)-248(same)-248(number)-248(of)-248(r)18(ows)]TJ/F78 9.9626 Tf 1.009 0 0 1 458.348 646.287 Tm [(hackSize)]TJ/F84 9.9626 Tf 1.009 0 0 1 493.148 646.287 Tm [(;)]TJ 0.985 0 0 1 150.705 634.332 Tm [(hence,)-255(the)]TJ/F78 9.9626 Tf 0.985 0 0 1 196.944 634.332 Tm [(hackOffsets)]TJ/F84 9.9626 Tf 0.985 0 0 1 244.865 634.332 Tm [(vector)-254(is)-255(an)-254(array)-255(of)]TJ/F192 10.3811 Tf 1 0 0 1 333.788 634.332 Tm [(\050)]TJ/F78 9.9626 Tf 4.274 0 Td [(m)]TJ/F84 9.9626 Tf 8 0 Td [(/)]TJ/F78 9.9626 Tf 6.336 0 Td [(h)-40(a)-25(c)-25(k)-30(S)-18(i)-32(z)-25(e)]TJ/F192 10.3811 Tf 36.682 0 Td [(\051)-210(+)]TJ/F84 9.9626 Tf 0.985 0 0 1 405.427 634.332 Tm [(1)-254(elements,)-255(each)-254(one)]TJ 0.985 0 0 1 150.406 622.377 Tm [(pointing)-253(to)-253(the)-253(\002rst)-253(index)-253(of)-253(a)-253(submatrix)-254(ins)1(ide)-254(the)-253(stacked)]TJ/F78 9.9626 Tf 0.985 0 0 1 408.705 622.377 Tm [(cM)]TJ/F84 9.9626 Tf 0.985 0 0 1 421.963 622.377 Tm [(/)]TJ/F78 9.9626 Tf 0.985 0 0 1 427.91 622.377 Tm [(rP)]TJ/F84 9.9626 Tf 0.985 0 0 1 440.206 622.377 Tm [(buf)18(fers,)-253(plus)]TJ 1.02 0 0 1 150.705 610.422 Tm [(an)-269(additional)-269(element)-268(pointing)-269(past)-269(the)-269(end)-269(of)-268(the)-269(last)-269(block,)-275(wher)18(e)-269(the)-269(next)]TJ 1.006 0 0 1 150.705 598.467 Tm [(one)-248(would)-249(begin.)-308(W)92(e)-249(thus)-248(have)-249(the)-248(pr)18(operty)-249(that)-248(the)-249(el)1(ements)-249(of)-248(the)]TJ/F78 9.9626 Tf 1 0 0 1 456.901 598.467 Tm [(k)]TJ/F84 9.9626 Tf 1.006 0 0 1 461.498 598.467 Tm [(-th)]TJ/F78 9.9626 Tf 1.006 0 0 1 476.426 598.467 Tm [(hack)]TJ/F84 9.9626 Tf 0.995 0 0 1 150.705 586.511 Tm [(ar)18(e)-252(stor)18(ed)-251(between)]TJ/F145 9.9626 Tf 1 0 0 1 236.017 586.511 Tm [(hackOffsets[k])]TJ/F84 9.9626 Tf 0.995 0 0 1 311.738 586.511 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 331.016 586.511 Tm [(hackOffsets[k+1])]TJ/F84 9.9626 Tf 0.995 0 0 1 414.702 586.511 Tm [(,)-252(similarly)-252(to)-252(what)]TJ 1 0 0 1 150.705 574.556 Tm [(happens)-250(in)-250(the)-250(CSR)-250(format.)]TJ +0 g 0 G ET +1 0 0 1 197.579 452.455 cm q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(set)]TJ -ET +.50096 0 0 .50096 0 0 cm q -1 0 0 1 217.809 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 0 0 cm +/Im8 Do Q -BT -/F51 11.9552 Tf 221.396 706.129 Td [(errverbosity)-190(\227)-190(Sets)-190(the)-190(verbosity)-190(of)-190(error)-190(messages)]TJ/F54 9.9626 Tf -68.95 -24.942 Td [(c)-175(a)-175(l)-174(l)-921(p)-147(s)-146(b)]TJ -ET -q -1 0 0 1 202.736 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q +0 g 0 G +1 0 0 1 -197.579 -452.455 cm BT -/F54 9.9626 Tf 207.181 681.187 Td [(s)-146(e)-146(t)]TJ +/F84 9.9626 Tf 185.456 430.537 Td [(Figur)18(e)-250(7:)-310(Hacked)-250(ELLP)92(ACK)-250(compr)18(ession)-250(of)-250(matrix)-250(in)-250(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-250(5)]TJ +0 g 0 G +0 g 0 G +0 g 0 G + 0.982 0 0 1 165.649 406.495 Tm [(W)56(ith)-254(this)-254(data)-254(str)8(uctur)19(e)-254(a)-254(very)-254(long)-254(r)18(ow)-254(only)-254(af)19(f)-1(ects)-254(one)-254(hack,)-254(and)-254(ther)19(efor)18(e)]TJ 1 0 0 1 150.705 394.54 Tm [(the)-250(additional)-250(memory)-250(is)-250(limited)-250(to)-250(the)-250(hack)-250(in)-250(which)-250(the)-250(r)18(ow)-250(appears.)]TJ 14.944 -12.021 Td [(The)-250(r)18(elevant)-250(data)-250(type)-250(is)]TJ/F145 9.9626 Tf 110.952 0 Td [(psb_T_hll_sparse_mat)]TJ/F84 9.9626 Tf 104.607 0 Td [(:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 224.391 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 150.705 232.948 cm +0 0 343.711 137.484 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F54 9.9626 Tf 228.836 681.187 Td [(e)-146(r)-146(r)-146(v)-147(e)-146(r)-146(b)-146(o)-146(s)-146(i)-146(t)-147(y)-279(\050)-151(v)-151(\051)]TJ +/F233 8.9664 Tf 163.108 359.772 Td [(type)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(extends)]TJ 0 g 0 G + [(\050psb_d_base_sparse_mat\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F51 9.9626 Tf -78.131 -27.895 Td [(T)90(ype:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [-525(psb_d_hll_sparse_mat)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F279 8.9664 Tf 9.415 -10.959 Td [(!)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(v)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.958 Td [(!)-525(HLL)-525(format.)-525(\050Hacked)-525(ELL\051)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(verbosity)-250(level)]TJ 14.386 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 139.477 -475.183 Td [(140)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)]TJ 0 g 0 G -ET - -endstream -endobj -1799 0 obj -<< -/Length 2016 ->> -stream +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F233 8.9664 Tf 0 -10.959 Td [(integer)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(8.4)-1000(psb)]TJ -ET -q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 151.016 706.129 Td [(set)]TJ -ET -q -1 0 0 1 166.999 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 170.586 706.129 Td [(erraction)-223(\227)-223(Set)-222(the)-223(type)-223(of)-223(action)-223(to)-222(be)-223(taken)-223(upon)]TJ -43.792 -13.948 Td [(error)-250(condition)]TJ/F54 9.9626 Tf -25.157 -24.941 Td [(c)-175(a)-175(l)-174(l)-926(p)-151(s)-151(b)]TJ -ET -q -1 0 0 1 152.113 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 156.605 667.24 Td [(s)-151(e)-151(t)]TJ -ET -q -1 0 0 1 173.955 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 178.447 667.24 Td [(e)-151(r)-151(r)-150(a)-151(c)-151(t)-151(i)-151(o)-151(n)-284(\050)-296(e)-163(r)-162(r)]TJ -ET -q -1 0 0 1 257.102 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 261.712 667.24 Td [(a)-163(c)-162(t)-296(\051)]TJ + [(\050psb_ipk_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(hksz)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ +0 g 0 G + [(\050psb_ipk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(irn\050:\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(ja\050:\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(idiag\050:\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + 18.829 -10.959 Td [(hkoffs\050:\051)]TJ/F148 5.1905 Tf -15.277 0 Td [(,)]TJ/F190 5.1905 Tf 0.61 0 Td [(!)]TJ 0 g 0 G -/F51 9.9626 Tf -161.817 -27.896 Td [(T)90(ype:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F233 8.9664 Tf -4.162 -10.959 Td [(real)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G - 0 -19.925 Td [(err)]TJ -ET -q -1 0 0 1 113.225 599.693 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 116.214 599.494 Td [(act)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(val\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.415 -21.918 Td [(contains)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 4.708 -10.959 Td [(....)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -4.708 -10.959 Td [(end)-525(type)]TJ +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +/F75 9.9626 Tf -12.403 -39.966 Td [(Diagonal)-250(storage)]TJ/F84 9.9626 Tf 1.012 0 0 1 150.396 180.167 Tm [(The)-248(DIAgonal)-248(\050DIA\051)-247(format)-248(\050shown)-248(in)-248(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-248(8)]TJ +0 g 0 G + [(\051)-248(has)-248(a)-247(2-dimensional)-248(array)]TJ/F145 9.9626 Tf 1 0 0 1 483.955 180.167 Tm [(AS)]TJ/F84 9.9626 Tf 0.98 0 0 1 150.705 168.212 Tm [(containing)-223(in)-222(each)-223(column)-223(the)-222(coef)18(\002cients)-223(along)-223(a)-222(diagonal)-223(of)-223(the)-222(matrix,)-229(and)-223(an)]TJ 0.98 0 0 1 150.705 156.257 Tm [(integer)-254(array)]TJ/F145 9.9626 Tf 1 0 0 1 208.91 156.257 Tm [(OFFSET)]TJ/F84 9.9626 Tf 0.98 0 0 1 242.771 156.257 Tm [(that)-254(determines)-254(wher)19(e)-254(each)-254(diagonal)-254(starts.)-316(The)-253(diagonals)]TJ 1 0 0 1 150.705 144.302 Tm [(in)]TJ/F145 9.9626 Tf 11.188 0 Td [(AS)]TJ/F84 9.9626 Tf 12.951 0 Td [(ar)18(e)-250(padded)-250(with)-250(zer)18(os)-250(as)-250(necessary)111(.)]TJ 1.01 0 0 1 165.649 132.281 Tm [(The)-248(code)-248(to)-249(compute)-248(the)-248(matrix-vector)-248(pr)17(oduct)]TJ/F78 9.9626 Tf 1 0 0 1 378.488 132.281 Tm [(y)]TJ/F192 10.3811 Tf 8.011 0 Td [(=)]TJ/F78 9.9626 Tf 11.598 0 Td [(A)-42(x)]TJ/F84 9.9626 Tf 1.01 0 0 1 413.412 132.281 Tm [(is)-248(shown)-249(in)-248(Alg.)]TJ +0 0 1 rg 0 0 1 RG + [-248(2)]TJ 0 g 0 G -/F54 9.9626 Tf 17.703 0 Td [(the)-250(type)-250(of)-250(action.)]TJ -9.115 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)-310(Possible)-250(values:)]TJ/F59 9.9626 Tf 179.116 0 Td [(psb_act_ret)]TJ/F54 9.9626 Tf 57.534 0 Td [(,)]TJ/F59 9.9626 Tf 4.981 0 Td [(psb_act_abort)]TJ/F54 9.9626 Tf 67.995 0 Td [(.)]TJ + [(;)]TJ 1.02 0 0 1 150.705 120.326 Tm [(it)-311(costs)-311(one)-311(memory)-311(r)18(ead)-311(per)-311(outer)-311(iteration,)-327(plus)-311(thr)18(ee)-311(memory)-311(r)18(eads,)-328(one)]TJ 0 g 0 G - -170.149 -461.235 Td [(141)]TJ + 1 0 0 1 315.088 90.438 Tm [(174)]TJ 0 g 0 G ET endstream endobj -1805 0 obj +2181 0 obj +<< +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (../figures/hll.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 2191 0 R +/BBox [0 0 494 214] +/Resources << +/ProcSet [ /PDF /Text ] +/ExtGState << +/R7 2192 0 R +>>/Font << /R8 2193 0 R/R10 2194 0 R>> +>> +/Length 2880 +/Filter /FlateDecode +>> +stream +xœÝZËŽ%· Ý×WÔ²{Ñe½ËN`;pAz0 #«›x‚ o€qùýIQÂŒ‘Ezfq›,Š<<,Q*U}:ÝåO‡ÿù÷v?¾y©çÇŸw†«ô™zÁGwþú·óÇóŸpéã‘rM0*”zùÎûTÄ+—x‚®–AvWª$»+Âõ\¯Óyƒ0¶VÐä+9“K‚ˆ 'r ²¿\VGÀàTŒü›ñýÕÒ#õ+ƒ"Á¥jPŠ\5kn éT5^ݧ£–+å8P µ]Àש(›»Bó3–)ó‘é´L¨æÊÄ`6Å [A.Ÿ¿H9|Š—ó¦>>ö+… õñ±\1Θ>Æ«öhê㣿€q­Ü—sIóyÖgZ f¦‡Á1Ø(û%˜ƬOå*mÖÇÃO¥ +sŒ [?-Ïúø\.Ÿò̃åYc1˜PÌ•‰Ál* +f_@.Å0õ ¨ñÕÔ÷}Ög/Ï^¤6öY¤~È#+–MuÔ‚yQR½8[m!ÏNÁÌÌ:–9wˆ”Yÿˆ´…lj‘X_´6‰7µyÖÆXÓÃ ÊÆà¹£(x²È¥X›Pøè4Ÿ +ø¸MºÍL¸A=µ•žHoÐwõ¡ØW¨35è3 ƒe£ÎU<Ê¡^QEª·î*Öo—ëKŸ¯œ ŸF«ˆ"$hÄÐT¨x'ŸÃCŘÐv°ïª¢BÍ Z2Jh\5'MƒEÊ{ä©̃:D™ƒH…À<+ĵ¶:8›ÜM³:Ø5ZÏRl*-% êJtÙTûR-A«ƒ} W NKÄYc@¼Lƒ9bp;1 î'D–9‰YŸFWy,šöüY÷H¤{„QBÛ27™ˆ³:Ó€yPƒ(a©„y¸Vb™;¤Zªƒ™§Y$†–Í™KÕV¹õuV¹UÓbÑTg ^Ô3gB0·Š¹WˆÜ•{/Ðx¿?wý67m¢ð™v¥oLJ£æÍFÆÆ×Š0G;nBÀH41á¨@VÁ;º:­D³X¥€û9kŚŠ+—‚%¢h¬UIJ±µ¬ÐY¤sÙç~h|ÉØ¤}jÆJ5Ö +±.F¬°65€÷bDc¬b4·ÌÐk_Ö&Ç͆Ö&øÍ†ƒØÚc&­4¸™l?§b1Êq3bÅbÜfÄ +‚TœNéÞ§ÆÏx±G!\¬Dc¬nÿ­‘(¬MÎB¸‰ÆÆË™I¨\–³äšŠÅ(§Íˆ‹Qð›+è.€MI…éŒáÇõæ¤ ;ì_Tü’Uõ¨ŠÆJ4Ö*¶ÝJ4»@,~›Œ¢Y&PÛdµJãôÈX‰f±Êe›Œ¢Y¬Zƒ-KµV¬Y¦?“LÃ~®òRÇ-_[È«IžXYòÄj’'V–Õ”7YàZ( í@¦ç•o‡o}XŠEËãW<ˆÌ1p„h +í£ÔCs…j(1X†‚‚5÷ð°çq;þŽÜGðS`­«™XïS­.â“[tÖœ³¶äqÑQöáú‚§YµÀÅ|©‘9ŽšøAŠ=t˜À-!2Œ`jÁ(ÕÖ-ï67ª\){v¢l%¬Ùá¾§›]ökv©®Ù‰<³cbcŠc˜ì…Z0Jõ°åA•{·¹Qå ý 6§BÜ»jRçÓ¯ÉoÊòCkYdˆÜaS‹Ú»z`YbàÓ2k`+™ÈçðPð!Gp ‘a£P Á-¶<¨rï6·1çðp5ÛìX£Ø +lþ0[ÁŽ¡Úì2,ë}$XÇK¦á€e“œhÚp È9‚É1¨… [cƽÏÌxç'›kùëø%ÔkrwUðþQ¸‘ý¥x™;ÐÏrü¼"Zü$e¿r?öÑo‡°¡»báOvÅŸ7s_üoý޹×ËUñ™´v¼ËïcyÁ—e•¾8U.°UpØ=pT±$žÔâb(`FÁ&ûœ<çJ÷€C\"…ÑÝ<žÊœÕÐÍð-™È¶Û—<¯Ð1;Â#|¹&D(šÂçýnÚû± Äæö529¸´²,ämI3é©î¢áãxðiWÐÓŠKÇÞ3Ï3TÃg o‡ ‘ÃÁ‡[LFBnæ¡Æ}j8ÔnùÆw1Ö •Eõ« +,ÃØRúi¨¶ ã”9Ó—„¯¦/«,}Ù·8κt ˜š1Æ7 !ÌudÊX6?ØÑÖXoSÃ+Ÿ¢K:–•O#óu !÷zŸÀn|YpUÃ$dO[ +% ÉuÁ.Œ¢é>Cx²„o1i»ôƒï×F/Guk#йžË Ÿ@ÁÛÑ™½„ʺÛ …Æ3öÀÈÖ˜È'<¹Äÿüs»Ÿ¿{=¾yÁ£s˜…¯Ð³Çåó _ŽWXTàŽ…?á÷õ~üüð‡çßÿñüÓwß}x|‚ýc¯5<|ûzÎ?<ŸÏ//Ï¿•*þá§sû÷üOؽ{ÿð-‡›Üüé½”‡ŸÿòúÃñíëñgøÿ[‡¬¿~übB †dó‰;ö&X&a· +Ù<øÇ×O?Ó‚•2.šg9_ÿ*×Ps ŸOøYT¨•®U¼v²rƈºŸÂãSóøbzh”8†Ñ½ÂXZÊü°ó`—ú矆@8èwàô)aÿ…¢ªküœËEvëñ»›šK°|dG˜s%´D YšVòÔ@( ázé‹ñCüÿæ×¬á¿8Ç7•xVÇ‹ éÑi3kè4½ðw(Ölú¸V*|ik¬Ô¦âÓ*økµøåÀ—žÑÑ÷K•_á•Box:ÀÀ¯¿Æë‘HÏ+x´“êˆÆi[‡;¬<ž¿Á²ÐùÅ+i œ°ùŸø`8¼=ÿ³·‡ +endstream +endobj +2196 0 obj << -/Length 507 +/Filter /FlateDecode +/Length 214 >> stream -0 g 0 G -0 g 0 G -BT -/F51 14.3462 Tf 150.705 705.784 Td [(9)-1000(Utilities)]TJ/F54 9.9626 Tf 0 -22.702 Td [(W)92(e)-323(have)-322(some)-323(utilities)-322(available)-323(for)-322(input)-323(and)-323(output)-322(of)-323(sparse)-322(matrices;)-359(the)]TJ 0 -11.955 Td [(interfaces)-250(to)-250(these)-250(r)18(outines)-250(ar)18(e)-250(available)-250(in)-250(the)-250(module)]TJ/F59 9.9626 Tf 242.009 0 Td [(psb_util_mod)]TJ/F54 9.9626 Tf 62.764 0 Td [(.)]TJ -0 g 0 G - -140.39 -580.689 Td [(142)]TJ -0 g 0 G -ET - +xœ]Á‚0 †ï{нÁ@²ì¢£¾ÀÙ±L<øö¶E> +stream +xœ]Mƒ F÷œbn€?MLãÆn\Ø4m/€0qÑÛP»èƒïM>x?Ü2øÃ[ùÂÚò¸ÚÍK„ gC¬¬@Ê»\„c¼…{B  Þù.äϲÉ7åîH«puB¢4#k‹¢kµî’ú{ªvaÒG²ŽÉúÒE#ž"6¯›¬ŸÁ4)U:€Ü¼G +¹wî•úÂßלuÉ‚¸Ø<~V +endstream +endobj +2199 0 obj +<< +/Filter /FlateDecode +/Length1 5268 +/Length 3426 +>> +stream +xœÍX}l[×u?÷=~“z|||)’ïñ‰’%’¢ý(‘¢"ÉŠ,ËVì9Ž;dª8‘dEqj·ž›zIÐdmƒ[“¸ Ò! Ú¥hÑ­²ûhKܬuÝu¶Åm‡Lè†.²Dk6¬A$íÜGJv²Æþ ½wϹ_çüÎïžs) Їo?Z´Àù(€ãKgζeéÈ‹KçÒƒJèS¨ø þ=}ÿÙ•3áVñØw€[^9ýÈýíñî‡BûX^8ɾø,@d•åPá]â‡ø×Pî}àÌC·ÇG\ø=ýÑ¥…Î~Wð‘:³ððYÒ¯áøÿBYÿÈ™åŽ}Oã#qö£{¨3œõŸ=·|¶±?àŠ¡M÷°r•{òÀRøíÃoÆaîwt‡à~çQZÀÇ  +d wÀAP`tØ ·Â|NÁ"œ Tè†ûà( ô °ÀÜ ûàIø-¸ ¸¿gáãð)8 +¿†È~~> !PÈSPÐÁÃ5zÛù:swŒzrµ‰º£ûd]e(V $¯ÿ å +”Ë8RÛkÖåó§b::\3èT½@]y6Õ0GkŸxµžÀqµõĿצAݹ=_w:êu\Ïïš¿»@=y;C.àîú…ùù\Æ›·{ÕÔ¶Ê——%½Z,P^ÿ$ÛäOqòÙ9S§®¾Û(®5– :kŒ& £žh8Ò‘¶Ä6 ´­¢+óú_;î„òz‘zsó5]ßgÎ.<¨×ô“‹í%ظ.¶3n­7ô}Ù³¡7Lg;“-N§p$úÇtj™ 8GpvšXFB_m 8i­9Ö±Íp†…ó¦¾ÚÙÜÔkŽ& Jêµ:4g6L½1×0Ø„öö*P‘…AF»%ækÈp Á^æÂƒ÷Ýè ›ªäщÆ ¶ÛNš /ÕׯW°'’¿SdjzšxI„%pžlð±{©™‹h½9À1§ù©#µ&2qÏÒt“è_T_¢ÝË=[{©yŠZćôxb>»9 +'=?~ïâºêy“iÞ÷@þ¦<û‘áÊDÙTßoc>p¿D6?GÉçáõ®Ù„7CúÞ¦k…Ë9‚_ß{ŸÞ„ŽìÃNµ#xY§ïz§;½™ŽàFÁó‹ŽdkŽv„ +A«#t¡êi „ +"¨Ê\ÎægÏ£¨ 1·E™í«8ûv4»·EE)¼-†Ùx‘˜×Ç«L#ܸB”iT.z]£á"ѹm1†¢–ßë4’cpAfóe®Ì½ &æ’Tàehȵz»@qå¨U´ËÞ5ªmð®55ä.¦ûst¤h—\k„Ž©µÚ*KÆÑev®w¤f<ÃFÇ“8:.Ú’³ûµµKþ‚/×ò‹p'ô‹võ|dò¢]Á¹»œ¹vµ‚$S­JýÒ%z{±Ù/7}a©Z­ÒˆD½U°µ^I¶yµºs±¢jD ^מtWÊ#Ã}fÆ£FPë13ÃeÅj·úF†ËD2$·!Ä8x:“ÉZa‰÷¹Â®(—Ý]ÈŽõiòÎ{Fõt,šJ‘ìúÏÈ?m"MîkÇf{ Sw+zFî=ÇöY³}1ÿNIÖ>qtý;FTK&£1ûÍ÷þ’\W‘Èa¬I—1ûçàëÐL"²T,µR^È @a9aYV³G­§ÔʶµnßÔ¶@Köuål` ç‹ÔXµc«3Ö©XÂÚo5“ rÖ¢1ÑVIŽË kÍÊúB"öy­¦b’*¡ä¶X8½"œ®ÚY@ý½ EÅ@ÜúG U ª‰jª12I&ˆ¡nµG¤íþWI15F¢c)}tã߯R?j¿Û:²rµœv+Š;]Þ˜ѱåÊìþÞµ­±·z9§«ÜW!Iƒ¦Ä"¥–ÜÆø0C*ŠHµ )ŠÀ0©¢fŽDèHÍϪ&³q¸H /ƾ䈥¤`K‹YBaÕckTí 9 Â]ÈpŸ¾*1¶ß‹¤FSmŸ(É—ø.5t¶~‡á`UÔ‚¨Å˜–ïB­‡Åb¸\²¢%«RÒÌ6—SJš@Åæ•§¦Îú ¿ê×*¿{æñÂÁ;†ÎŸ’‡ž<öÌKdïóß3Œ7.õÂ]Ÿ¾5¹,OL_vò1Ãø +r±@Ó`‡J­Lá fòmܶpÏ£Ö9åÙU» X´š=YF¤–rV3Ûä¬Ò€ÅÎ-ؤS|§s(¦„‰–"Œ>Ö¤éU$Þ_B§$qÎl†4ˆg¸´ëÛk3VÿÆx9øÝsÕãúØÆ¿Þ’ÖÇHÏDÚ>¤h÷äR÷®WÞËŒ áx·×%ß7zJýV%íÂ>s’Ø#k¥+¯¾°?îâk,ž6Æ3LCXD“G¹h{üȪL‘òxÜ0–xžB4lÍ6ñŠ¡cà©bÄä$;2<óÑí¤”’“^”á!ÞÌ|;T“9ùÄå—žzú_Þד«$w*¤‚‘ü]ãõBDøÎs_ºúÝç¿ômr|ÿÙ™¼8¸ûøí¥ÅÅññÅÅíxe1^½ð"4ƒ,+‡œ°P¿ÕŒ` ì„Íi´”v¸‹„æÏµÒî`OIÚµÖt§™ÖMü¹fÚÍšiÓŸ#4[¤áUÛƒ1MYMO˜ux8&ìaÍpªXÔ#bÝÍQR›Ð ÚÒ;*Âa¤t‹·;„„íªÚ +;…2;…æÉ‘ÆXs´O’”¬r¥DÞÐü;&.coUSz…Søî®ò± ‘ÐÅñ¿öæ„ùÉ^þ†>µ§¯K}Ýoß-R›?'ÿ€1‚Ç¡ÙǸ¬—ì<1R´ýx:„ÈωHÌÙb‘zV1_Úš¼ÖêáCè‰&Ú€AM +köN|k}h½Y¥ ýïÄõ<+8IùRH”Øa;ß‘šžäV„ü2 1w1è#7œÕ!Ò.Cˆ aBV“8Ì'nt¥ôX‘ºW[.îÄ¡.‡ö¶€eUÐdDJXSSp† Ú~tÞç ·»q¬ËíäqÛDz<^V0¡;D¨tüÜ.¯#ï•ï¬Ê17ì‰IÕcåÌäÆO&uîôБo~óhaƒTtvŒ)æKy}‰ã₩ÊbîÆðºY¥7 I!~;üxMilÕö¢=2Ú.{™Á²€½2kzñœÛ=ØëÅÜFUV‹ÜS\±X»¡ÞK[×§[Ô˜Oõé$=‘®Ý‘MžŽëñiòƒvÖMW6ž<±²C¤®×ö‹h³ƒðhjì´ +¥VÌ1ÒîR±î C?z­´ÓÓ "Ëɹ"M¬2¨a5å„ã2¸™·RM[Tñ‡$B¢­Ùy|÷ʘ¤E¼R·DÃè›Èœì®RC¢~v›¤nçâ˜âÔˆ×c:EÊä?Xœv¡pê/1s‡ŽZ”箩Ùä obAúéh29ú­G~µ~"¥rÏ +æm#Ÿ3Èzw„SÈ0÷ý²¾]…Òeã»ÜøÑÙ=av/ &—œÜ~âävvÍ¡Pj©íóJb¦S„·ö/ÏÖ,Ûž°s[ unDÄ©¿êõ«\ÿÇ≠*Ý©¸#Ü¥þÑÜ3½{6®ÍŸ±†3qÛ'=ó”êçžUøÂôW«iNQxãÖ¯ÌíŽ`­uw+ݯ¿¡ ²€7$ö¡d†|ü'÷(~¯ñóüë®s®»-÷+ž^h³ ýë/‚f <ޏéÇ}³®ç÷ââAöß\1ä 0}×Í×þñqÁ3ÎÓÅðy§¾¹‰O=QfÝ€›~8üc˜ûÁë¬ +xïquúÚÛãÛÈsæzp&Ìž[^>º|îÔýÎ +äéÿƒ]¾÷‹ïÀ;›ïStþ+ÀÕ Ãý„¹áß—Áä¾yã]Ô¿†íaÔIâñ}Âäqp¿ƒí?A»ù›ìÏì}¾ú¶Mÿðò½áñÿà¾Â¿ÅÔ×Ìà [ï÷.®ÿ­çMþ÷n´ö¿j¬O +endstream +endobj +2200 0 obj +<< +/Filter /FlateDecode +/Length1 11124 +/Length 7803 +>> +stream +xœÍzy|TU²pÕ9÷ö’N§;kw’îÎMw–NHÈ$¶äfeI aÓ ‚IH‚‚`‚ÐðÜF… ¨àÐé6Á'ѧÎà2à8ãö‰#ŽËLfq„ôW÷v@ù~ó{¿÷¾ß÷Ç»‡ª:§ªÎVUgëFèuµ sò@ý2Z¼|]S{¨œ1ç-ßÐeÿÛÜ·ã#í++ÛW­›Ö} @7 v®Z»qeH?‰êžkmiZñú…±*·‡˜S[‰¾ "†Ê©­ëºné»BqkÛ–7Mô—J(f]ÓÍíšµfKõö›ÖµLŒïnBR{[g×DýIм}}Kû¯WºÏ“~=€É*ÞG£ªÁ$¾ ‚Ÿœ%øb|Nð‚xHãk‚£<šj?;Nxö@*œÃ)ð2ŒÀØ¥P»`&œ‚Ãñ @‚ +x +œhU`Aà:XŸÁ(¤C5|ŒQÔN%´C¿$\ w‘V”ÃÏa×âBÈ¡ü,–…nê¹?8H¾|ŸJÃg˜„Y”ûDBôÀ¿A¬×ƒ B3ÀÍø%8À Û…¡/x\Gà·XM¹¹°Q|_ÖR­'Ñ‚#Á3ÁÏáE¡…Zº î¦ûa„MæåâØÁWÃ&»ÿÀLJŸ°[YOpIð©àg4Ø`:̇¥Ðà&x‚¼ú2¼Å4O ¯Š›ÄsÁûɶ.(£±×’öBj{;yÉJïÒ,#ÑN³˜Žóp®Â~|ø~À4ÌÁ:ØWÜÇßà SE1XL-ÅA2õ+Áh%ÜJÖ¾Ÿæû¼ +'1]˜M3z—êË®b”žd§ØÇüNÞ/\ïÿãø÷Á>ÐR”Í$;tÃ3d…¿`!×`'~J#ßÉžãÜÌ%^ÈKù"ÞÀïæ»ø/ù¯„õÂAáCq¶Ø$Ô6ß8þv°:xÙACãJƒ,(€i?+)šn ñµSZ›a+ôÁ}/÷äyŸ€“ð[øü‰<è 1¯¦Þ×QÔ݉÷QzáKø*žÄOð[%±Jél*+a嬊­bwRÚÅN³wÙ|_Î{x/¥Ýü(ÿ@A‚b¥Yâvñ€æ mºv–¶Y÷æ…±‹™.~<ã ã׎?8þÒøçÁÅÁ4~'dÃdé6å#ƒû(=C‘x^ƒ7á=u¬_#C‘"ÞŠECy­gâlJsq>¥k(-Á¥”š°[)õ`/Þ†·ãx/> ¦‡inûði «x;Pg`~ð@ІaÐ\K;ï °O+ÒÞÓ Éâ>Y–Kf\í¹ª¸hú´©…ùySrs&gg¹33ÒÓ\ÎT)Åa·%'MJLˆ·Zâb¢£"ͦc¸!L¯ÓjD3„¬J©Êk÷¹¼>Á%Íš•­”¥&b4ýˆáõÙ‰Uu¥ŽÏîUÕìWjʤ¹òÿÒ”CšòeM4Û=àÉβWJvß[’=€Kç×SþÞ +©ÁîSósÕüN5o¤¼ÃAì•ÖÖ +»½öJ_Õ†Ö¾Jo57h+—Ê[²³`0Ì@Yå|©}-3PÍ0Keñ ‘åK**}ñR…2wV6­ðÕͯ¯¬Ht8²³|X¾\jöTæ3¹U(W»ñiÊ}Zµûje6°Ý>˜5Ò·#`†f¯;|…´¢éºzojPúˆtS¿>˦³ÖŠÔxTyý¶Ky_¥uµ])öõm³ûæ×ÿXêPpCµácÎ*o_u¼ƒLX½ÐN}±;ê}x'uhWæ¡Ì)4»©Ráxר}z©Ljí[ã%Ç$ôù`ÁF‡?!A>…„J{ߢzÉá+I”š*& Æ@ß‚Cñ²=þJIvÖ 92dÖÁÓD&ÜøãLËe™šSÕ•\õ‚ËvEeDÒl +Ÿ}¹FR/Ñœ¦+¨e:ô-ŸNjô5 Õò­ ¬öé˽}æb⛕ú>Ñi–ì}ßù_ûÓ•œ¦ ŽÆiþ”¬%—ä—ò>·Û—™©ˆ¶œæU$#—$±×(’ÞK’ËÕ½Åñs ¼,b}:×å&s\tek±ãþ qKH^½Pªž¿´Þ^Ùç°mõ¢+J!ùô˲‰†dpŸà$KÍ–(ô,­WôOtVI•«½³h©Ñ}Ñåõ<‘5„r,‘«MQü^w¹e¥P®´%85jü¯huÀ*íU>³wV7„9ÿÍJà9¥–J~¨61'_±ûÊòUW”¯^x§ .V½hi__ز*Ú¬úúª${UŸ·¯)ìm–ìf©ï¯çõ}í•ÞK'úªv4Ð$Z±8[¹#‚Ö1^ KÌð}×øs±ê¬Låèák:}Ú@¤²™Þ1¥t7xËd¥·$;០UVæÉ¢îÉ*õ§g䩤¼矰CtµãŒ?.Q•|ì/+›ÈLÊ efç) ãÃ_ÿ˜Ÿ¡CE­5”>9ï\©‘Èoñ6à¿™8”êÊÛs‚¿Iò×ùI:O•j'ýÆÈ=ø^*+tÏDùQ¢Šü§üG¨Gôá úñ‰>¨>xmü‰òÞ­Öëš ¼ÓŸl3—&“ÜNKÀ)·‹r»Èt»¨„‘.vkÕž‰æ]¢d®-~‡¤úhË%>o€Lº…L¿…,·…,·m¾¤³9¤“Í7“ÎfÒÙL:›É*¹¼“úë$‡a3€“Ý;Éî +ßGx„à´Ê¿ƒðN‚¥Äo";fШîáküé6 +²UCEr^ÉqºÇ"5»r(>)¯ÿ‡’>L D¢Ԥ趨Җ!}¸ÂmJH +QÒº¡4‚/‡[ÄN%( ¨ ørjŽm˜Ïƒu:#l=¬‡÷=¢[Q'xÔé€B2Šgƒ‡2lœæÕ·ë{õܬ·ësõ²¾N/¶Ñ“¨ŸsÏá%¼–7r1ñk‹ó‰È35Åù; ŸaÄpÚ ú4#šÓšQÍ9h×äjdMÆ«i×ôjvj4úšZæ5´z Ül°r ²¡Î Ú´8Pz'oV-a3A;ÁNlÜH|;¿ž ‘¼ÑH¦¸žø@¨d&8MùQ¢"•L¤g"=qMÄ5+’:/Aû„TsYr©Ž¢N‘¤‘4‚¸dÛQÂç”Á*©d¤’‘´N³ 4B3a;AWy£5„/Ér'ä^*?§ê\’ÉJ]vAnJÉ@_dàÎ ”=%¥yr +¡¨¨¨F©ÑÙ˜Þ¸Oh“Úœmémû„Z©ÖY›^»O(‘Jœ%é%û„)Ç™“ž³O°I6§-ݶOè¯9\s¢æTÐXÓVÓSç‘ë†üîÜ<•¦8zÄŸ7ÍTz5;LÓi$¼‡à aAA AÈ«Üg‰û,qŸ…Z‚F‘j=«l1„m2…¿G•)9Eήsšü!q~mi m»{8µ}ˆä‡TíPî°Ê÷Uùµú*_Ѳ\ª§l‚KÕín)-Ã¥PBÐHÐN Â)¾ÎPë„mí‡ ¾”Ò¾„=Ké;ijdã”XÄÅÑÉ©3—šY8Å‚ŸRñÃ*¾GÅ%*N•#æ¿c|qŽñ®9Æ4ʰt:ƒŒ¸KÅÙPj|®ÔX[jÌ(5Rkp€‘ŪX£`ü£Šç©8KŽq¿sÿæ0þÕa|Üaìp¯v(õ&Ñ6²LTÏQ±K6ØŒ¯ÙŒKlÆi6c©w#õe*NVq¢‚ñëçL&Ðǯ¡‚ZB¿'Ã`  ú=¥DÆýž™D.ú=»‰üÓïù‰íüÕ£ ¿õ§žµ•Æâyœ-(å¿Mпâlz.Ùèñ<›n6Ütý™ß³UÑ’êÿ”ÊO@ŠNÑßKA…îÁÙ*ÿñ‰zù³š©×GýY©×ŸB–ÚëCþ¬³Äý‰?ë"÷û³Öé÷;•®ñ{2m¥‘¸ +R™¢»œLIÍD³¨åµDg†*Wú³”ZJ,÷KSˆ¤)£|%¨S»³ù%u’I ©MLIt"8U&uðFHQ©Î/m¥V4Ï9ÏÚþî9®L¾A“·íÓh~‹©ø{œí?h{û˜b.¿íTVGm¿’ŽÛ^M àb¿m$+ #Á‰¬Ã#¶A2²tµÎZe{VR¥û$’’«÷x²mJKm8©ì·mÍzA¬£/&qCÖ [ç ­Ê@ËêL³KëmEÄžÀÙCmSRÊPr©ƒGm™Ô£KR‡rÍ´aVZì–³´]Úfíbí|íUÚ|m¶Ö®MÒNÒÆè¢tf]„.\¦Óé4:AÇt ‹ Ge·ro‹Ñ˜ÕŸø jÞÌÌÔŸþ€¡ŽÑÚñEójV½° }QÕP½¨Ì7Í]Ðø¦»«}ººkëïk ’Ý@XTOª°îLTwÇ1çÎ{ºùÎ{°Ú7²ª›í¾oÒ<Âè’*JeVˆÛPb-‰šYTUñ/w»ø¬îÖ$ßƒÕ ë}Ï$5øò”L0©¡Ú7Syc¬­²âkWHCý1ÜÄ:*(|ÜTÑpY RX;©G!ŠÚ¤(j‚CªZªFašRY1˜’Rzg+J>/«J«Bm¥RÔVBH%CªÚV*KVÔ(B™~ÜX8 ImÌjc“¥A§“T²œŠÊà4') :§©âƒ?ˆ%gh8 àTûqbƒÚâ:é!Š‚ ¦#÷ÿϯ¥ì ŒCM­X®<νRe ×·}C«Õ×Ûl·®øhâÕîò6/oUhS‹ï#©¥Â·Bª°6-ÿâ劸Iª„å•‹ê—Ë-þ&¹©RjªhÚßS^}E_÷\ç_4Ö£4V®ôµ¿ú_ˆ«ñ~¥¯j¥¯j¥¯ýò~µ¯êeX]W?¨ƒ²zÝ©tˆÂh=x eqæöêâ¸Êa½5qX:¶ ô2—Ê|FE”]š]ªˆhu*¢åç— ‘õÖ«‰ÃøÔ„ÈLìH© Ü`­\]qù_ggg—ÝÝnÂ]ÝV•×E‹Ö±°ÚW¥<=>O¥OöV4 âŽî‰¯¼^6Ÿðœò°6O§ß³ÇsØ#vw7;êDʩ֘Җғҟ²'åpŠF\WTöìIùK +ï¦hÂ.ú*+Ô>»‰Ò?¥ØÕÝ©|@t„ºsw»ËëKS`9Ýz‘nèÙM ä,$á?¿Cð)Á߸ðOž$R8<›gÓÓ½Bé±Á­l:Vž7”[˜7=@´ieˆ.\¢•óBÔSšg%ê/É+5Ña˜ðë|EðO‘çñ<µñîPÔ6tB§iø@….uº»ÐMTÌÝÕévƒJ€“HÕWÆ=`g7)È!DHIåv*ÕºzY)gQ½3ƒæ2<Î^¤Ëª–ðƒ(Ø‹ÏqÓ*™#ñ:x‚ä 8f€oÀëÁê6ë¹è™g>ï™{Ñ%”7_ 4%×éˆt¢­.ØùÈY„ïÁ.Œ¨›ÿ9êR#¶Ò3ñ!9F¶z­ÖQ«VÙÊ6À]À"J£q5ÝTô8@OS®æu”—¨ò?èu½âˆøµLg°‰éŠz]8ã0Œ'õÙrTD„IŽ,Ì5õ˜všL‚)Þ2ÌRñ,˜»Ìí™k;kVF\≌*ÂÈ"øfì~ãvOÉ…eر,Ú™g‰uÎ`…‘i.—”¢=‡sÑžëÆ™wz\˜Ö™à,~±÷ûmë§'3§“%MÙÄ>Ú•iO¶)s¬ žåûÄ0À³Çèý9:8CTOGÊÄëPä™ú2^ã€ñu<ÉÞÇ÷Ù¨Ñh@0ÊFÎDA ËŽœÀY çLàFQžY(þ5D4¿W,ÀGŽÐ.³/è=ñ¹Ng­ u€ +/°?@8&ÓmFñÔ²±³Êäçž§i¹ÍcîÏ6q²{[Ä–W¦äbÇúe°žB)Ú‹:R´mÚ¯Ø{ãžv|`|{Gî¢ü$±ÆõÏ…W'{ ê6 ‹üxü˜ŒUòmZ«¡ÈbtuU&¯ Sr\\†Ö£­}Z«‘í× +Ku×Z–ZoÐuEvE=fx<â‘ÈC†C'Å“–_Z?°|`µ'|g‰¥;—/&ÆÆÇÅ[’¬Z½Å`5$ÄÏŒ¿ÇÒo×Zã³$ćÇkŒ<ž‰«%.6F-Ø*ëõrLxI¯õž/‡›Å„þxÜ8žÅó| +Ž{‡…'ð^Ùšß×F7F·E÷D ÑÔÊÑ2M*ì²½×νö;³ÇÇïh}Q–cYëaýì;Åΰ¿0‹· ã}ªyUËžõŒÍ3/ëø–l¼lÌ<¦Øùâ²OÉÅ÷ÙÈ(K‘jQEEÌáÙf·¼qÉòË”E näŽB€Â +6Vš:5?O™šFË´Ž¼©S§ñƒF± í»o\±ÇåŒ?õè¾ßåÎÙÿÝ l^»¤*ÅñïX†?½uwDZ×~³sÕª'ŽŒŸ›nž’ Ê*ßü‚÷‘¿âÁù¸In@ÔçÛò3ÓÚò7¥ôzÃ{zosöºúòŸ¶îK8à +.áy×ñ´WÃ^5¼gŒÓBjŒ,AŸg´$8ΈjÜ·ïŒx"®‚b¤;ÎNoÄkÓ®Ë_kp5[åZ“Öš nNÛµ9¿_è{µ½ºÛ"o‹êé{XxP·+òÁ¨Gãö»žM{6? Õ}iø*üˈ/Ó¾ÌËÐõiÅP„ÓóÄ +„'¤ *2[ìtÏökÄl…D“JõtÕÑ£¬B.å͸R6C¡\ÈäBoá@áh¡P(½@Ž3 MrX®E¶ì´pK|Á0þ¿¼ä±ócŠÇΞ'÷•Œ)nCKÑ”Üòrž;'9%2NÐÅ:¢Ô 6mR3fÅd6Ãä¨ÜfLì͘¬#äŽËn†œHB ÞDÔýv+}´³@‡²Î\®Â‚w5Ú8%~5RJšÂsNrlLœ%Z£üCO‘-†árþ-iˆï¥|&´ +Å-ºMú›Âo6nŒ¾É¾]w{t˜^ןÁ®Ò‰iVGšUäÉN´â0=¦­(?—V§E-yMÖç8ÛœtâA²²ü"D$w?g±€Ñ:L«+MÏC”9ÊÅ£Ø"GA†œÑ›Áå oÆ@Æh†Ã´J¤&‡cañéWøk,ä°‹giû4Ó +$->OK±H]ʪ:-31Uî2;'¹$—Íèh†$SB3¦ê(g7$7cb$¡½3´/Ý“BËp™¥pêÔ¨i´îÈ1Ó&œÅ´ •Zª§ÖÞ6úvÆã=ýo®¼åµ7Ýÿñk{_dùQeç6ÜÕPÚ8ùÖINÖ©‡[~÷¼ûÓ}¿ÿýøÆ­kØ±Ûæ5}róÀîwnZœ¥úa­³çÉfH…a“NÝÁD1V!FcB€ö¤(}¸d“]^×€kÔ%¸"vD#´AôÃÝÕâÃzÈXêy37´W©F©ÁT)5%•ird­sRbRbr"×D»LNƒËo‰g‡IÛ—†ŒA¹¸pÅlHž¨l_QæØfˆ‹mþa÷ÊT!3sktATÈX‘1Lݳ¦™U+M›©F4²†ÍÙѵÔûØæGï~§ùå­ë^©,ê˜Ú•<97µ(£¸¢pVÛýÖ.(Ýóêøá?}à³—þ>þÅàMëaÑvæ:®^8þ˜j3;ZÇÈfa`„÷äô8#mà•FÙÄef†c¬–&ˆ\/jP7A7 +šp#Ý&ÉQZ]ŒV«ÓqA« ×͈ÆãøÝn ¸G6ЍÑë4(„‡ ÇéÍÍé~¹R6èõ&Ž{øaÎxÿ.[±D½˜ÐK·ÈQ7idZñ?:õ;> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(9.1)-1206(hb)]TJ +/F84 9.9626 Tf 1.02 0 0 1 99.895 706.129 Tm [(memory)-286(write)-286(and)-286(two)-286(\003oating-point)-286(operations)-286(per)-286(inner)-287(iter)1(ation.)-427(The)-286(ac-)]TJ 0.984 0 0 1 99.895 694.174 Tm [(cesses)-255(to)]TJ/F145 9.9626 Tf 1 0 0 1 139.647 694.174 Tm [(AS)]TJ/F84 9.9626 Tf 0.984 0 0 1 152.603 694.174 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 171.695 694.174 Tm [(x)]TJ/F84 9.9626 Tf 0.984 0 0 1 179.42 694.174 Tm [(ar)18(e)-254(in)-255(strict)-254(sequential)-255(or)19(der)75(,)-255(ther)19(efor)18(e)-255(no)-254(indir)18(ect)-254(addr)18(essing)]TJ 1 0 0 1 99.895 682.219 Tm [(is)-250(r)18(equir)18(ed.)]TJ +0 g 0 G ET +1 0 0 1 146.769 562.733 cm q -1 0 0 1 144.589 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 148.175 706.129 Td [(read)-206(\227)-207(Read)-206(a)-207(sparse)-206(matrix)-207(fr)1(om)-207(a)-206(\002le)-207(in)-206(the)-207(Harwell\226)]TJ -21.381 -13.948 Td [(Boeing)-250(format)]TJ/F54 9.9626 Tf -25.157 -24.941 Td [(c)-175(a)-175(l)-174(l)-865(h)-90(b)]TJ -ET +.49594 0 0 .49594 0 0 cm q -1 0 0 1 144.379 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 0 0 cm +/Im9 Do Q +Q +0 g 0 G +1 0 0 1 -146.769 -562.733 cm BT -/F54 9.9626 Tf 148.265 667.24 Td [(r)-90(e)-90(a)-90(d)-223(\050)-167(a)-242(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-871(f)-97(i)-96(l)-96(e)-96(n)-96(a)-97(m)-96(e)-367(,)-791(b)-206(,)-919(m)-143(t)-144(i)-143(t)-143(l)-144(e)-277(\051)]TJ +/F84 9.9626 Tf 166.233 540.815 Td [(Figur)18(e)-250(8:)-310(DIA)-250(compr)18(ession)-250(of)-250(matrix)-250(in)-250(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-250(5)]TJ 0 g 0 G 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -48.37 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 114.839 389.447 cm +0 0 313.823 115.567 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(\002lename)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F233 8.9664 Tf 136.657 494.353 Td [(do)]TJ 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.534 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F59 9.9626 Tf 65.185 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ + [-525(j)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G -/F54 9.9626 Tf 27.109 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.757 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ + [(,ndiag)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -317.895 -21.918 Td [(On)-250(Return)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.415 -10.959 Td [(if)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.926 Td [(a)]TJ + [-525(\050offset\050j\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(r)18(ead)-250(fr)18(om)-250(\002le.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 442.283 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 442.084 Td [(Tspmat)]TJ -ET -q -1 0 0 1 344.406 442.283 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 347.544 442.084 Td [(type)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(>)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -268.571 -19.925 Td [(b)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ 0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.838 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-235(array)-234(of)-235(type)-234(r)18(eal)-235(or)-234(complex,)-238(rank)-234(2)-235(and)-234(having)-235(the)-234(ALLOCA)74(T)74(ABLE)]TJ 0 -11.956 Td [(attribute;)-361(will)-324(be)-324(al)1(located)-324(and)-324(\002lled)-324(in)-324(if)-324(the)-324(input)-324(\002le)-323(contains)-324(a)-324(right)]TJ 0 -11.955 Td [(hand)-250(side,)-250(otherwise)-250(will)-250(be)-250(left)-250(in)-250(the)-250(UNALLOCA)74(TED)-250(state.)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(mtitle)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G -/F54 9.9626 Tf 32.09 0 Td [(Matrix)-250(title.)]TJ -7.183 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(A)-337(charachter)-338(variable)-337(of)-338(l)1(ength)-338(72)-337(holding)-338(a)-337(copy)-338(of)-337(the)-337(matrix)-338(title)-337(as)]TJ 0 -11.956 Td [(speci\002ed)-250(by)-250(the)-250(Harwell-Boeing)-250(format)-250(and)-250(contained)-250(in)-250(the)-250(input)-250(\002le.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(iret)]TJ + 9.414 -10.959 Td [(ir1)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G - 139.477 -184.274 Td [(143)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1817 0 obj -<< -/Length 4948 ->> -stream +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(1)]TJ 0 g 0 G + [(;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(9.2)-1000(hb)]TJ -ET -q -1 0 0 1 192.93 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 196.517 706.129 Td [(write)-298(\227)-297(W)74(rite)-298(a)-298(sparse)-298(matrix)-297(to)-298(a)-298(\002le)-298(in)-297(the)-298(Harwell\226)]TJ -18.913 -13.948 Td [(Boeing)-250(format)]TJ/F54 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-884(h)-109(b)]TJ -ET -q -1 0 0 1 195.753 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 199.827 667.24 Td [(w)-109(r)-109(i)-109(t)-109(e)-242(\050)-167(a)-241(,)-927(i)-152(r)-151(e)-152(t)-478(,)-904(i)-130(u)-129(n)-130(i)-129(t)-435(,)-871(f)-96(i)-96(l)-96(e)-97(n)-96(a)-96(m)-96(e)-368(,)-816(k)-42(e)-42(y)-259(,)-855(r)-79(h)-80(s)-335(,)-918(m)-144(t)-143(i)-144(t)-143(l)-143(e)-277(\051)]TJ + [-525(ir2)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -49.122 -27.896 Td [(T)90(ype:)]TJ + [-525(m)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(-)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(offset\050j\051;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(a)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -10.958 Td [(else)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(to)-250(be)-250(written.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.292 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.137 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 575.783 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 575.584 Td [(Tspmat)]TJ -ET -q -1 0 0 1 395.216 575.783 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 398.354 575.584 Td [(type)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +0 g 0 G + 9.414 -10.959 Td [(ir1)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -268.57 -19.926 Td [(b)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(Rigth)-250(hand)-250(side.)]TJ 13.838 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(An)-235(array)-234(of)-235(type)-234(r)18(eal)-235(or)-234(complex,)-238(rank)-234(1)-235(and)-234(having)-235(the)-234(ALLOCA)74(T)74(ABLE)]TJ 0 -11.955 Td [(attribute;)-361(will)-324(be)-324(allocated)-323(and)-324(\002lled)-324(in)-324(if)-324(the)-324(input)-324(\002le)-323(contains)-324(a)-324(right)]TJ 0 -11.955 Td [(hand)-250(side.)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(1)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(\002lename)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written)-250(to.)]TJ -19.368 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.534 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(,)-387(in)]TJ -303.765 -11.955 Td [(which)-234(case)-234(the)-233(default)-234(output)-234(unit)-234(6)-234(\050i)1(.e.)-305(standar)18(d)-234(output)-234(in)-233(Unix)-234(jar)18(gon\051)]TJ 0 -11.955 Td [(is)-250(used.)-310(Default:)]TJ/F59 9.9626 Tf 74.799 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(-)]TJ 0 g 0 G -/F51 9.9626 Tf -104.935 -19.926 Td [(iunit)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 27.108 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.758 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ + [-525(offset\050j\051;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -317.894 -19.925 Td [(key)]TJ + [-525(ir2)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 21.589 0 Td [(Matrix)-250(key)111(.)]TJ 3.317 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(A)-291(charachter)-291(variable)-291(of)-291(length)-291(8)-291(holding)-291(the)-290(matrix)-291(key)-291(as)-291(speci\002ed)-291(by)]TJ 0 -11.955 Td [(the)-250(Harwell-Boeing)-250(format)-250(and)-250(to)-250(be)-250(written)-250(to)-250(\002le.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(mtitle)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 32.089 0 Td [(Matrix)-250(title.)]TJ -7.183 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(A)-239(charachter)-239(variable)-239(of)-239(length)-240(72)-239(holding)-239(the)-239(matrix)-239(title)-239(as)-239(speci\002ed)-239(by)]TJ 0 -11.956 Td [(the)-250(Harwell-Boeing)-250(format)-250(and)-250(to)-250(be)-250(written)-250(to)-250(\002le.)]TJ + [-525(m;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.906 -21.917 Td [(On)-250(Return)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -10.959 Td [(end)-525(if)]TJ 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(do)]TJ 0 g 0 G - 0 -19.926 Td [(iret)]TJ + [-525(i)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.433 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + [(ir1,ir2)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 139.477 -128.483 Td [(144)]TJ + 9.414 -10.959 Td [(y\050i\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1824 0 obj -<< -/Length 3542 ->> -stream +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(9.3)-1000(mm)]TJ -ET -q -1 0 0 1 148.768 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 152.354 706.129 Td [(mat)]TJ -ET -q -1 0 0 1 173.658 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 177.245 706.129 Td [(read)-202(\227)-203(Read)-202(a)-203(sparse)-202(matrix)-203(from)-202(a)-203(\002le)-202(in)-203(the)-202(Ma-)]TJ -50.45 -13.948 Td [(trixMarket)-250(format)]TJ/F54 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-810(m)-35(m)]TJ -ET -q -1 0 0 1 149.022 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 152.359 667.24 Td [(m)-35(a)-35(t)]TJ -ET -q -1 0 0 1 171.029 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 174.367 667.24 Td [(r)-35(e)-35(a)-35(d)-169(\050)-166(a)-242(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-882(f)-107(i)-107(l)-107(e)-107(n)-107(a)-106(m)-107(e)-241(\051)]TJ + [-525(y\050i\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(+)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(alpha)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ 0 g 0 G -/F51 9.9626 Tf -74.472 -27.896 Td [(T)90(ype:)]TJ + [(as\050i,j\051)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [(x\050i)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [(offset\050j\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -10.959 Td [(end)-525(do)]TJ 0 g 0 G - 0 -19.925 Td [(\002lename)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.415 -10.959 Td [(end)-525(do)]TJ 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.534 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F59 9.9626 Tf 65.185 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F51 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ 0 g 0 G -/F54 9.9626 Tf 27.109 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.757 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -317.895 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G +/F75 9.9626 Tf 16.096 -32.463 Td [(Algorithm)-250(2:)]TJ/F84 9.9626 Tf 60.055 0 Td [(Matrix-V)111(ector)-250(pr)18(oduct)-250(in)-250(DIA)-250(format)]TJ 0 g 0 G - 0 -19.926 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(r)18(ead)-250(fr)18(om)-250(\002le.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 442.283 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 442.084 Td [(Tspmat)]TJ +0 g 0 G + -97.969 -26.977 Td [(The)-250(r)18(elevant)-250(data)-250(type)-250(is)]TJ/F145 9.9626 Tf 110.953 0 Td [(psb_T_dia_sparse_mat)]TJ/F84 9.9626 Tf 104.607 0 Td [(:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 344.406 442.283 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 99.895 208.684 cm +0 0 343.711 115.567 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 347.544 442.084 Td [(type)]TJ +/F233 8.9664 Tf 112.299 313.591 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -268.571 -19.925 Td [(iret)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(extends)]TJ 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + [(\050psb_d_base_sparse_mat\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 139.477 -307.811 Td [(145)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -ET - -endstream -endobj -1832 0 obj -<< -/Length 4155 ->> -stream +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(psb_d_dia_sparse_mat)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(9.4)-1000(mm)]TJ -ET -q -1 0 0 1 199.577 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 203.164 706.129 Td [(array)]TJ -ET -q -1 0 0 1 231.784 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 235.371 706.129 Td [(read)-222(\227)-223(Read)-222(a)-223(dense)-222(array)-223(from)-222(a)-223(\002le)-222(in)-223(the)-222(Ma-)]TJ -57.767 -13.948 Td [(trixMarket)-250(format)]TJ/F54 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-845(m)-71(m)]TJ -ET -q -1 0 0 1 200.884 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 204.572 667.24 Td [(a)-70(r)-70(r)-71(a)-70(y)]TJ -ET -q -1 0 0 1 232.04 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 235.728 667.24 Td [(r)-70(e)-70(a)-71(d)-203(\050)-150(b)-206(,)-926(i)-152(r)-151(e)-152(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-882(f)-107(i)-107(l)-107(e)-107(n)-106(a)-107(m)-107(e)-241(\051)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F279 8.9664 Tf 9.414 -10.959 Td [(!)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)-525(DIA)-525(format,)-525(extended.)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -85.023 -27.896 Td [(T)90(ype:)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F233 8.9664 Tf 0 -21.918 Td [(integer)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [(\050psb_ipk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(\002lename)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.533 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F59 9.9626 Tf 65.184 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ + [-525(offset\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 27.108 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.201 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.757 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ 0 g 0 G -/F51 9.9626 Tf -317.894 -21.918 Td [(On)-250(Return)]TJ + [(\050psb_ipk_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G - 0 -19.926 Td [(b)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.839 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-398(array)-398(of)-397(type)-398(r)18(eal)-398(or)-398(complex,)-434(rank)-398(1)-398(or)-398(2)-398(and)-398(h)1(a)-1(v)1(ing)-398(the)-398(ALLO-)]TJ 0 -11.955 Td [(CA)74(T)74(ABLE)-257(attribute,)-258(or)-257(an)-257(object)-257(of)-257(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 177.91 0 Td [(psb)]TJ -ET -q -1 0 0 1 369.841 430.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 372.979 430.129 Td [(T)]TJ -ET -q -1 0 0 1 378.837 430.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 381.975 430.129 Td [(vect)]TJ -ET -q -1 0 0 1 403.524 430.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 406.663 430.129 Td [(type)]TJ + [-525(nzeros)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.958 Td [(real)]TJ +0 g 0 G + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(,)-259(of)-257(type)-256(r)18(eal)-257(or)]TJ -251.973 -11.955 Td [(complex.)]TJ 0 -11.955 Td [(W)55(ill)-275(be)-276(allocated)-275(and)-276(\002ll)1(ed)-276(in)-275(if)-276(the)-275(input)-275(\002le)-276(contains)-275(a)-275(right)-276(hand)-275(side,)]TJ 0 -11.956 Td [(otherwise)-250(will)-250(be)-250(left)-250(in)-250(the)-250(UNALLOCA)74(TED)-250(state.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(data)]TJ +0 g 0 G + [(\050:,:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -21.918 Td [(end)-525(type)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.88 Td [(iret)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F75 9.9626 Tf -12.404 -39.731 Td [(Hacked)-250(DIA)]TJ/F84 9.9626 Tf 0.985 0 0 1 99.895 156.191 Tm [(Storage)-254(by)-253(DIAgonals)-254(is)-253(an)-254(attractive)-253(option)-254(for)-253(matrices)-254(whose)-253(coef)18(\002cients)-254(ar)18(e)]TJ 1.015 0 0 1 99.895 144.236 Tm [(located)-247(on)-246(a)-247(small)-247(set)-246(of)-247(diagonals,)-247(since)-246(they)-247(do)-247(away)-246(with)-247(storing)-247(explicitly)]TJ 0.988 0 0 1 99.895 132.281 Tm [(the)-253(indices)-254(and)-253(ther)18(efor)19(e)-254(r)19(educe)-254(signi\002cantly)-253(memory)-253(traf)18(\002c.)-315(However)75(,)-253(having)]TJ 0.98 0 0 1 99.895 120.326 Tm [(a)-246(few)-246(coef)19(\002)-1(cients)-245(outside)-246(of)-246(the)-246(main)-246(set)-245(of)-246(diagonals)-246(may)-246(signi\002cantly)-246(incr)19(ease)]TJ 0 g 0 G - 139.476 -248.035 Td [(146)]TJ + 1 0 0 1 264.279 90.438 Tm [(175)]TJ 0 g 0 G ET endstream endobj -1839 0 obj +2203 0 obj +<< +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/dia.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 2212 0 R +/BBox [0 0 499 211] +/Resources << +/ProcSet [ /PDF /Text ] +/ExtGState << +/R7 2213 0 R +>>/Font << /R8 2214 0 R/R10 2215 0 R>> +>> +/Length 2502 +/Filter /FlateDecode +>> +stream +xœÝZ¹neÇÍïWÜ xÕûJ€$À‰á†a(¢íQ@9Ðﻪ»NuÝÖÈv Áཪ®åœîêõñóé.:þ“Ï—·ã«õüô¯#¸óçÃ]¹÷âúùýá[¾Jð§ï1\ù|[ŠVI~=>Ùo&ªP“7U¨‰÷‰U&Ë$í&é&µl&P,“Îòχ!¹Ak€Ò°’Ö\m+K¶µ;ÛÊ’mõ!Øæ!ÞÚs¹µ³xkoíÖÎ"·>ÜY¯FÝ\ÎÔ»;ƒ§Ì?ýýüóùOjútÔ\®”úYc»Zån€¦P7¤’ÏêÓÕœ? u`mù,%^±7•_ŽâýÕÛÒõ+º2r¼ªéî +5žˆÐ"}æ¨9 ¿@â–;—㇟ê]p™Š÷rÔë­v.e—{®ç‡ïß1ÿ¹<\m—³ì 6ï"9.v*³#ÍoU´NÉM×@^IU#°$PsôÅ ¹¡Q´â¼£Ÿcú¾ÙQJ®_5&Ã)æv…¶8E_®â&'|WNP1œ!#ü┊¿|¬à”]¸jœð}q‚FÑŠóŽÞŽÒ{a4F)¶F“ÔrrŠ©^QQûÐóÕ +RºR7€yÃ_ä•W5@&ùLa¨MÚÌðß8Œ±z—¼Æˆ•Ö¯vc&EV}I yîåj–Z¦…"îÙÅ«j"jКDPè3…¡&Ô¨`c1Æì2£6ÇÞ§F%1x)hfñЙ'%Ž»äH‹@l£¼fQ¼Þæ/´«Î5Ð,²•çä×ÂnÙߌ&ö«-nr§îè7©ŒÀé‘oéÞT!~¯˜XJâñì_œ-†W |⑳4ã,±Â¨ˆ(8½,¸K3]pžAÄ=çD²Ç‘³W3¡R¤2&WÇH3‹FsH„=ç}ö54®Èµ_ùRùd û•/tâ¦3dsl~×ý +бyõ…(Áe·bEãc6+ߟfhù¾6+hÔé¼CÇ$ŠtÊ¥‹@íéªãŠÔÇäõ•æw$>Kï#<;8ê7îRYÕ%„h^W™J*Ì+šÎÕ¦Š!_‘9Ð)hl¥Œù4äpnØÖ¼º‡ÝqùKœÊ56ÎÓ,@ …/¾)ߎÝç•©3w§¼Í+Ï+(ÅÕRœ +­æm¥Ð°K㩎½®8½†Ëŵ2@þ2Úß'.#æ»õÞòvìÞ¯ºöB£ý7ÓoÝùb+â7ŒÊˆ÷çÚÅ☦xQ¿¸4ÞBL›ƨ_¥ó+J§•Êw“˹4ŒÛM3ß\¦ÄÝ +kE7úÍ +k%¯+0Y¯+hŸ¯+Ú®¯+hŸ¯+ڎוÿå!8:äÐW_i›œj +mQ´YOë"¿áê:ÚÆ-õãGä—C5-t^_õÍÊBýNÛÜžU†x‹óvì¹(2àX¥&TÚÓX5®*$1ÜlK‰Zx±ù­Sbè·N ­Ý:EdÓ)©¦­SD£1S ·N‘¬<›þ_܆G¼ÌðH.3<@#ݰ +$†;€m)ˆ8¹‚’…ÂKÌ(½ÆqW@È/Újˆ rAAãý,$t!]+nHE~Y] ‰wc°ß%¯/.Þ|ÖâSB£Ž æ0_¶#­q¦]EmOÍú«¨íµßü!¢®tÖ_EmOÞú«ˆöÀ'+bç[w£ºâ­}>ºk;^ÝïÛ&>DÍßã-?DmÏu*Z›ím‰·vÉväÿÓÜ~út|æÍƒÿäãåíüæùøêC£Íð|¦þl<.|·+éÊ%žÏoÇ_žÂãSæÃªËOþñ©ÐZÀßÝãSãóJϤ­|ËèáŒ}udáêãÏ8¾}&|ÿõ—:®§ËSõíxcXš8{ƒ¾7»Y¨$§+•_ø÷ˆÆ¢eAÓ„b­*ÃC4´ µt®µëŽæ˜2{,€î<ÆûIëeqS ã·‰d·Yp†[åG\c1—ÐAeå !#Çâ°JD¸óÜz–(Êmi&²žh³ yO‚K¹õØæeÔ‡ã=T7Õ2 È5‡rS°JD¸ó˜Ü®&g_妚‰Œ¯„nÕ §ðÍŽÁ ÕzY8õu&—SË~â\•‰æ`“;jfRG˜n˜Í@C«|*yu†¯ÑGÕZe*¿9‰³‘%åÒLP+Â=ƒÑÊ-Šm:ïèÇØ$gÇ%­~å)³Yâ€bËætqÆ"ÐRi梊(6(¤Õ]ú_è¤5¼ö8#”2õÕSEGÛx:IvãÑ¡ºÌÏ.¡P•r‹LÛwI“„ZLA܇àó4äïÄ…îbË‹ÈôhâN™Í‘@†ø`2 ƒ2QQ  ¿ºp²™¡1Ì¢iæÏfÜ!O2* +u´3º’Ñäh8¸ì“LË‹I3=Êï`Ù H­À©Lht‹7tíµý ¢ð 8à œˆ¾˜ ?,šUE=8xÚ‘k9#_ÎSçû^¹ ¡¡‹©¿J;#ô8úMytFžçù©ñü0•è| h r.òídæ€5:*™j!¯qãÿKq­Øq‹\Oq­L|Ó"›qS 7 òÊ PèÈ Êsë|Ž¥ó¡¤&;ÝGh-„‚O¶•ÿÛ%Ïb€ì;:H5i”ðnö92@&‡J—Óê—ÝdúøG@^)D£ $‚‚ÜXXf­£"ÀMÅ1JAåT45Ír’üÅÕ† 3µÜ@d“€ +rca™ñ8ÏzD—‹bH…°d'¥¢šÃ\LÀ/ó¹¬1ÙŒ,0F c¨)T#  7Ì,¥j7ß%Î¥šäÛæ›RÙ6ßq4›/ÉÙ¸g]åû\¦—×\ÃW\¬ò+3,êmó½Lr´›ïJŽ·Í7ñÅÓÐÈþ¶ù&þ?9³ù.y’QQШ» Ñ•Œ&Gs¼m¾7ì“LÏ‹I7=šûmóM¹m›/]”o›/ÉvóUQxT»ù._àlÛæ»òâ›QUÔÌá×.Ü$Rg{çæñÊØcé‚Lçúqçþú1º‡ç×><>¾bûJ:ªŠÞËÃ_ƽ:dº’·p>yºovª†ç¿‘ã¿ûîã·ÏÓ‘¯ì-u„縑óß¿Ë&& +endstream +endobj +2217 0 obj +<< +/Filter /FlateDecode +/Length 177 +>> +stream +xœ]1à EwNÁ ‰Ô,KºdhUµ½1!Co_I‡ßâë›Óur6Qöˆ^½ Qcްù=* 3,Ö‘¦¥ÚªtP©j•°ñ&Ãû€â˜Êw¹{6—ÒiªGy [ +¢t s1#8ý÷ÕUÃlŽÉV‹"α’¡Cc×fÄb/Šû²íôåÅ9ሪ=Fp©œQbæxÖÁïÒàCvQùðY¢ +endstream +endobj +2219 0 obj +<< +/Filter /FlateDecode +/Length 197 +>> +stream +xœ]Mƒ …÷œ‚ˆFmL»qѦi{ÄÁ° ꢷï0j]¼ Ìß#k»kçÝʳGœÍ Vn",ó ðFçY^ðÁ™õ ŠfÒeíM‡÷'ǰ;ßõÙ3¿ÐM¾×˜y€%hQû˜BIk?ü=Õ{AoÌ3“„ÀÈd™+’+Äš°Jh ›ËªP²*â ±Q$ĆFŸCÒÉι=7[ŒàWòLž’çá÷-a©Š£ØsÇbã +endstream +endobj +2220 0 obj +<< +/Filter /FlateDecode +/Length1 9528 +/Length 6467 +>> +stream +xœÍY{xTÕµ_{ïsf&™Lfòž$™ÉÉ„À$&„wÉä1á‘á!$TÌ$!¼HB«L¨E4 p->¨ +Q/`ËÉDh@¯D¯¶­«µêµ+VÚ[ÚÔWkÌýí“€r¿~÷»ý¾ûÇ=;k­½×Z{¯µ×^gŸ½'ĈÈF$¨zÞÂüB2ž¬6 Åëê[†Û€U7nls6ç½-h¼Od~yEËÊuSÚæYº‰ÔÖ•ko[1¬?ýÔÎUMõË_½t¡‚È+™“W‘06f/Ql¢4²j]ÛwFìý (~mscýp;s(v]ýwZL«‡QжûÖúuM#úÕÒ©–æÖ¶‘ö€”·lhjùÅ +ßçЯ!²;ÕûàE¹£ÄnJ'Š|88?4;rI½…´¡5‘‘€ÞÏŒ€töAÚGY4ÈÆÓKÔO³é•P5í¦tšŽP,ÝÆ^#A)§§ÈË\Ä©‚R˜J{è]º‰6ÐÇ4@9TI°xŒ¤J¦i‘ßWÒÝ‘ãЊ¦2ú`kÙBÊG}&Ïe>XÞé§ʉ¼y­Çèc–é¡™¨ý–âh uÐ?S<­¡W#—ä´©²Mìwä¡mW&*]‘[èz:J¿d•¨Í¡ÛÔw¢ŽÒZôz’¥°þÈÙÈ'ô‚¨ #}—î†Çaêç׉2µ›Ü”M7Ð\ª‡ôŸè]–ÀÆ‹@dL¤4²܃ô)÷ñŸ3üðÑ,ª£{éqDãm:G_0+›Äc‡PÞ`Tßo•ÔN·#¯CôÒa:ÎÆ³ñ<…§ Z)4–n„l'í‡ý^:Ã*Y-ëg/ŠýjÁPq$1’ù$¡qT÷Ñ‹°ñ9+€,ˆLѦd(mjáå-˜árz”ÎÐðãÄý ú+‡ò!¿ƒwD–DžŠ| _,䢩4Ÿ–R3m¤oÓXÕ—èeú3»È£ yZyE½]ŒÜØfS)|Ÿí…{;V)L}(oc–qÌYLesÙ¶’íd²>ö.{—›¸‡¯ç¿ºxM¼¯LVÕHFJ¦ ØÕh ­Â +Ühßù>E¯Ð)–IJYfô6úɯçå(OòÓü±UìT.©w ýçÐÅH™‘e3‡vzQøK†cÙÖÊ>‚ç»ø³"V8„&&‰±HÔŠ»ÅnñïâçÊåòž:K­W™ë‡nz#Rùž|É¿ÆP.M¤)ÈŸȦ[à_ ÊÚD[¨‹îC¾ÜOÝtó>I§è—ôkúV€˜>¯†õuȺ­ì>”=ì0{‘½ÂN±Ù—²ðL”>™ó2^ÁWò­(»ùþ6?/F‰FÑ!:QöŠcâ]…E‰¨…(3ÕíêAÓkæóLsƒåg—.\w¹öòC4”6ô­¡‡^ú$²8rü÷R]O·ÁË=ÈÁý(O#ÑOègô+Ã×Og*2ÞÉ4dC.V­˜Í`³Pæ°ù(7¢,aKQêY[…ÒÁ:ÙwÙì{ì^ö€QÆÜö³²c(?f'P~Éβ߲߳O9’˜ d³—áù|fZÆgðy|ÊJÞŒÒÂ7ðX¡ƒ¼—ço‹áy¢^¬{ÄÄKâ-ñ••\%_ñ+‹••ÊÊiå åå¢êRƒê*u¯ú’)Ý4Ñt£iéaÓÓyÓ%³É\mn0o2¿eŽX¼Ø­~Šy¥o>ù¦Ó¬UMT¾ÃÏâ½pŠu»3ñEb­¸OüB]Á…›½ÇºÄjqKäIQÁÿ*šÙb~’e +—Z$VÐаCüCþ9ÿDIb‹øïXŽòÏìǼY”q“4¢¾©$)wªç‰ø¯¨ˆofýüq§¸3ò¯T¤îegÕ½ü r+<Îâ­ÞÆB§ŸóÕ|;Õ(Õ‹´qÿ¡úÄ{:¿›o){éc¡ñÏØ {»Æël¶’ÅoæÓØ!츗Y]`ë©…=@öû5ë#ÆžYÁjéÜÆ¦à3öºð°·D4ÕJY6ObÕ|ß(ž7“Ã.ñ º V€Ü¹ò Ñ­xvó1ØÓ‚ØMÞd…䤇°ß>ô¼Ü±ÕwÔíȳÇE.- ZÆ_£"¼£ÔÐ]TH'ƒwS˜6E:Ùrìûs°rêck(ŸY±[¦À·|/’y&öÂ:Xý+öÿW±ëW²?Ò·™oV?å(R²C bg +aÿÝŽ²œ–¡õ(Ýo:ª¾IóX +‘âÚ‹,ŸnÆ7ç#ØO#?ü[J+¹ðÚy=z<:4“(wÑkŒÓfø<ïyµ2;5˜áj|£ªðM¶t~ ê÷–kµný‚QŸcÔwuê:¸ƒÎUån…ÜA½b㪮`¨ÃõX£Ë´²¦è¼\ꉶ¢jEMOÑZzXÊtfTxJ°¨‡“ŧô4­<¨§jåÒ]xƒõËõêù5Áòt§6/WgeZƒNZ©n÷*Tf˜ÑMeºÙ0ã^-gCÛÝ=¹ý];úÔòÅ,×–×ßT£‹úZi#λåzÊíçœ_71x|YͶoJÓEWйÚ-›]]ÛÜz÷üšoJ=×Öb {+B]0¼!¬\è†-¾µ¶Fg[aÐ-ç!ç4<»&-(9¡5n=J+ÕVu­ aaÒºtZp›'œ–8 ´ »kQæÑ‹ÓµÚúòQ=‰Ôµà¶ÞÔ€;õZI^n#n8¬=±ö‘JŒí›•¦«2£f¨ËZå‚«qeÒ#mÒAw7ºáI†9M•¨i*u5N…žZ†^úr¬Çj=ª,Ôå(ß!ûëª×¡¹»¾ ¬¿vá×rêG8&¯ã ’U™%W ò+uÝçÓÇ“ b.ÊÂÇéF{R^îÆ>®k-7ÂGÕˆm}mQ>‚ïñÈåÝÞ 4ôÎù5Ãm75¤‡)ï«ÕyHJú¯H’n”’Î+’«ÝCòøY’7‹$Ý’}õÏîHN®*ÒYòÿ n–W.Ô*ç/­q»B#±­\tMkX>õªl¤Æ†¸®x©YRoÁÒÉÀŸê­Ð‚«C3ñªÁG=¡¬F¤óÚáOÆPÈß›®Ž,51r,Åk2òyŸÙ‚68Ì]¡;B3‡qm´Çó¿ìÔ”½ òu·‘9éE¾kÛ×_Ӿƽ˜.‡•l^¹hiWWô5² +lV]]š»¢+ÔUßélÐܭ븨5]-ÁЕå؞®Wì¨Å$V±¢Âà˜èS|}šIÅ÷Î{L ίÛòã‘~ñao0Xèõ]gÐpÎØBCNUø¯âC~QgÃÉé†äƒpiéHeòÔáJ︼³%Ñâú€‹ÄY|TŒ^½9×–ØÀ`â²ãï¢nñkÒœâ½Þ¬ìÂ}'ÅÏ UœÂ÷Tv;¶ÅbÀŸŠã”îÂ9ôèˆähol\!•´Š{1½~à3€À @¡fq:;G +Ù]€|À<ɇÄ!ø¹ýíÀù€fÀN€B‹ÄÓàß"±xJ¬Á‡Ñ%vàbšº]|ß ÿšúø¸>ˆÇÑ–tßHûP)ÿÁÚÉ ЇÀO}иðºÄ#í¢Ýè×6B»Ek8Ãå(É€Ü (Ôv£¶¡Û3ìÖ–z@ A× S„ksØ£k´¹7%µ°!ÝŒÐoFä6#r›IhÓMÃ:ybt6Agt6!*¢öZ±`ì¸qoEÜ%_îœ1øßÞè–-ñmÄq,¼ºG¬ ç¸d+{§ +‹ŸÃ9–aؽ©£ w~ÝŠŠ–‰;BíR·É6õFÅHnSoÚèa +­[JbE#ý€S"p`"  ˆÆpV¾ë„˜Kë,ˆuuðÑ¡t¨JA9‹?) +©ÚBHÉx‘G~(ŒuÕùÙ”PTKTg”pD¹£ +¢QÕQj3®D;…p‰|Q,æ‰:¡öEúÃæ¢ ¦¢ »¬ÝVÝÚo=cUuS¿éŒiÀ4hRݦSÀTm +™ZL¦]¦nSÔ.Ó.3Y[¬Vá°º­Ö€µÚªºÌ¬»d«h/-°ÐØPã:ðÝâf@V£¡¸|&´€3¨€ªhÙ¡g‡ž\;¸vp XJª!@ˈÔtUr¥Ô”ÀHcÁEl€e 0-Z6´lÐ:Ã/ÁC°P o€¬¾"+‘‡&C>hè\‘d_~)P?¦,ÓDzî±l×Xð—2âããë´:o]NÝ~¥Ykö6ç4ïWæió¼óræíWеboqNñ~%_Ë÷æçäïW\šËëÊqíWvV©:YuºJ©«j®ê¨S°t½a_A¡A3½’ §¦N±—ÜÀ`:uÀûg‚ìÀ.@> Ð Pùƒû ¸Ï€û ÍÔTôzFn1À®™äï3d²&åü¹À䇋&Ì+©Â¶[Øû0ä‡ íáÚƒ¯üy#úÝ_j¹WúÉMp©±Ý-Åk¸”Šu€€J§Å: ÀèÀ.@ à@KQ–ˆ%ü”Ãü°È ØÆ'¹(9Yþgq”8x rÁÆž2ðþÇÀÅÎ +Äζ}9ÛöÂlÛ]³mcPá9øÙØn{ÖÛ³%¶y%¶±%6Œ–B²ñ$›$fÿià¹Î $zl_ylŸylöØóØÖ{l7xd¿Qx‡m<ÑÀV‰qI•x¶³V—í'.Û—mŠËVbc{¬S©3 œ.1ûôY{¹¢žcŸR9FbaÿXW'ƒ°HØ_2öϹöïù[Øÿ}×óì+f|ÚØ—á¬s®’$ö9›¥Èög#ôÏl®K.\žgáâæbÈϼ ÿöo‘úO¢ÿÐ~‚2-Rÿq\%ÝÇfüÇFú=Îm€ÕG¹·Áê(×°úP8÷¸ßçÞr8w-ÈΰW:¸&ìç*‰c+)‹KÝFòréIՈřy-èŒáÎÁp®ìU. ô±²°6dŒôòy¦QµaÎÖŒIŽ&Íbi†Óéä5h,³ÎÛ(Ó –°¶£˜žõžsýÅÿœœ8}Áìá½®žÇü£ù6+|ÈõÆq®°ëtnósý\{ÎõJV[võçöY 8™ÛÇÙQW‚¬C—³c®#¹+]Ïh†t¿)–zŸ?Ïõˆ¶ÔµÇ‹vص%÷yé­ÃŒC\›;ÝUå?äªðö1ˆ~ D»Š´ ®i`Oíc³z¹ÆgõIW +0Æ¡c®q°˜­®Ü8åŸDfÖÈ5·™Ì‹ÍóÍ×›'˜óÌnóhó(s¢%Þâ°ÄZb,Ñ‹ÅdQ,ÜB–ľÈ@À'Ïm‰&‡ñŸ"±bÔ\bnüôGœY8Þ=ATòÊ…¥L¯¤ÊE¥ú_eŸ9²@Ÿê«Ô-Õߪéaì¾Z´t~w£E5HPÉÚš./wlj±ü­÷¦Kºië½µµ¬Rïo¤Ê·þåBÌ#‡TU+uRòÆbgqüô¸iå…F°ïëÇéûæã­?X¹°Fzt­^(+‘ѵ•ú y-<Î×óæ`ùqÞ"ImÍqv;_\ ùìöòÚ«j”É[ F~I¤Z/eJ5Êd½†Z•¡†4Í –÷df+½ÄfI%¤ÏK†ÒÊá±²`cUK5žAYÆXY[VÜŒc´RÝ¢¼’¼)ÂÛ)E±òç—‘óŽë=é'ØS#"ØqZ)ùÈ\]~õ¯µµµMB{»¸­ÝiðÚðÒzVêò²è×ýA=*¯er9ÚGž²š€ã¤ÿ´Ÿ7û;ü;ýûüGüj{{-Øñ'3OgòºÌæÌŽÌ™û2dš¤à¦šcÿ¾Ì?eŠvdkÃ,7l¶ƒâO6ÛÚ[åC0Ð +6çk÷•Õ”dR#N½ 'ô’ +U»6¶B’Ám•ÝÚ%½ªÇåÅY5ÎÌd¦9=œ=Ç_ÀaÕÌO†IUúø Ï +Š6ËÊQF©“zrN‚¥(v »™œ>Ç—þËþ¹ŽÏýs.û©uÇ% ñž8Oœ[?]r‹þK•.’[é76ÿ…¬“×à$¨8àæjçèå“;TÆŒ nìVÍBlëfg˜‰õ±‰G©SY´TZ»¼ÌïðSþàñl™/Á“äYÈÕËyÊCÆí”‹éRWá¼Sø®Ùi–âuÃDg(U"{FròX³ß<ËüC³)àþ–²Ôò­”¥Î[,mqmñZ‹ÝwØz8ö”z*åß寮ëp¥|•’„s€’ª¦'¥&§¦Œvš£R¬Në艩3RïIÙé6;S9OIKI5ÙD*WMΔä¤Ds‚bëc«QQĘâÎ(Õ'&bjÚÎT¶/õH*O=!& ž÷ö2“ÑÇî ØÈô›y u Í  JB3˜T¹îN·¹»ÝÜúû +±³±@ ±Ž7󾓟ä§ùYþ'ná©®ì>Óéú,›sÎa®cÙú/—Íù|ÙÇ,Ò…ËËÖû‹/¯÷‹‹O™?¦qG¬›CÝürìˈìú Ëh™L,ò1á™D4ib¶–i2k“'O(”S3™¹ÙS8yòq¨îÒ«gî½·.ß—íM=ýÈþ_Ì>ðÕtÖ°vIES‡.zY){ø‡[´¯?þ“·v­\ùÄÑ¡Á©ŽñyÆz b½LX¯dz(p†œÝΧB΀“o¤»ˆÇ–$°Õ8µF±nÊD¬dÝ‚º†Î%;[MÉàû4€ó˜Gq¦FYb¸ ì/PŸˆµâ&Ø;ì»ìÝvÅžšr‚g±s#òùç8.œsÈì-öÁ˜F_\¸Ä¾ðùÆ ë—%x'Ä%&'§$y&Mç“â&ŽÉF(̃l¶'ÁÓMMŽ6{Ó¼¥ÊO¿¸mÃÔ îõòÑãoçïïçÎpá¤C³#çÅ1Geщp½ÅSbXU“$±ÙÒú˜=•FÙlÈewgd+Ùq’[GÍÔA;©ïiª÷Ëøze‡×u–ôBñ…ñe·ªX––•™ÅMœ ÆMfï¨ôÑéé”m÷Z³©)©ÜäQâÈeJk`‰±¨%Ç –ÅÜ ,ÝïHj Ôh ã*Ñ8ÆÛ’01~ +–?%9.‘›´Ì1ÙS)É +'O™,ƒ2FFÅÄgïh[ztÓ#w¿ÙðÒ–u/§­ŸÜ–q]AÖ´±Eå“fNä{ϳy Jö½2täCÇøøÅ¿ ïy ~Ãa6íü#­ž=jäEUäœØ¯V‘•ž9NJd 7!}ºjœ.QIµ0UŒ‹*¥€-dë¶½ÊNñwØ;|ÀfM#fed ØW—…@šà‰BpEØÔÀŒIêo˜ Äô¹3õ±=Ǻ­Ìš£žàçqÿ$ƒ³ªPª•nEUžç¿¥˜‘;¾\váœ÷Ïå;äs\ðû·©×ù¶Ån¾òÆl@¼°#1y +ó˜Ÿó_ ù[ØCÛ×,š0Z­ÊþÛ Ê+é×…¬Ã¿]úi ^gù\Þo´ùȯ›‰ò'/! `¢ìÁŠàÆ?GÿÁžÿ…&X‘³œ‰çI,¯ÖÆåbøQ ~¼ü÷8É߇㈭^×Ô:·éÛ š×ÕßZ½pÎ"£'ÛõDÊrms#×0F~6Mc£øÓ´ÎåBi6U ¯éÿðȾü?ôèGNÔÙý_XR‡=ñÑè—$=zC/]l»¼ÃQd‰E3ꊭÿîi +endstream +endobj +2221 0 obj +<< +/Filter /FlateDecode +/Length1 4304 +/Length 2777 +>> +stream +xœÍW}l[Õ?÷=ûÙÏIì÷e?'®÷üòÑÄvÜÚIwiÒ6mÚ® M[l(…$MK¡…¬”°nƒµò6J7TCš˜¶ñ»Ïõ4`@µ¡IÛ*6uÑ61&±M­CiÒ@$Ù¹ÏNÊ×Ôí¿Ùò½çwî¹÷žó;çÞ÷ šàð0¾c"çø6{¦LÎÖ°? @ž>~ÌhP@Åkø{äÀìÁ#Jú»8v€›9xøž5{÷ÝÞ'o™™ÜÏû¾y@z•ý· Â3ͯà/"n»åȱ»köÒ86+ß1=YßOÃ&tdòîYB¥ hÏü1nŸ<2S÷홽ãÎcõùlcöèÌli²e;€+Œ>ÝÈFÈyî,$bøíÀo? ÂptÛa7~÷":·Ãâka¨Ð ¬ƒk  nƒC0ûA† 4ÃÍp¤a´Á>È€üp=l‚‡ákp +Jp¾³p<ðydö>ø<ࣤ n¥Ýãºåx‘‚µ.L…DamÑÑÝ_4.R¢ö„S”$?ÑÆDŠrÉ­; ­¢™¢|òPØ Ãã“SÔ•dSM˼·ðçÈoŠ´+ÌGþQŒX&u' +tôxÑ(q=w²iïõ)*$í89…»§öîPÀe'Á48-3Þ]`í΂5…Þ[#ìˆ5‚Ìï,”±×O”‰A°£Æ4mžY±´W0IQ‹¼`“rŠÕ‹°_øÃgæƒÂ%¦ùÈÇçh|äï0,lÆ +çsbS½ã}à~Ž,>DÉ×a+õŽlB.Ú£,£TÂbÕv¢p¢Eæ÷€ºŒ”Ol >ccÙuK8@46¾¾/,B{qPÌׇ z¯ +8è‰×ð^4°5ê AC¦š4®¨Býu4Ôúû¹„ÍG¨"Ô¬e¨°}UgߺFF¥yJåÀ2 0{‰XWìƒLãÿð +!¦ r¡+ -Ã0B=¹‹TK° ¸ ¾ø<×Ͻ Þ%YÈÁóPî&¨´5êJÐLÚî÷T©ž¶ÁS-ëàKœÝ´/mg]UBÒ43Wé—!€ÖýíD¥ÅAå–8³n‰¢u‹d§HÂîÔ«ç|)o¢"Jp'tJ¶†z^«R^²s8wµ3×Σ֗’ªç©(Ÿ[wŠJÙóù<ÕdêɃ­·ÉŠÍ‹ùüªÕ$ +j~âî\ç@âѶõ 6˜_ÎôÆ[Hx“üèi¬“;«ò©‘7Îç[9UåÍkž}ilFTÕݬ6¿þ†îWüXå +X‹ïr6Öy˜0eŒ+Ë\IÛË_=KTÍPAÂçl‚Z™ +Hpî ’­ã½’`Üåäl7Ë_SÞVÙí¤°Ã‹Ç5A>Vš¦îhãB”Ù\–¼¡‹+מ1×,\ÎÇŒ§òÍMý»Oig»DýÒZëÕµmüREš.õôúަàëÖ ,sS@nºÀ†²È2È#L¨4ÔÈPC-¨dÂÒÛ¢²àZð4Vb]¢Š Å¡®Óvµcô±.&ÆâŒ¡î4õÏÙ€ E3e?{Ö û‘!ªd íHK> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(9.5)-1000(mm)]TJ +/F84 9.9626 Tf 1.004 0 0 1 150.705 706.129 Tm [(the)-248(amount)-247(of)-248(needed)-248(padding;)-248(mor)18(eover)74(,)-248(while)-248(the)-248(DIA)-248(code)-247(is)-248(easily)-248(vector)18(-)]TJ 0.999 0 0 1 150.705 694.174 Tm [(ized,)-249(it)-249(does)-249(not)-249(necessarily)-250(make)-249(optimal)-249(use)-249(of)-249(the)-249(memory)-249(hierar)18(chy)111(.)-310(While)]TJ 0.99 0 0 1 150.406 682.219 Tm [(pr)18(ocessing)-252(each)-252(diagonal)-252(we)-252(ar)18(e)-252(updating)-252(entries)-252(in)-252(the)-252(output)-252(vector)]TJ/F145 9.9626 Tf 1 0 0 1 457.277 682.219 Tm [(y)]TJ/F84 9.9626 Tf 0.99 0 0 1 462.508 682.219 Tm [(,)-252(which)]TJ 0.98 0 0 1 150.705 670.263 Tm [(is)-248(then)-248(accessed)-247(multiple)-248(times;)-250(if)-248(the)-248(vector)]TJ/F145 9.9626 Tf 1 0 0 1 342.631 670.263 Tm [(y)]TJ/F84 9.9626 Tf 0.98 0 0 1 350.281 670.263 Tm [(is)-248(too)-248(lar)19(ge)-248(to)-248(r)19(emain)-248(in)-248(the)-248(cache)]TJ 1 0 0 1 150.705 658.308 Tm [(memory)111(,)-250(the)-250(associated)-250(cache)-250(miss)-250(penalty)-250(is)-250(paid)-250(multiple)-250(times.)]TJ 1.02 0 0 1 165.649 646.353 Tm [(The)]TJ/F78 9.9626 Tf 1.02 0 0 1 186.392 646.353 Tm [(hacked)-367(DIA)]TJ/F84 9.9626 Tf 1.02 0 0 1 239.757 646.353 Tm [(\050)]TJ/F75 9.9626 Tf 1.02 0 0 1 243.141 646.353 Tm [(HDIA)]TJ/F84 9.9626 Tf 1.02 0 0 1 271.929 646.353 Tm [(\051)-367(format)-368(was)-367(designed)-367(to)-367(contain)-368(the)-367(amount)-367(of)]TJ 1.02 0 0 1 150.406 634.398 Tm [(padding,)-346(by)-325(br)18(eaking)-326(the)-325(original)-326(matrix)-325(into)-326(equally)-325(sized)-325(gr)17(oups)-325(of)-326(r)18(ows)]TJ 1.013 0 0 1 150.376 622.443 Tm [(\050)]TJ/F78 9.9626 Tf 1.013 0 0 1 153.737 622.443 Tm [(hacks)]TJ/F84 9.9626 Tf 1.013 0 0 1 175.778 622.443 Tm [(\051,)-246(and)-245(then)-245(storing)-246(these)-245(gr)18(oups)-246(as)-245(independent)-246(matrices)-245(in)-245(DIA)-246(format.)]TJ 1.004 0 0 1 150.396 610.488 Tm [(This)-250(appr)18(oach)-250(is)-250(similar)-250(to)-250(that)-250(of)-250(HLL,)-250(and)-250(r)18(equir)18(es)-250(using)-250(an)-250(of)17(f)1(set)-250(vector)-250(for)]TJ 1.013 0 0 1 150.705 598.532 Tm [(each)-247(submatrix.)-306(Again,)-247(similarly)-246(to)-247(HLL,)-247(the)-247(various)-247(submatrices)-246(ar)17(e)-246(stacked)]TJ 0.991 0 0 1 150.705 586.577 Tm [(inside)-253(a)-252(linear)-253(array)-253(to)-252(impr)18(ove)-253(memory)-252(management.)-314(The)-253(fact)-252(that)-253(the)-253(matrix)]TJ 0.98 0 0 1 150.705 574.622 Tm [(is)-212(accessed)-212(in)-212(slices)-212(help)1(s)-212(in)-212(r)18(educing)-212(cache)-212(misses,)-220(especially)-212(r)18(egar)19(ding)-212(accesses)]TJ 1 0 0 1 150.705 562.667 Tm [(to)-250(the)-250(vector)]TJ/F145 9.9626 Tf 57.424 0 Td [(y)]TJ/F84 9.9626 Tf 5.23 0 Td [(.)]TJ 1.003 0 0 1 165.649 550.712 Tm [(An)-248(a)-1(ddi)1(tional)-249(vector)]TJ/F78 9.9626 Tf 1.003 0 0 1 259.673 550.712 Tm [(hackOffsets)]TJ/F84 9.9626 Tf 1.003 0 0 1 308.411 550.712 Tm [(is)-249(pr)18(ovided)-248(to)-249(complete)-248(the)-249(matrix)-248(format;)]TJ 0.999 0 0 1 150.705 538.757 Tm [(given)-250(that)]TJ/F78 9.9626 Tf 0.999 0 0 1 197.561 538.757 Tm [(hackSize)]TJ/F84 9.9626 Tf 0.999 0 0 1 234.51 538.757 Tm [(is)-250(the)-251(number)-250(of)-251(r)18(ows)-250(of)-251(each)-250(hack,)-251(the)]TJ/F78 9.9626 Tf 0.999 0 0 1 408.824 538.757 Tm [(hackOffsets)]TJ/F84 9.9626 Tf 0.999 0 0 1 457.388 538.757 Tm [(vector)-250(is)]TJ 1.013 0 0 1 150.705 526.801 Tm [(made)-246(by)-246(an)-246(array)-246(of)]TJ/F192 10.3811 Tf 1 0 0 1 242.857 526.801 Tm [(\050)]TJ/F78 9.9626 Tf 4.274 0 Td [(m)]TJ/F84 9.9626 Tf 8 0 Td [(/)]TJ/F78 9.9626 Tf 6.336 0 Td [(h)-40(a)-25(c)-25(k)-30(S)-18(i)-32(z)-25(e)]TJ/F192 10.3811 Tf 36.682 0 Td [(\051)-209(+)]TJ/F84 9.9626 Tf 1.013 0 0 1 314.476 526.801 Tm [(1)-246(elements,)-246(pointing)-246(to)-246(the)-246(\002rst)-246(diagonal)]TJ 0.98 0 0 1 150.705 514.846 Tm [(of)18(fset)-215(of)-215(a)-214(submatrix)-215(inside)-215(the)-215(stacked)]TJ/F78 9.9626 Tf 0.98 0 0 1 318.648 514.846 Tm [(offsets)]TJ/F84 9.9626 Tf 0.98 0 0 1 345.153 514.846 Tm [(buf)18(fers,)-223(plus)-215(an)-214(additional)-215(element)]TJ 0.98 0 0 1 150.705 502.891 Tm [(equal)-226(to)-225(the)-226(number)-225(of)-226(nonzer)18(o)-225(diagonals)-226(in)-226(the)-225(whole)-226(matrix.)-306(W)94(e)-226(thus)-226(have)-225(the)]TJ 0.98 0 0 1 150.406 490.936 Tm [(pr)18(operty)-202(that)-202(the)-201(number)-202(of)-202(diagonals)-202(of)-202(the)]TJ/F78 9.9626 Tf 1 0 0 1 341.485 490.936 Tm [(k)]TJ/F84 9.9626 Tf 0.98 0 0 1 346.083 490.936 Tm [(-th)]TJ/F78 9.9626 Tf 0.98 0 0 1 360.171 490.936 Tm [(hack)]TJ/F84 9.9626 Tf 0.98 0 0 1 379.718 490.936 Tm [(is)-202(given)-202(by)]TJ/F78 9.9626 Tf 0.98 0 0 1 427.587 490.936 Tm [(hackOffsets[k+1])]TJ 1 0 0 1 149.709 478.981 Tm [(-)-250(hackOffsets[k])]TJ/F84 9.9626 Tf 62.983 0 Td [(.)]TJ +0 g 0 G ET +1 0 0 1 197.579 370.389 cm q -1 0 0 1 148.768 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 152.354 706.129 Td [(mat)]TJ -ET +.4451 0 0 .4451 0 0 cm q -1 0 0 1 173.658 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 0 0 cm +/Im10 Do Q -BT -/F51 11.9552 Tf 177.245 706.129 Td [(write)-275(\227)-275(W)74(rite)-275(a)-275(sparse)-275(matrix)-275(to)-275(a)-275(\002le)-275(in)-275(the)-275(Ma-)]TJ -50.45 -13.948 Td [(trixMarket)-250(format)]TJ/F54 9.9626 Tf -25.158 -24.48 Td [(c)-175(a)-175(l)-174(l)-828(m)-52(m)]TJ -ET -q -1 0 0 1 149.539 667.901 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q +0 g 0 G +1 0 0 1 -197.579 -370.389 cm BT -/F54 9.9626 Tf 153.049 667.701 Td [(m)-52(a)-53(t)]TJ +/F84 9.9626 Tf 198.751 348.472 Td [(Figur)18(e)-250(9:)-310(Hacked)-250(DIA)-250(compr)18(ession)-250(of)-250(matrix)-250(in)-250(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-250(5)]TJ +0 g 0 G +0 g 0 G +0 g 0 G + -33.102 -23.941 Td [(The)-250(r)18(elevant)-250(data)-250(type)-250(is)]TJ/F145 9.9626 Tf 110.952 0 Td [(psb_T_hdia_sparse_mat)]TJ/F84 9.9626 Tf 109.837 0 Td [(:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 172.236 667.901 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 150.705 120.326 cm +0 0 343.711 192.279 re f Q -BT -/F54 9.9626 Tf 175.746 667.701 Td [(w)-52(r)-53(i)-52(t)-52(e)-186(\050)-167(a)-242(,)-900(m)-126(t)-125(i)-126(t)-125(l)-126(e)-426(,)-926(i)-152(r)-151(e)-152(t)-478(,)-904(i)-130(u)-129(n)-130(i)-130(t)-434(,)-882(f)-107(i)-107(l)-106(e)-107(n)-107(a)-107(m)-107(e)-240(\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F233 8.9664 Tf 163.108 301.945 Td [(type)]TJ 0 g 0 G + [-525(pm)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -75.851 -26.279 Td [(T)90(ype:)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 14.122 -10.959 Td [(real)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.464 Td [(On)-250(Entry)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.464 Td [(a)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-1050(::)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(to)-250(be)-250(written.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 578.783 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 578.584 Td [(Tspmat)]TJ -ET -q -1 0 0 1 344.406 578.783 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 347.544 578.584 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(data)]TJ 0 g 0 G -/F51 9.9626 Tf -268.571 -19.464 Td [(mtitle)]TJ + [(\050:,:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 32.09 0 Td [(Matrix)-250(title.)]TJ -7.183 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(A)-231(charachter)-230(variable)-231(holding)-231(a)-230(descriptive)-231(title)-231(for)-230(the)-231(matrix)-231(to)-230(be)-231(writ-)]TJ 0 -11.955 Td [(ten)-250(to)-250(\002le.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -14.122 -10.959 Td [(end)-525(type)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.464 Td [(\002lename)]TJ + [-525(pm)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written)-250(to.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.534 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-234(case)-234(the)-233(default)-234(output)-234(unit)-234(6)-233(\050i.e.)-305(standar)18(d)-234(output)-234(in)-233(Unix)-234(jar)18(gon\051)]TJ 0 -11.956 Td [(is)-250(used.)-310(Default:)]TJ/F59 9.9626 Tf 74.799 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -21.918 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -104.936 -19.463 Td [(iunit)]TJ + [-525(po)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 27.109 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.757 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 14.122 -10.959 Td [(integer)]TJ 0 g 0 G -/F51 9.9626 Tf -317.895 -20.764 Td [(On)-250(Return)]TJ + [(\050psb_ipk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G - 0 -19.463 Td [(iret)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -20.763 Td [(Notes)]TJ/F54 9.9626 Tf 14.944 -11.956 Td [(If)-283(this)-282(function)-283(is)-283(called)-282(on)-283(a)-282(matrix)-283(a)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-1050(::)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-283(on)-282(a)-283(distributed)-283(communicator)-282(only)]TJ -14.944 -11.955 Td [(the)-316(local)-317(part)-316(is)-316(written)-317(in)-316(output.)-509(T)92(o)-316(get)-317(a)-316(single)-316(MatrixMarket)-317(\002le)-316(with)-316(the)]TJ 0 -11.955 Td [(whole)-225(matrix)-225(when)-225(appr)18(opriate,)-230(e.g.)-302(for)-225(debugging)-225(purposes,)-230(one)-225(could)]TJ/F52 9.9626 Tf 318.257 0 Td [(gather)]TJ/F54 9.9626 Tf -318.257 -11.955 Td [(the)-339(whole)-338(matrix)-339(on)-338(a)-339(single)-338(rank)-339(and)-338(then)-339(write)-338(it.)-576(Consider)-339(the)-338(following)]TJ 0 -11.955 Td [(example)-250(for)-250(a)]TJ/F52 9.9626 Tf 62.495 0 Td [(double)]TJ/F54 9.9626 Tf 28.692 0 Td [(pr)18(ecision)-250(matrix)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET -q -1 0 0 1 99.895 178.717 cm -0 0 343.711 82.69 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + [-525(off\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -BT -/F94 8.9664 Tf 102.884 250.747 Td [(type)]TJ + -14.122 -10.959 Td [(end)-525(type)]TJ 0 g 0 G - [(\050psb_ldspmat_type\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ + [-525(po)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(aglobal)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -21.918 Td [(call)]TJ + 0 -21.918 Td [(type)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_gather\050aglobal,a,desc_a,info\051)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(if)]TJ + [-525(extends)]TJ 0 g 0 G - [-525(\050iam)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(==)]TJ + [(\050psb_d_base_sparse_mat\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_root_\051)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ + [-525(::)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 37.659 -10.959 Td [(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_d_hdia_sparse_mat)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F279 8.9664 Tf 9.415 -10.959 Td [(!)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.958 Td [(!)-525(HDIA)-525(format,)-525(extended.)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(mm_mat_write\050aglobal,mtitle,info,filename\051)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -37.659 -10.959 Td [(end)-525(if)]TJ +/F233 8.9664 Tf 0 -21.918 Td [(type)]TJ +0 g 0 G + [(\050pm\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(call)]TJ + [-525(allocatable)]TJ 0 g 0 G - [-525(psb_spfree\050aglobal,)-525(desc_a,)-525(info\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf -2.989 -23.747 Td [(T)92(o)-250(simplify)-250(this)-250(pr)18(ocedur)18(e)-250(in)]TJ/F59 9.9626 Tf 129.513 0 Td [(C)]TJ/F54 9.9626 Tf 5.23 0 Td [(,)-250(ther)18(e)-250(is)-250(a)-250(utility)-250(function)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET -q -1 0 0 1 99.895 137.797 cm -0 0 343.711 16.936 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -BT -/F94 8.9664 Tf 102.884 144.073 Td [(psb_i_t)-525(psb_c_)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(<)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(s,d,c,z)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(>)]TJ + [-525(hdia\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(global_mat_write\050ah,cdh\051;)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -2.989 -23.747 Td [(that)-250(pr)18(oduces)-250(exactly)-250(this)-250(r)18(esult.)]TJ + [(\050po\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 164.384 -29.888 Td [(147)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G -ET - -endstream -endobj -1846 0 obj -<< -/Length 6720 ->> -stream +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(9.6)-1000(mm)]TJ -ET -q -1 0 0 1 199.577 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 203.164 706.129 Td [(array)]TJ -ET -q -1 0 0 1 231.784 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 235.371 706.129 Td [(write)-374(\227)-375(W)74(rite)-374(a)-375(dense)-374(array)-374(from)-375(a)-374(\002le)-375(in)-374(the)]TJ -57.767 -13.948 Td [(MatrixMarket)-250(format)]TJ/F54 9.9626 Tf -25.158 -24.509 Td [(c)-175(a)-175(l)-174(l)-858(m)-83(m)]TJ -ET -q -1 0 0 1 201.262 667.872 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 205.076 667.672 Td [(a)-83(r)-83(r)-83(a)-83(y)]TJ -ET -q -1 0 0 1 233.175 667.872 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 236.989 667.672 Td [(w)-83(r)-83(i)-83(t)-82(e)-217(\050)-149(b)-206(,)-941(v)-165(t)-165(i)-165(t)-166(l)-165(e)-505(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-130(u)-129(n)-130(i)-129(t)-435(,)-881(f)-107(i)-107(l)-107(e)-107(n)-107(a)-107(m)-107(e)-240(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(offset\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ 0 g 0 G -/F51 9.9626 Tf -86.284 -26.38 Td [(T)90(ype:)]TJ + [(\050psb_ipk_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.493 Td [(On)-250(Entry)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(nblocks,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.493 Td [(b)]TJ + [-525(nzeros)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.839 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-190(array)-190(of)-190(type)-190(r)18(eal)-190(or)-190(complex,)-202(rank)-190(1)-190(or)-190(2,)-202(or)-190(an)-190(object)-190(of)-190(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 277.745 0 Td [(psb)]TJ -ET -q -1 0 0 1 469.676 578.595 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 472.814 578.396 Td [(T)]TJ -ET -q -1 0 0 1 478.672 578.595 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 481.81 578.396 Td [(vect)]TJ -ET -q -1 0 0 1 503.359 578.595 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 506.497 578.396 Td [(type)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(,)]TJ -351.808 -11.955 Td [(of)-250(type)-250(r)18(eal)-250(or)-250(complex;)-250(its)-250(contents)-250(will)-250(be)-250(written)-250(to)-250(disk.)]TJ + [(\050psb_ipk_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.906 -31.448 Td [(\002lename)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -44.274 -31.448 Td [(vtitle)]TJ + [-525(hack)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 28.772 0 Td [(Matrix)-250(title.)]TJ -3.865 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(A)-244(charachter)-243(variable)-244(holding)-244(a)-243(descriptive)-244(title)-244(for)-243(the)-244(vector)-244(to)-243(be)-244(writ-)]TJ 0 -11.955 Td [(ten)-250(to)-250(\002le.)-310(T)90(ype:)]TJ/F51 9.9626 Tf 70.763 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -108.95 -11.955 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.533 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(,)-387(in)]TJ -303.764 -11.956 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F59 9.9626 Tf 65.184 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -95.322 -19.492 Td [(iunit)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 27.108 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.201 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.757 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(64)]TJ 0 g 0 G -/F51 9.9626 Tf -317.894 -20.836 Td [(On)-250(Return)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G 0 g 0 G - 0 -19.492 Td [(iret)]TJ +/F84 9.9626 Tf 142.565 -36.164 Td [(176)]TJ 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -20.836 Td [(Notes)]TJ/F54 9.9626 Tf 14.944 -11.955 Td [(If)-290(this)-289(function)-290(is)-290(call)1(ed)-290(on)-290(a)-289(vector)-290(v)]TJ +ET + +endstream +endobj +2222 0 obj +<< +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (../figures/hdia.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 2229 0 R +/BBox [0 0 556 211] +/Resources << +/ProcSet [ /PDF /ImageC /Text ] +/ExtGState << +/R7 2230 0 R +>>/XObject << +/R8 2231 0 R +>>/Font << /R9 2232 0 R/R11 2233 0 R>> +>> +/Length 3027 +/Filter /FlateDecode +>> +stream +xœÝZK9Ý¿¢–E×øýX˜ 4IKE¬&êeXÌßç\û>\î$HhFBQé{¿ãû´OÙ®úx¸ÓŽþñÿÏ·oÞÔãý¿oÞ?ßÜñýÍ÷~ºpøÖÛéòñ¬ +߆âéöö–ý†Å‚ñîÌ6c‰á¬õð.1X4!“ÂtìyG±fE¥P7”h.¨Üvk.¨öÕ^¢Ø†Í•ýŽbÍŠªg«(ˆO¥Npög£³¢xÈnb}mˆ+(xw‰âJ~±âª;¨¾E·DqŸX±‚êYú¬DÓ¦ûÈsM4©GHµï(Ñ,¨àÆRÍŠJqG‰fEÕ¨ú… žÚ˜à¡œ™ÐaLùgÓø44<Áw”hT +mC©fEå¾£D³¢Ú T{‰ÂÂõsVú™a+šs»hZP9í(ÖŒúá¯4mŽEÐ[?1WÆdè ÛºRÓ†ÍJ'Ó*¯aæRVÅÀ2FƒÌ£`BØ0¬XóˆY+!•챯šÑᬕÈŠ5Tp»-Öê#¨¸œÁ·šœ‹;°²ÝñÓß?ÿ„ )"E:Ù&_4Ó±ì(Ѭ¨v”hõJ‰‡Wy×)îÔñAÜX²9ö#?Ï!û¿™œ±þ›?o¦I'ÖÖ¡Ð +ˆ hHz>iÊ9og¤U˜(”?a†EÀ³u¶ßaÈFg0Pð«ùŒõ]½ùÏ>[pSÐØ‡(©É@I]LsqF(ÓwépF­ºOM‹å‘ëLN’»X¸ÖÿQ‚§sé>- é@¤–´D–ÕE“Ý;~·†€bšsMÒ(»ÖÄgCyj¯KC*’K®hGj…I’9-‘­' bF-pé\\ ‚*¯’°$0DÉjþlZ*´tÅ…3DkŠ‹´ôµ'S\Z¿KGxôµþÚlnböeiHèg-=kðtCTÆJó¾®+¤Ÿ)øn+¤ Žæu ÏÖýºBi yš´œÔ…&­ˆY± sÙ|èlç ÆRIX¢äÇ56-²†àéÚxvŒàbA —ù$²õD\µpmÂãeuXXĨƒ¹r0ñiŠÂ¶Olâ‡aa00)CŸ¦HÓœ{º­ϱ<[ÐÖ±ãë;÷ØjlK{ˆ®÷¹oÇïþ²%º eá[hPצ¤9c()©¬œešIijùN}(#†°KKB™QÖ&„ +±ÑžG 7DÔÙgû=fZ:š·‚‹yl$oApYÔOíŇL~‰‚‡F9å5Öh¦Ó‚Ubú0º”(„O%Já[ÉÃY2]£fáÚIÉЮ”b^)yˆÆÁü%JFšqGê5 …žÎÈ4C`ôc^¹Çs.­ ¨ÝDbš¦kAGã’ÐE5þìiàË\+ˆ ÁA£Á,Ä4zŠãИ’S],¹q +àÕÀ–ÅèÚWšÙ‡ñZ1m¹©†CË8?¬¹e—¶Ühc·æÖó57‘-7ÖhhlABg–›Ä ŽQ ìYP×¾ÖÌÆZkaðfÃÞ%÷1#YSñüõtÃIoÃ`·FzT#Ô:ž©""ÒÒw+ ÄÁíj€eqA#Xƒ#͸g =з‰ 1€cP€D-¶,ÆZûJ3k-Í^Zn¢‘Ðrœ³ABÏqLË-‡9Ÿæ|S,›cÕphbABg–›Ä ‰Z lYŒµö•fF]æf^˜ò¨gÚ8ų7ÕÄŠýlT3"ª± Ú¿þV)Z?·¾ãf̺¡1Ö°X]]ƒùLÈ¿†ÙYáKjÏ¢ÖKedjˆ›<Ÿ®ð/o•¢‡‹í—çÛ>úéÆÅPó\+u¿Uóq™¿¨Ùñ¼(q|KàcÉtõ¼h|¤;ÓDŸ8TÜè3¦ÆÃYË«HŒÇŠ”Û+½’¬Q‹‰n¨û±ûìþǨÙ¡k£1*R$¢™®`˜jš“†Òè •f‘ª,vU#žÅûØ}JoÒ|íYÖ„5%sMè%Çxé?k‚SW-"RMXQ»Ö„G°Æ|d®ÉÕ§Ô„ì5kšéŠjÒãéÕwjt·n²¨VUÁne4;Ørô!”6t„A· ¡Y!éeB±:³h ¬yâJà×:ùD#†R[Dï°a§˜Ì–Isµ<H«eÕðd¡ èm8Ï% P§õ§#Þ´Ï·màxG¼ÎZË‘ýn9s…#Àƒjõ§á‚N5‚ÌcG,r©™®Çum”Ò|ûi¬½–»8¾PÃôE!øæ*]Šâ ÛæGR,_#ù¬ÝÍ?fCésöâÄñßâÝ´Ï·}$½”Ç®`Ly®…ÈR+Q—æ#­ÍK³ÙzT&zµ)ŠH_x6£=“¥O±ôù¢Og„ixLEð8•ˆÉ«KŽc³2žÙOO¦áU ¡òjSÃJ[êYâE,l^yÖÑgeƒ”´Ø¢þð™‰o² }DêBtôú§ëÏÎ'ºÖÑ"ý¨†ù‡Ç +Š}–éæƒ„†Ì¶fbôBÍ™þïñùøÍÃí›7ýéxøñ6¿qžßŸ`Ú dœöPÀ‡çÛ{õð›Ïø%¥ƒ^w¬ã_îÎ…)ôzÜãÉŽ-îú ƒʪ#Oú! smèéRßa÷ý»;ÿê¾aÃì\º«¯þúð‡Û=–ŒsØï®†ßÝݦ@ïwñÕ}Íî.ã?„NC ¬Ô,PŒ˜{¤ …áÛÔ…öJŸ«÷ôº8¬å¹×ô}¯ ÏHzw÷û׿ýãñ§ï¾{‹ÀU­áîÛ‡}±£ÇÀ„nbURA4Ûƒãǹ@7ú#Ãׯ"íW}¼{{¼~ófZÍÍã‡êðg)wæG?Üþ4º‘ +endstream +endobj +2231 0 obj +<< +/Subtype /Image +/ColorSpace /DeviceGray +/Width 2362 +/Height 946 +/BitsPerComponent 1 +/Interpolate true +/Filter /CCITTFaxDecode +/DecodeParms << +/K -1 +/Columns 2362 +>> +/Length 150 +>> +stream +ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ 0l?ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿü@ +endstream +endobj +2235 0 obj +<< +/Filter /FlateDecode +/Length 177 +>> +stream +xœ]1à EwNÁ €0´C”%]24ªÚ^€€‰ˆ¡·/8I‡¶ülë›õÃmð.SöHA¿ Së¼I°†-i ÌÎÑPãt>³^T$¬¿«øþD eìΣZ€=Å;b×è``JCR~ÒrÞµÖv¼ùÉ]0Ùc³1ç%“V¡KUP¼ J¼vêêáêð4Dõ–øŒo ÍjÏyø}C¬*Z‚|ïY› +endstream +endobj +2237 0 obj +<< +/Filter /FlateDecode +/Length 213 +>> +stream +xœ]1Â0 E÷œ"7hZ(¢Rä.¦ÊÐ4 +eàöØ.00<«/µ¥oW»Ãþ⬫s™ügb +>¦gñ¨{¼Ç¤êFÑÏ“êG—Uµ;º|{eÔÔ€añ“±ºÔ¼ÔËŒŸ|d籸tGe(LÃ߯nè糡NƪʮkŒ¡JºtÅÚ’nD[Ö-¤[ÖÒž5€@JIlÛ€m׬ôEÚ@ÚIÎo"ŽÌ»WÕþY +¦Y$àÅcÂß ó”yJê Ûál« +endstream +endobj +2238 0 obj +<< +/Filter /FlateDecode +/Length1 10604 +/Length 7344 +>> +stream +xœÍzyxTU²xÕ9÷ÞîNÒéξ§»sÓÙ:!!„%±%7+K !áÓIXÂf!( LÂ(¢…qpâ‚ð‡NG° >‰>ynê¸>G¢âˆÎd†qw„ô¯ÎM@ù}ó½ïÍ÷ýþøÝCUSUg«S§Î9À =À¡¶¦.7ô/ãB [×4wŒ•Ó3°¶uÃzû—sÞÛJŒ÷ Ï/ëX¾fj×\c€Ü¹|õÆecúITÏôhÛÒæ%/]©È, æ”6bDd†<IåÔ¶5ëoïï+BöÕí­Íãý yäšæ;”ÕÖä?Mȯo^³t\_ÔS;Ú;ו3…¾½cÝҎח¹¨­ÐzK¬|ªl‰|$>$8Kpntvà‚¼ +ÔÑ•aAµŸ'Ü û ÎãDx†`6<%P {`œ‚# +ñe@…r8N´ƒJˆAîƒwáZXŸÀ0d@|€áÔNt@4>#\·Ž“V”Áo`WcäR~&ËFõ¼+01x5ð•„O05Ð3)÷'ƒtè†_B8¬„—„¡àfü àRÔXWÁQx«(76ÊÂjªõÆàPàLàSxFBXJ-ýn¥û`ˆMàerØ! ®†¹ÐLÒŸÁ»¹H”î#îø‚¹Øo¹Æá‚YзÃCd·à,|Á8ÄC”^ÿÊïÐØª  6‘_=HÖ;‡á8Nĉ,†Åµb ®!Ù.ØOýÀi¬ÂÂgù~9o´8ˆ +|@ÔÓ÷Á³ÔÇW˜G:ÔOáë¥di½œq+Íp <§á5Çd÷¯á;Ì¢ô!û9ë, + |Bc1‚ ¦Áp'Ù6 Jiì5¤]Gmï UòŸÒ[4Ë0´Ó,¦á\œËqÞ~|ße +s°µìsîå/ó÷¥)²(¢–¢!™úUa´Ñ +üœ¬}'Í÷ ¼/b¦aÍè-ªÿ »Š•Sz„bðm|—tA¾etxôÏ£?zÁ@^6ƒìГþ†Ñ4†L\‰ø1|7{’‡r+Wùd^Âð~+ßÃÿ‹ÿ^Z'’Þ“gÉÍò!Cóèõ£¯ª7“-W:dCL%ÿYFÞ´ŠÆ×Ail†­Ð w¿Ü }pˆæ}^„7áðZ@yõ¾†¼nÞAé><ŒÏâ ø"~ˆßˆÄR(e°)¬˜•±J¶œm£´‡fo±s<‘·ònÞCi/?Æß•@’¤€œOi¦¼C> ¼lÈ0Ì4´_¹0r1ëbÃÅFa4~ôßFï}vôÓÀÂÀF¿r`t;ò>òÁý”'O<¿…Wàm}¬_ C™<>Uò†lZµbœ³(ÍÁy”®¡´SjÆl£Ô=ø ¼ oÆÛñ.=ÝKsÛÿŽÇ(=…ƒ”ÞÄ3ø'ü¿`äÄŒ“7;Y:Ëe…4Ó26ƒÕ°ù”–³vJlÛ@+t€ °ãì-Á<‡7óµü>þþÿÿ^bR¶”+¹¥…Òré&é”ôšôŽôƒl“+ä6y¯üœ’ (×(+•{•#Ê9å‚A1ÔZ › 0ŒNŠV¿£y…Ÿ~¹Ê)ì”#¥ÙÚ±¼CÞŽ×Ŷ€¯æwð×åexžÛñ=ìå+øªÀ#¼’}ÇÛq!;‰)Ü&ñe°xˆ}ȾbŸJQ¸€}†Ò/ñ)Ö΢":‘ߢ¤›äsìm(b[pˆ½Àoâ7þŠä½xFÞË^»4Ì"à íêíìªô{¶‚í€z©@þVÝÿ]¾‘ì=ÝŠYüÒ^ø„«ìK‘¢Äë° 9æ‘ï\úFázÚ{X:Å´ +Š&o`>ÄÂ=ï¿}ZDlùyùÙC<æC4²—¡ˆöÆ'”êáȇAòÁ[!Ý ›=¸„âþŠŸ ü¸r1˜¢e ­›Î‹h–B±°‰zýŽâÿKõ«ð¯pÚig A†$$;¥ +ŠLŠ¿;(-F*=w*Gå7 c$ûè^òò÷á::s>¦þãÁMã[ IÙ4j;EæµTãÑ™ Qº^F[hÌÓiŸ×J3)òÞXI3\AgT5‰/ŠÀ=PFk7?pS`4 +\ Ë¡.pâ¦Àv¹-”]RÅØñy:þwPÜž ïQí²l<‡c2¸Wr’¥f©äzó× ý“•jÅ +ÏLÚj4FoDY=O` c9–Àõ¦È¯½Ü²(Ô‡ˆ¶$§¢ûÿ¿ÁH¬sÐ^éµzfŽá† ‡ãYÉ8/jéäÇjãsò¹®,_uEùŠá…ôr°”ƪ,îí ºBVIÁª··RµWözz›ýžÕnU{óz^ßÛQá¹´üþÀàŽoåΚDåˆ;"£°È +?¬o-Òë§ŸAç˜à :}ÚA¦óÎJ¼j‰¥·$;âTTäk~¢® :õedæë_|bþðÙaºˆÚˆqÆ K>ð•–Žg¦LË dåäŸ) âÀßÿ€Ÿ¡CE¯51!ÿ|‰™È:ãmÐÇÿ^o 5-ßIþ +É_â/Òy*ª½è3‡åSƒ¿ãOÑ-ÝF÷У㒣¡aùPÒÉo§é >M0Lpž@‚v~º v!ÀBØFKP#8ü?DãÜOõ-„s Ú vH°€?NüUóƒ|%Œ6¾“¦QDwð_éôQ¢ñD&>=øCTtßxù~¢Bþëqþ}TŽ&zï8½‡ø DïÖ¼6~×xyïÒë­§}¼Ó—l³–$“ÜNGÀ)·‡r{Èt{¨„‘.v«õžú‰æ]3FÉ\[|U_£-1qù}dÒ-dú-d¹-d¹- ‘hó%Íc:9|3él&ͤ³™¬’Ç;©¿NZ0 l%°p²{'Ù]𽄇Nëü› ï&è%~Ù1“Fu_é˰‘“-(Ôò‹OÐ=©ÙeqIù»~,™‚„# §¡»T—.0…îÒø¤1JZ«JBy+üŒ€A$áT‚‚r‰·úRsmƒ|.¬1‚jëfݼ[ê–¥¼r ?Éó¡Öä’á<ܤikrãT©ÃÔcâV“Ý”gÒLµ&¹žD»8·ñ\^Ìkx—ý!Ÿ¡hm†R4iwp_°7x(øt°ìU†”Óʰr^‘íJž¢)µŠGéPz”ÝJŸbÚ­ì60OpGpO0·Ûƒó‚µàÚ`ÙfÀ¾’m¼ElZÂV‚‚ÝÙ¸‰øv~A­F™â:âa ’•à4凉ÊT²ž…ô,ĵ×B\ ,$µ‚Žq©rYr©ŽÐ?/$é$ %n(Ùv˜ðy‘#˜M%3•ÌT2“ÖivFh%l'¨%à:o˜€¼†ð%YÞ¸ÜC èòóºÎ%™&ê² ZsúP&z3±/wg¢æ..É×R…‡‡7©MΦŒ¦ýR»ÚîlÏhß/Õ¨5ΚŒšýR±Zì,Î(Þ/媹ÎÜŒÜý’Mµ9m¶ýÒ®ê#Õ'«OUKMÕíÕÝÕ|*-݀ϕ—¯Ó§ G}qñùS-%W³#4&ÂûÎp°¶ä´ÈìˆÎ}‚¸O÷ ¨!h"©Ö"Ķ˟.9!gWÈ9Mþ°¯hRMI5…Ý&‚}œÚ>LòúöXîˆÎ÷Öù5ãú}:_hÙ.ÕAp±îÓ6\ ÅM2œâ‹à µNØFÐAp„@â‹)-â‹Ø”³Ã<[3OŒ²At4,áaFk‰•…/˜ñ ŽïÕñm:.Öqª:ÛüÍló3³Í·Ì6§S†eÐdÆ=:vhÁ%æ'KÌ5%æÌ3µ0³(+ãŸuèpê4-úàÍ¢S£OÝJ­(O:ÏÚ¾uŸ‡¯ÑâÛkûøišßB*~„³|‡l¯æòÙNeûÑyÌö{õ„í…T?.ôÙ†²ýFœÌö3wŽí~u±í>'•}¶­ÙO‹aÀšñB7dO·U»Ù*~$±æ¦Î´ [‘ºÎVHìi~œ5pÈ61Õ/†’Gm:fË¢ÓT}(×Ld“Á€]Z¶a½¡Å°Ð0Ïp•a’!Ç`7$ ‘Æp£Õj 1FÅ(™Œ‘þÀ°æ÷¶HŪÿÄ' ,éy+˜é?ýC#£½ãàU¬ª®½áUPµ Ô;ÕUå7æ{§¹ª¼ÆÚ«ïG¼£J^v«aA=9¨`mK»ã€˜»íöA7o»½¡«¼C­PÕb÷~SGó¢Kª¬–ÆBô†âØâðéa…•åÿyƱëÇ/ÖõÓ/6É{wU]½÷ñ¤o¾È’ª¼3ijð8[ËÚ+ʳAêã&¶¶b¾àã¦ò†ËjÂ:H Ü‚µHj‚ºZµ®FnšRQÞŸ’2¦ôÎJä>ÏéJËÇÚJ¥.¨­ZAH%CªÞV*KjäcY~ÚX EoÌzc‰B©ßé$•l§PéŸê$…~çT]|èG±êN8õ~œØ ÷ƒø£NƘyÁ¸3’Žëÿå·´ô_PÆæ÷—´ŠÇ¹G­XJàñîØÐëíi±Ûû—¼?þjOó´´¶ Ú¼Ôû¾º´Ü»D-·÷7·þq«7«åýÐZ± ¾¿U[ZîkÖš+Ôæò†ÇºËª®èë¶Ë}•uÿ“ƺEce¢¯Çªþ‰¸Jˆ}U‰¾ªD_ié}UÍ/ŪÚú~#”6ÐëN§,8ˆöƒ'ÁÑPm혮oŽ«±?O”€Ž­`z‡¨¥^3å”ä”íN! +?¿Œ‹b~•#aŽ‹¬ÄSKÁ±+Ê/ÿëìì\/ «ËEx}W¬Î[O›ÖQWå­E·×]áÕ<å (–£kü+«×¬'ݧܬÝÝíÞåÞç>â–»ºˆ~2åT +kJiOéNÙ•²/åHŠ"×ÖÓÜûRþ–»țp=}åzŸ]DéŸ(®ïêPcݹº\eõ%)ÐJ·^¤zD¨“êdøOÂo|Lð%7þÁ#‚Ãsx=ÝËE .tbyþ@Þäüi~¢ÍËÆhÝâ1Z1wŒºKòc‰úŠ'•XèŽ0Hø%‚÷>'øÌóy¾Þxט×6tB§ iø@…õuºÖ£‹2(̽¾ÓåÂÁiHÕ…Wú=`g)hAˆ’ÎíÕº½¬ÇÄÃYÖïÌôŠžÓÏð{†.«vÒ²ägÏ<É!È 2G⌊|’ä 8f‚ Wáuë²~ã¾èžkýÊ=碊)o½@hbž#Ìæ$D¡.ØùÐM†À. Q—öçøSr½ÔSaÐ×l´Ó©é“å(AÌæx?Z´pS<¤iiLKó¤õ¥ §Iia‚ÚDünØ}4î8ç &ÓQë¢4Ιkm\ûÍœ()™˜W¶Q«ÆT55%•) 92ÅàLLHJHNàJDšÅœÇ‡Ö6%¾#C)B¹T´·`‚‘P¸5ªâ‚é1E ,²²¶F„O2)?&:,’)jJzÚTkLô¤ü)S§„¤§¥§©)…ÍÞ¹~±çÍ÷ßúFËs[×<_Q¸vÊúä y©…™Eå“g°½ç°f~ɾFüeôØ]Ÿ<ûíè¹þ»š×ÆÂs÷wæ9®®}@·™–é8Ù,ˆnqokÑf +îfÍÂ5 f…`”&ˆÜ$+(…›A +1KJˆÙìÇD-Ü`Œ4ŒF.”#ØÌh>ÐJã>Í,£b2*ŠQ–BB¤tþp0â2-Ød²pÜÇpÆýø­KW]¤»=–>˰…[Í€†¸ÐA¼ãÒ¬u‹htÏùª‘²² +Ÿ(.̵º)X/®s‡†bXxáö .i‹õy‘µX,ó`]#6®]‡Qj˜昌“ˆ ?~lÿÅçX×õûGSñ«;FËzø/.ìd]lÒÿ°çÉ& +Ù$îÑ"µXOl_ìp¬±Z,Û· -‰Àtã5a¤ßм‘ò*UþŽf²¢‰ø…Fw9 31”MÆÆa¿%õYZxh¨E ›œgé¶ì¦YK–¸˜A–ŠgÇ]Îåžc9kž_즹`X!|=r¿v¹hN¸¶1Â9),2::&Ê1y:›,ÜBxÅyœíˆp_;Ê<Ó¢ƒ Îxg©ô»‡~ؾnZ2s:YÒÄMìý=Yöd›˜cuà,ß/WC0‰pD¡ŠÚ†ôß³·GÝx×莵y &%ÉÕiÿxFz!a‚'xlë°‡ÕÓ£ˆC±fgrOÒ’)Ý2¢þƒ fÅZôànìÃÓ¨  ŽB´`±ËÅFò-È!L}7Š^uL¾ø‹¹g¼mŠ5 ÉGò±ú8†}!…&ñk†;¤°ÄTT\•"2afæ´L­ÀSpª`¸àÛ `‰©[Ý4áñÔ㩃^œpF=ãüï Ÿ§|æ ™eÌôãÎŒ +øÙÙÓy˜ççG¹lÆh?î;š¤¹r ’èY1`5gfœÀ6ˆûX ®µ÷ØÙn{ŸÙý|Ò€7Cü¸›ø9=9lwN_Ë!þÑ&C·üì-H+À¾‚¡V@k4ý)-âd‹ˆ›$ÂÞ¹KaמּçF×~%ÐYŠÅ]#ëŠGG sÇ"á” ¹ÉiAIIq¨ŽT‡Ó!)²34--ˆB\®”Ó‚ÉÊ9‚Ó[0È4AÉkA›9IÄ<«{ü|ÉÚJŸîÓë`--ï=òEE¢R)i“ tV´'ë0MU…ß§o+ê¿ù‘E¥ƒ[z:îýóm­¹Ž¸ø°cœYËîQãm®»çÚköÍÜ깿Mš}Û]+kïÙ;ñØÏ¼[–§'eåb%xïꚪiI%ÉA×Ý\³¼û±±õm¡õÝHûÃAþ\ö¨„á É+’»ån¥;i§t{’a2›ì¸†_c_äX•¸AÞ˜¸õÆ÷&>šúÔaÕBo;‹5,<"*:Æifœ‹€fwDÚ¹dwÄ'$rC¬$w߀ÝîˆÄïéôÐ(²âGÀ>r8hû âtzXÏ8Úcèë†_Óº©¨©•©äß³²>:D#šÉ®Yû¬Ì—2ˆwágú"žm¤0bm1R_ʳ´±(O§˜¾€UÂÂc +·'¸d +š +b£5B#]Ž´àUR{ø’ä¹#InlÀF48 ’XE1Щ4¶,—…Ö$ùƹ£m hºÛ¢›çunÜÔ>AOÏ­šÓÕ¿wÇš§Q’«?–¾÷Vÿªc=éSëò]VGA÷ÏÞ,Ê10‹8‹êÉæý´§b!F´¬.Ó† Baz×ù™SQ8nᛤMÑÛb$·1C‘¹—§p{“´'ŽÙÓ0-ÍBÛøöXÅÑ?`1Óí5±Z8…®,-‹iYž¬¾¬á,)+n̾$‚k„="/B‹ØÑaˆˆËüñp¡qÎųã7} Pp"ë5ެ#sá˜Íh#d':MáI‰É‰L sšÓœ&•\ݚЎPÊ¥¥µ`b¸½RBÁ¥#_x¿îûÊ ÂžÂçÅ‘Vž:e*Q‘—M¬¦(üî›<²*u÷/w¼²|ó+;šŸ¹-ß­ºøJøŒÊI³Ývë–´Er›Ó\óðïnkö>¾óñk0éέ¿X¾½Îóaiî£÷ú‡]x¸Þ¢à÷:+gëx‹”(½®ßÆþ +)~§MO À¿öѵ9(Ì`”þÅšÿÿ~Lѱ$ìs~j @8G`ñèsÖóÂj +YÑF.þÎ X°bÍÒιKo˜ß¾¦ùúÚº9 ô,p÷xýÿÍg¼²xή`ŒÿK)ÄDÀl;Áy‚jö8Ô­ƒ¨[ßÿáí°×þÒï=2ØdqmŒëøá“žôèÕðÃú‹;­EÆP%ï÷ÿ=X +endstream +endobj +2239 0 obj +<< +/Filter /FlateDecode +/Length1 5080 +/Length 3303 +>> +stream +xœÍW}pÇu{¾ûÀá@‚îp)!H€`HЦ(J´TY–,pdÙ$EËr¤„uÕöäÃi’Zƒ6¶•hœŽëIâL:I󇻢S[IGi:´V“ŽËq;©;ã~ØhR'ÓvEöí¤d7MÿëanoßÛ·ï÷~ûvBððpø¶£E œGþw,Ž/Ÿ[\íÈÒòüòù‡ô€ü$*~ŠïS÷­ž>i¿mg¸•Óg¹¯cï~ péþ•ÅS¼ÿ‹O(_Eeù~Tx–ùÿ +Êý÷Ÿ{èᎽr‹Ñ³Y^ìÎÄ¢ïÜâëdM¼Šöÿ…²þáÅs+Ýõ½ŠEbõ#}¨Ûÿ1Ö¾úàÊjc±÷€+Žkº›µ+ÜÓHáoe˜„¸ÏÑ‚cø;Òiø0|ª Ap;FA‡Ýp Á‡à ,Á)@…¸"ÌA?œ ¼†»`<¿ á·`>Ÿ„£ðˆìÇá7á³?…<å>\£·ž¯S0wÇ©«MÕÝ'êú+”(#ñ%yýïh0W \þÀ‘Ú^³n(Ÿ?×éÌášAgêêʳ®†iag¦©õ¸a$ôõ€p5Ǻk3³HÞÔ×»“›zíÀÑ„AI½Ö@‡̆©7æ"ëÐéÂ>*²0ȸn‰9À*òûh°¹øÀ½7zº*yt¢ñ8ƒíÖSfÃCõõÉÄKØͯÁ ™™%^aœ’«±òHÍ\ÂÕ›³ üs‘Ÿ9Rk"÷,Ï6‰NðCõeÚ³Ò·5—š§¨E\°(8¤ÇóÙÍq8%¼úîÅkªðÓ¼ç 9šùg˜ö#Ã9”9ˆ±®ÞßÅV÷ dós”|PïášMÈu{žE”ŠHÖ謄ZÅb´\²b%«RÒÌ—SJZ˜ ‹Í—žœYõ>Õ§U¾zîÓ…ƒ·œ?#$`Ñjöe‘úXÈYÍl“²JCÛ·`eN½;Mé0%B´aô±¦ÝH¯"ñü +:%‰³g3¤A„ÑÒ®ï¶ç¬Á¿âeÄïs/TèÕ'6þåi}‚ôM¥íCŠæ>ƒ\êÙõw2#H8ÞíqÉ÷ŽŸQ¿]I»°Íœ&ö˜ÁjéÊËÏíOGBrÅÓÆxÀ€Yh‹hã(mÁ‡¬Ê)Û c‰û)ˆAóÅÛ¶‰ß`¡Š““lËðÌG·“RJNzQFGx3æ;¡šæÈ©Ç/¿ðäSüâ¾¾ÜD%¹ëP!ˆæïœœª¢áï=ó¥+ßöKß%Ç÷¯ÎåÅáÝÇo*--MN.-mÇ+‹ñê‡ç¡`Y9è„…ú¬fe'|m§ÒR:áJDY,š/×J»QÜ%iW»éN3­›ørÍ´›UÓ¦/Gh¶H#ë¶€1MYM!Â?ÚDV0 ŠEÏÝ51!u ¢­!½c"FJ°x»ƒHØPÕVØ.”Ù.4GH޼?Æš£ÍIR²Ê•y]ó혺hLl¼YMéNá{Båc¢Á‹“C>í)óÇSýü 1|rÏ@H}ÍœìÜ-R›¿ ÿ€1OCs€qY/Ùy b´hûpw†"s$*1g‹E*¬c¾´5¹Ýáƒè‰&Ú€AM†ÛöNüj¸z³JAú#ÞíÕóìÀIÊkAQb›ì|× *5…ävùddîbÐÇnØ«#¤sð é;'Šs°”bŽ“GgOÞ={z~xÉß+¨±žøòäÈÄÜm˵óÇ­ÓÁ€Hb)-»´§2÷xqa´r¼zðžLæojwŒíØŸ;=ýkg2Jø•ßÝåG ù164}ŒÅ˜œJ+Ð!„ÂXÒ‹JVÙJ¦½ +Ã¥7†,I ùdI +Y2”bÚ¡,2 5Ī© n¸HÃë6 K’V3ÌÎÿ™0²„ÊcB©‘±Zýjô‹v ©¡v¨‘cÔàÈI¶Ç[­Úr/ÊbV‰Æ>@ <ÿ;CTGüKcâC'²}…kÿTÑ·yòx4ôÌLÖ§iþég'Ã¥Ûÿ@qcä;ש28þætfk݆8iøo¨©0x¢}$!ñB ´šÄa>q£{,¥Ç‹Ô½Þr‰pšºÚÛa> +stream 0 g 0 G 0 g 0 G - [-290(on)-289(a)-290(distributed)-290(communicator)-289(only)]TJ -14.944 -11.955 Td [(the)-316(local)-317(part)-316(is)-316(written)-317(in)-316(output.)-509(T)92(o)-316(get)-317(a)-316(single)-316(MatrixMarket)-317(\002le)-316(with)-316(the)]TJ 0 -11.955 Td [(whole)-243(vect)1(or)-243(when)-243(appr)18(opriate,)-244(e.g.)-307(for)-243(debugging)-242(purposes,)-244(one)-243(could)]TJ/F52 9.9626 Tf 318.257 0 Td [(gather)]TJ/F54 9.9626 Tf -318.257 -11.955 Td [(the)-349(whole)-349(vector)-349(on)-349(a)-349(single)-349(rank)-349(and)-349(then)-349(writ)1(e)-349(it.)-607(Consider)-349(the)-349(following)]TJ 0 -11.956 Td [(example)-250(for)-250(a)]TJ/F52 9.9626 Tf 62.495 0 Td [(double)]TJ/F54 9.9626 Tf 28.692 0 Td [(pr)18(ecision)-250(vector)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET q -1 0 0 1 150.705 149.348 cm -0 0 343.711 82.69 re f +1 0 0 1 99.895 651.334 cm +0 0 343.711 60.772 re f Q 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G 0.56 0.13 0.00 rg 0.56 0.13 0.00 RG BT -/F94 8.9664 Tf 153.694 221.378 Td [(real)]TJ +/F233 8.9664 Tf 121.713 701.446 Td [(integer)]TJ 0 g 0 G - [(\050psb_dpk_\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ + [(\050psb_long_int_k_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG [-525(::)]TJ 0 g 0 G - [-525(vglobal\050:\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -21.918 Td [(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_gather\050vglobal,v,desc,info\051)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(if)]TJ + [-525(dim)]TJ 0 g 0 G - [-525(\050iam)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(==)]TJ + [(=)]TJ 0 g 0 G - [-525(psb_root_\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(0)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(mm_array_write\050vglobal,vtitle,info,filename\051)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.958 Td [(end)-525(if)]TJ + -9.414 -21.917 Td [(contains)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(call)-525(deallocate)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(\050vglobal,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(stat)]TJ + 4.707 -10.959 Td [(....)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -4.707 -10.959 Td [(end)-525(type)]TJ 0 g 0 G - [(info\051)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf -2.989 -23.777 Td [(T)92(o)-250(simplify)-250(this)-250(pr)18(ocedur)18(e)-250(in)]TJ/F59 9.9626 Tf 129.513 0 Td [(C)]TJ/F54 9.9626 Tf 5.23 0 Td [(,)-250(ther)18(e)-250(is)-250(a)-250(utility)-250(function)]TJ 0 g 0 G - 29.64 -41.41 Td [(148)]TJ +/F84 9.9626 Tf 151.98 -567.173 Td [(177)]TJ 0 g 0 G ET endstream endobj -1852 0 obj +2246 0 obj << -/Length 604 +/Length 7873 >> stream 0 g 0 G 0 g 0 G +BT +/F75 11.9552 Tf 150.705 706.129 Td [(12.4)-1000(CUDA-class)-250(extensions)]TJ/F84 9.9626 Tf 1.012 0 0 1 150.705 687.165 Tm [(For)-248(computing)-248(with)-247(CUDA)-248(we)-248(de\002ne)-248(a)-248(dual)-248(memorization)-247(strategy)-248(in)-248(which)]TJ 1.02 0 0 1 150.705 675.21 Tm [(each)-322(variable)-323(on)-322(the)-323(CPU)-322(\050\223host\224\051)-323(side)-322(has)-323(a)-322(GPU)-323(\050\223device\224\051)-322(side.)-536(When)-323(a)]TJ 1.02 0 0 1 150.705 663.255 Tm [(GPU-type)-295(variable)-295(is)-295(initialized,)-307(the)-295(data)-295(contained)-295(is)-295(\050usually\051)-295(the)-295(same)-295(on)]TJ 0.998 0 0 1 150.705 651.3 Tm [(both)-251(sides.)-314(Eac)1(h)-252(operator)-251(invoked)-252(o)1(n)-252(the)-251(variable)-252(may)-251(change)-251(the)-252(data)-251(so)-251(that)]TJ 1 0 0 1 150.705 639.344 Tm [(only)-250(the)-250(host)-250(side)-250(or)-250(the)-250(device)-250(side)-250(ar)18(e)-250(up-to-date.)]TJ 0.981 0 0 1 165.649 627.389 Tm [(Keeping)-255(track)-256(of)-256(the)-255(updates)-256(to)-255(data)-256(in)-255(the)-256(variables)-255(is)-256(essential:)-317(we)-256(want)-255(to)]TJ 0.99 0 0 1 150.406 615.434 Tm [(perform)-252(most)-252(computations)-251(on)-252(the)-252(GPU,)-252(but)-252(we)-252(cannot)-252(af)19(f)-1(or)19(d)-252(the)-252(time)-252(needed)]TJ 1.02 0 0 1 150.705 603.479 Tm [(to)-270(move)-270(data)-271(between)-270(the)-270(host)-270(memory)-271(and)-270(the)-270(device)-270(memory)-270(because)-271(the)]TJ 0.981 0 0 1 150.705 591.524 Tm [(bandwidth)-254(of)-254(the)-254(inter)18(connection)-254(bus)-254(would)-255(become)-254(the)-254(main)-254(bottleneck)-254(of)-254(the)]TJ 0.984 0 0 1 150.705 579.569 Tm [(computation.)-315(Thus,)-254(each)-254(and)-255(every)-254(computational)-254(r)18(out)1(ine)-255(in)-254(the)-254(library)-254(is)-254(built)]TJ 1 0 0 1 150.705 567.613 Tm [(accor)18(ding)-250(to)-250(the)-250(following)-250(principles:)]TJ +0 g 0 G + 13.888 -18.472 Td [(\225)]TJ +0 g 0 G + 1.002 0 0 1 175.611 549.141 Tm [(If)-248(the)-249(data)-248(type)-248(being)-249(handled)-248(is)-248(GPU-enabled,)-249(make)-248(sur)18(e)-249(that)-248(its)-248(device)]TJ 1.02 0 0 1 175.611 537.186 Tm [(copy)-300(is)-300(up)-300(to)-300(date,)-313(perform)-300(any)-300(arithmetic)-300(operation)-300(on)-300(the)-300(GPU,)-300(and)]TJ 1.02 0 0 1 175.611 525.231 Tm [(if)-266(the)-266(data)-266(has)-267(been)-266(alter)18(ed)-266(as)-266(a)-266(r)17(esult,)-271(mark)-266(the)-266(main-memory)-267(copy)-266(as)]TJ 1 0 0 1 175.611 513.276 Tm [(outdated.)]TJ +0 g 0 G + -11.018 -19.199 Td [(\225)]TJ +0 g 0 G + 1.007 0 0 1 175.303 494.077 Tm [(The)-249(main-memory)-248(copy)-249(is)-249(never)-248(updated)-249(unless)-249(this)-248(is)-249(r)18(equested)-249(by)-248(the)]TJ 1 0 0 1 175.611 482.122 Tm [(user)-250(either)]TJ +0 g 0 G +/F75 9.9626 Tf 0 -19.198 Td [(explicitly)]TJ +0 g 0 G +/F84 9.9626 Tf 47.582 0 Td [(by)-250(invoking)-250(a)-250(synchr)18(onization)-250(method;)]TJ +0 g 0 G +/F75 9.9626 Tf -47.582 -15.214 Td [(implicitly)]TJ +0 g 0 G +/F84 9.9626 Tf 1.011 0 0 1 225.404 447.71 Tm [(by)-247(invoking)-247(a)-246(method)-247(that)-247(involves)-247(other)-247(data)-247(items)-246(that)-247(ar)18(e)]TJ 1 0 0 1 197.529 435.755 Tm [(not)-250(GPU-enabled,)-250(e.g.,)-250(by)-250(assignment)-250(ov)-250(a)-250(vector)-250(to)-250(a)-250(normal)-250(array)111(.)]TJ 1.007 0 0 1 150.705 416.556 Tm [(In)-247(this)-247(way)110(,)-248(data)-247(items)-247(ar)18(e)-247(put)-248(on)-247(the)-247(GPU)-247(memory)-247(\223on)-248(demand\224)-247(and)-247(r)18(emain)]TJ 1.02 0 0 1 150.705 404.601 Tm [(ther)18(e)-289(as)-289(long)-289(as)-288(\223normal\224)-289(computations)-289(ar)18(e)-289(carried)-289(out.)-435(A)1(s)-289(an)-289(example,)-300(the)]TJ 1 0 0 1 150.705 392.646 Tm [(following)-250(call)-250(to)-250(a)-250(matrix-vector)-250(pr)18(oduct)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET q -1 0 0 1 99.895 695.17 cm +1 0 0 1 150.705 364.481 cm 0 0 343.711 16.936 re f Q 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -BT -/F94 8.9664 Tf 102.884 701.446 Td [(psb_i_t)-525(psb_c_)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(<)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(s,d,c,z)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(>)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F233 8.9664 Tf 172.523 370.757 Td [(call)]TJ 0 g 0 G - [(global_vec_write\050vh,cdh\051;)]TJ + [-525(psb_spmm\050alpha,a,x,beta,y,desc_a,info\051)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf -2.989 -24.209 Td [(that)-250(pr)18(oduces)-250(exactly)-250(this)-250(r)18(esult.)]TJ +/F84 9.9626 Tf 1.02 0 0 1 150.286 347.275 Tm [(will)-258(transpar)18(ently)-258(and)-258(automatically)-257(be)-258(performed)-258(on)-258(the)-257(GPU)-258(whenever)-258(all)]TJ 1.014 0 0 1 150.705 335.32 Tm [(thr)18(ee)-246(data)-245(inputs)]TJ/F145 9.9626 Tf 1 0 0 1 228.932 335.32 Tm [(a)]TJ/F84 9.9626 Tf 1.014 0 0 1 234.162 335.32 Tm [(,)]TJ/F145 9.9626 Tf 1 0 0 1 239.17 335.32 Tm [(x)]TJ/F84 9.9626 Tf 1.014 0 0 1 246.88 335.32 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 266.463 335.32 Tm [(y)]TJ/F84 9.9626 Tf 1.014 0 0 1 274.173 335.32 Tm [(ar)18(e)-246(GPU-enabled.)-305(If)-246(a)-245(pr)18(ogram)-246(makes)-245(many)-246(such)]TJ 1 0 0 1 150.705 323.364 Tm [(calls)-250(sequentially)111(,)-250(then)]TJ 0 g 0 G - 164.384 -586.799 Td [(149)]TJ + 13.888 -18.472 Td [(\225)]TJ 0 g 0 G -ET - -endstream -endobj -1856 0 obj -<< -/Length 1109 ->> -stream + 1.02 0 0 1 175.303 304.892 Tm [(The)-273(\002rst)-273(kernel)-273(invocation)-273(will)-273(\002nd)-273(the)-273(data)-273(in)-273(main)-273(memory)109(,)-280(and)-273(will)]TJ 1.02 0 0 1 175.611 292.937 Tm [(copy)-303(it)-302(to)-302(the)-303(GPU)-302(memory)108(,)-316(thus)-303(incurring)-302(a)-303(signi\002cant)-302(over)17(head;)-331(the)]TJ 1 0 0 1 175.611 280.982 Tm [(r)18(esult)-250(is)-250(however)]TJ/F78 9.9626 Tf 78.156 0 Td [(not)]TJ/F84 9.9626 Tf 15.771 0 Td [(copied)-250(back,)-250(and)-250(ther)18(efor)18(e:)]TJ 0 g 0 G + -104.945 -19.198 Td [(\225)]TJ 0 g 0 G -BT -/F51 14.3462 Tf 150.705 705.784 Td [(10)-1000(Preconditioner)-250(routines)]TJ/F54 9.9626 Tf 0 -22.702 Td [(The)-228(base)-227(PSBLAS)-228(library)-227(contains)-228(the)-227(implementation)-228(of)-227(two)-228(simple)-227(pr)18(econdi-)]TJ 0 -11.955 Td [(tioning)-250(techniques:)]TJ + 1.02 0 0 1 175.611 261.784 Tm [(Subsequent)-308(kernel)-308(invocations)-308(involving)-307(the)-308(same)-308(vector)-308(will)-308(\002nd)-308(the)]TJ 1 0 0 1 175.611 249.828 Tm [(data)-250(on)-250(the)-250(GPU)-250(side)-250(so)-250(that)-250(they)-250(will)-250(r)8(un)-250(at)-250(full)-250(speed.)]TJ 1.016 0 0 1 150.705 231.356 Tm [(For)-246(all)-245(invocations)-246(after)-246(the)-245(\002rst)-246(the)-246(only)-245(data)-246(that)-246(will)-245(have)-246(to)-246(be)-246(transferr)18(ed)]TJ 1.02 0 0 1 150.705 219.401 Tm [(to/fr)18(om)-254(the)-254(main)-253(memory)-254(will)-254(be)-253(the)-254(scalars)]TJ/F145 9.9626 Tf 1 0 0 1 355.319 219.401 Tm [(alpha)]TJ/F84 9.9626 Tf 1.02 0 0 1 384.049 219.401 Tm [(and)]TJ/F145 9.9626 Tf 1 0 0 1 403.83 219.401 Tm [(beta)]TJ/F84 9.9626 Tf 1.02 0 0 1 424.752 219.401 Tm [(,)-256(and)-253(the)-254(r)18(eturn)]TJ 1 0 0 1 150.705 207.446 Tm [(code)]TJ/F145 9.9626 Tf 23.213 0 Td [(info)]TJ/F84 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - 13.888 -19.925 Td [(\225)]TJ +/F75 9.9626 Tf -44.134 -20.101 Td [(V)111(ectors:)]TJ 0 g 0 G - [-500(Diagonal)-250(Scaling)]TJ +/F84 9.9626 Tf 1.003 0 0 1 191.073 187.345 Tm [(The)-248(data)-248(type)]TJ/F145 9.9626 Tf 1 0 0 1 254.2 187.345 Tm [(psb_T_vect_gpu)]TJ/F84 9.9626 Tf 1.003 0 0 1 329.904 187.345 Tm [(pr)18(ovides)-248(a)-248(GPU-enabled)-248(extension)-249(of)]TJ 1.02 0 0 1 175.611 175.39 Tm [(the)-261(inner)-261(type)]TJ/F145 9.9626 Tf 1 0 0 1 241.259 175.39 Tm [(psb_T_base_vect_type)]TJ/F84 9.9626 Tf 1.02 0 0 1 345.866 175.39 Tm [(,)-265(and)-260(must)-261(be)-261(used)-261(together)-260(with)]TJ 1.02 0 0 1 175.611 163.435 Tm [(the)-274(other)-275(inner)-274(matrix)-274(type)-275(to)-274(make)-274(full)-275(use)-274(of)-274(the)-275(G)1(PU)-275(computational)]TJ 1 0 0 1 175.611 151.479 Tm [(capabilities;)]TJ 0 g 0 G - 0 -19.926 Td [(\225)]TJ +/F75 9.9626 Tf -24.906 -19.198 Td [(CSR:)]TJ 0 g 0 G - [-500(Block)-250(Jacobi)-250(with)-250(ILU\0500\051)-250(factorization)]TJ -13.888 -19.925 Td [(The)-356(supporting)-356(data)-356(type)-356(and)-356(subr)18(outine)-356(interfaces)-356(ar)18(e)-356(de\002ned)-356(in)-356(the)-356(mod-)]TJ 0 -11.955 Td [(ule)]TJ/F59 9.9626 Tf 16.301 0 Td [(psb_prec_mod)]TJ/F54 9.9626 Tf 62.764 0 Td [(.)-350(The)-263(old)-263(interfaces)]TJ/F59 9.9626 Tf 87.314 0 Td [(psb_precinit)]TJ/F54 9.9626 Tf 65.386 0 Td [(and)]TJ/F59 9.9626 Tf 19.489 0 Td [(psb_precbld)]TJ/F54 9.9626 Tf 60.156 0 Td [(ar)18(e)-263(still)]TJ -311.41 -11.955 Td [(supported)-250(for)-250(backwar)18(d)-250(compatibility)]TJ +/F84 9.9626 Tf 0.982 0 0 1 178.341 132.281 Tm [(The)-255(data)-256(type)]TJ/F145 9.9626 Tf 1 0 0 1 240.361 132.281 Tm [(psb_T_csrg_sparse_mat)]TJ/F84 9.9626 Tf 0.982 0 0 1 352.697 132.281 Tm [(pr)18(ovides)-255(an)-256(interface)-255(to)-255(the)-256(GPU)]TJ 1 0 0 1 175.333 120.326 Tm [(version)-250(of)-250(CSR)-250(available)-250(in)-250(the)-250(NVIDIA)-250(CuSP)92(ARSE)-250(library;)]TJ 0 g 0 G - 164.383 -497.003 Td [(150)]TJ + 139.755 -29.888 Td [(178)]TJ 0 g 0 G ET endstream endobj -1862 0 obj +2250 0 obj << -/Length 5016 +/Length 5844 >> stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(10.1)-1000(init)-250(\227)-250(Initialize)-250(a)-250(preconditioner)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf 0 -18.964 Td [(call)-525(prec%init\050icontxt,ptype,)-525(info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G +BT +/F75 9.9626 Tf 99.895 706.129 Td [(HYB:)]TJ 0 g 0 G - 0 -19.925 Td [(icontxt)]TJ -0 g 0 G -/F54 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +/F84 9.9626 Tf 0.98 0 0 1 128.647 706.129 Tm [(The)-251(data)-252(type)]TJ/F145 9.9626 Tf 1 0 0 1 190.423 706.129 Tm [(psb_T_hybg_sparse_mat)]TJ/F84 9.9626 Tf 0.98 0 0 1 302.715 706.129 Tm [(pr)18(ovides)-251(an)-251(interface)-252(to)-251(the)-252(HYB)]TJ 1.02 0 0 1 124.802 694.174 Tm [(GPU)-295(storage)-296(available)-295(in)-295(the)-296(NVIDIA)-295(CuSP)90(ARSE)-295(library)109(.)-455(The)-296(internal)]TJ 1.001 0 0 1 124.802 682.219 Tm [(str)8(uctur)18(e)-249(is)-249(opaque,)-249(hence)-249(the)-250(h)1(ost)-250(side)-249(is)-249(just)-249(CSR;)-249(the)-249(HYB)-249(data)-249(format)]TJ 1 0 0 1 124.802 670.263 Tm [(is)-250(only)-250(available)-250(up)-250(to)-250(CUDA)-250(version)-250(10.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(ptype)]TJ +/F75 9.9626 Tf -24.907 -20.26 Td [(ELL:)]TJ 0 g 0 G -/F54 9.9626 Tf 30.994 0 Td [(the)-250(type)-250(of)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 151.121 0 Td [(global)]TJ/F54 9.9626 Tf -157.208 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(string,)-250(see)-250(usage)-250(notes.)]TJ +/F84 9.9626 Tf 1.02 0 0 1 125.32 650.003 Tm [(The)-265(data)-265(type)]TJ/F145 9.9626 Tf 1 0 0 1 190.038 650.003 Tm [(psb_T_elg_sparse_mat)]TJ/F84 9.9626 Tf 1.02 0 0 1 297.34 650.003 Tm [(pr)18(ovides)-266(an)-265(interface)-265(to)-265(the)-265(ELL-)]TJ 1 0 0 1 124.802 638.048 Tm [(P)92(ACK)-250(implementation)-250(fr)18(om)-250(SPGPU;)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(On)-250(Exit)]TJ +/F75 9.9626 Tf -24.907 -20.261 Td [(HLL:)]TJ 0 g 0 G +/F84 9.9626 Tf 0.98 0 0 1 127.532 617.787 Tm [(The)-194(data)-194(type)]TJ/F145 9.9626 Tf 1 0 0 1 187.621 617.787 Tm [(psb_T_hlg_sparse_mat)]TJ/F84 9.9626 Tf 0.98 0 0 1 294.121 617.787 Tm [(pr)18(ovides)-193(an)-194(interface)-194(to)-194(the)-194(Hacked)]TJ 1 0 0 1 124.802 605.832 Tm [(ELLP)92(ACK)-250(implementation)-250(fr)18(om)-250(SPGPU;)]TJ 0 g 0 G - 0 -19.925 Td [(prec)]TJ +/F75 9.9626 Tf -24.907 -20.261 Td [(HDIA:)]TJ 0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -30.874 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ +/F84 9.9626 Tf 1.02 0 0 1 135.282 585.571 Tm [(The)-298(data)-297(type)]TJ/F145 9.9626 Tf 1 0 0 1 200.986 585.571 Tm [(psb_T_hdiag_sparse_mat)]TJ/F84 9.9626 Tf 1.02 0 0 1 319.077 585.571 Tm [(pr)18(ovides)-298(an)-298(in)1(terface)-298(to)-298(the)]TJ 1 0 0 1 124.802 573.616 Tm [(Hacked)-250(DIAgonals)-250(implementation)-250(fr)18(om)-250(SPGPU;)]TJ/F75 14.3462 Tf -24.907 -34.763 Td [(13)-1000(CUDA)-250(Environment)-250(Routines)]TJ/F75 11.9552 Tf 0 -24.857 Td [(psb)]TJ ET q -1 0 0 1 337.631 446.268 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.53 514.195 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 340.77 446.069 Td [(prec)]TJ +/F75 11.9552 Tf 124.116 513.996 Td [(cuda)]TJ ET q -1 0 0 1 362.319 446.268 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 150.729 514.195 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 365.457 446.069 Td [(type)]TJ +/F75 11.9552 Tf 154.315 513.996 Td [(init)-250(\227)-250(Initializes)-250(PSBLAS-CUDA)-250(environment)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -54.42 -19.126 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [-525(psb_cuda_init\050ctxt)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(info)]TJ + [-525([,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -30.326 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Err)18(or)-250(code:)-310(if)-250(no)-250(err)18(or)74(,)-250(0)-250(is)-250(r)18(eturned.)]TJ/F51 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ/F54 9.9626 Tf 34.311 0 Td [(Legal)-245(inputs)-244(to)-245(this)-245(subr)18(outine)-245(ar)18(e)-244(interpr)18(eted)-245(depending)-245(on)-244(the)]TJ/F52 9.9626 Tf 285.595 0 Td [(p)-25(t)-25(y)-80(p)-25(e)]TJ/F54 9.9626 Tf -319.906 -11.956 Td [(string)-250(as)-250(follows)]TJ -0 0 1 rg 0 0 1 RG -/F54 7.5716 Tf 72.358 3.617 Td [(4)]TJ + [-525(device]\051)]TJ/F84 9.9626 Tf 14.944 -22.253 Td [(This)-250(subr)18(outine)-250(initializes)-250(the)-250(PSBLAS-CUDA)-250(envir)18(onment.)]TJ +0 g 0 G +/F75 9.9626 Tf -14.944 -20.177 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 4.284 -3.617 Td [(:)]TJ +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -76.642 -19.925 Td [(NONE)]TJ +/F75 9.9626 Tf -29.828 -20.261 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 35.965 0 Td [(No)-250(pr)18(econditioning,)-250(i.e.)-310(the)-250(pr)18(econditioner)-250(is)-250(just)-250(a)-250(copy)-250(operator)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -35.965 -19.925 Td [(DIAG)]TJ + 0 -20.26 Td [(device)]TJ 0 g 0 G -/F54 9.9626 Tf 33.205 0 Td [(Diagonal)-371(scaling;)-432(each)-371(entry)-372(of)-371(the)-371(input)-371(vector)-372(is)-371(multiplied)-371(by)-371(the)]TJ -8.298 -11.955 Td [(r)18(ecipr)18(ocal)-266(of)-267(the)-266(sum)-267(of)-266(the)-266(absolute)-267(values)-266(of)-267(the)-266(coef)18(\002cients)-266(in)-267(the)-266(cor)18(-)]TJ 0 -11.955 Td [(r)18(esponding)-250(r)18(ow)-250(of)-250(matrix)]TJ/F52 9.9626 Tf 116.148 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(;)]TJ +/F84 9.9626 Tf 34.311 0 Td [(ID)-250(of)-250(CUDA)-250(device)-250(to)-250(attach)-250(to.)]TJ -9.404 -11.956 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(local)]TJ/F84 9.9626 Tf 21.579 0 Td [(.)]TJ -53.32 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(optional)]TJ/F84 9.9626 Tf 38.187 0 Td [(.)]TJ -64.966 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ 1.003 0 0 1 124.802 364.098 Tm [(Speci\002ed)-249(as:)-308(an)-249(integer)-248(value.)-558(Default:)-308(use)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf 1 0 0 1 317.836 364.098 Tm [(mod)]TJ 0 g 0 G -/F51 9.9626 Tf -148.373 -19.926 Td [(BJAC)]TJ + [(\050iam,ngpu\051)]TJ/F84 9.9626 Tf 1.003 0 0 1 388.314 364.098 Tm [(wher)18(e)]TJ/F145 9.9626 Tf 1 0 0 1 418.287 364.098 Tm [(iam)]TJ/F84 9.9626 Tf 1.003 0 0 1 436.462 364.098 Tm [(is)]TJ 1.011 0 0 1 124.802 352.143 Tm [(the)-247(calling)-248(pr)18(ocess)-247(index)-248(and)]TJ/F145 9.9626 Tf 1 0 0 1 256.205 352.143 Tm [(ngpu)]TJ/F84 9.9626 Tf 1.011 0 0 1 279.618 352.143 Tm [(is)-247(the)-248(total)-247(number)-248(of)-247(CUDA)-247(devices)]TJ 1 0 0 1 124.802 340.187 Tm [(available)-250(on)-250(the)-250(curr)18(ent)-250(node.)]TJ/F75 11.9552 Tf -24.907 -20.176 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Pr)18(econdition)-211(by)-212(a)-211(factorization)-212(of)-211(the)-212(block-diagonal)-211(of)-212(matrix)]TJ/F52 9.9626 Tf 273.867 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(,)-219(wher)18(e)]TJ -286.723 -11.955 Td [(block)-347(boundaries)-348(ar)18(e)-347(determined)-347(by)-348(the)-347(data)-347(allocation)-348(boundaries)-347(for)]TJ 0 -11.955 Td [(each)-223(pr)18(ocess;)-232(r)18(equir)18(es)-222(no)-223(communication.)-301(Only)-223(the)-222(incomplete)-223(factoriza-)]TJ 0 -11.955 Td [(tion)]TJ/F52 9.9626 Tf 20.498 0 Td [(I)-96(L)-9(U)]TJ/F85 10.3811 Tf 18.202 0 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(0)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)]TJ/F54 9.9626 Tf 6.64 0 Td [(is)-250(curr)18(ently)-250(implemented.)]TJ +/F84 9.9626 Tf 12.454 -20.177 Td [(1.)]TJ 0 g 0 G + [-461(A)-250(call)-250(to)-250(this)-250(r)18(outine)-250(must)-250(pr)18(ecede)-250(any)-250(other)-250(PSBLAS-CUDA)-250(call.)]TJ/F75 11.9552 Tf -12.454 -29.61 Td [(psb)]TJ ET q -1 0 0 1 99.895 130.181 cm -[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S +1 0 0 1 120.53 270.423 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 124.116 270.224 Td [(cuda)]TJ +ET +q +1 0 0 1 150.729 270.423 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F54 5.9776 Tf 110.755 123.219 Td [(4)]TJ/F54 7.9701 Tf 3.487 -2.893 Td [(The)-250(string)-250(is)-250(case-insensitive)]TJ +/F75 11.9552 Tf 154.315 270.224 Td [(exit)-250(\227)-250(Exit)-250(from)-250(PSBLAS-CUDA)-250(environment)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -54.42 -19.126 Td [(call)]TJ +0 g 0 G + [-525(psb_cuda_exit\050ctxt\051)]TJ/F84 9.9626 Tf 14.944 -22.254 Td [(This)-250(subr)18(outine)-250(exits)-250(fr)18(om)-250(the)-250(PSBLAS)-250(CUDA)-250(context.)]TJ +0 g 0 G +/F75 9.9626 Tf -14.944 -20.176 Td [(T)90(ype:)]TJ +0 g 0 G +/F84 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G +/F75 9.9626 Tf -29.828 -20.261 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 150.037 -29.888 Td [(151)]TJ +0 g 0 G + 0 -20.261 Td [(ctxt)]TJ +0 g 0 G +/F84 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F75 9.9626 Tf 31.432 0 Td [(global)]TJ/F84 9.9626 Tf 28.782 0 Td [(.)]TJ -60.523 -11.955 Td [(T)90(ype:)]TJ/F75 9.9626 Tf 27.088 0 Td [(required)]TJ/F84 9.9626 Tf 39.293 0 Td [(.)]TJ -66.072 -11.955 Td [(Intent:)]TJ/F75 9.9626 Tf 31.8 0 Td [(in)]TJ/F84 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G + 139.477 -29.888 Td [(179)]TJ 0 g 0 G ET endstream endobj -1872 0 obj +2255 0 obj << -/Length 7572 +/Length 5432 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(10.2)-1000(build)-250(\227)-250(Builds)-250(a)-250(preconditioner)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf 0 -20.364 Td [(call)-525(prec%build\050a,)-525(desc_a,)-525(info[,amold,vmold,imold]\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -24.086 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.815 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.816 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(system)-250(sparse)-250(matrix.)-310(Scope:)]TJ/F51 9.9626 Tf 146.229 0 Td [(local)]TJ/F54 9.9626 Tf -131.285 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(,)-250(tar)18(get.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(sparse)-250(matrix)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 190.872 0 Td [(psb)]TJ +/F75 11.9552 Tf 150.705 706.129 Td [(psb)]TJ ET q -1 0 0 1 382.802 580.382 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 385.94 580.183 Td [(Tspmat)]TJ +/F75 11.9552 Tf 174.926 706.129 Td [(cuda)]TJ ET q -1 0 0 1 417.95 580.382 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 201.538 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 421.088 580.183 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -291.305 -22.815 Td [(prec)]TJ +/F75 11.9552 Tf 205.125 706.129 Td [(DeviceSync)-250(\227)-250(Synchronize)-250(CUDA)-250(device)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F145 9.9626 Tf -54.42 -19.65 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(alr)18(eady)-190(initialized)-190(pr)18(econdtioner)-190(data)-190(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 277.288 0 Td [(psb)]TJ + [-525(psb_cuda_DeviceSync\050\051)]TJ/F84 9.9626 Tf 0.98 0 0 1 165.649 663.146 Tm [(This)-249(subr)18(out)1(ine)-249(ensur)18(es)-249(that)-249(all)-249(pr)19(eviosly)-249(invoked)-249(kernels,)-250(i.e.)-315(all)-249(invocation)]TJ 1 0 0 1 150.705 651.191 Tm [(of)-250(CUDA-side)-250(code,)-250(have)-250(completed.)]TJ/F75 11.9552 Tf 0 -31.147 Td [(psb)]TJ ET q -1 0 0 1 469.217 509.746 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 620.243 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 472.355 509.547 Td [(prec)]TJ +/F75 11.9552 Tf 174.926 620.044 Td [(cuda)]TJ ET q -1 0 0 1 493.904 509.746 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 201.538 620.243 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 497.043 509.547 Td [(type)]TJ +/F75 11.9552 Tf 205.125 620.044 Td [(getDeviceCount)]TJ/F145 9.9626 Tf -54.42 -19.65 Td [(ngpus)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -346.338 -34.771 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 474.975 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 474.776 Td [(a)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(pr)18(oblem)-250(communication)-250(descriptor)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 208.625 0 Td [(local)]TJ/F54 9.9626 Tf -217.183 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(,)-250(tar)18(get.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(communication)-250(descriptor)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 247.649 0 Td [(psb)]TJ + [-1050(psb_cuda_getDeviceCount\050\051)]TJ/F84 9.9626 Tf 14.944 -23.333 Td [(Get)-250(number)-250(of)-250(devices)-250(available)-250(on)-250(curr)18(ent)-250(computing)-250(node.)]TJ/F75 11.9552 Tf -14.944 -31.147 Td [(psb)]TJ ET q -1 0 0 1 439.579 439.11 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 546.113 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 442.717 438.911 Td [(desc)]TJ +/F75 11.9552 Tf 174.926 545.914 Td [(cuda)]TJ ET q -1 0 0 1 464.266 439.11 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 201.538 546.113 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 467.404 438.911 Td [(type)]TJ +/F75 11.9552 Tf 205.125 545.914 Td [(getDevice)]TJ/F145 9.9626 Tf -54.42 -19.65 Td [(dev)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -337.62 -22.816 Td [(amold)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 33.763 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(matrix)-250(storage.)]TJ -8.857 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F59 9.9626 Tf 201.393 0 Td [(psb)]TJ -ET -q -1 0 0 1 393.323 368.474 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 396.461 368.275 Td [(T)]TJ + [-1050(psb_cuda_getDevice\050\051)]TJ/F84 9.9626 Tf 14.944 -23.333 Td [(Get)-250(device)-250(in)-250(use)-250(by)-250(curr)18(ent)-250(pr)18(ocess.)]TJ/F75 11.9552 Tf -14.944 -31.147 Td [(psb)]TJ ET q -1 0 0 1 402.319 368.474 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 405.457 368.275 Td [(base)]TJ -ET -q -1 0 0 1 427.006 368.474 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 471.983 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 430.144 368.275 Td [(sparse)]TJ +/F75 11.9552 Tf 174.926 471.784 Td [(cuda)]TJ ET q -1 0 0 1 462.154 368.474 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 201.538 471.983 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 465.292 368.275 Td [(mat)]TJ/F54 9.9626 Tf 15.691 0 Td [(.)]TJ +/F75 11.9552 Tf 205.125 471.784 Td [(setDevice)]TJ/F145 9.9626 Tf -54.42 -19.65 Td [(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -330.278 -22.816 Td [(vmold)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F54 9.9626 Tf 34.321 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -9.415 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F59 9.9626 Tf 201.393 0 Td [(psb)]TJ -ET -q -1 0 0 1 393.323 297.838 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 396.461 297.638 Td [(T)]TJ -ET -q -1 0 0 1 402.319 297.838 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 405.457 297.638 Td [(base)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_cuda_setDevice\050dev\051)]TJ/F84 9.9626 Tf 14.944 -23.333 Td [(Set)-250(device)-250(to)-250(be)-250(used)-250(by)-250(curr)18(ent)-250(pr)18(ocess.)]TJ/F75 11.9552 Tf -14.944 -31.147 Td [(psb)]TJ ET q -1 0 0 1 427.006 297.838 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 397.853 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 430.144 297.638 Td [(vect)]TJ +/F75 11.9552 Tf 174.926 397.654 Td [(cuda)]TJ ET q -1 0 0 1 451.693 297.838 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 201.538 397.853 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 454.832 297.638 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F75 11.9552 Tf 205.125 397.654 Td [(DeviceHasUV)129(A)]TJ/F145 9.9626 Tf -54.42 -19.65 Td [(hasUva)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -325.048 -22.815 Td [(imold)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F54 9.9626 Tf 32.099 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(integer)-250(vector)-250(storage.)]TJ -7.193 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-190(as:)-280(an)-190(object)-190(of)-190(a)-190(class)-190(derived)-190(fr)18(om)-190(\050integer\051)]TJ/F59 9.9626 Tf 235.804 0 Td [(psb)]TJ -ET -q -1 0 0 1 427.733 227.202 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 430.872 227.002 Td [(T)]TJ -ET -q -1 0 0 1 436.73 227.202 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.868 227.002 Td [(base)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_cuda_DeviceHasUVA\050\051)]TJ/F84 9.9626 Tf 1.02 0 0 1 165.649 354.671 Tm [(Returns)-250(tr)8(ue)-251(if)-250(device)-251(curr)18(ently)-250(in)-251(use)-250(supports)-250(UV)108(A)-250(\050Uni\002ed)-250(V)54(irtual)-251(Ad-)]TJ 1 0 0 1 150.705 342.716 Tm [(dr)18(essing\051.)]TJ/F75 11.9552 Tf 0 -31.147 Td [(psb)]TJ ET q -1 0 0 1 461.417 227.202 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 311.768 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 464.555 227.002 Td [(vect)]TJ +/F75 11.9552 Tf 174.926 311.569 Td [(cuda)]TJ ET q -1 0 0 1 486.104 227.202 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 201.538 311.768 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 489.242 227.002 Td [(type)]TJ/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -359.459 -24.085 Td [(On)-250(Return)]TJ +/F75 11.9552 Tf 205.125 311.569 Td [(W)74(arpSize)]TJ/F145 9.9626 Tf -54.42 -19.65 Td [(nw)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G - 0 -22.816 Td [(prec)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econdtioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 193.612 0 Td [(psb)]TJ + [-525(psb_cuda_WarpSize\050\051)]TJ/F84 9.9626 Tf 14.944 -23.333 Td [(Returns)-250(the)-250(warp)-250(size.)]TJ/F75 11.9552 Tf -14.944 -31.147 Td [(psb)]TJ ET q -1 0 0 1 385.542 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 237.638 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 388.68 132.281 Td [(prec)]TJ +/F75 11.9552 Tf 174.926 237.439 Td [(cuda)]TJ ET q -1 0 0 1 410.229 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 201.538 237.638 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 413.367 132.281 Td [(type)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf -98.279 -41.843 Td [(152)]TJ -0 g 0 G -ET - -endstream -endobj -1876 0 obj -<< -/Length 1021 ->> -stream +/F75 11.9552 Tf 205.125 237.439 Td [(MultiProcessors)]TJ/F145 9.9626 Tf -54.42 -19.65 Td [(nmp)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(psb_cuda_MultiProcessors\050\051)]TJ/F84 9.9626 Tf 14.944 -23.333 Td [(Returns)-250(the)-250(number)-250(of)-250(multipr)18(ocessors)-250(in)-250(the)-250(CUDA)-250(device.)]TJ/F75 11.9552 Tf -14.944 -31.147 Td [(psb)]TJ +ET +q +1 0 0 1 171.339 163.508 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F75 11.9552 Tf 174.926 163.309 Td [(cuda)]TJ +ET +q +1 0 0 1 201.538 163.508 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q BT -/F51 9.9626 Tf 99.895 706.129 Td [(info)]TJ +/F75 11.9552 Tf 205.125 163.309 Td [(MaxThreadsPerMP)]TJ/F145 9.9626 Tf -54.42 -19.65 Td [(nt)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -24.907 -21.918 Td [(The)]TJ/F59 9.9626 Tf 20.388 0 Td [(amold)]TJ/F54 9.9626 Tf 26.152 0 Td [(,)]TJ/F59 9.9626 Tf 6.506 0 Td [(vmold)]TJ/F54 9.9626 Tf 29.862 0 Td [(and)]TJ/F59 9.9626 Tf 20.577 0 Td [(imold)]TJ/F54 9.9626 Tf 29.862 0 Td [(ar)18(guments)-372(may)-373(be)-372(employed)-373(to)-372(interface)-372(with)]TJ -133.347 -11.955 Td [(special)-250(devices,)-250(such)-250(as)-250(GPUs)-250(and)-250(other)-250(accelerators.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 164.384 -533.997 Td [(153)]TJ + [-525(psb_cuda_MaxThreadsPerMP\050\051)]TJ/F84 9.9626 Tf 14.944 -23.333 Td [(Returns)-250(the)-250(maximum)-250(number)-250(of)-250(thr)18(eads)-250(per)-250(multipr)18(ocessor)74(.)]TJ +0 g 0 G + 149.439 -29.888 Td [(180)]TJ 0 g 0 G ET endstream endobj -1884 0 obj +2259 0 obj << -/Length 5673 +/Length 2725 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(10.3)-1000(apply)-250(\227)-250(Preconditioner)-250(application)-250(routine)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf 0 -18.964 Td [(call)-525(prec%apply\050x,y,desc_a,info,trans,work\051)]TJ 0 -11.955 Td [(call)-525(prec%apply\050x,desc_a,info,trans\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(prec)]TJ -0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 117.837 0 Td [(local)]TJ/F54 9.9626 Tf -117.279 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ +/F75 11.9552 Tf 99.895 706.129 Td [(psb)]TJ ET q -1 0 0 1 388.441 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.53 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 391.579 577.576 Td [(prec)]TJ +/F75 11.9552 Tf 124.116 706.129 Td [(cuda)]TJ ET q -1 0 0 1 413.128 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 150.729 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 416.266 577.576 Td [(type)]TJ +/F75 11.9552 Tf 154.315 706.129 Td [(MaxRegistersPerBlock)]TJ/F145 9.9626 Tf -54.42 -18.964 Td [(nr)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(x)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(sour)18(ce)-250(vector)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 111.142 0 Td [(local)]TJ/F54 9.9626 Tf -96.198 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 218.688 0 Td [(psb)]TJ -ET -q -1 0 0 1 410.618 521.985 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 413.756 521.785 Td [(T)]TJ + [-525(psb_cuda_MaxRegistersPerBlock\050\051)]TJ/F84 9.9626 Tf 14.944 -21.918 Td [(Returns)-250(the)-250(maximum)-250(number)-250(of)-250(r)18(egister)-250(per)-250(thr)18(ead)-250(block.)]TJ/F75 11.9552 Tf -14.944 -29.133 Td [(psb)]TJ ET q -1 0 0 1 419.614 521.985 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.53 636.313 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 422.752 521.785 Td [(vect)]TJ +/F75 11.9552 Tf 124.116 636.114 Td [(cuda)]TJ ET q -1 0 0 1 444.301 521.985 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 150.729 636.313 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 447.439 521.785 Td [(type)]TJ +/F75 11.9552 Tf 154.315 636.114 Td [(MemoryClockRate)]TJ/F145 9.9626 Tf -54.42 -18.964 Td [(cl)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -317.656 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 502.059 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 501.86 Td [(a)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(pr)18(oblem)-250(communication)-250(descriptor)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 208.625 0 Td [(local)]TJ/F54 9.9626 Tf -217.183 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(communication)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 200.207 0 Td [(psb)]TJ + [-525(psb_cuda_MemoryClockRate\050\051)]TJ/F84 9.9626 Tf 14.944 -21.917 Td [(Returns)-250(the)-250(memory)-250(clock)-250(rate)-250(in)-250(KHz,)-250(as)-250(an)-250(integer)74(.)]TJ/F75 11.9552 Tf -14.944 -29.133 Td [(psb)]TJ ET q -1 0 0 1 392.137 466.194 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.53 566.299 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 395.275 465.994 Td [(desc)]TJ +/F75 11.9552 Tf 124.116 566.1 Td [(cuda)]TJ ET q -1 0 0 1 416.824 466.194 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 150.729 566.299 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 419.963 465.994 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -290.179 -19.925 Td [(trans)]TJ -0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(Scope:)]TJ -2.76 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(work)]TJ -0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(an)-250(optional)-250(work)-250(space)-250(Scope:)]TJ/F51 9.9626 Tf 136.476 0 Td [(local)]TJ/F54 9.9626 Tf -140.352 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(double)-250(pr)18(ecision)-250(array)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -21.918 Td [(On)-250(Return)]TJ +/F75 11.9552 Tf 154.315 566.1 Td [(MemoryBusW)37(idth)]TJ/F145 9.9626 Tf -54.42 -18.964 Td [(nb)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G - 0 -19.925 Td [(y)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(destination)-250(vector)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 131.914 0 Td [(local)]TJ/F54 9.9626 Tf -117.528 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 218.688 0 Td [(psb)]TJ -ET -q -1 0 0 1 410.618 276.904 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 413.756 276.704 Td [(T)]TJ + [-525(psb_cuda_MemoryBusWidth\050\051)]TJ/F84 9.9626 Tf 14.944 -21.918 Td [(Returns)-250(the)-250(memory)-250(bus)-250(width)-250(in)-250(bits.)]TJ/F75 11.9552 Tf -14.944 -29.133 Td [(psb)]TJ ET q -1 0 0 1 419.614 276.904 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.53 496.284 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 422.752 276.704 Td [(vect)]TJ +/F75 11.9552 Tf 124.116 496.085 Td [(cuda)]TJ ET q -1 0 0 1 444.301 276.904 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 150.729 496.284 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 447.439 276.704 Td [(type)]TJ +/F75 11.9552 Tf 154.315 496.085 Td [(MemoryPeakBandwidth)]TJ/F145 9.9626 Tf -54.42 -18.964 Td [(bw)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -317.656 -19.925 Td [(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + [-525(psb_cuda_MemoryPeakBandwidth\050\051)]TJ/F84 9.9626 Tf 0 -21.918 Td [(Returns)-250(the)-250(peak)-250(memory)-250(bandwidth)-250(in)-250(MB/s)-250(\050r)18(eal)-250(double)-250(pr)18(ecision\051.)]TJ 0 g 0 G - 139.477 -118.52 Td [(154)]TJ + 164.384 -364.765 Td [(181)]TJ 0 g 0 G ET endstream endobj -1889 0 obj +2264 0 obj << -/Length 3387 +/Length 81 >> stream 0 g 0 G 0 g 0 G +0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(10.4)-1000(descr)-250(\227)-250(Prints)-250(a)-250(description)-250(of)-250(current)-250(preconditioner)]TJ +/F84 9.9626 Tf 315.088 90.438 Td [(182)]TJ 0 g 0 G +ET + +endstream +endobj +2268 0 obj +<< +/Length 7303 +>> +stream 0 g 0 G -/F59 9.9626 Tf 0 -18.964 Td [(call)-525(prec%descr\050info\051)]TJ 0 -11.955 Td [(call)-525(prec%descr\050info,iout,)-525(root\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +BT +/F75 14.3462 Tf 99.895 705.784 Td [(References)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F84 9.9626 Tf 4.982 -23.579 Td [([1])]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + 1.02 0 0 1 121.474 682.205 Tm [(G.)-316(Bella,)-333(S.)-316(Filippone,)-334(A.)-315(De)-316(Maio)-316(and)-316(M.)-316(T)91(esta,)]TJ/F78 9.9626 Tf 1.02 0 0 1 343.652 682.205 Tm [(A)-316(Simulation)-315(Model)-316(for)]TJ 0.986 0 0 1 121.195 670.25 Tm [(For)18(est)-255(Fir)19(es)]TJ/F84 9.9626 Tf 0.986 0 0 1 167.553 670.25 Tm [(,)-255(in)-255(J.)-255(Dongarra,)-255(K.)-255(Madsen,)-255(J.)-255(W)94(asniewski,)-256(editors,)-255(Pr)19(oceedings)]TJ 0.98 0 0 1 121.474 658.295 Tm [(of)-252(P)94(ARA)-251(04)-252(W)94(orkshop)-251(on)-252(State)-251(of)-252(the)-251(Art)-252(in)-251(Scienti\002c)-251(Computing,)-253(pp.)-251(546\226)]TJ 1 0 0 1 121.474 646.34 Tm [(553,)-250(Lectur)18(e)-250(Notes)-250(in)-250(Computer)-250(Science,)-250(Springer)74(,)-250(2005.)]TJ 0 g 0 G + -16.597 -21.736 Td [([2])]TJ 0 g 0 G - 0 -19.925 Td [(prec)]TJ + 1.02 0 0 1 121.474 624.604 Tm [(D.)-322(Bertaccini)-322(and)-322(S.)-322(Filippone,)]TJ/F78 9.9626 Tf 1.02 0 0 1 263.46 624.604 Tm [(Sparse)-322(appr)18(oximate)-322(inverse)-322(pr)17(econditioners)]TJ 1.006 0 0 1 121.474 612.649 Tm [(on)-249(high)-250(performance)-249(GPU)-249(platforms)]TJ/F84 9.9626 Tf 1.006 0 0 1 268.626 612.649 Tm [(,)-250(Comput.)-249(Math.)-249(Appl.,)-250(71,)-249(\0502016\051,)-250(no.)-249(3,)]TJ 1 0 0 1 121.474 600.694 Tm [(693\226711.)]TJ 0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(the)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 117.836 0 Td [(local)]TJ/F54 9.9626 Tf -117.278 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ -ET -q -1 0 0 1 337.631 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 340.77 577.576 Td [(prec)]TJ -ET -q -1 0 0 1 362.319 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 365.457 577.576 Td [(type)]TJ + -16.597 -21.736 Td [([3])]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + 0.989 0 0 1 121.474 578.958 Tm [(A.)-252(Buttari,)-252(D.)-252(di)-252(Sera\002no,)-252(P)130(.)-252(D'Ambra,)-252(S.)-252(Filippone,)-111(2LEV)75(-D2P4:)-313(a)-252(package)]TJ 1.003 0 0 1 121.474 567.003 Tm [(of)-251(high-performance)-251(pr)17(ec)1(onditioners,)-115(A)1(pplicable)-252(Algebra)-251(in)-251(Engineering,)]TJ 1.02 0 0 1 121.474 555.048 Tm [(Communications)-246(and)-246(Computing,)-247(V)109(olume)-246(18,)-247(Number)-246(3,)-247(May)109(,)-246(2007,)-247(pp.)]TJ 1 0 0 1 121.474 543.093 Tm [(223-239)]TJ 0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(iout)]TJ + -16.597 -21.736 Td [([4])]TJ 0 g 0 G -/F54 9.9626 Tf 23.243 0 Td [(output)-250(unit.)-310(Scope:)]TJ/F51 9.9626 Tf 87.391 0 Td [(local)]TJ/F54 9.9626 Tf -85.727 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(number)74(.)-310(Default:)-310(default)-250(output)-250(unit.)]TJ + 1.005 0 0 1 121.474 521.357 Tm [(P)128(.)-248(D'Ambra,)-248(S.)-249(Filippone,)-248(D.)-248(Di)-249(Sera\002)1(no)-109(On)-249(the)-248(Development)-248(of)-248(PSBLAS-)]TJ 1.02 0 0 1 121.474 509.402 Tm [(based)-465(Parallel)-464(T)88(wo-level)-464(Schwarz)-465(Pr)18(econditioners)-1055(Applied)-465(Numeri-)]TJ 1.02 0 0 1 121.474 497.447 Tm [(cal)-323(Mathematics,)-343(Elsevier)-323(Science,)-342(V)109(olume)-323(57,)-343(Issues)-323(11-12,)-343(November)18(-)]TJ 1 0 0 1 121.474 485.492 Tm [(December)-250(2007,)-250(Pages)-250(1181-1196.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(root)]TJ + -16.597 -21.736 Td [([5])]TJ 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-250(fr)18(om)-250(which)-250(to)-250(print)-250(Scope:)]TJ/F51 9.9626 Tf 155.834 0 Td [(local)]TJ/F54 9.9626 Tf -154.18 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-387(as:)-585(an)-387(integer)-387(number)-388(between)-387(0)-387(and)]TJ/F52 9.9626 Tf 220.442 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 14.01 0 Td [(\000)]TJ/F54 9.9626 Tf 10.638 0 Td [(1,)-422(in)-387(which)-387(case)]TJ -245.09 -11.955 Td [(the)-314(speci\002e)1(d)-314(pr)18(ocess)-314(will)-313(print)-314(the)-313(description,)-330(or)]TJ/F83 10.3811 Tf 225.38 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1,)-329(in)-314(which)-314(case)-313(all)]TJ -233.574 -11.955 Td [(pr)18(ocesses)-250(will)-250(print.)-310(Default:)-310(0.)]TJ + 1.006 0 0 1 121.474 463.756 Tm [(Dongarra,)-250(J.)-249(J.,)-250(DuCr)18(oz,)-250(J)1(.,)-250(Hammarling,)-250(S.)-249(and)-250(Hanson,)-249(R.,)-250(An)-249(Extended)]TJ 0.98 0 0 1 121.474 451.801 Tm [(Set)-234(of)-234(Fortran)-233(Basic)-234(Linear)-234(Algebra)-233(Subpr)18(ograms,)-238(ACM)-234(T)92(rans.)-234(Math.)-233(Softw)93(.)]TJ 1 0 0 1 121.195 439.846 Tm [(vol.)-250(14,)-250(1\22617,)-250(1988.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(On)-250(Return)]TJ + -16.318 -21.735 Td [([6])]TJ 0 g 0 G + 0.983 0 0 1 121.474 418.111 Tm [(Dongarra,)-255(J.,)-254(D)-1(u)1(Cr)18(oz,)-255(J.,)-255(Hammarling,)-254(S.)-255(and)-255(Duf)19(f)-1(,)-254(I.,)-255(A)-255(Set)-254(of)-255(level)-255(3)-254(Basic)]TJ 0.985 0 0 1 121.474 406.156 Tm [(Linear)-254(Algebra)-254(Subpr)19(ograms,)-254(ACM)-254(T)91(rans.)-253(Math.)-254(Softw)93(.)-253(vol.)-254(16,)-254(1\22617,)-254(1990.)]TJ 0 g 0 G - 0 -19.925 Td [(info)]TJ + 1 0 0 1 104.877 384.42 Tm [([7])]TJ +0 g 0 G + 0.993 0 0 1 121.474 384.42 Tm [(J.)-252(J.)-252(Dongarra)-252(and)-252(R.)-253(C.)-252(Whaley)112(,)]TJ/F78 9.9626 Tf 0.993 0 0 1 262.893 384.42 Tm [(A)-252(User)-37(')55(s)-252(Guide)-252(to)-252(the)-252(BLACS)-252(v)55(.)-252(1.1)]TJ/F84 9.9626 Tf 0.993 0 0 1 407.142 384.42 Tm [(,)-252(Lapack)]TJ 0.98 0 0 1 120.976 372.465 Tm [(W)94(orking)-229(Note)-230(94,)-234(T)94(ech.)-230(Rep.)-229(UT)56(-CS-95-281,)-234(University)-230(of)-229(T)94(ennessee,)-234(Mar)18(ch)]TJ 1 0 0 1 120.976 360.51 Tm [(1995)-250(\050updated)-250(May)-250(1997\051.)]TJ +0 g 0 G + -16.099 -21.736 Td [([8])]TJ +0 g 0 G + 1.004 0 0 1 121.474 338.774 Tm [(I.)-250(Duf)18(f,)-250(M.)-250(Marr)18(one,)-250(G.)-250(Radicati)-250(and)-250(C.)-250(V)55(ittoli,)]TJ/F78 9.9626 Tf 1.004 0 0 1 328.201 338.774 Tm [(Level)-250(3)-250(Basic)-250(Linear)-250(Algebra)]TJ 0.982 0 0 1 121.195 326.819 Tm [(Subpr)18(ograms)-253(for)-254(Sparse)-254(Matrices:)-315(a)-254(User)-254(Level)-254(Interface)]TJ/F84 9.9626 Tf 0.982 0 0 1 344.719 326.819 Tm [(,)-254(ACM)-254(T)92(ransactions)-254(on)]TJ 1 0 0 1 121.474 314.864 Tm [(Mathematical)-250(Softwar)18(e,)-250(23\0503\051,)-250(pp.)-250(379\226401,)-250(1997.)]TJ +0 g 0 G + -16.597 -21.736 Td [([9])]TJ +0 g 0 G + 1.02 0 0 1 121.474 293.128 Tm [(I.)-354(Duf)18(f,)-381(M.)-354(Her)17(oux)-354(and)-354(R.)-354(Pozo,)]TJ/F78 9.9626 Tf 1.02 0 0 1 273.913 293.128 Tm [(An)-354(Overview)-354(of)-354(the)-354(Sparse)-354(Basic)-354(Linear)]TJ 0.982 0 0 1 121.116 281.173 Tm [(Algebra)-254(Subpr)18(ograms:)-316(the)-254(New)-254(Standard)-255(fr)19(om)-255(the)-254(BLAS)-255(T)113(echnical)-254(Forum)]TJ/F84 9.9626 Tf 0.982 0 0 1 414.869 281.173 Tm [(,)-254(ACM)]TJ 1 0 0 1 121.166 269.218 Tm [(T)90(ransactions)-250(on)-250(Mathematical)-250(Softwar)18(e,)-250(28\0502\051,)-250(pp.)-250(239\226267,)-250(2002.)]TJ +0 g 0 G + -21.271 -21.735 Td [([10])]TJ +0 g 0 G + 1.02 0 0 1 121.474 247.483 Tm [(S.)-256(Filippone)-256(and)-256(M.)-255(Colajanni,)]TJ/F78 9.9626 Tf 1.02 0 0 1 260.376 247.483 Tm [(PSBLAS:)-256(A)-256(Library)-255(for)-256(Parallel)-256(Linear)-256(Alge-)]TJ 1.02 0 0 1 121.474 235.528 Tm [(bra)-278(Computation)-278(on)-278(Sparse)-278(Matrices)]TJ/F84 9.9626 Tf 1.02 0 0 1 273.712 235.528 Tm [(,)-467(ACM)-278(T)88(ransactions)-278(on)-278(Mathematical)]TJ 1 0 0 1 121.474 223.572 Tm [(Softwar)18(e,)-250(26\0504\051,)-250(pp.)-250(527\226550,)-250(2000.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + -21.579 -21.735 Td [([11])]TJ 0 g 0 G - 139.477 -263.975 Td [(155)]TJ + 1.02 0 0 1 121.474 201.837 Tm [(S.)-328(Filippone)-329(and)-328(A.)-329(Bu)1(ttari,)]TJ/F78 9.9626 Tf 1.02 0 0 1 250.516 201.837 Tm [(Object-Oriented)-328(T)109(echniques)-329(for)-328(Sparse)-328(Matrix)]TJ 0.98 0 0 1 121.146 189.882 Tm [(Computations)-227(in)-228(Fortran)-227(2003)]TJ/F84 9.9626 Tf 0.98 0 0 1 241.714 189.882 Tm [(,)-313(ACM)-227(T)92(ransactions)-228(on)-227(Mathematical)-228(Softwar)19(e,)]TJ 1 0 0 1 121.474 177.927 Tm [(38\0504\051,)-250(2012.)]TJ +0 g 0 G + -21.579 -21.736 Td [([12])]TJ +0 g 0 G + 1.02 0 0 1 121.474 156.191 Tm [(S.)-284(Filippone,)-294(P)127(.)-284(D'Ambra,)-294(M.)-284(Colajanni,)]TJ/F78 9.9626 Tf 1.02 0 0 1 302.518 156.191 Tm [(Using)-284(a)-284(Parallel)-284(Library)-284(of)-284(Sparse)]TJ 1.02 0 0 1 121.195 144.236 Tm [(Linear)-296(Algebra)-296(in)-295(a)-296(Fluid)-296(Dynamics)-296(Applications)-296(Code)-295(on)-296(Linux)-296(Clusters)]TJ/F84 9.9626 Tf 1.02 0 0 1 429.06 144.236 Tm [(,)-308(in)]TJ 0.99 0 0 1 121.474 132.281 Tm [(G.)-254(Joubert,)-254(A.)-254(Murli,)-254(F)93(.)-254(Peters,)-254(M.)-254(V)93(anneschi,)-255(editors,)-254(Parallel)-254(Computing)-253(-)]TJ 1 0 0 1 121.086 120.326 Tm [(Advances)-250(&)-250(Curr)18(ent)-250(Issues,)-250(pp.)-250(441\226448,)-250(Imperial)-250(College)-250(Pr)18(ess,)-250(2002.)]TJ +0 g 0 G + 143.193 -29.888 Td [(183)]TJ 0 g 0 G ET endstream endobj -1790 0 obj +2174 0 obj << /Type /ObjStm /N 100 -/First 967 -/Length 10102 ->> -stream -1786 0 501 58 1787 115 1788 172 1783 230 1792 311 1794 429 505 488 1795 546 1796 605 -1791 664 1798 745 1800 863 509 921 1801 978 1802 1035 1797 1093 1804 1187 1806 1305 513 1364 -1803 1422 1809 1516 1807 1655 1811 1800 517 1858 1812 1915 1813 1973 1808 2031 1816 2125 1814 2264 -1818 2409 521 2468 1819 2526 1820 2585 1815 2644 1823 2738 1821 2877 1825 3022 525 3080 1826 3137 -1827 3194 1822 3252 1831 3346 1829 3485 1833 3628 529 3687 1834 3745 1835 3803 1830 3862 1838 3956 -1836 4095 1840 4240 533 4298 1841 4355 1842 4413 1837 4471 1845 4591 1843 4730 1847 4872 537 4931 -1848 4989 1849 5048 1844 5107 1851 5227 1853 5345 1850 5403 1855 5484 1857 5602 541 5661 1854 5719 -1861 5813 1858 5961 1859 6108 1863 6256 545 6314 1864 6371 1860 6429 1871 6549 1866 6715 1867 6860 -1868 7006 1869 7153 1873 7300 549 7359 1870 7417 1875 7511 1877 7629 1874 7687 1883 7781 1878 7947 -1879 8092 1880 8235 1881 8381 1885 8525 553 8584 1882 8642 1888 8736 1886 8875 1890 9020 557 9078 -% 1786 0 obj -<< -/D [1784 0 R /XYZ 98.895 753.953 null] ->> -% 501 0 obj -<< -/D [1784 0 R /XYZ 99.895 716.092 null] ->> -% 1787 0 obj -<< -/D [1784 0 R /XYZ 99.895 678.98 null] ->> -% 1788 0 obj -<< -/D [1784 0 R /XYZ 99.895 679.195 null] ->> -% 1783 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1792 0 obj -<< -/Type /Page -/Contents 1793 0 R -/Resources 1791 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R ->> -% 1794 0 obj -<< -/D [1792 0 R /XYZ 149.705 753.953 null] +/First 986 +/Length 11860 >> -% 505 0 obj -<< -/D [1792 0 R /XYZ 150.705 716.092 null] ->> -% 1795 0 obj -<< -/D [1792 0 R /XYZ 150.705 689.963 null] ->> -% 1796 0 obj -<< -/D [1792 0 R /XYZ 150.705 693.143 null] ->> -% 1791 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1798 0 obj -<< -/Type /Page -/Contents 1799 0 R -/Resources 1797 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R ->> -% 1800 0 obj -<< -/D [1798 0 R /XYZ 98.895 753.953 null] ->> -% 509 0 obj -<< -/D [1798 0 R /XYZ 99.895 716.092 null] ->> -% 1801 0 obj -<< -/D [1798 0 R /XYZ 99.895 678.98 null] ->> -% 1802 0 obj -<< -/D [1798 0 R /XYZ 99.895 679.195 null] ->> -% 1797 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1804 0 obj -<< -/Type /Page -/Contents 1805 0 R -/Resources 1803 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R ->> -% 1806 0 obj -<< -/D [1804 0 R /XYZ 149.705 753.953 null] +stream +2172 0 2173 205 2175 494 2177 715 2134 928 2159 1074 2144 1132 2160 1191 2161 1250 2162 1309 +2163 1368 2164 1427 2165 1486 2166 1545 2167 1603 2155 1662 2168 1721 2156 1779 2186 1974 2191 2131 +2192 2311 2193 2354 2194 2657 2195 2862 2197 3075 2182 3296 2183 3443 2184 3590 2188 3737 2189 3796 +2190 3854 2185 3913 2206 4123 2212 4262 2213 4442 2214 4485 2215 4686 2216 4981 2218 5202 2204 5415 +2208 5561 2201 5619 2209 5678 2202 5736 2210 5795 2205 5853 2225 6007 2229 6146 2230 6326 2232 6369 +2233 6576 2234 6877 2236 7098 2223 7311 2227 7458 2228 7517 2224 7576 2241 7758 2243 7876 2240 7934 +2245 8017 2247 8135 614 8194 2244 8252 2249 8375 2251 8493 618 8551 622 8607 2252 8664 626 8722 +2248 8779 2254 8874 2256 8992 630 9051 634 9109 638 9167 642 9225 646 9283 650 9341 654 9399 +658 9457 2253 9515 2258 9610 2260 9728 662 9786 666 9843 670 9900 674 9957 2257 10014 2263 10109 +2265 10227 2262 10286 2267 10354 2269 10472 2270 10530 2271 10588 2012 10646 958 10703 957 10760 916 10817 +% 2172 0 obj +<< +/BaseFont /YAZDUX+TimesNewRomanPSMT +/FontDescriptor 2175 0 R +/ToUnicode 2176 0 R +/Type /Font +/FirstChar 48 +/LastChar 57 +/Widths [ 500 500 500 500 500 0 0 500 500 500] +/Subtype /TrueType >> -% 513 0 obj +% 2173 0 obj << -/D [1804 0 R /XYZ 150.705 716.092 null] +/BaseFont /NDNSMY+FreeSerif +/FontDescriptor 2177 0 R +/ToUnicode 2178 0 R +/Type /Font +/FirstChar 32 +/LastChar 89 +/Widths [ 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 721 0 0 0 0 0 0 0 0 385 0 0 0 0 0 0 0 667 529 0 0 0 0 0 701] +/Subtype /TrueType >> -% 1803 0 obj +% 2175 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Type /FontDescriptor +/FontName /YAZDUX+TimesNewRomanPSMT +/FontBBox [ 15 -13 638 675] +/Flags 65540 +/Ascent 675 +/CapHeight 675 +/Descent -13 +/ItalicAngle 0 +/StemV 95 +/MissingWidth 777 +/FontFile2 2179 0 R >> -% 1809 0 obj +% 2177 0 obj << -/Type /Page -/Contents 1810 0 R -/Resources 1808 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R -/Annots [ 1807 0 R ] +/Type /FontDescriptor +/FontName /NDNSMY+FreeSerif +/FontBBox [ 0 -71 706 752] +/Flags 65540 +/Ascent 752 +/CapHeight 679 +/Descent -71 +/ItalicAngle 0 +/StemV 105 +/MissingWidth 600 +/FontFile2 2180 0 R >> -% 1807 0 obj +% 2134 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 438.278 369.462 450.338] -/A << /S /GoTo /D (spdata) >> ->> -% 1811 0 obj -<< -/D [1809 0 R /XYZ 98.895 753.953 null] ->> -% 517 0 obj -<< -/D [1809 0 R /XYZ 99.895 716.092 null] ->> -% 1812 0 obj -<< -/D [1809 0 R /XYZ 99.895 676.015 null] ->> -% 1813 0 obj -<< -/D [1809 0 R /XYZ 99.895 679.195 null] +/Rect [384.587 583.767 391.56 595.827] +/A << /S /GoTo /D (figure.5) >> >> -% 1808 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1816 0 obj -<< -/Type /Page -/Contents 1817 0 R -/Resources 1815 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R -/Annots [ 1814 0 R ] ->> -% 1814 0 obj +% 2159 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 571.778 420.271 583.837] -/A << /S /GoTo /D (spdata) >> +/D [2157 0 R /XYZ 98.895 753.953 null] >> -% 1818 0 obj +% 2144 0 obj << -/D [1816 0 R /XYZ 149.705 753.953 null] +/D [2157 0 R /XYZ 389.217 621.446 null] >> -% 521 0 obj +% 2160 0 obj << -/D [1816 0 R /XYZ 150.705 716.092 null] +/D [2157 0 R /XYZ 114.839 563.747 null] >> -% 1819 0 obj +% 2161 0 obj << -/D [1816 0 R /XYZ 150.705 676.015 null] +/D [2157 0 R /XYZ 114.839 567.034 null] >> -% 1820 0 obj +% 2162 0 obj << -/D [1816 0 R /XYZ 150.705 679.195 null] +/D [2157 0 R /XYZ 114.839 556.075 null] >> -% 1815 0 obj +% 2163 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/D [2157 0 R /XYZ 114.839 545.116 null] >> -% 1823 0 obj +% 2164 0 obj << -/Type /Page -/Contents 1824 0 R -/Resources 1822 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R -/Annots [ 1821 0 R ] +/D [2157 0 R /XYZ 114.839 534.158 null] >> -% 1821 0 obj +% 2165 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 438.278 369.462 450.338] -/A << /S /GoTo /D (spdata) >> +/D [2157 0 R /XYZ 114.839 523.199 null] >> -% 1825 0 obj +% 2166 0 obj << -/D [1823 0 R /XYZ 98.895 753.953 null] +/D [2157 0 R /XYZ 114.839 512.24 null] >> -% 525 0 obj +% 2167 0 obj << -/D [1823 0 R /XYZ 99.895 716.092 null] +/D [2157 0 R /XYZ 114.839 501.281 null] >> -% 1826 0 obj +% 2155 0 obj << -/D [1823 0 R /XYZ 99.895 678.98 null] +/D [2157 0 R /XYZ 114.839 481.057 null] >> -% 1827 0 obj +% 2168 0 obj << -/D [1823 0 R /XYZ 99.895 679.195 null] +/D [2157 0 R /XYZ 99.895 294.895 null] >> -% 1822 0 obj +% 2156 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F84 687 0 R /F75 685 0 R /F233 1044 0 R /F279 1813 0 R /F78 686 0 R /F192 942 0 R /F190 941 0 R /F145 940 0 R >> +/XObject << /Im7 2133 0 R >> /ProcSet [ /PDF /Text ] >> -% 1831 0 obj +% 2186 0 obj << /Type /Page -/Contents 1832 0 R -/Resources 1830 0 R +/Contents 2187 0 R +/Resources 2185 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R -/Annots [ 1829 0 R ] +/Parent 2115 0 R +/Annots [ 2182 0 R 2183 0 R 2184 0 R ] >> -% 1829 0 obj +% 2191 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [352.526 426.323 428.58 438.383] -/A << /S /GoTo /D (vdata) >> +/Producer (GPL Ghostscript 9.10) +/CreationDate (D:20140329133928+01'00') +/ModDate (D:20140329133928+01'00') +/Creator (cairo 1.13.1 \(http://cairographics.org\)) >> -% 1833 0 obj +% 2192 0 obj << -/D [1831 0 R /XYZ 149.705 753.953 null] +/Type /ExtGState +/OPM 1 >> -% 529 0 obj +% 2193 0 obj << -/D [1831 0 R /XYZ 150.705 716.092 null] +/BaseFont /MCSFLP+FreeSerif +/FontDescriptor 2195 0 R +/ToUnicode 2196 0 R +/Type /Font +/FirstChar 32 +/LastChar 89 +/Widths [ 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 721 0 670 0 610 564 0 714 0 385 709 0 0 0 724 0 0 667 529 606 0 0 0 0 701] +/Subtype /TrueType >> -% 1834 0 obj +% 2194 0 obj << -/D [1831 0 R /XYZ 150.705 678.98 null] +/BaseFont /AJRCAD+TimesNewRomanPSMT +/FontDescriptor 2197 0 R +/ToUnicode 2198 0 R +/Type /Font +/FirstChar 48 +/LastChar 57 +/Widths [ 500 500 500 500 500 0 0 500 500 500] +/Subtype /TrueType >> -% 1835 0 obj +% 2195 0 obj << -/D [1831 0 R /XYZ 150.705 679.195 null] +/Type /FontDescriptor +/FontName /MCSFLP+FreeSerif +/FontBBox [ 0 -71 706 752] +/Flags 65540 +/Ascent 752 +/CapHeight 679 +/Descent -71 +/ItalicAngle 0 +/StemV 105 +/MissingWidth 600 +/FontFile2 2199 0 R >> -% 1830 0 obj +% 2197 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Type /FontDescriptor +/FontName /AJRCAD+TimesNewRomanPSMT +/FontBBox [ 15 -13 638 675] +/Flags 65540 +/Ascent 675 +/CapHeight 675 +/Descent -13 +/ItalicAngle 0 +/StemV 95 +/MissingWidth 777 +/FontFile2 2200 0 R >> -% 1838 0 obj +% 2182 0 obj << -/Type /Page -/Contents 1839 0 R -/Resources 1837 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R -/Annots [ 1836 0 R ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [453.687 426.731 460.661 438.791] +/A << /S /GoTo /D (figure.5) >> >> -% 1836 0 obj +% 2183 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 574.778 369.462 586.838] -/A << /S /GoTo /D (spdata) >> +/Rect [354.541 176.362 361.575 188.421] +/A << /S /GoTo /D (figure.8) >> >> -% 1840 0 obj +% 2184 0 obj << -/D [1838 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [487.118 128.475 494.142 140.535] +/A << /S /GoTo /D (algocf.2) >> >> -% 533 0 obj +% 2188 0 obj << -/D [1838 0 R /XYZ 99.895 716.092 null] +/D [2186 0 R /XYZ 149.705 753.953 null] >> -% 1841 0 obj +% 2189 0 obj << -/D [1838 0 R /XYZ 99.895 679.441 null] +/D [2186 0 R /XYZ 447.542 464.41 null] >> -% 1842 0 obj +% 2190 0 obj << -/D [1838 0 R /XYZ 99.895 679.657 null] +/D [2186 0 R /XYZ 150.705 211.214 null] >> -% 1837 0 obj +% 2185 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F94 915 0 R >> +/Font << /F84 687 0 R /F78 686 0 R /F192 942 0 R /F145 940 0 R /F233 1044 0 R /F279 1813 0 R /F148 1490 0 R /F190 941 0 R /F75 685 0 R >> +/XObject << /Im8 2181 0 R >> /ProcSet [ /PDF /Text ] >> -% 1845 0 obj +% 2206 0 obj << /Type /Page -/Contents 1846 0 R -/Resources 1844 0 R +/Contents 2207 0 R +/Resources 2205 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R -/Annots [ 1843 0 R ] +/Parent 2211 0 R +/Annots [ 2204 0 R ] >> -% 1843 0 obj +% 2212 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [452.361 574.59 528.415 586.65] -/A << /S /GoTo /D (vdata) >> +/Producer (GPL Ghostscript 9.10) +/CreationDate (D:20140329133927+01'00') +/ModDate (D:20140329133927+01'00') +/Creator (cairo 1.13.1 \(http://cairographics.org\)) >> -% 1847 0 obj +% 2213 0 obj << -/D [1845 0 R /XYZ 149.705 753.953 null] +/Type /ExtGState +/OPM 1 >> -% 537 0 obj +% 2214 0 obj << -/D [1845 0 R /XYZ 150.705 716.092 null] +/BaseFont /BNRUAU+TimesNewRomanPSMT +/FontDescriptor 2216 0 R +/ToUnicode 2217 0 R +/Type /Font +/FirstChar 45 +/LastChar 55 +/Widths [ 333 0 0 500 500 500 0 0 0 0 500] +/Subtype /TrueType >> -% 1848 0 obj +% 2215 0 obj << -/D [1845 0 R /XYZ 150.705 679.413 null] +/BaseFont /ICFUKB+FreeSerif +/FontDescriptor 2218 0 R +/ToUnicode 2219 0 R +/Type /Font +/FirstChar 32 +/LastChar 89 +/Widths [ 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 721 0 0 0 610 564 0 0 0 0 0 0 0 0 724 0 0 667 529 606 0 0 0 0 701] +/Subtype /TrueType >> -% 1849 0 obj +% 2216 0 obj << -/D [1845 0 R /XYZ 150.705 679.628 null] +/Type /FontDescriptor +/FontName /BNRUAU+TimesNewRomanPSMT +/FontBBox [ 21 -13 638 675] +/Flags 65540 +/Ascent 675 +/CapHeight 675 +/Descent -13 +/ItalicAngle 0 +/StemV 95 +/MissingWidth 777 +/FontFile2 2220 0 R >> -% 1844 0 obj +% 2218 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F94 915 0 R >> -/ProcSet [ /PDF /Text ] +/Type /FontDescriptor +/FontName /ICFUKB+FreeSerif +/FontBBox [ 0 -71 706 752] +/Flags 65540 +/Ascent 752 +/CapHeight 679 +/Descent -71 +/ItalicAngle 0 +/StemV 105 +/MissingWidth 600 +/FontFile2 2221 0 R >> -% 1851 0 obj +% 2204 0 obj << -/Type /Page -/Contents 1852 0 R -/Resources 1850 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [371.292 537.01 378.265 549.069] +/A << /S /GoTo /D (figure.5) >> >> -% 1853 0 obj +% 2208 0 obj << -/D [1851 0 R /XYZ 98.895 753.953 null] +/D [2206 0 R /XYZ 98.895 753.953 null] >> -% 1850 0 obj +% 2201 0 obj << -/Font << /F94 915 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/D [2206 0 R /XYZ 396.732 574.688 null] >> -% 1855 0 obj +% 2209 0 obj << -/Type /Page -/Contents 1856 0 R -/Resources 1854 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R +/D [2206 0 R /XYZ 99.895 526.012 null] >> -% 1857 0 obj +% 2202 0 obj << -/D [1855 0 R /XYZ 149.705 753.953 null] +/D [2206 0 R /XYZ 114.839 383.469 null] >> -% 541 0 obj +% 2210 0 obj << -/D [1855 0 R /XYZ 150.705 716.092 null] +/D [2206 0 R /XYZ 99.895 187.185 null] >> -% 1854 0 obj +% 2205 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F84 687 0 R /F145 940 0 R /F233 1044 0 R /F75 685 0 R /F279 1813 0 R >> +/XObject << /Im9 2203 0 R >> /ProcSet [ /PDF /Text ] >> -% 1861 0 obj +% 2225 0 obj << /Type /Page -/Contents 1862 0 R -/Resources 1860 0 R +/Contents 2226 0 R +/Resources 2224 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R -/Annots [ 1858 0 R 1859 0 R ] ->> -% 1858 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [320.317 442.264 387.374 454.323] -/A << /S /GoTo /D (precdata) >> +/Parent 2211 0 R +/Annots [ 2223 0 R ] >> -% 1859 0 obj +% 2229 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [171.257 352.6 177.533 366.303] -/A << /S /GoTo /D (Hfootnote.4) >> +/Producer (GPL Ghostscript 9.10) +/CreationDate (D:20140329133928+01'00') +/ModDate (D:20140329133928+01'00') +/Creator (cairo 1.13.1 \(http://cairographics.org\)) >> -% 1863 0 obj +% 2230 0 obj << -/D [1861 0 R /XYZ 98.895 753.953 null] +/Type /ExtGState +/OPM 1 >> -% 545 0 obj +% 2232 0 obj << -/D [1861 0 R /XYZ 99.895 716.092 null] +/BaseFont /PBIKKX+TimesNewRomanPSMT +/FontDescriptor 2234 0 R +/ToUnicode 2235 0 R +/Type /Font +/FirstChar 45 +/LastChar 55 +/Widths [ 333 0 0 500 500 0 500 500 500 500 500] +/Subtype /TrueType >> -% 1864 0 obj +% 2233 0 obj << -/D [1861 0 R /XYZ 114.242 129.79 null] +/BaseFont /ZBHFTP+FreeSerif +/FontDescriptor 2236 0 R +/ToUnicode 2237 0 R +/Type /Font +/FirstChar 32 +/LastChar 89 +/Widths [ 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 721 0 670 0 610 564 0 714 0 0 709 0 0 0 724 0 0 667 529 606 0 0 0 0 701] +/Subtype /TrueType >> -% 1860 0 obj +% 2234 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R >> -/ProcSet [ /PDF /Text ] +/Type /FontDescriptor +/FontName /PBIKKX+TimesNewRomanPSMT +/FontBBox [ 15 -13 638 675] +/Flags 65540 +/Ascent 675 +/CapHeight 675 +/Descent -13 +/ItalicAngle 0 +/StemV 95 +/MissingWidth 777 +/FontFile2 2238 0 R >> -% 1871 0 obj +% 2236 0 obj << -/Type /Page -/Contents 1872 0 R -/Resources 1870 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R -/Annots [ 1866 0 R 1867 0 R 1868 0 R 1869 0 R ] +/Type /FontDescriptor +/FontName /ZBHFTP+FreeSerif +/FontBBox [ 0 -71 706 752] +/Flags 65540 +/Ascent 752 +/CapHeight 679 +/Descent -71 +/ItalicAngle 0 +/StemV 105 +/MissingWidth 600 +/FontFile2 2239 0 R >> -% 1866 0 obj +% 2223 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [365.487 576.377 443.006 588.437] -/A << /S /GoTo /D (spdata) >> +/Rect [440.392 344.666 447.366 356.726] +/A << /S /GoTo /D (figure.5) >> >> -% 1867 0 obj +% 2227 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [451.902 505.741 518.96 517.801] -/A << /S /GoTo /D (precdata) >> +/D [2225 0 R /XYZ 149.705 753.953 null] >> -% 1868 0 obj +% 2228 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [422.264 435.105 489.322 447.165] -/A << /S /GoTo /D (descdata) >> +/D [2225 0 R /XYZ 447.542 382.345 null] >> -% 1869 0 obj +% 2224 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.227 128.475 435.285 140.535] -/A << /S /GoTo /D (precdata) >> +/Font << /F84 687 0 R /F145 940 0 R /F78 686 0 R /F75 685 0 R /F192 942 0 R /F233 1044 0 R /F279 1813 0 R >> +/XObject << /Im10 2222 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1873 0 obj +% 2241 0 obj << -/D [1871 0 R /XYZ 149.705 753.953 null] +/Type /Page +/Contents 2242 0 R +/Resources 2240 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 2211 0 R >> -% 549 0 obj +% 2243 0 obj << -/D [1871 0 R /XYZ 150.705 716.092 null] +/D [2241 0 R /XYZ 98.895 753.953 null] >> -% 1870 0 obj +% 2240 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F233 1044 0 R /F84 687 0 R >> /ProcSet [ /PDF /Text ] >> -% 1875 0 obj +% 2245 0 obj << /Type /Page -/Contents 1876 0 R -/Resources 1874 0 R +/Contents 2246 0 R +/Resources 2244 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R +/Parent 2211 0 R >> -% 1877 0 obj +% 2247 0 obj << -/D [1875 0 R /XYZ 98.895 753.953 null] +/D [2245 0 R /XYZ 149.705 753.953 null] >> -% 1874 0 obj +% 614 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/D [2245 0 R /XYZ 150.705 716.092 null] +>> +% 2244 0 obj +<< +/Font << /F75 685 0 R /F84 687 0 R /F233 1044 0 R /F145 940 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1883 0 obj +% 2249 0 obj << /Type /Page -/Contents 1884 0 R -/Resources 1882 0 R +/Contents 2250 0 R +/Resources 2248 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R -/Annots [ 1878 0 R 1879 0 R 1880 0 R 1881 0 R ] ->> -% 1878 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [371.126 573.77 438.184 585.83] -/A << /S /GoTo /D (precdata) >> +/Parent 2211 0 R >> -% 1879 0 obj +% 2251 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [393.303 517.98 469.357 530.039] -/A << /S /GoTo /D (vdata) >> +/D [2249 0 R /XYZ 98.895 753.953 null] >> -% 1880 0 obj +% 618 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [374.822 462.189 441.88 474.248] -/A << /S /GoTo /D (descdata) >> +/D [2249 0 R /XYZ 99.895 554.06 null] >> -% 1881 0 obj +% 622 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [393.303 272.899 469.357 284.958] -/A << /S /GoTo /D (vdata) >> +/D [2249 0 R /XYZ 99.895 527.944 null] >> -% 1885 0 obj +% 2252 0 obj << -/D [1883 0 R /XYZ 149.705 753.953 null] +/D [2249 0 R /XYZ 99.895 315.727 null] >> -% 553 0 obj +% 626 0 obj << -/D [1883 0 R /XYZ 150.705 716.092 null] +/D [2249 0 R /XYZ 99.895 284.171 null] >> -% 1882 0 obj +% 2248 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1888 0 obj +% 2254 0 obj << /Type /Page -/Contents 1889 0 R -/Resources 1887 0 R +/Contents 2255 0 R +/Resources 2253 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R -/Annots [ 1886 0 R ] +/Parent 2211 0 R >> -% 1886 0 obj +% 2256 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [320.317 573.77 387.374 585.83] -/A << /S /GoTo /D (precdata) >> +/D [2254 0 R /XYZ 149.705 753.953 null] >> -% 1890 0 obj +% 630 0 obj << -/D [1888 0 R /XYZ 98.895 753.953 null] +/D [2254 0 R /XYZ 150.705 720.077 null] >> -% 557 0 obj +% 634 0 obj << -/D [1888 0 R /XYZ 99.895 716.092 null] +/D [2254 0 R /XYZ 150.705 633.991 null] >> - -endstream -endobj -1894 0 obj +% 638 0 obj << -/Length 973 +/D [2254 0 R /XYZ 150.705 559.861 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(10.5)-1000(clone)-250(\227)-250(clone)-250(current)-250(preconditioner)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf 0 -18.964 Td [(call)-1050(prec%clone\050precout,info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(prec)]TJ -0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -77.917 -33.873 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(precout)]TJ -0 g 0 G -/F54 9.9626 Tf 39.292 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ -0 g 0 G -/F51 9.9626 Tf -39.292 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ -0 g 0 G - 140.583 -449.28 Td [(156)]TJ -0 g 0 G -ET - -endstream -endobj -1900 0 obj +% 642 0 obj << -/Length 2703 +/D [2254 0 R /XYZ 150.705 485.732 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(10.6)-1000(free)-250(\227)-250(Free)-250(a)-250(preconditioner)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf 0 -18.964 Td [(call)-525(prec%free\050info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(prec)]TJ -0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ -ET -q -1 0 0 1 337.631 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 340.77 577.576 Td [(prec)]TJ -ET -q -1 0 0 1 362.319 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 365.457 577.576 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(On)-250(Exit)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(prec)]TJ -0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -30.874 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ -ET -q -1 0 0 1 337.631 502.059 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 340.77 501.86 Td [(prec)]TJ -ET -q -1 0 0 1 362.319 502.059 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 365.457 501.86 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -30.326 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Err)18(or)-250(code:)-310(if)-250(no)-250(err)18(or)74(,)-250(0)-250(is)-250(r)18(eturned.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ/F54 9.9626 Tf 34.363 0 Td [(Releases)-250(all)-250(internal)-250(storage.)]TJ -0 g 0 G - 130.021 -333.713 Td [(157)]TJ -0 g 0 G -ET - -endstream -endobj -1905 0 obj +% 646 0 obj +<< +/D [2254 0 R /XYZ 150.705 411.602 null] +>> +% 650 0 obj +<< +/D [2254 0 R /XYZ 150.705 325.516 null] +>> +% 654 0 obj +<< +/D [2254 0 R /XYZ 150.705 251.386 null] +>> +% 658 0 obj +<< +/D [2254 0 R /XYZ 150.705 177.256 null] +>> +% 2253 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 2258 0 obj +<< +/Type /Page +/Contents 2259 0 R +/Resources 2257 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 2261 0 R +>> +% 2260 0 obj << -/Length 548 +/D [2258 0 R /XYZ 98.895 753.953 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 14.3462 Tf 150.705 705.784 Td [(11)-1000(Iterative)-250(Methods)]TJ/F54 9.9626 Tf 0 -22.702 Td [(In)-402(this)-403(chapter)-402(we)-403(pr)18(ovide)-402(r)18(outines)-403(for)-402(pr)18(econditioners)-402(and)-403(iterative)-402(meth-)]TJ 0 -11.955 Td [(ods.)-472(The)-304(interfaces)-304(for)-304(Krylov)-304(subspace)-303(methods)-304(ar)18(e)-304(available)-304(in)-304(the)-304(module)]TJ/F59 9.9626 Tf 0 -11.955 Td [(psb_krylov_mod)]TJ/F54 9.9626 Tf 73.225 0 Td [(.)]TJ -0 g 0 G - 91.158 -568.734 Td [(158)]TJ -0 g 0 G -ET - -endstream -endobj -1912 0 obj +% 662 0 obj << -/Length 8246 +/D [2258 0 R /XYZ 99.895 720.077 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(11.1)-1000(psb)]TJ -ET -q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 156.993 706.129 Td [(krylov)-250(\227)-250(Krylov)-250(Methods)-250(Driver)-250(Routine)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-266(subr)18(outine)-266(is)-267(a)-266(driver)-266(that)-267(p)1(r)18(ovides)-267(a)-266(general)-266(interface)-266(for)-267(all)-266(the)-266(Krylov-)]TJ 0 -11.955 Td [(Subspace)-250(family)-250(methods)-250(implemented)-250(in)-250(PSBLAS)-250(version)-250(2.)]TJ 14.944 -11.955 Td [(The)-250(stopping)-250(criterion)-250(can)-250(take)-250(the)-250(following)-250(values:)]TJ -0 g 0 G -/F51 9.9626 Tf -14.944 -18.774 Td [(1)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(normwise)-222(backwar)18(d)-221(err)18(or)-222(in)-221(the)-222(in\002nity)-222(norm;)-231(the)-221(iteration)-222(is)-222(stopped)-221(when)]TJ/F52 9.9626 Tf 109.036 -26.864 Td [(e)-15(r)-25(r)]TJ/F85 10.3811 Tf 15.14 0 Td [(=)]TJ/F83 10.3811 Tf 40.62 6.745 Td [(k)]TJ/F52 9.9626 Tf 5.34 0 Td [(r)]TJ/F52 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(k)]TJ -ET -q -1 0 0 1 246.191 620.108 cm -[]0 d 0 J 0.398 w 0 0 m 74.372 0 l S -Q -BT -/F85 10.3811 Tf 246.316 610.783 Td [(\050)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F52 9.9626 Tf 5.938 0 Td [(A)]TJ/F83 10.3811 Tf 7.442 0 Td [(k)-24(k)]TJ/F52 9.9626 Tf 11.048 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.876 1.96 Td [(k)]TJ/F85 10.3811 Tf 7.376 0 Td [(+)]TJ/F83 10.3811 Tf 10.256 0 Td [(k)]TJ/F52 9.9626 Tf 5.44 0 Td [(b)]TJ/F83 10.3811 Tf 4.861 0 Td [(k)]TJ/F85 10.3811 Tf 5.44 0 Td [(\051)]TJ/F61 10.3811 Tf 8.236 6.834 Td [(<)]TJ/F52 9.9626 Tf 11.087 0 Td [(e)-80(p)-25(s)]TJ -0 g 0 G -/F51 9.9626 Tf -235.842 -29.908 Td [(2)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Relative)-250(r)18(esidual)-250(in)-250(the)-250(2-norm;)-250(the)-250(iteration)-250(is)-250(stopped)-250(when)]TJ/F52 9.9626 Tf 136.209 -26.865 Td [(e)-15(r)-25(r)]TJ/F85 10.3811 Tf 15.141 0 Td [(=)]TJ/F83 10.3811 Tf 13.446 6.745 Td [(k)]TJ/F52 9.9626 Tf 5.34 0 Td [(r)]TJ/F52 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(k)]TJ -ET -q -1 0 0 1 273.365 563.335 cm -[]0 d 0 J 0.398 w 0 0 m 20.025 0 l S -Q -BT -/F83 10.3811 Tf 273.49 554.01 Td [(k)]TJ/F52 9.9626 Tf 5.439 0 Td [(b)]TJ/F83 10.3811 Tf 4.862 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.744 Td [(2)]TJ/F61 10.3811 Tf 8.371 8.578 Td [(<)]TJ/F52 9.9626 Tf 11.086 0 Td [(e)-80(p)-25(s)]TJ -0 g 0 G -/F51 9.9626 Tf -208.668 -29.848 Td [(3)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Relative)-250(r)18(esidual)-250(r)18(eduction)-250(in)-250(the)-250(2-norm;)-250(the)-250(iteration)-250(is)-250(stopped)-250(when)]TJ/F52 9.9626 Tf 134.486 -26.865 Td [(e)-15(r)-25(r)]TJ/F85 10.3811 Tf 15.14 0 Td [(=)]TJ/F83 10.3811 Tf 15.17 6.745 Td [(k)]TJ/F52 9.9626 Tf 5.34 0 Td [(r)]TJ/F52 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(k)]TJ -ET -q -1 0 0 1 271.641 506.622 cm -[]0 d 0 J 0.398 w 0 0 m 23.472 0 l S -Q -BT -/F83 10.3811 Tf 271.766 497.297 Td [(k)]TJ/F52 9.9626 Tf 5.34 0 Td [(r)]TJ/F54 7.5716 Tf 4 -1.744 Td [(0)]TJ/F83 10.3811 Tf 4.408 1.744 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.744 Td [(2)]TJ/F61 10.3811 Tf 8.371 8.578 Td [(<)]TJ/F52 9.9626 Tf 11.086 0 Td [(e)-80(p)-25(s)]TJ/F54 9.9626 Tf -210.391 -29.848 Td [(The)-248(behaviour)-248(is)-248(contr)18(olled)-249(by)-248(the)-248(istop)-248(ar)18(gument)-248(\050see)-248(later\051.)-310(In)-248(the)-248(above)-248(for)18(-)]TJ 0 -11.956 Td [(mulae,)]TJ/F52 9.9626 Tf 32.81 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F54 9.9626 Tf 5.303 1.96 Td [(is)-256(the)-256(tentative)-257(soluti)1(on)-257(and)]TJ/F52 9.9626 Tf 125.144 0 Td [(r)]TJ/F52 7.5716 Tf 4.042 -1.96 Td [(i)]TJ/F85 10.3811 Tf 5.757 1.96 Td [(=)]TJ/F52 9.9626 Tf 11.2 0 Td [(b)]TJ/F83 10.3811 Tf 6.822 0 Td [(\000)]TJ/F52 9.9626 Tf 10.777 0 Td [(A)-42(x)]TJ/F52 7.5716 Tf 12.759 -1.96 Td [(i)]TJ/F54 9.9626 Tf 5.303 1.96 Td [(the)-256(corr)18(esponding)-256(r)18(esidual)]TJ -225.064 -11.955 Td [(at)-250(the)]TJ/F52 9.9626 Tf 27.083 0 Td [(i)]TJ/F54 9.9626 Tf 2.964 0 Td [(-th)-250(iteration.)]TJ -28.305 -17.357 Td [(c)-175(a)-175(l)-174(l)-880(p)-105(s)-105(b)]TJ -ET -q -1 0 0 1 150.28 433.215 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 154.313 433.015 Td [(k)-105(r)-105(y)-104(l)-105(o)-105(v)-238(\050)-156(m)-21(e)-22(t)-21(h)-22(o)-22(d)-218(,)-208(a)-242(,)-255(p)-80(r)-81(e)-80(c)-335(,)-191(b)-206(,)-203(x)-231(,)-234(e)-60(p)-59(s)-293(,)-273(d)-98(e)-97(s)-98(c)]TJ -ET -q -1 0 0 1 352.02 433.215 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 355.983 433.015 Td [(a)-370(,)-283(i)-108(n)-108(f)-108(o)-274(,)-57(&)]TJ -227.086 -11.955 Td [(&)-580(i)-69(t)-69(m)-70(a)-69(x)-313(,)-327(i)-151(t)-152(e)-151(r)-478(,)-281(e)-107(r)-106(r)-387(,)-321(i)-145(t)-146(r)-146(a)-145(c)-146(e)-466(,)-336(i)-161(r)-160(s)-161(t)-496(,)-291(i)-116(s)-116(t)-116(o)-116(p)-407(,)-219(c)-43(o)-43(n)-44(d)-177(\051)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -29.002 -25.88 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.349 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.349 Td [(method)]TJ -0 g 0 G -/F54 9.9626 Tf 39.851 0 Td [(a)-193(string)-194(that)-193(de\002nes)-194(the)-194(it)1(erative)-194(method)-193(to)-194(be)-194(use)1(d.)-292(Supported)-193(values)]TJ -14.944 -11.956 Td [(ar)18(e:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.349 Td [(CG:)]TJ -0 g 0 G -/F54 9.9626 Tf 22.964 0 Td [(the)-250(Conjugate)-250(Gradient)-250(method;)]TJ -0 g 0 G -/F51 9.9626 Tf -22.964 -15.364 Td [(CGS:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.051 0 Td [(the)-250(Conjugate)-250(Gradient)-250(Stabilized)-250(method;)]TJ -0 g 0 G -/F51 9.9626 Tf -29.051 -15.365 Td [(GCR:)]TJ -0 g 0 G -/F54 9.9626 Tf 30.157 0 Td [(the)-250(Generalized)-250(Conjugate)-250(Residual)-250(method;)]TJ -0 g 0 G -/F51 9.9626 Tf -30.157 -15.364 Td [(FCG:)]TJ -0 g 0 G -/F54 9.9626 Tf 28.503 0 Td [(the)-250(Flexible)-250(Conjugate)-250(Gradient)-250(method)]TJ -0 0 1 rg 0 0 1 RG -/F54 7.5716 Tf 176.854 3.616 Td [(5)]TJ -0 g 0 G -/F54 9.9626 Tf 4.284 -3.616 Td [(;)]TJ -0 g 0 G -/F51 9.9626 Tf -209.641 -15.364 Td [(BICG:)]TJ -0 g 0 G -/F54 9.9626 Tf 33.484 0 Td [(the)-250(Bi-Conjugate)-250(Gradient)-250(method;)]TJ -0 g 0 G -/F51 9.9626 Tf -33.484 -15.365 Td [(BICGST)92(AB:)]TJ -0 g 0 G -/F54 9.9626 Tf 59.696 0 Td [(the)-250(Bi-Conjugate)-250(Gradient)-250(Stabilized)-250(method;)]TJ -0 g 0 G -/F51 9.9626 Tf -59.696 -15.364 Td [(BICGST)92(ABL:)]TJ -0 g 0 G -/F54 9.9626 Tf 65.783 0 Td [(the)-218(Bi-Conjugate)-217(Gradient)-218(Stabilized)-218(method)-217(with)-218(r)18(estart-)]TJ -43.865 -11.955 Td [(ing;)]TJ -0 g 0 G -/F51 9.9626 Tf -21.918 -15.365 Td [(RGMRES:)]TJ -0 g 0 G -/F54 9.9626 Tf 52.294 0 Td [(the)-250(Generalized)-250(Minimal)-250(Residual)-250(method)-250(with)-250(r)18(estarting.)]TJ -0 g 0 G -/F51 9.9626 Tf -77.201 -19.349 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 178.414 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(.)]TJ -170.787 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 138.701 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 138.501 Td [(Tspmat)]TJ -ET -q -1 0 0 1 344.406 138.701 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 347.544 138.501 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -ET -q -1 0 0 1 99.895 130.091 cm -[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S -Q -BT -/F54 5.9776 Tf 110.755 123.219 Td [(5)]TJ/F54 7.9701 Tf 3.487 -2.893 Td [(Note:)-310(the)-250(implementation)-250(is)-250(for)]TJ/F52 7.9701 Tf 113.297 0 Td [(F)-31(C)-45(G)]TJ/F85 8.3049 Tf 16.387 0 Td [(\050)]TJ/F54 7.9701 Tf 3.319 0 Td [(1)]TJ/F85 8.3049 Tf 4.085 0 Td [(\051)]TJ/F54 7.9701 Tf 3.32 0 Td [(.)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 9.629 -29.888 Td [(159)]TJ -0 g 0 G -ET - -endstream -endobj -1923 0 obj +% 666 0 obj << -/Length 7054 +/D [2258 0 R /XYZ 99.895 650.062 null] >> -stream -0 g 0 G -0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 150.705 706.129 Td [(prec)]TJ -0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(The)-250(data)-250(str)8(uctur)18(e)-250(containing)-250(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 658.308 Td [(prec)]TJ -ET -q -1 0 0 1 384.755 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 387.893 658.308 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -258.11 -22.202 Td [(b)]TJ -0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(The)-250(RHS)-250(vector)74(.)]TJ 13.838 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 218.688 0 Td [(psb)]TJ -ET -q -1 0 0 1 410.618 588.484 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 413.756 588.285 Td [(T)]TJ -ET -q -1 0 0 1 419.614 588.484 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 422.752 588.285 Td [(vect)]TJ -ET -q -1 0 0 1 444.301 588.484 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 447.439 588.285 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -317.656 -22.202 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(initial)-250(guess.)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 218.687 0 Td [(psb)]TJ -ET -q -1 0 0 1 410.618 518.461 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 413.756 518.262 Td [(T)]TJ -ET -q -1 0 0 1 419.614 518.461 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 422.752 518.262 Td [(vect)]TJ -ET -q -1 0 0 1 444.301 518.461 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 447.439 518.262 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -317.656 -22.203 Td [(eps)]TJ -0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(The)-250(stopping)-250(tolerance.)]TJ 4.433 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(number)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -22.203 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 426.236 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 426.036 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 378.415 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 378.216 Td [(desc)]TJ -ET -q -1 0 0 1 384.755 378.415 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 387.893 378.216 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -258.11 -22.203 Td [(itmax)]TJ -0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(The)-250(maximum)-250(number)-250(of)-250(iterations)-250(to)-250(perform.)]TJ -5.529 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F52 9.9626 Tf 38.569 0 Td [(i)-32(t)-25(m)-40(a)-42(x)]TJ/F85 10.3811 Tf 27.744 0 Td [(=)]TJ/F54 9.9626 Tf 10.961 0 Td [(1000.)]TJ -77.274 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable)]TJ/F52 9.9626 Tf 142.349 0 Td [(i)-32(t)-25(m)-40(a)-42(x)]TJ/F83 10.3811 Tf 27.743 0 Td [(\025)]TJ/F54 9.9626 Tf 10.962 0 Td [(1.)]TJ -0 g 0 G -/F51 9.9626 Tf -205.961 -22.202 Td [(itrace)]TJ -0 g 0 G -/F54 9.9626 Tf 29.878 0 Td [(If)]TJ/F61 10.3811 Tf 11.007 0 Td [(>)]TJ/F54 9.9626 Tf 14.142 0 Td [(0)-422(print)-423(out)-422(an)-422(informational)-423(message)-422(about)-422(conver)18(gence)-423(every)]TJ/F52 9.9626 Tf -30.066 -11.955 Td [(i)-32(t)-15(r)-50(a)-25(c)-25(e)]TJ/F54 9.9626 Tf 26.396 0 Td [(iterations.)-310(If)]TJ/F85 10.3811 Tf 56.313 0 Td [(=)]TJ/F54 9.9626 Tf 10.961 0 Td [(0)-250(print)-250(a)-250(message)-250(in)-250(case)-250(of)-250(conver)18(gence)-250(failur)18(e.)]TJ -93.724 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F52 9.9626 Tf 38.57 0 Td [(i)-32(t)-15(r)-50(a)-25(c)-25(e)]TJ/F85 10.3811 Tf 26.796 0 Td [(=)]TJ/F83 10.3811 Tf 11.086 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1.)]TJ -0 g 0 G -/F51 9.9626 Tf -109.553 -34.158 Td [(irst)]TJ -0 g 0 G -/F54 9.9626 Tf 19.915 0 Td [(An)-250(integer)-250(specifying)-250(the)-250(r)18(estart)-250(parameter)74(.)]TJ 4.992 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(V)92(alues:)]TJ/F52 9.9626 Tf 34.613 0 Td [(i)-22(r)-35(s)-25(t)]TJ/F61 10.3811 Tf 17.671 0 Td [(>)]TJ/F54 9.9626 Tf 10.961 0 Td [(0.)-298(This)-214(is)-213(employed)-214(for)-213(the)-214(BiCGST)74(ABL)-214(or)-213(RGMRES)-214(meth-)]TJ -63.245 -11.955 Td [(ods,)-250(otherwise)-250(it)-250(is)-250(ignor)18(ed.)]TJ -0 g 0 G - 139.477 -29.888 Td [(160)]TJ -0 g 0 G -ET - -endstream -endobj -1928 0 obj +% 670 0 obj << -/Length 4489 +/D [2258 0 R /XYZ 99.895 580.047 null] >> -stream -0 g 0 G -0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 99.895 706.129 Td [(istop)]TJ -0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(An)-250(integer)-250(specifying)-250(the)-250(stopping)-250(criterion.)]TJ -2.759 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(V)92(alues:)-351(1:)-351(use)-271(the)-270(normwise)-271(backwar)18(d)-270(err)18(or)74(,)-276(2:)-351(use)-271(the)-270(scaled)-271(2-norm)-270(of)]TJ 0 -11.955 Td [(the)-250(r)18(esidual,)-250(3:)-310(use)-250(the)-250(r)18(esidual)-250(r)18(eduction)-250(in)-250(the)-250(2-norm.)-310(Default:)-310(2.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(computed)-250(solution.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 218.688 0 Td [(psb)]TJ -ET -q -1 0 0 1 359.808 558.881 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 362.947 558.682 Td [(T)]TJ -ET -q -1 0 0 1 368.804 558.881 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 371.943 558.682 Td [(vect)]TJ -ET -q -1 0 0 1 393.492 558.881 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 396.63 558.682 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -317.656 -19.925 Td [(iter)]TJ -0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(The)-250(number)-250(of)-250(iterations)-250(performed.)]TJ 4.434 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-250(as:)-310(an)-250(integer)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(err)]TJ -0 g 0 G -/F54 9.9626 Tf 17.714 0 Td [(The)-250(conver)18(gence)-250(estimate)-250(on)-250(exit.)]TJ 7.193 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-250(as:)-310(a)-250(r)18(eal)-250(number)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(cond)]TJ -0 g 0 G -/F54 9.9626 Tf 27.119 0 Td [(An)-210(esti)1(mate)-210(of)-210(the)-209(condition)-210(number)-209(of)-210(matrix)]TJ/F52 9.9626 Tf 204.999 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(;)-223(only)-210(available)-209(with)-210(the)]TJ/F52 9.9626 Tf -214.444 -11.956 Td [(C)-45(G)]TJ/F54 9.9626 Tf 17.001 0 Td [(method)-250(on)-250(r)18(eal)-250(data.)]TJ -17.086 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-249(as:)-310(a)-249(r)18(eal)-249(number)74(.)-310(A)-249(corr)18(ect)-250(r)18(esult)-249(will)-249(be)-249(gr)18(eater)-250(than)-249(or)-249(equal)]TJ 0 -11.955 Td [(to)-403(one;)-480(if)-403(speci\002ed)-403(for)-403(non-r)18(eal)-403(data,)-441(or)-403(an)-403(err)18(or)-403(occurr)18(ed,)-441(zer)18(o)-403(is)-403(r)18(e-)]TJ 0 -11.956 Td [(turned.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 139.477 -161.394 Td [(161)]TJ -0 g 0 G -ET - -endstream -endobj -1932 0 obj +% 674 0 obj +<< +/D [2258 0 R /XYZ 99.895 510.033 null] +>> +% 2257 0 obj +<< +/Font << /F75 685 0 R /F145 940 0 R /F84 687 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 2263 0 obj +<< +/Type /Page +/Contents 2264 0 R +/Resources 2262 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 2261 0 R +>> +% 2265 0 obj +<< +/D [2263 0 R /XYZ 149.705 753.953 null] +>> +% 2262 0 obj +<< +/Font << /F84 687 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 2267 0 obj << -/Length 81 +/Type /Page +/Contents 2268 0 R +/Resources 2266 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 2261 0 R +>> +% 2269 0 obj +<< +/D [2267 0 R /XYZ 98.895 753.953 null] +>> +% 2270 0 obj +<< +/D [2267 0 R /XYZ 99.895 723.717 null] +>> +% 2271 0 obj +<< +/D [2267 0 R /XYZ 99.895 698.792 null] +>> +% 2012 0 obj +<< +/D [2267 0 R /XYZ 99.895 638.64 null] +>> +% 958 0 obj +<< +/D [2267 0 R /XYZ 99.895 595.644 null] +>> +% 957 0 obj +<< +/D [2267 0 R /XYZ 99.895 538.043 null] +>> +% 916 0 obj +<< +/D [2267 0 R /XYZ 99.895 477.792 null] >> -stream -0 g 0 G -0 g 0 G -0 g 0 G -BT -/F54 9.9626 Tf 315.088 90.438 Td [(162)]TJ -0 g 0 G -ET endstream endobj -1936 0 obj +2277 0 obj << -/Length 6590 +/Length 6164 >> stream 0 g 0 G 0 g 0 G -BT -/F51 14.3462 Tf 99.895 705.784 Td [(References)]TJ -0 g 0 G -/F54 9.9626 Tf 4.982 -22.702 Td [([1])]TJ -0 g 0 G - [-500(D.)-314(Barbieri,)-330(V)129(.)-314(Car)18(dellini,)-330(S.)-314(Filippone)-314(and)-314(D.)-314(Rouson)]TJ/F52 9.9626 Tf 258.302 0 Td [(Design)-314(Patterns)-314(for)]TJ -241.705 -11.955 Td [(Scienti\002c)-224(Computations)-225(on)-224(Sparse)-224(Matrices)]TJ/F54 9.9626 Tf 175.255 0 Td [(,)-229(HPSS)-225(2011,)-229(Algorithms)-225(and)-224(Pr)18(o-)]TJ -175.255 -11.955 Td [(gramming)-396(T)92(ools)-395(for)-396(Next-Generation)-396(High-Performance)-396(Scie)1(nti\002c)-396(Soft-)]TJ 0 -11.955 Td [(war)18(e,)-250(Bor)18(deaux,)-250(Sep.)-250(2011)]TJ -0 g 0 G - -16.597 -19.642 Td [([2])]TJ -0 g 0 G - [-500(G.)-362(Bella,)-391(S.)-362(Filippone,)-390(A.)-363(De)-362(Maio)-362(and)-363(M.)-362(T)92(esta,)]TJ/F52 9.9626 Tf 239.345 0 Td [(A)-362(Simulation)-362(Model)-363(for)]TJ -222.748 -11.955 Td [(For)18(est)-346(Fir)18(es)]TJ/F54 9.9626 Tf 47.93 0 Td [(,)-370(in)-346(J.)-346(Dongarra,)-370(K.)-346(Madsen,)-370(J.)-346(W)92(asniewski,)-370(editors,)-370(Pr)18(oceed-)]TJ -47.93 -11.955 Td [(ings)-338(of)-337(P)92(ARA)-338(04)-338(W)92(orkshop)-337(on)-338(State)-338(of)-338(the)-337(Art)-338(in)-338(Scienti\002c)-337(Computing,)]TJ 0 -11.955 Td [(pp.)-250(546\226553,)-250(Lectur)18(e)-250(Notes)-250(in)-250(Computer)-250(Science,)-250(Springer)74(,)-250(2005.)]TJ -0 g 0 G - -16.597 -19.642 Td [([3])]TJ -0 g 0 G - [-500(A.)-216(Buttari,)-223(D.)-216(di)-217(Sera\002no,)-223(P)129(.)-216(D'Ambra,)-223(S.)-216(Filippone,)-71(2LEV)74(-D2P4:)-293(a)-216(package)]TJ 16.597 -11.955 Td [(of)-247(high-performance)-247(pr)18(econditioners,)-107(Applicable)-247(Algebra)-247(in)-247(Engineering,)]TJ 0 -11.955 Td [(Communications)-292(and)-293(Computing,)-303(V)111(olume)-292(18,)-303(Number)-292(3,)-303(May)111(,)-303(2007,)-303(pp.)]TJ 0 -11.955 Td [(223-239)]TJ -0 g 0 G - -16.597 -19.642 Td [([4])]TJ -0 g 0 G - [-500(P)129(.)-248(D'Ambra,)-248(S.)-248(Filippone,)-249(D.)-248(Di)-248(Sera\002no)-107(On)-248(the)-248(Development)-248(of)-248(PSBLAS-)]TJ 16.597 -11.955 Td [(based)-523(Parallel)-524(T)90(wo-level)-523(Schwarz)-523(Pr)18(econditioners)-1235(Applied)-523(Numeri-)]TJ 0 -11.955 Td [(cal)-376(Mathematics,)-408(Elsevier)-376(Science,)-408(V)111(olume)-376(57,)-408(Issues)-376(11-12,)-408(November)18(-)]TJ 0 -11.955 Td [(December)-250(2007,)-250(Pages)-250(1181-1196.)]TJ -0 g 0 G - -16.597 -19.642 Td [([5])]TJ -0 g 0 G - [-500(Dongarra,)-268(J.)-265(J.,)-269(DuCr)18(oz,)-268(J.,)-269(Hammarli)1(ng,)-269(S.)-265(and)-264(Hanson,)-269(R.,)-268(An)-265(Extended)]TJ 16.597 -11.955 Td [(Set)-463(of)-463(Fortran)-463(Basic)-464(Linear)-463(Algebra)-463(Subpr)18(ograms,)-516(ACM)-463(T)90(rans.)-463(Math.)]TJ 0 -11.955 Td [(Softw)92(.)-250(vol.)-250(14,)-250(1\22617,)-250(1988.)]TJ -0 g 0 G - -16.597 -19.642 Td [([6])]TJ -0 g 0 G - [-500(Dongarra,)-422(J.,)-421(DuCr)18(oz,)-422(J.,)-422(Hammarling,)-422(S.)-387(and)-387(Duf)18(f,)-422(I.,)-422(A)-387(Set)-387(of)-388(level)-387(3)]TJ 16.597 -11.955 Td [(Basic)-308(Linear)-307(Algebra)-308(Subpr)18(ograms,)-322(ACM)-308(T)90(rans.)-308(Math.)-308(Soft)1(w)91(.)-307(vol.)-308(16,)-322(1\226)]TJ 0 -11.955 Td [(17,)-250(1990.)]TJ -0 g 0 G - -16.597 -19.642 Td [([7])]TJ -0 g 0 G - [-500(J.)-349(J.)-350(Dongarra)-349(and)-350(R.)-350(C.)-349(Whaley)111(,)]TJ/F52 9.9626 Tf 166.061 0 Td [(A)-349(User)-37(')55(s)-350(Guide)-349(to)-350(the)-349(BLACS)-350(v)55(.)-349(1.1)]TJ/F54 9.9626 Tf 152.062 0 Td [(,)-374(La-)]TJ -301.526 -11.955 Td [(pack)-234(W)92(orking)-233(Note)-234(94,)-237(T)92(ech.)-233(Rep.)-234(UT)55(-CS-95-281,)-237(University)-234(of)-233(T)92(ennessee,)]TJ 0 -11.955 Td [(Mar)18(ch)-250(1995)-250(\050updated)-250(May)-250(1997\051.)]TJ -0 g 0 G - -16.597 -19.642 Td [([8])]TJ -0 g 0 G - [-500(I.)-259(Duf)18(f,)-262(M.)-260(Marr)18(one,)-261(G.)-260(Radicati)-259(and)-259(C.)-260(V)55(ittoli,)]TJ/F52 9.9626 Tf 223.407 0 Td [(Level)-259(3)-260(Basic)-259(Linear)-260(Algebra)]TJ -206.81 -11.955 Td [(Subpr)18(ograms)-328(f)1(or)-328(Sparse)-327(Matrices:)-465(a)-328(User)-327(Level)-328(Interface)]TJ/F54 9.9626 Tf 233.522 0 Td [(,)-347(ACM)-327(T)90(ransactions)]TJ -233.522 -11.955 Td [(on)-250(Mathematical)-250(Softwar)18(e,)-250(23\0503\051,)-250(pp.)-250(379\226401,)-250(1997.)]TJ -0 g 0 G - -16.597 -19.641 Td [([9])]TJ 0 g 0 G - [-500(I.)-282(Duf)18(f,)-289(M.)-282(Her)18(oux)-281(and)-282(R.)-281(Pozo,)]TJ/F52 9.9626 Tf 160.607 0 Td [(An)-282(Overview)-281(of)-282(the)-281(Sparse)-282(Basic)-282(Linear)-281(Al-)]TJ -144.01 -11.956 Td [(gebra)-287(Subpr)18(ograms:)-383(the)-287(New)-287(Standard)-286(fr)18(om)-287(the)-287(BLAS)-287(T)111(echnical)-286(Forum)]TJ/F54 9.9626 Tf 292.455 0 Td [(,)-296(ACM)]TJ -292.455 -11.955 Td [(T)90(ransactions)-250(on)-250(Mathematical)-250(Softwar)18(e,)-250(28\0502\051,)-250(pp.)-250(239\226267,)-250(2002.)]TJ -0 g 0 G - -21.579 -19.641 Td [([10])]TJ +BT +/F84 9.9626 Tf 150.705 706.129 Td [([13])]TJ 0 g 0 G - [-500(S.)-298(Filippone)-298(and)-298(M.)-297(Colajanni,)]TJ/F52 9.9626 Tf 159.942 0 Td [(PSBLAS:)-298(A)-298(Library)-297(for)-298(Parallel)-298(Linear)-298(Alge-)]TJ -138.363 -11.955 Td [(bra)-340(Computation)-340(on)-340(Sparse)-340(Matrices)]TJ/F54 9.9626 Tf 151.715 0 Td [(,)-670(ACM)-340(T)90(ransactions)-340(on)-340(Mathematical)]TJ -151.715 -11.956 Td [(Softwar)18(e,)-250(26\0504\051,)-250(pp.)-250(527\226550,)-250(2000.)]TJ + 0.984 0 0 1 172.284 706.129 Tm [(Gamma,)-254(E.,)-509(Helm,)-254(R.,)-508(Johnson,)-255(R.,)-508(and)-508(Vlissides,)-255(J.)-254(1995.)]TJ/F78 9.9626 Tf 0.984 0 0 1 428.128 706.129 Tm [(Design)-254(Patterns:)]TJ 1 0 0 1 171.975 694.174 Tm [(Elements)-250(of)-250(Reusable)-250(Object-Oriented)-250(Softwar)18(e)]TJ/F84 9.9626 Tf 190.781 0 Td [(.)-250(Addison-W)92(esley)111(.)]TJ 0 g 0 G - -21.579 -19.641 Td [([11])]TJ + -212.051 -19.926 Td [([14])]TJ 0 g 0 G - [-500(S.)-397(Filippone)-398(and)-397(A.)-397(Buttari,)]TJ/F52 9.9626 Tf 151.683 0 Td [(Object-Oriented)-397(T)111(echniques)-397(for)-398(Sparse)-397(Matrix)]TJ -130.104 -11.955 Td [(Computations)-353(in)-353(Fortran)-354(2003)]TJ/F54 9.9626 Tf 126.789 0 Td [(,)-716(ACM)-353(T)90(ransactions)-353(on)-354(Mathemati)1(cal)-354(Soft-)]TJ -126.789 -11.955 Td [(war)18(e,)-250(38\0504\051,)-250(2012.)]TJ + 1.02 0 0 1 172.284 674.248 Tm [(Karypis,)-341(G.)-322(and)-322(Kumar)73(,)-341(V)126(.,)]TJ/F78 9.9626 Tf 1.02 0 0 1 299.262 674.248 Tm [(METIS:)-322(Unstructur)18(ed)-322(Graph)-322(Partitioning)-322(and)]TJ 1.02 0 0 1 172.005 662.293 Tm [(Sparse)-386(Matrix)-386(Ordering)-386(System)]TJ/F84 9.9626 Tf 1.02 0 0 1 308.549 662.293 Tm [(.)-386(Minneapolis,)-421(MN)-386(55455:)-586(University)-386(of)]TJ 1.02 0 0 1 172.284 650.338 Tm [(Minnesota,)-447(Department)-406(of)-407(Computer)-406(Science,)-447(1995.)-407(Internet)-406(Addr)17(ess:)]TJ/F145 9.9626 Tf 1 0 0 1 172.284 638.383 Tm [(http://www.cs.umn.edu/~karypis)]TJ/F84 9.9626 Tf 156.91 0 Td [(.)]TJ 0 g 0 G - -21.579 -19.642 Td [([12])]TJ + -178.489 -19.925 Td [([15])]TJ 0 g 0 G - [-500(S.)-339(Filippone,)-360(P)129(.)-339(D'Ambra,)-360(M.)-339(Colajanni,)]TJ/F52 9.9626 Tf 202.675 0 Td [(Using)-339(a)-338(Parallel)-339(Library)-338(of)-339(Sparse)]TJ -181.096 -11.955 Td [(Linear)-350(Algebra)-350(in)-350(a)-349(Fluid)-350(Dynamics)-350(Applications)-350(Code)-350(on)-350(Linux)-349(Clusters)]TJ/F54 9.9626 Tf 307.21 0 Td [(,)-375(in)]TJ -307.21 -11.955 Td [(G.)-262(Joubert,)-264(A.)-262(Murli,)-264(F)92(.)-262(Peters,)-265(M.)-261(V)92(anneschi,)-265(editors,)-265(Parallel)-261(Computing)]TJ 0 -11.955 Td [(-)-250(Advances)-250(&)-250(Curr)18(ent)-250(Issues,)-250(pp.)-250(441\226448,)-250(Imperial)-250(College)-250(Pr)18(ess,)-250(2002.)]TJ + 1.02 0 0 1 172.284 618.458 Tm [(Lawson,)-252(C.,)-252(Hanson,)-252(R.,)-251(Kincaid,)-252(D.)-251(and)-250(Kr)17(ogh,)-251(F)90(.,)-252(Basic)-251(Linear)-250(Algebra)]TJ 1.018 0 0 1 172.284 606.502 Tm [(Subpr)18(ograms)-245(for)-245(Fortran)-244(usage,)-245(ACM)-245(T)88(rans.)-244(Math.)-245(Softw)90(.)-245(vo)1(l.)-245(5,)-245(38\226329,)]TJ 1 0 0 1 171.786 594.547 Tm [(1979.)]TJ 0 g 0 G - 142.805 -29.888 Td [(163)]TJ + -21.081 -19.925 Td [([16])]TJ 0 g 0 G -ET - -endstream -endobj -1945 0 obj -<< -/Length 3007 ->> -stream + 1.002 0 0 1 172.284 574.622 Tm [(Machiels,)-248(L.)-249(and)-248(Deville,)-248(M.)]TJ/F78 9.9626 Tf 1.002 0 0 1 298.249 574.622 Tm [(Fortran)-248(90:)-309(An)-248(entry)-248(to)-249(object-or)1(iented)-249(pr)18(ogram-)]TJ 0.991 0 0 1 172.284 562.667 Tm [(ming)-253(for)-254(the)-253(solution)-254(of)-253(partial)-254(differ)18(ential)-253(equations.)]TJ/F84 9.9626 Tf 0.991 0 0 1 385.663 562.667 Tm [(ACM)-254(T)91(rans.)-253(Math.)-254(Softw)93(.)]TJ 1 0 0 1 172.005 550.712 Tm [(vol.)-250(23,)-250(32\22649.)]TJ 0 g 0 G + -21.3 -19.926 Td [([17])]TJ 0 g 0 G + 0.986 0 0 1 172.284 530.786 Tm [(Metcalf,)-253(M.,)-252(Reid,)-253(J.,)-252(Cohen,)-253(M.,)-253(Bader)76(,)-253(R.)]TJ/F78 9.9626 Tf 0.986 0 0 1 353.525 530.786 Tm [(Modern)-252(Fortran)-253(explained.)]TJ/F84 9.9626 Tf 0.986 0 0 1 463.289 530.786 Tm [(Oxfor)18(d)]TJ 1 0 0 1 172.284 518.831 Tm [(University)-250(Pr)18(ess,)-250(2024.)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 150.705 706.129 Td [([13])]TJ + -21.579 -19.925 Td [([18])]TJ 0 g 0 G - [-500(Gamma,)-217(E.,)-434(Helm,)-216(R.,)-434(Johnson,)-217(R.,)-434(and)-417(Vlissides,)-216(J.)-209(1995.)]TJ/F52 9.9626 Tf 276.04 0 Td [(Design)-209(Patterns)1(:)]TJ -254.461 -11.955 Td [(Elements)-250(of)-250(Reusable)-250(Object-Oriented)-250(Softwar)18(e)]TJ/F54 9.9626 Tf 190.781 0 Td [(.)-250(Addison-W)92(esley)111(.)]TJ + 1.008 0 0 1 172.284 498.906 Tm [(Metcalf,)-249(M.,)-249(Reid,)-249(J.)-249(and)-249(Cohen,)-249(M.)]TJ/F78 9.9626 Tf 1.008 0 0 1 329.715 498.906 Tm [(Modern)-249(Fortran)-249(explained.)]TJ/F84 9.9626 Tf 1.008 0 0 1 441.822 498.906 Tm [(Oxfor)18(d)-249(Uni-)]TJ 1 0 0 1 172.005 486.951 Tm [(versity)-250(Pr)18(ess,)-250(2011.)]TJ 0 g 0 G - -212.36 -19.926 Td [([14])]TJ + -21.3 -19.925 Td [([19])]TJ 0 g 0 G - [-500(Karypis,)-422(G.)-388(and)-387(Kumar)74(,)-422(V)129(.,)]TJ/F52 9.9626 Tf 149.763 0 Td [(METIS:)-388(Unstructur)18(ed)-387(Graph)-388(Partitioning)-388(and)]TJ -128.184 -11.955 Td [(Sparse)-452(Matrix)-452(Ordering)-452(System)]TJ/F54 9.9626 Tf 135.842 0 Td [(.)-452(Minneapolis,)-503(MN)-452(55455:)-714(University)-452(of)]TJ -135.842 -11.955 Td [(Minnesota,)-531(Department)-475(of)-476(Comp)1(uter)-476(Science,)-531(1995.)-475(Internet)-475(Addr)18(ess:)]TJ/F59 9.9626 Tf 0 -11.955 Td [(http://www.cs.umn.edu/~karypis)]TJ/F54 9.9626 Tf 156.91 0 Td [(.)]TJ + 1.02 0 0 1 172.284 467.026 Tm [(Rouson,)-371(D.W)90(.I.,)-371(Xia,)-371(J.,)-371(Xu,)-371(X.:)-505(Scienti\002c)-346(Softwar)18(e)-346(Design:)-505(The)-346(Object-)]TJ 1 0 0 1 172.284 455.07 Tm [(Oriented)-250(W)92(ay.)-250(Cambridge)-250(University)-250(Pr)18(ess)-250(\0502011\051)]TJ 0 g 0 G - -178.489 -19.925 Td [([15])]TJ + -21.579 -19.925 Td [([20])]TJ 0 g 0 G - [-500(Lawson,)-314(C.,)-314(Hanson,)-314(R.,)-313(Kincaid,)-314(D.)-301(and)-301(Kr)18(ogh,)-314(F)92(.,)-314(Basic)-301(Linear)-301(Algebra)]TJ 21.579 -11.956 Td [(Subpr)18(ograms)-288(for)-288(Fortran)-288(usage,)-298(ACM)-288(T)90(rans.)-288(Math.)-288(Softw)92(.)-288(vol.)-288(5,)-298(38\226329,)]TJ 0 -11.955 Td [(1979.)]TJ + 1.014 0 0 1 172.284 435.145 Tm [(M.)-247(Snir)73(,)-247(S.)-247(Otto,)-248(S.)-247(Huss-Lederman,)-247(D.)-247(W)91(alker)-247(and)-247(J.)-247(Dongarra,)]TJ/F78 9.9626 Tf 1.014 0 0 1 455.169 435.145 Tm [(MPI:)-247(The)]TJ 1.02 0 0 1 171.955 423.19 Tm [(Complete)-327(Refer)18(ence.)-327(V)72(olume)-327(1)-327(-)-327(The)-327(MPI)-327(Cor)18(e)]TJ/F84 9.9626 Tf 1.02 0 0 1 368.48 423.19 Tm [(,)-348(second)-327(edition,)-347(MIT)-327(Pr)17(ess,)]TJ 1 0 0 1 171.786 411.235 Tm [(1998.)]TJ 0 g 0 G - -21.579 -19.925 Td [([16])]TJ + -21.081 -19.926 Td [([21])]TJ 0 g 0 G - [-500(Machiels,)-240(L.)-239(and)-238(Deville,)-240(M.)]TJ/F52 9.9626 Tf 146.833 0 Td [(Fortran)-238(90:)-304(An)-238(entry)-239(to)-238(object-oriented)-238(pr)18(ogram-)]TJ -125.254 -11.955 Td [(ming)-211(for)-210(the)-211(solution)-210(of)-211(partial)-210(differ)18(ential)-211(equations.)]TJ/F54 9.9626 Tf 211.899 0 Td [(ACM)-211(T)90(rans.)-210(Math.)-211(Softw)92(.)]TJ -211.899 -11.955 Td [(vol.)-250(23,)-250(32\22649.)]TJ + 1.02 0 0 1 172.284 391.309 Tm [(D.)-389(Barbieri,)-425(V)127(.)-389(Car)18(dellini,)-425(S.)-389(Filippone)-389(and)-389(D.)-388(Rouson)]TJ/F78 9.9626 Tf 1.02 0 0 1 426.1 391.309 Tm [(Design)-389(Patterns)]TJ 1.018 0 0 1 172.284 379.354 Tm [(for)-245(Scienti\002c)-245(Computations)-245(on)-246(S)1(p)-1(a)1(rse)-246(Matrices)]TJ/F84 9.9626 Tf 1.018 0 0 1 365.291 379.354 Tm [(,)-245(HPSS)-245(2011,)-246(Algorithms)-245(and)]TJ 0.985 0 0 1 172.284 367.399 Tm [(Pr)18(ogramming)-251(T)93(ools)-252(for)-251(Next-Generation)-252(High-Performance)-251(Scienti\002c)-252(Soft-)]TJ 1 0 0 1 171.865 355.444 Tm [(war)18(e,)-250(Bor)18(deaux,)-250(Sep.)-250(2011)]TJ 0 g 0 G - -21.579 -19.926 Td [([17])]TJ + -21.16 -19.925 Td [([22])]TJ 0 g 0 G - [-500(Metcalf,)-251(M.,)-250(Reid,)-251(J.)-250(and)-251(Cohen,)-251(M.)]TJ/F52 9.9626 Tf 177.874 0 Td [(Fortran)-250(95/2003)-251(explained.)]TJ/F54 9.9626 Tf 112.004 0 Td [(Oxfor)18(d)-251(Uni-)]TJ -268.299 -11.955 Td [(versity)-250(Pr)18(ess,)-250(2004.)]TJ + 1.02 0 0 1 172.284 335.519 Tm [(Car)18(dellini,)-300(V)127(.,)-600(Filippone,)-299(S.,)-599(and)-578(Rouson,)-299(D.)-289(2014,)-299(Design)-289(patterns)-289(for)]TJ 1.02 0 0 1 172.284 323.563 Tm [(sparse-matrix)-381(computations)-382(on)-381(hybrid)-381(CPU/GPU)-382(platforms,)]TJ/F78 9.9626 Tf 1.02 0 0 1 456.492 323.563 Tm [(Scienti\002c)]TJ 1 0 0 1 171.975 311.608 Tm [(Pr)18(ogramming)-250(22,)]TJ/F84 9.9626 Tf 73.713 0 Td [(1,)-250(1\22619.)]TJ 0 g 0 G - -21.579 -19.925 Td [([18])]TJ + -94.983 -19.925 Td [([23])]TJ 0 g 0 G - [-500(Rouson,)-416(D.W)92(.I.,)-415(Xia,)-416(J.,)-416(Xu,)-415(X.:)-575(Scienti\002c)-383(Softwar)18(e)-383(Design:)-575(The)-382(Object-)]TJ 21.579 -11.955 Td [(Oriented)-250(W)92(ay.)-250(Cambridge)-250(University)-250(Pr)18(ess)-250(\0502011\051)]TJ + 0.985 0 0 1 172.284 291.683 Tm [(D.)-254(Barbieri,)-253(V)131(.)-254(Car)18(dellini,)-253(A.)-254(Fanfarillo,)-253(S.)-254(Filippone,)-254(Thr)19(ee)-254(storage)-254(formats)]TJ 1.02 0 0 1 172.284 279.728 Tm [(for)-257(sparse)-257(matrices)-257(on)-257(GPGPUs,)-260(T)91(ech.)-257(Rep.)-257(DICII)-257(RR-15.6,)-260(Universit)]TJ 1 0 0 1 478.39 279.753 Tm [(\036)]TJ 1.02 0 0 1 477.558 279.728 Tm [(a)-257(di)]TJ 1 0 0 1 172.284 267.773 Tm [(Roma)-250(T)92(or)-250(V)111(er)18(gata)-250(\050Febr)8(uary)-250(2015\051.)]TJ 0 g 0 G - -21.579 -19.926 Td [([19])]TJ + -21.579 -19.926 Td [([24])]TJ 0 g 0 G - [-500(M.)-282(Snir)74(,)-290(S.)-282(Otto,)-289(S.)-282(Huss-Lederman,)-290(D.)-282(W)92(alker)-282(and)-282(J.)-281(Dongarra,)]TJ/F52 9.9626 Tf 304.659 0 Td [(MPI:)-282(The)]TJ -283.08 -11.955 Td [(Complete)-369(Refer)18(ence.)-369(V)74(olume)-369(1)-370(-)-369(The)-369(MPI)-369(Cor)18(e)]TJ/F54 9.9626 Tf 195.586 0 Td [(,)-399(second)-369(edition,)-399(MIT)-369(Pr)18(ess,)]TJ -195.586 -11.955 Td [(1998.)]TJ + 1.02 0 0 1 172.284 247.847 Tm [(S.)-284(Filippone,)-294(V)127(.)-284(Car)17(dellini,)-293(D.)-284(Barbieri,)-294(and)-284(A.)-284(Fanfarillo.)-284(Sparse)-284(matrix-)]TJ 1.02 0 0 1 172.005 235.892 Tm [(vector)-342(multiplication)-342(on)-342(GPGPUs.)]TJ/F78 9.9626 Tf 1.02 0 0 1 331.577 235.892 Tm [(ACM)-342(T)109(rans.)-342(Math.)-342(Softw)54(.)]TJ/F84 9.9626 Tf 1.02 0 0 1 442.232 235.892 Tm [(,)-366(43\0504\051:30:1\226)]TJ 1 0 0 1 172.284 223.937 Tm [(30:49,)-250(2017.)]TJ 0 g 0 G - 142.804 -352.677 Td [(164)]TJ + 142.804 -133.499 Td [(184)]TJ 0 g 0 G ET endstream endobj -1962 0 obj +2295 0 obj << /Length1 1383 /Length2 5908 @@ -26462,12 +33361,12 @@ W ò6S&ªå_!“½SÎ|esU›FÌR™y† ¢Y‹¥ýžï­§N endstream endobj -1964 0 obj +2297 0 obj << -/Length1 1537 -/Length2 2827 +/Length1 1956 +/Length2 7144 /Length3 0 -/Length 4364 +/Length 9100 >> stream %!PS-AdobeFont-1.0: CMITT10 003.002 @@ -26487,7 +33386,7 @@ FontDirectory/CMITT10 known{/CMITT10 findfont dup/UniqueID known{dup 11 dict begin /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /MPVPBL+CMITT10 def +/FontName /SFGIZH+CMITT10 def /FontBBox {11 -233 669 696 }readonly def /PaintType 0 def /FontInfo 9 dict dup begin @@ -26503,44 +33402,88 @@ FontDirectory/CMITT10 known{/CMITT10 findfont dup/UniqueID known{dup end readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for +dup 65 /A put +dup 67 /C put dup 68 /D put +dup 69 /E put +dup 72 /H put +dup 73 /I put +dup 75 /K put +dup 76 /L put +dup 77 /M put +dup 80 /P put +dup 84 /T put +dup 86 /V put dup 97 /a put dup 99 /c put +dup 44 /comma put dup 100 /d put dup 101 /e put dup 33 /exclam put +dup 102 /f put +dup 103 /g put +dup 104 /h put +dup 45 /hyphen put +dup 105 /i put +dup 107 /k put +dup 109 /m put dup 110 /n put dup 111 /o put +dup 112 /p put +dup 40 /parenleft put +dup 41 /parenright put dup 46 /period put +dup 114 /r put dup 115 /s put +dup 47 /slash put dup 116 /t put +dup 119 /w put +dup 120 /x put +dup 121 /y put readonly def currentdict end currentfile eexec ÙÖoc;„j²„¼ø°Aw-åÎ3Ã6Uöÿu4 lã.ÂNk©4¡õ8•DPËh>®[M†E7wk9B½.…H‰ì.³CÍS†oøZçèqO«£­ OèÖ)9ú‚îOHl~Q:ÔɾD0j‚‡— J¿¶Ñù'JZ ¶ì÷­½`ÕÖÄB 5Ô†G -t²ðbY©7:¾Í¿2úh½K@¾À,¥Š 'çó,¥É÷ÙL’~## ê¥gˆyëMŽ¢iMæç?à úi8ðZ¼‡ý+ÐÕ’~Ëó[\3©èMn‚Àû=r=_­VxÎgpÄÁ÷¢%vïbÒ¨ˆSr\ËÆ©ˆµÉ*ó£'"$¶Æ9iVˆ«ð¬2Ý;äxÕ¿£ü§`e'ËôóÄ«fz•+‹;¥æ€îZÑ)¢Ëÿ˜ýæÓ£ÿ°¸¡4š¸ÍêYŽ,Œ1i$ÂWšºVÁC™oì _±S*`6 »Ij$¨I‹Ý/}„½!9'n0%3SV´ÝP}vÖÂqôsF}¶OTÓŒ+¼œO*½6ž½¸’I¤é*\-ÌRÚ3ª°ÚNÙœ:gd¿;p/V;aZþÀ6è©¡Ü:£“s–wQu+MÉô…àåF€Ãœ•Õ¡n>s ¯­›”Éå*ŸÁHPëix€†Î¡\x6²¥u±»j^tüâTg'Dà¦i&kò|í5Üé.Æ+ E¦0E,¼€ýâì»TÙ™~Ìçô#N™ƒa½¡X®Ï–Ý.-ÁŒüEæJ:êcx¶éΖçxh¯¤N ýT#O½£¤ #k]ºFÖ¼‚…XNÇw³iD—>?„Ö·Ç×—ßÁð”ÞÌÝ5vîY3ϳ7”@)‘r¬:J?ÅlM{OÞ{s…§ªø,¯ïܽóÿ^…$2œG £›Žl’G¿E¶Û¼Šì±@½ÝD²ŒÎu°ŽÈ]÷í–ú >2ëw‘3àêYq÷ÕÖ­±³¶Ã’¯ØP[Ë0q;˜‰HzÎ -¯'.«5¾"æmÙ«úN4Kª5ËàRcÄ8jÓLÛþY·Lö мÛâÕoÔ Œï­À32p!ûqb‰1Ù6#&9èŠV _ÌZ‡­wBsIéƒ$PDdÿc‰›³dîˆG°3Õ¡,­¼E šð»~ÍëÛ× -°Dt «J{\«©]Êô/yõ~UVV$-äIfygñEŸYŽ<ºáÝ Q«J\Z¿" -¦Hõàä1ÓRzìÛUS -ÕªµÏQÛhÏM°hËYF² -(ûGØÅÒ­û`÷D áç -Ž%=ÀýÇ Œû¨t’ ¯¸Ì ˆP›’JvŠÀï… ä‘¸,q­“™:ÿ7çÀ¹JUBRºgs¹PóÆ–b,-’>·½µeÁ¥|»ÉgEz´P5¤—õ2öá~ û'ÅÚñ®ØÖ#œ÷E’ä‰5kb†¼ÃNú&YÍJ†·ïG¾7ºV8#Â3ú²Ž0gVà|¶(抉˜ âBHÞàÂ4ÊÚÒ>¶__W!ýr£©N—(ÈÌክ\,¨!Ì¥ÃaåÃÄE›Žbã÷ô$·@ÂSìè*sOˤÌPøWógæÂµ½4ouçnHŠÊšOq%ÍØxh6`t%b)K0ñV•Òm¬³kqèWØQºcÕM -cPJ‰]¨6›œÿïµÇ¢`³óHÄG4ê¼ÅuO§d“áœ)u_è^Ö~µŸÅF]Î/ Ç€ ½ ë4lØ{kNæõ˜åíÉðÑ»!t¸=m~æ®ìø®"v_бdBtúýÞ¸±ÛRSª‡ É<¹øœ¸K-Pö\kìˆô®¸ ˜ÐÄo×ý¿DCîY¥J E!ûnÞ™ÿÉ -Ch¹gˆwý‘¢}ùÅ¡fuhk~7‹\ñøhi ¢ÜH˜gÝGv&›€ Û'kÜD®²šàWÚ©îÉÄ鯤Ä^3íÃé:o"éugŒ˜ìŠløæOÈ-È`á˜MjŠãj§ëb¦íÛ×?'Ѥ”п6žûŸ8ùÐ08Tâg$Þ(R5PP½;u¥4êôÓ© ˆD]o.ì—þÆý… Ôª²×³é<è›,oIÂÞ§ò ¤GÀvO¼7Iö OƒA—TWyí¸›¬{ &š¬Šž²R ”ÄXWAN8ijŸ³ùHUÖ`h"a@Qî=µC/>±è?I¾ÈãÊ©é",F°óyeŠ5èh<5¦‹¶'gç¬éŠ+Ïï7-Ç^fëí´8Í«±lUŸ?K°’0>Ú§cDœX,y±‚Éâa=Ä'ÿ·¾K)pw£ -ˆ‹NèšlU§¨·¾Š›”€dcÚ¸ZUµf Kê#v˜G¸ø@Hw¶²sPfC@µ¥™zS J[p—éÌ -þG?Oÿ'6ÄMpxX¹†n÷o“ó¨E*ØÔ5^ ÇÇ|@~¬ßðdˆ@¼M8É€DgäÒLžKý:ñâO:ι®ÿìÞ¯¾€e§£EzEX,¸u¢8ŒáWÔ¸Õ»PŽb'¯±Ôés¸-'†3¢ÀioCÈy z,Lf¶±p@•LéñK{ï¸ÃDhþ—[,s§œîæ£Y• ŒèG:œ¾{Ò¨ØU¿™5Fd‘vJ1Ã&!Ùƒ»ÿÓØî _uºYÿ²þß´Á,©„u8´(š¹õ³íK,Í{Íf1;Ö¼Re!X¨†–Äv„jÃÁy¦pßÖ–„×¹ŽFðÙy û`'¡ÿB`XÈŽCóÝXzï`´Aã¢îÀ<Üìk5(PU m–IzGÂùJþ.Èê=¤[¯!5q°Oëˆi³·ò‘ÆCÑu,)Ÿ`–÷„q:ì×@h«]â±Ù¼º…mž’gž“!Ÿ„ßh¼Ô“#Æs–1ªÐÞºŒsÜÓî~äU5Œu] ÍÐBO8V™D”EƒbØ/¤ž -IªŸXå£Õ§„ -¢²Ç¸jfÒ&ËÌô¡NÖ^k¡?h·&ÈÒ #РH}JgC ó3•ó¼>À•ƒÇ…ÚKЬűµ—&>-áéÄÛ#ƒ°kÜ¡T‚‡ó²Úã -vÖð4-tœÃ’»u•{ÌÞ½ifMVZÂgG4ÄHb©éY¿Å\þe# ÑåÒ«ÀDüͱU•œR“ Ú—JRw¥¾‚G*“ìõÊ"yh„ ”qCÄÊií:àË“(’RÚg³gü¼¢X_žøÑ/ÿÌ÷ wR}–,.}Ch›7´\å[Þ"Û“B¶+cË»~ï̓p הؓ–l†DZâN]‡Á„ð­³ü¥ &×¡ŽšZ˜+ªÅrçø’Øv)^º¥aÓªÛ»öyÏ¢6¹cøÀÓDøVÖF“ŠÌw$Šíyâ ¥( H:º€=.¬›1eëÜPä12)g¿¡<8üŽ Õ ³&×o]ï7»ªoµüϾ-/}tõN5ÒY#£˜,¨!Ó:oãIÙL}ªeÝëbŠq€z¶Ï‹Û5†€9ÒÆïC• -{#¬°ÈÚ| -endstream -endobj -1966 0 obj -<< -/Length1 1416 -/Length2 6052 +t²ðbY©7:¾Í¿2úh½K@¾À,¥Š 'çó,¥É÷ÙL’~## ê¥gˆyëMŽ¢iMæç?à úi8ðZ¼‡ý+ÐÕ’~Ëó[\3©èMn‚Àû=r=_­VxÎgpÄÁ÷¢%vïbÒ¨ˆSr\ËÆ©ˆµÉ*ó£'"$¶Æ9iVˆ«ð¬2Ý;äxÕ¿£ü§`e'ËôóÄ«fz•+‹;¥æ€îZÑ)¢Ëÿ˜ýæÓ£ÿ°¸¡4š¸ÍêYŽ,Œ1i$ÂWšºVÁC™oì _±S*`6 »Ij$¨I‹Ý/}„½!9'n0%3SV´ÝP}vÖÂqôsF}¶OTÓŒ+¼œO*½6ž½¸’I¤é*\-ÌRÚ3ª°ÚNÙœ:gd¿;p/V;aZþÀ6è©¡Ü:£“s–wQu+MÉô…àåF€Ãœ•Õ¡n>s ¯­›”Éå*ŸÁHPëix€†Î¡\x6²¥u±»j^tüâTg'Dà¦i&kò|í5Üé.Æ+ E¦0E,¼€ýâì»TÙ™~Ìçô#N™ƒa½¡X®Ï–Ý.-ÁŒüEæJ:êcx¶éΖçxh¯¤N ýT#O½£¤ #k]ºFÖ¼‚…XNÉá±D –Aj†ùvÕÊóìfú rÆ®–/*{âŸ,Ed),É# òƒ¶¿ñvihÓÓÍøÊ@ÂŽ áçÌ`ܲ ~êy)öΚgÙ‰Š•ïÇZln÷ñÙüÖv»´ªø×é̉˭0È2L7ЇÏçPkg÷¦ûøcWÇïãÏ%» Nç «|]§É¼Üûgñ¶n– ÉL¢è³>DšõC¶çj}§x›t¨7$ËÀ 1€Œìµ—ÏõTõ +"ç*v’OBdÌNÓÇ#™/Ãõ_,N¿Ð f/’.˜1fuU¤oëÄÝ«ÿøcýtÜݶ.±¹ÆãaÂÿLœEE¬Ô O´¾¼8NFôm|@*7Ȫë–q—–EïIu£eXÊŠ Hm{+¹œRZ¹¾“\ Düç¤o þºŠ6Ã;‚%GÉí÷ã‡þC`•]m%4B­Hi*Gßö§Fn‡Ó²Ý/‰Á„ðÕ.'¶…–1#F\ÚŒ£SÆÈàÅÖ†Ek˜Sò*KbÒ¯ix VA‹yÍqÊÎÜøô‹·iú,ki”X—¡¹áÕWø&ô`BAâ…:[2« ôºGÖ’—LQ2”}—O6É%ù™Ú‘ÊšX;îåäñýóa~ŠS?øé›‚KoÚ»6lðúTyª6D¼È±óF‡D“ðï×¥À±±W‰Ó\g?¢""Ñv™|]íJÊz`¡“%i~üH £õ‘ Ä69Ûtö @Y¦ P˜a¾÷Ъ' ¸é› $|,ú­ŒÐúÏký¿©ß’2Ç{fÉåC,ý1ŸšiJŸ•ݹ`zÑ$ïR·I½)ôO!À¢lG{žINÇüNù~zj"(¢:ÑË.AòØoÓnãÑߦvSÃw_#a\ ÏöQ½THÁÌ5̯EUSûOj޾Dò¦1ì7£Ø³BKÿ­TÖ*u¢»ôNºEh†ZÝwïÐ…Â?’rÄ81…¶ÙÔÅ:›ËðAíðJОö»Qú¨÷[»“‘Í¡`첈ã~lÙ÷&P}Ǹ­™?p™µ?{ªêÛF§.§t@…í_ÌbÃïkîÏçqµÁí‚ÓÝ ¤¥·ü 7/# àio »jihŠ¿÷'`±“7ƒ]7DÏço·™¶VÍKN4Ÿµe˜ïÍîí©œu +Ÿrp²Úáòùnó×¼‘ñÁ!7PѓМ€šžwe¡®ŸÇÁ(ùíB +ßþ•É ³€Õg‰»¶ì­ D¹ÂÄ +þߦ¹Ñ‡çPqË2t…•ξXm7š+‹tº?a’ä­å}“iÒ/Êk±5Ýiªœð:ˆS`ƒ$‰¼¢u²”5³n2ù/Ä¿K‡ô #é&}ùî•Ø 6ªÓÑf +{a$ò6šÊŸwºAòKY¤ó0×tÒ…dšïž³Ât¸¤ åIÀZ´IÍýù+ñàcÄC­Öð'àÕ0ë@-hœ¢q—m +–÷™ÅæyšóÏ3ÒP^Ù>]¥©à†A•ÔÓ{G +ÚI7®Ðè{¹ðp™ìõrÈ œÝµóÕ(órècÐ(j|/ Gç‚ì%! ›.åÍ +.I›ãö~NžKõGd}ý ]l5Ï, j]~ "¨´CçxûÊôóCHt¥ x@¶x /ÛÄ[ìo=Î~¶‡h7 /°æOawÒ÷Ü>0xd);?uš^q#+ˆuI£Ö¦| +ĽT,ïG…f¬Ku(˜‘ÊU ’êcÉ]~n8:>NA ôn›”åíc´wŠvZ÷ý¼B3]%ãö#q×c)¸Á„!ñÿ56#ý¹D!Ø¡oC¨>ÎFb£™2§y° X˜pätl‘ÿ0;µ¸4bO¿Á˜÷³NÌœÄmo OöAgn£¨]¤µumÇíiMC]GE½ŠØýoé±nÝõÞX5ÀcI+Q] +Áqe@E¥±„•ç…+­LíNv{*Hï‘!¥‚AǯÛÈ™ð·÷å?=A%;W xBÂ6JŠ&@;éì§l ƽƒ&=È˜Š˜eÌi±™æyΧ®ðñ‹ +.ü} ì Ê-˘ñ½xÝVJÜï{npJ¬F岨ÇuÝ:õJA/‡õ%È›šý ØVဿOÄb0¸SK¿Ê”‚ëÁ_äœõñ4C©ß–J`ÔìèÐ@©`ÅÇŽƒ%* M º©^ÃA{y뿱)æ—šØÄû’ˆ³ÿ’dô«=§UÁ'oXRxº/8ôÊ–Ý~TM> žÙ¢Lêi;g}c[Í«.òàìlýv—ÿœj˜g‡)G£©66„û[þ>kxÙÄ-´áöSð´Ã¿³ë,M®/sVðiæ«êÍúŽ‹‘ÌÒj£³jü]’wɶÿ“u/¢Ûº*º,y¸©LðC÷¡|ŽQÔø±qSA†ú™pFî~_U³—ó¯ÙÅ0øˆ«|6¹;è"å–s;1Î(—V¥Ã$ŸÅ +YGr®àÄ“çÊă‹ÄoX¬r6¡0ź3jî‚wÁ…8«6*ˆ¬Ì£ÁyáVK£­¡evëG=Œú€¢>‚N£lÖðÑNÊ +…Ñæ¬Æ£ñ+ª@ðËñòÞ²¦î–g5ƒ¿:ƒ0_D¡‚’*Ñ… açõÈ·½»7¦6¨å¾M·ÿxv.Tï­RG†»&µÚßÛ!'PM7>ˆ‡z-™ˆ~‡±¿}Iì”EH}YN8‘Ùlj"ÞS7n£˜£yN»½§£R¦á_c–êá½…›äÿý„£+˜ñ¤ÅgfQÉ›ƒì½æayî³áêø ÿâô@< zß~ÕCÍÐÞ̃ùú´D:®`12Y¹åzåÿÌ—›­ÀçK€D»|‰ænd­5Ç„à?MµÇ GÄ&¿Š”ì‡û¬³[®;ʱÍÖþ G­ŽxIÅd§‡°êXhX7 ¿`⟵ÄmR¬IŒhölüöQ¹ØÝó”:CÍ–õ‰¨æ;;EÞWrU–k&´‰8Óþ.®[Ôh% fäYBÅ‹èÕ5Q —Á¡2m'c ¢F #Êׇ žeDõ»”ÂFJ0î7Dņ¦¡¸󩔪°é\g¡W}Lj̉Xÿ€ÔíÎéwFuæè§Ëœº¼¬³ž¡T0C"N¹^c*´<À¹’wÛ3|‹ñ™B<¢ÁaYˆ:…»¢¢ÉŇ…99tÎ8IÚEäL}w½…mxPiòV§<4àÆYzänOZ×aH8x¦m¥ùºQ¬—Í¡9EëË‚sr6íàï‰öþìTü?~N{3iÄ Lh±Å.*s®Úai9J1?Ç-B[\†SOSÞ[ Ø÷h‡ ÈŸ bïug2ã£JUlÿkí›vgÇUÚû X»=ç>CxíYEk4è Ø`½UçV©ìèˆ0N sÔö–,—KU þŒ–!ä¥ú„lÞj =òÊ”}ÝwfXƒ±"uŸ'ªÆ”BºéŠsŸµ]P{­b‚×ÿð­aáܱl7r^c¥JÝY% x~Ú@¦£„ŸtýÖ åN¿¨%½õ†=œßb½Wa[´¶pT7·°29w¦¡¦"ƒ£6œ3M‚"‹¬(žMÓÊlàúY¹Äl U½&F†ÙFãJ®å«‹™‡Çj#™È6åE¼Šú¾ù¼éÒæäMkðwæÃÑ-hÕ£lµ½%s󲘮K6‚}žu‰nYA(uƒ–nrïÎ]‚Û ŒæRø™Wù?¦8^•Áv_Ö%4ïÜÄ5…]Z÷oH0ò¢gÜ?ãÛˆ®¯½Å2f!‚¯zËq³*. Ós¨¦¿f'Üö:"¹ë5aãMóÔ“6Ù·Lcùø$¸KØtÎÌÈ…¯¶•Ar’SÞ'\sW°F¨•™6 û &™µoðíß›L‹ y¶JYÿRVRnðØZïT 8žÛ7´=ù”n4P’ëärŽZ\JÌ¢t¿7ô¦v½h¸˜P¤;?'’Á\©z¬òÀ*ÚÛÆ™ õmÕ⌟)ÿ‰õìÈØVÕlÆÅ\‹…IG/r›+sã÷íZ|Ÿâªá§ººÍ5Ë®­ÜDªàCt C ˜ @'h ézHê]/mÛ)”µzýk¿ÃtàÄ[寴ÉE {êä¥NX¥cЍKyÀX—ÉþG¦R |ÿÎûØA=1WÓÈ‘EJŒÉ½<¸ºnYr[ÕTƒ ‰asÒϼ=£1Šá=‹r%SíÒ¤ÔZ6T’’Wa9¾Ý KÆK­ã®0ÅýWŸ1"mƒNÂü6d]w#t>6ç¬ïÏ‘*½^œvú´[¸S÷JÈàž†÷$ +úÝ`D/Ì>¼w³­°jêEÕ*¿*ᜠXú(H“‘ö ò¦|¬ r¤=ÌŸòéƒz¸#-1Õ_1¬kmàøG\Ëeò+Æ@T‚o3©'P’îjÞ~-cÎv~û H3Z¸êW»-Z×ëzÔ"d¼mŽ+:éäšZoǹ Oì@aØ[C¹*DôÈ3* fT,í¤1´‡ˆÑñœQÐÙĤÓjôçJ àzÔ6þh¿[Ù+J£ÍJ6L|Þö"?ŽÀ°äN|¼ûieÝÉÔsï{ïÝŒÚ +A¹ehX‘ª'v2d*U1‡¶ã{®¶¬d^ +_g\e+J`Ú¼uÜ4ã¤Éî– +þšÅˆÈÁß"þr?ÿ±¡iœ„'5´DÇ*,uƒ¯Îêžö àeòH%. $åÇfØß'ÛT¡Õìx„ ÂýÉ®3aâÐ\©<ò;3&“à.8‚±Ìoé aè­?shp/Ú`ȶB¨F–{ˆ·D1U#¡¢¦†ùžË'õ@/ÒwNwú¿öæ½O{WC°Sž2ºtP=‹'B—\n’ ‹m‹…$`ˆ¼Õ≲Xÿv¸G^¹Hf0@Ǽ„8Dz”gˆðtÛ…ôÕ×-4ŽÌ}ôýGW +Ú ýó£XêYÖ¾q¥7¾D(m‘Dç¶ ³¢‰"ögFA­uÏÁÛshB…!&«s–X¤Ó’ü}¼W·üTÅÃZÈsµæc@ŽŠÙ‰úø`9’ÈîzmÚ§ÊuËÀxç“leà :»wæšýÞn§­Êa`uú‚•ÑÃÏ©n¡ëtÕvóí87”JMS”rrdI8üMïÔƒi¯6ß?é.ôÎÃ"{O-|ä‘lòÕÊð ‘vÐ~6|¬gªÇ|P.³¾SD~nLjàyDZŠãÓÛÒ®o‹äåW™$ÃQp$³äÿ,;øøCÀ꺵nÝÆ­ó|ƶ法dÓ…ëw€ÝPÂw?WL²Iqß>\h(óÉFÚìÍS*Ñ—Qs¤ä™“Á3 +ŸÚª´ ®N«×À@I€ß;ƒi±eg¤Ü|F\•ÿ‘;;MÌZk±_çä`÷½£ý:BçY–±9 »LxqyKÕN,H`#øWè$ãV=U‘DxkU.¬Þ Ò"*4è´xSR‹è¹?õ €±ü¡§PrP#‚ä½ü”Ýý¤®>³½iª$!¨E–Ö°sG&làzÒ¿g$|oüëjÚ0ÑPˆëËsÕëffÊÄXú ô_ÏÉèÏç°»ÿ§‚ñ£^ û颤Ñþa9ïMh- téŸÏÁ¸\œZéZ³çŸ ™¯ßHzÅŒ°)XHm]cÕ&Aƒsl†¾ÛY³² x=VZ]wq½¢³ Ù¾fp™n‘v%\Ü*iš;ü2ýf<¸¢ž“ŒÙªÿÿcZ}‡Z‚;“É{,X¸]“T´ûÔ]ÐÙ4~jýr²d×.1º²…ò=ºâx%LE(‘ö“C_ÇïLлF»|zyå8ÓÅ‘;èênö—F+G'jDWTÓ2-ÚÁ–ÆhZîVÛ‡ôÐ »öcAÔA­^dÃùE¹É_{0âŸvÇÈŸ)ЇË7Þèyëß΂€Lhé ³á§9‘SFúsþù¨Úàf†®³_-vAYµó}Ö1ï)GMÚoÑ\‘<¬Ÿ+Â’‘Xc?«[´ªÖl&/”ÿ’øÇ_®ï¤ÐÝiËÿºâÇœÓT%˜jéü¡Ì.2‚Õô­ÌF"C"Ç#«à¥UѸ 4?“h)DÞnkcœ7@ñÊ*üãY­ÇÄÌ7sáîùÕݹõ=°žœRµ½ï»9YçBÝ…TZÞGýàkÇJý‡`íÊž`­ \þj+@{“kF]ÆÓ Oì­ç‰`èwŸqŽrÇÁ¿1-—F8©ÿA(èpóÔžèÆ@f]‹ Å»ƒ#xq"ï»Tïã›À× +*!Ñî¿/а¾µ§ãñ’û½š®K¹ÝBdî$¶ɉڊêý`²ÀÅiõç‘ÉçÝjÍMz³Jü7âä]Äjœ[¹‡¼JXö¶p h¬핤>ðÉÃ’mþ@¿ž4G«ëÐ׆‡õÁ®7¤ +¥ý2‚˜3>nå–“ éjVwYˆ³»2¥8Qד%ÕÊuÍh@}y‚S€._6dž‚òw…Al‚Ÿé¥Œ?Ê¡- +4ö8ɹ ˆÁktÃ’ë:öÿרý]XŒû¼|jÄ9ž' 7ˆ,¶} +öR±{>þ¸gºõ7,}ù™¬‰ÃP+Þzm5ÞÇ˺ÀHõÿ‚èU?iSÂr,+ +F½Aa´´ÿ.P’gvŸþì˜Çaósl ø¯ZBh·õ´gæÇ –Wµ Û-‹BѶ”N˜°ŠÜhú`˨[ÉW÷jßí)qˆ(µÎãÓÌý÷ÍŒþð.wYÕæ‹$ÞQ¬ÿ0±€®Ñ]Šü +endstream +endobj +2299 0 obj +<< +/Length1 1442 +/Length2 6151 /Length3 0 -/Length 7468 +/Length 7593 >> stream %!PS-AdobeFont-1.0: CMMI10 003.002 @@ -26560,7 +33503,7 @@ FontDirectory/CMMI10 known{/CMMI10 findfont dup/UniqueID known{dup 11 dict begin /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /SYFPBV+CMMI10 def +/FontName /TPELEW+CMMI10 def /FontBBox {-32 -250 1048 750 }readonly def /PaintType 0 def /FontInfo 10 dict dup begin @@ -26577,6 +33520,7 @@ FontDirectory/CMMI10 known{/CMMI10 findfont dup/UniqueID known{dup end readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for +dup 44 /arrowhookleft put dup 62 /greater put dup 60 /less put readonly def @@ -26607,18 +33551,17 @@ D jà…О¤á”Á ˜|-°cL„ô´TŸîz~ÈœÿLú†7Ô¥­G+ZÀËŸBŒ±ßŠ¿ê=¸*&ÌT7«8~‡#˜ÒàÆ¿l•Â8¶²Å½¸hJû·¦Á ¢(jÿbå*=|tŒ,cÒ“0Å0l·ï°´ÍsŒ‰‡K› @û3Õ\îSN:8¡£'gé¦ßU?Þ)äÔRb%ÑûÚoŒ·€˜è>Š6 ãÄÇ~)˜ O’ «©ÇXw5Í/"Ëá|k™¨(e$/ÞJ«¾G ¸à“ÇõÁž0Mõo#hpÑTé%Äòå-R (>¥*Îú Ò¨ìÈ25ˆh¼èïº|I.5uÁ`QP£÷Ö‚)`ñ©—QQǶé(ü÷4“5•³^§ƒÞ„‚M߯oˆ5G*`?ÍRFM¨ àÀþÕÌgãŽêpÀfÚßnþ¾5Æ1d¬¤¹‘íKÄë[L y¸q ´BÀ¶$Ã@'ˆÊ6é!Wëà -ÎúE¶ÆI¹ï6ø,ûITÁ$‡þÎy›Ù‡î“H–pi¹æÿVchž]l&Ûß¡FÃÝ:¹ÂO¤é$#ÈŠ!D:¨‹…%)aã›Mx“ýÈÇ"_L—ú?Âd*ú:‡Ö0ï2]7x(ìº4p ®P8¼*.v5žsËb<õ޳]LQ†0ï*ø¶Kí•×+·@>e.-ÚoªÓèþC®–¡Æ€lÊïÆÑ\~R+$vÃo–D¥™Úg†ßþyùê èaˆ çDNnrè*ïÌö®z ®høƒÙø[&¦Å&‡ÀÿæÔüq.®‹Êø¿ÝÐ¥‡ÄÎ¥ëÇl”Vx3õTª ö%LbzbSzÓ´1þ¿m‚­ ÛËžpØïMc(áÐÀ»yÁJqØü^ó]{¤+ÝSŠŠ1¶Ý¶Q< ÁŠM~Ñ |[77{>º5ÔŸ5I ¥ò8ŒÄÔ”_ki®d¥‘€4ÀcÖ@NÏ!T‘m + +ÎúE¶ÆI¹ï6ø,ûITÁ$‡þÎy›Ù‡î“H–pi¹æÿVchž]l&Ûß¡FÃÝ:¹ÂO¤é$#ÈŠ!D:¨‹…%)aã›Mx“ýÈÇ"_L—ú?Âd*ú:‡Ö0ï2]7x(ìº4p ®P8¼*.v5žsËb<õ޳]LQ†0ï*ø¶Kí•×+·@>e.-ÚoªÓèþC®–¡Æ€lÊïÆÑ\~R+$vÃo–D¥™Úg†ßþyùê èaˆ çDNnrè*ïÌö®z ®høƒÙø[&¦Å&‡ÀÿæÔüq.®‹Êø¿ÝÐ¥‡ÄÎ¥ëÇl”Vx3õTª ö%LbzbSzÓ´Hv­÷šÒѦ·Sc)]M|Yœ÷IGC¼rç õrº²ô®ÍmGý­`—½2N×q$ž +n^Ø90ªI`V<_qŽnA¤©UÉ´Ý^°å"2ض>K;ägþæB +r sb¢VØb)l 2ÅþSŠ„T£­H+PMzjÔîëzËiÍfí$ŽDáŒMkV©° Ã?ã\Ù¼Ä>ެ߈ƶa¹<¡ÈèEE£ÜL½¦D…¯6=t¯–EÒ÷µ€ðàÀ.…(Á%FoÜ~­´ô6€rý¦\l•ù;séñ'$Öµ…ïß>ž4¹ÿr$ï2¨DZý~*®\R ù·Yi$=Þ¡¥k‡å'¯Öå“™úý.m…6¯Â–„tY0’O$ð)ºQñ’ršÈUQ…»+¶ßªúÔ4(¨«,‡°Qb8¬—mÓ©qý±âÀÁÝHä°=‘œ,YŠ8i  ÿ‡½ª”SZ6Žöã½mØ@ˆ»`é7~â"L7\ã¹ßFdN#FþýHR´K´KHpb40 2ÂÚ~HÇKε֊úøX endstream endobj -1968 0 obj +2301 0 obj << -/Length1 1429 -/Length2 6269 +/Length1 1478 +/Length2 6403 /Length3 0 -/Length 7698 +/Length 7881 >> stream %!PS-AdobeFont-1.0: CMR10 003.002 @@ -26638,7 +33581,7 @@ FontDirectory/CMR10 known{/CMR10 findfont dup/UniqueID known{dup 11 dict begin /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /GIGFZE+CMR10 def +/FontName /SOSTRQ+CMR10 def /FontBBox {-40 -250 1009 750 }readonly def /PaintType 0 def /FontInfo 9 dict dup begin @@ -26654,6 +33597,8 @@ FontDirectory/CMR10 known{/CMR10 findfont dup/UniqueID known{dup end readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for +dup 91 /bracketleft put +dup 93 /bracketright put dup 61 /equal put dup 40 /parenleft put dup 41 /parenright put @@ -26687,17 +33632,17 @@ f}cq ¶ØtíþþYá5`F~_›xÁƒUž|‹'X’µÇ‡„G@UÖ6I \™„çÜö>ü:ÉV_ûÉ>Õ¦9*I0}Óµ\"{Øï3UÚzÆñÉuäFœfÌÙ¤…õPߨÊ)Eˆ8‚üU¥œúw)®ðgŠËqÔ:àdj#¥¹¯ÜˆhqÇ ¥œ¼so -¹@’ KG7ã"­|QЇæªúå Zòzs|…‚}£ÄLÕ´SŠ¥:€fªÕm¼§ÂA3ïÖðS 2Ä=‘Ö[³d·Ø“üJœ\ܳ­òcìÚ€üù|Ët¾j1ÑÆ›AçrþŠ®ÀÑ®þÌJvÂ,K| ò´…Ï•VÞœ ˜ü›Q„´[ÜÞ<×nrWso}7÷œxrpn„mõãHpF%·EÛä¦ä!$š«ˆºâÎRŒJuçñíˆ:OgŠ7Ç)¢B›@OûdfzîÙG0ž%άá†u¦cs4²ï…C¢[+‡Ö ­cù½¬pÑ v¯sÛpwb»“ xr{¬"Op½ñ q!ëtXœÑ¯}ú´Ú­›%àjé:Ò_;¨´Œ¾[ ;*vU¶þ8´Cé@³e®]F‹ ÑãÛéA£EKS/Œˆzýÿ˜åKcÅ +OÞ,¨4¾•e|Ó{‹G»Êø#Rvðà©KÎýyÝ{ K1èEúŸÖýVËVw¯b´æûÙç6­«ÂªÎÖ=…»ºyeçŸÀeìÜ‘üÆ÷ uEØ>M%;,ˆš1–W}w ~hW¹ «—¦˜_èFÌâÑî9QcyƼ®E¸~ôÙf®áz\©a’:a<ÿÔ2eugg°SRŸ1ØEÜ’(=F™=Ž£ƒ;âq')^1>Çu6 +GÁ0ÿÑûòÿ¦œÿ+B°¸+›æ¸žb¥qGYÀðitâLxRIPv®Õ½·hÎÄÓÔ¥ºÒÏ‹^ž½:nwåòœÕp5¿Ð>^؉R¥Þä}ð4ü¸¿Œò™áùÞaŒgül}×^™(©t7ƒaÝÔ &ó½¶)±ZS«ˆ‘ ”¾8ÿn}@f0;­b.Û£ELì›7†%·¹›3Cª,ò@ä7¹Õï[I [1sI¬îÅXÚ<Æü[›fÜŒ¹¶Ýa¯:?yºÈç3"èu/'Ø\Ödñ ÁDÓ¹Ú=»qUü êškóκùH‚â$ y§ïQ7¼plŽ*žmhS]ADäÛäøù5Îã½üÆ«ü‹79µ endstream endobj -1970 0 obj +2303 0 obj << -/Length1 1641 -/Length2 8102 +/Length1 1688 +/Length2 8444 /Length3 0 -/Length 9743 +/Length 10132 >> stream %!PS-AdobeFont-1.0: CMSY10 003.002 @@ -26717,7 +33662,7 @@ FontDirectory/CMSY10 known{/CMSY10 findfont dup/UniqueID known{dup 11 dict begin /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /DMJGRR+CMSY10 def +/FontName /VKSUEJ+CMSY10 def /FontBBox {-29 -960 1116 775 }readonly def /PaintType 0 def /FontInfo 9 dict dup begin @@ -26737,6 +33682,8 @@ dup 66 /B put dup 72 /H put dup 73 /I put dup 32 /arrowleft put +dup 33 /arrowright put +dup 3 /asteriskmath put dup 106 /bar put dup 107 /bardbl put dup 102 /braceleft put @@ -26764,23 +33711,29 @@ A5 ŸÇÉ[ÓLŠhŸŽY³)Øo57kßþ“#%H’\,¬xÎ|ôs¯sxí¥HžÍ†|¶;`Å vû%øÚ¯T[ícÖ]ï‚eŠ"G—Ujß«„yŠðtvxÕ:udQ‘uZy_²Td"“ª£b+çÁO†]Øl¯xà'6wòã6X²C¿DKOÁ‡f›(g@w]f¦V™ËÂ[ê~ëŒ1Fùé^A3v¬ ÇâO›vÒ¯¶2v zúåT¸ÞúË0ÙÝb#ö›[›zyÏ’Õ$e1¬ÛkÕ5—dRAì`(Å…@{=yW>¿ðˆä>ú‘¯çy@æÅÿ¹Uáú;”œ±=ÃH;Çc{–ðÛ3û0´ü£^Õ®”)Õ™-{·O¹°OÖú2R¨Â´xÈèëˆQÅôwBgБâ¼ýSý»ž›"תl›bðöz;¹±˜IyÕ\E×ÁËê‰pr¨kM ·@@Âe&À 3•˜lÃʉtS÷æÄ%í)è/pRÇøÞÙÀª"2µu ˜Ee’5ø+ÄÙ°s§Zò'±¹§õ˜àçbˆQ²ôð°SlŸjÅö zvÞ€â~%.Kz\N¡Š„çüã÷Eê,ãeV2Ò=Mo)-JcÇVà¤:ÒUÙ2Ѹ>É1ˆ×È“"¿QWº ƒOýõÑ‘4*Ozûº.c¸ójc”«Y&üßÜØØÏìê=ë7”Y™´öøÈðê,A5nÊIßFŠ•/œ?üÌŠ™á ]as.´N/þCÕ»tÜýH…G50x}eb9—S&ùœ¬é0år¾à«J§H€‘ôN­\¿?^4'îœèb©É»ÏÀ':KÒ`wpcÏT–!ÜÛ_ÿ öH˜°øŠ¹ÝІ‘óþ{–ÅÚ}þH¿mZto”àÞê7Ê«¾ß‘ŸQžÇ·¾Ùbi>ÜœZO¤§ „Š“dºdFÕ¿cc -$"«˜ |ðhJQvB'\³Í%Ì6Âä²3è¯J)A‚g²+TÂò‰ªÀî§ø¼†ÊrnßãLì“O™ÁN”@µEzçHr$©d(|º÷Áµ*zˆÜtiF¾ Õˆ›º&«Öþ«Œ{Ó5rT[°)C÷ =t†<¢na‹´€ï3å—³b«m ~4¢@±Š¶xký‚surGàE½_à D#D¯°%Z‘ÿúR·ƒ”sqbª®ÝÆ:sÅBÛŸö‚šB6Õ©3bÝó7:GÆÛ mšÄ*‡^Ö¬ÇW B#?-Áö`]díNgåe§É ”â=Ñäž+J#Ÿñ|zµ +²”"¥^(}tÝŒ%ê…RdøŽ Ç?É̳sûßõhÓžPaLäUÍ ºs ç™ÈÏEAª«4c[ùŒÞþ‚墶Ûë§Üm%êq³ñ ]/`3"<fùµÕ²øÕ6hó :%~üÃ>ÓàCŠ¥³Çà™ÑÜÃåú"Ê dò$L8AðŒ€x„úÝqÐ0ø-ÈELÌæ’ßuÀ&yÅ2ÁrÛ2¸È‹z–^{4µžXúQ†¾Ö­Þs5\Ë¡ #Üç~Ç"eQ|7Åt)Ÿ½qv[_ÄQÀS‚5­‚Ü-1ߦ$F;‰Ã*yøXL™@`ÓƒMuûù¥$HCÑáB’®¼ºÂ3ãáU„phÒÌåZQ¬ é;H×vQva;—ϲE–c&µ098ì”)vC¶pzílpª@¡L|Né4fÒæÍS¾õ'/V9WŠØ -ƒŠš|N´”Áü¥À'IrÉYfŽ,¶YÍ:§ò|êûA£C­fªMúw ì6:9xõ'õøŒ&KkçOªšÀCqó:[ÚŠ¦¼3(Ðr™äÖ1Åà?ü™GÑ Õo˃ÜôÓ×;‘ÖxÿN˜.uœáŠYùÄN®¦~¹“qdx ‘£N’oÖ†Ð̦îúˆÖTÃb£ÿ>Þë󡤶¬]2Àr×dëµ0D>šìµ¾‹¼2ÝØ$1u2Àkdï·²±Í¡£W8Ô æZβšI8ÜQ²ÕñÒ»î¢=)K82d 3tcÆÕÃT¶‹ÄÉN°UÞ'KMC¬*Ïû7BGrJ›!˜-2àJÃùœ™¸ÅåFØÕ*øõ• °«tÔß¾Ï\Ö˲ɑBÝðHé~¹Ò¥ÏVØÒä#½“Œ&Ôû|Ob°˜ðÆ2Zìšà$ç̘ÞRLµø0ü)²²´ªG|'é.äðšùÑu‚âc©ß=w¼;é—¯9`ëVÛ Þ_>}G²ï´>i„ã»:æm",&e1à\jÀ'ܹ ¥-Â*]Á™l€Ë ŸIcBÁœ0G)‘³¡¸Jõ!ŸDùžgr€AÊXÔj×dýCÂ۹uÀB˜„-Ç&n,Ñ @¼ô¥‰ë-Y†µ½,Yê“–ø ü8°¡á¼U>ç -¢wÏ–ÜZ÷ÅBû¡ìÍšÁ«e/s@Ê–v`œ6K¿J F-㌱БÑä åfä·‰× -äME7vÑIÉ—„Ý9KúCP^Ï9Ú³:`õO˦Šq» ³ «‡l›1p¸…ù*gs4¡ýPצ‹>±ÉO›ÿßÒI*ôXkW±ôžÃ}™bËó`ŸÔý^ Ò˜‘ø·rؽٷBމ6IA(q -¢£²È1=tåR´&×6‘𨑶~ͤúTªáQ¬PŽ!Øõüì¿Éeîìj kâîŒ>á}ÙËyFê.ßâ–Û2"`µÐ‡˜ÕHÄQ«Ê÷Šæ©æ™QT–ÁBÔ„¥¼Äê-¶î’×9ðf4¸±è×¹.ôÌC—AH¬+ûûîiuc=Ý!%Éæ²è‚ÖGÈx&ßÙ€߿㠻@à·ÏŽïmÏH:Ýᳯp‹zÂAsÉLG† -˜R£ïÞ,fúM#›†|¹ÿÕ’–k×–>‰yé1¨#póÅC^ÛÕr=éxaK_7mG¾È¿{·Ç=y¢¤­Ë㼟ªD‹ÄG§ãiSVWÞ4{,i"È“]üeÉr×4ŸxSôHó ô™ Ó…‹ßD…¡f‚†øHº|eÝ/Ó3Nýƒ?g™¹id“CžYd£ê€Ÿêš—²’†éMš5ä O8à*­_è÷0Ú,ØÙwæÛ=õÍ#®ö†šJ²†ÿ˜ÀÝqV+Çhô÷Eª÷ÐéBŠÍy;ˉð˜í}àº-Ä%i¿Øˆ9­ÒD¶A,’N­ô %E‹oËŒw»\”ó7Pm̵z¶²íµf8ËÙÂÕÀ¤ûÈ"ô"¹ÈHg»!ýŸ¸zbª¢ËŸ_d[ KXÉ;Ú -¨Ÿ§Õ‹HD¨ÞÔmÅCT>—,Ù§Ùò“ûS?þ..KΡ˜¤¡åˆ+ù¶HšÁ“Žä3›ßÁ -ŒdýgD§æ§`(C5¹¯Ž‡€Ÿ¶W·jokÙ˜*„Èóç)º9Âë J ±XÈÊHLAÒ±, ñéûŽÅ¢l’+Ô³Aø÷Íêʦ—8‚‡|éV¨%r½ÁÆ¢¶wéSVЃìÏö{'J×™N7ÔJ£éM6¦¼~4ªÕTLìœDX¤ÄKäÕÅr÷6çHÕÁ9¶o›=åùØ«ºL$5ç^«ow¡ùiØÝÊ?© o)¤QÅÑ|Ž*‹Ïç¶Å·ìît…±":JíÛ ÜSôc6Zsº~éã øCEÝñ¯ZF Ø'W±³ZÝÍ#¨KãEÄ%¨ÙÐw^ jÔ!|è»öà W*Œm9sC©¦=[Uõl¡œÁÌV W;Ú:Ë$º×†/z{œŽïj4ºJ}üì7Õø²:]ôD‚¯µî’ÀÕgVyUßRo–'É)ƒ ¼ ’úÿ¾,;-]÷jŠœ5ǯëÚn»@ñˆÞqŒ©Ó½[ªxì…ŽÛG[Gßv®ÖQ³XËÉ`s}«“¾¥Å^LgÎ’áæÕ†Ý¯È|X{`£‰Ä(+?wåNwÀ>y>M€ZvA  BÀ’&çaVd9»¾¹ÐÌ -^(w-Âè ÏìÔ +$"«˜ |ðhJQvB'\³Í%Ì6Âä²3è¯J)A‚g²+TÂò‰ªÀî§ø¼†ÊrnßãLì“O™ÁN”@µEzçHr$©d(|º÷Áµ*zˆÜtiF¾ Õˆ›º&«Öþ«Œ{Ó5rT[°)C÷ =t†<¢na‹´€ï3å—³b«m ~WåÒ¯Ò-ÿ¢-ã*_>Góe‡È¾ºJµö!De:ñ‘1˜´‹ 4Åæ*ørkd›«Tý^ï\ä”(^”å·|a›ÊuØÝj“íË•û‘ÇNø×–GÁÔß² ‚Óh$ èÇ™¼Op—H.'çHã •Æ"&+t +Õd[øÍZìÿÏ—ŒþEßÁ1Éøktê±èŸ/È”’Ë7ï\xßY +hÊ^IJvh>üã\/µ?1ªX½ ¶Ç½þnyWlÕ.ÀëŽýAñka«i»³Yè4!@¬ž.‰Sø§Eg_»ç’Ž;Ûâb$;ˆ ‡ëÂótO¯EN,×@ß}Ë€¡À°ù¢g~Ÿ ÕŒª ÿìÓf,ÚtäyÁÓô–¼yËV½Ðc) ¤ðävŠM‘/ã¶"{i è~?pÕ·uŠCùMãâ ÙD=Þϵ0íŽâsñίŸSÖ΄V"å'»„¬†B &ïä·!©U,)úÛö8®RîÞ’¼JRD0’õ˜]¨<±M£r|±98CÙ׫NnŸ3Œ®=Cøl·âÛ ¬¹?×NÛ©Y$£õçÌO6­¾f¥;çHÄ2¶h,°èbþ=&qfžN¢é]ŒÜŽbt¸íÑ)Œª~±ƒ2¾3ªØÃˆFG?‹ì¼ˆç¯[¡PöÝ«_²Nò2@´kÕ +ç=‘ûC:•æ¨ÞÛ‡uVÝÂ%Oÿ^øðí"¢….œ-ٛ冕ÅK =°‡¹G,üû[{uscˆœ–8`ÏÏ=ýŸ,†aÒ… 4` ÆÅ ú>,®Ò¶Ö!?îòy)ÃéFë°ËåðÜîá„ 6áò.sv¼¿×W«:iAaQ±Ãà²$gŒ§õp¬$~ÓɹPKx 8IöJ«cxîÈœ6ò‡Í¯MæHkbŒTþ':ËX¨ £¤ IÖ°(ý¥t#˜+Ëd‚Ô¶dó‘áÏS¿¨¸ëçTOöh†¯Ã¬tÙRŠ<%¢(•Ð^Ì0ƒ.9u¯»U€˜÷ÒŠo4jµѾ9fyï)œeYgSÆ¡’#åKçÅÚ)«šàAªÆ¾‰kW2«Ìê1#áÚE~Üì›—‡‘€"@q´¡Ñí ¼3ýÍõ•Ý|aTõçÑ ²õçýÜ=iï¬Péi~‡ô¶!zå ¿I·&;çXÃÔe”eg¥8:¨ÞæKäÓª¦Då‘K[h¾$Ñ)Zd‘7Ï’–íÁmQ¸õF*TÕ™<ƒÀ•çãDMI ÷%’ê¬w¨}Öå§ž!‰zQl#9Éñkãt²_©¤#åJèôn}ƒcy6PoÀ Óq_eÝŒñU¶]6-Q +±ÄÕÎr™}—G~D"ØiŸfì=¤öóåºÄzиƒ“ÒhΚ¿(±0ÐóøEÕ‘çë17MæÔã1Zͬå +è-óÔÓ“<q­œžOvÉ™/$«ú—¶#9æuÀÙ®ywÓ ”}w|ê&ŽÏÓúÇ{×?M + –±Ï~x3Ï L'-@Zºkv•hcîåQ Ñßž†JˆˆQbù<¿ +w‘­q¤žÈ$vßKGš¹è¨„¶ù4Ý[ ß´g´<ôçæáŸìa’éÒø4Ð}×D+VWFº].ÈÙÌY3‚8:Þ­*£VtE§;Ôa‚£Æzv âäÂŽwâþꕞúT¬¶ÕƒU·ðÊT³Çd¹,lW³Þêa|‹h6*¡úšÿíA»TÒfÎ’*”•*òþð!#ÞÔY‡Ñ¬¶ù} ™©爳ܒ@ Ë\ÖÛæ mÏÈ©õÜüÍ—Ðh\‚§ÅÃ0jÉïzôÑdA7y,¾Y‘‰õŽM5?̶ÀÒí„´™¼Äðï•òa õ1@Ñê5~#uñÔÖ ð'µœYá! +5 +¡Gí´ž ¶ò +êN¨ /?|$Ŧm§®µØ<£¾$(Ò [ÎýxxGµÊt„Që¨WâÖ˳uÜu±_.3YôNò&ÍŽ”£3ôëãÜ¥OÛL'ÔŒP??赨ðÕT!Ã¥¿ÿQž2ð¾B#7<.*zÿš¢¹ÎWSá®6zÂçjÃQyºtˆH h´t«p1ÓÇ-Y£¤`ÑÊßòaÕóñ9·³Ö<‡còÊ¸í±¦½lXôüT"t:ój˜Y|… +ùÉ,»oêã%þ¥Ý|å.' õ@qÞ8Š‹¥;ÁÏ;Oƒ˜"!L’LÄpëÚ42ð»ûÓºKG||mHƒvÚ®Vka›WŒ£Ìè{9 ¦nRgÏëaZ87͈ºŒ,ÒKØçÚÐê¯`˜vÕ+ѯÊZôvÈø–˜Å^(!ñ|øÛ¢ç~†}Mr(îéÊÙîY|ÙÅ~v,§ ƒO5í³¸¤·«ÓØ€@ïÝŒzªé ?|1òá?º§êŠRâT*kqMèÜ+pª^ñ\¿u PÍoCàŠ½zž¿$Åt@ò[àB-ïE,ì9g"Ù#£.º±:N¦ßú=¸Bœ*×ÝjyqëÀl6Þ]{j?Ê"AsŸNŒÜ9 +p¿3úù¶ô4}|0IÇ•çÜ›‰Ó±ŠõͯHöûê¥=ÍÝY©É§ÿÂ`?ö.¼5‡=¯ØP?Ÿ9<¯N›¥;Içq×ÈW²s1nÙ„&mèÛÁLÖ§_&ÿßczÙqâ9lÁÉ£Øg^[Uz(ß@ °ï™üî]øbžéõ½|ŠÝæx‰LB=öÇÙ”6Wé€ÀîûÍ*Gçt.P/6œn¼ª±*Ç|›Dï¼ëäÑ'+T%^–Hho'«‰ÝÄE¬ÁÖZÑü©xøT*!i¦…ÍZ±w2l°f7ƒ*VÄÈš8Ÿîá+ÌzìïÀÿxŸâ67HÿÆ0 +bŠ|Ü<dF.ÊÂÃOü¶½£æ[–ßÈÝv°ó¿ö±O¨hgÃy‡Qì®—»å¶ ‘«h›0}¦ª»½˜Üõ,ïåè:Õ(Ñ–~¨–À–’6uü‹mXóR·VÃ<‡­Ð³JVíM\Fw%T6VÍvýÑQnéϨÿ¦:Ï M¤\Ú…Uá-:φ57M}ÎÞ!ãZ‘D ês‰ ç {$Qû0Ý„në®êÓ Ê(ãN3°Þ[( ½¶?™ÓŒoÜî9T½”ÒuæÒ«6nÀu÷ðD!qáíZ”ÞU›·ÍÜT”!pÚ¹VØžïó‡/…È\ýh^‡:ñ'.AµEõ¾S‹6P*BËY‹LŸ endstream endobj -1972 0 obj +2305 0 obj << -/Length1 2519 -/Length2 17749 +/Length1 2571 +/Length2 18274 /Length3 0 -/Length 20268 +/Length 20845 >> stream %!PS-AdobeFont-1.0: CMTT10 003.002 @@ -26800,7 +33753,7 @@ FontDirectory/CMTT10 known{/CMTT10 findfont dup/UniqueID known{dup 11 dict begin /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /UFPYIQ+CMTT10 def +/FontName /XIQVGP+CMTT10 def /FontBBox {-4 -233 537 696 }readonly def /PaintType 0 def /FontInfo 9 dict dup begin @@ -26822,7 +33775,9 @@ dup 67 /C put dup 68 /D put dup 69 /E put dup 70 /F put +dup 72 /H put dup 73 /I put +dup 74 /J put dup 75 /K put dup 76 /L put dup 77 /M put @@ -26833,8 +33788,11 @@ dup 82 /R put dup 83 /S put dup 84 /T put dup 85 /U put +dup 86 /V put dup 87 /W put +dup 88 /X put dup 89 /Y put +dup 90 /Z put dup 97 /a put dup 38 /ampersand put dup 126 /asciitilde put @@ -26848,7 +33806,6 @@ dup 58 /colon put dup 44 /comma put dup 100 /d put dup 101 /e put -dup 56 /eight put dup 61 /equal put dup 102 /f put dup 52 /four put @@ -26916,65 +33873,57 @@ Qx ŠÊ"„¸Óªï©á“a¦x;ÏY Ž`³m ÷±ÎÆeòïï©"bsàiq>,ÄZnÊè›3æÂŒeÐÌ(¥±gÆØoû¦¼ =$ìRù·ÿŸµþܬú¯Ÿ'âJ:cjª3¦‚f2 N’µ:3CC;OÊv"<ȳA?9=¿Ô‡a’ÓÈ{úúMË»Š¶ö&}Lænu¦¥4ÛŸV[Ìà+.¢_…bê¨$tö«1ê.¶}ÉÖÇÓÁcÑü¯{ä«<<›vì÷ܸßÌzÖô‡<ú Íñ–ÈУÝ9ÌrÞµ"œb‚t¶™Ê˜$yéЪ֡Vì ]W–ÂÖÒÔ>£Ýã0žõP¤B’·W*ZCÉÆ›ŠOžêS€ ë0³é€Õaº‚ÎÖÀºåS„±5Ε÷-}7‰‚ÔÆÙ-Á›*¸IC®{1ȹ†AŠ˜ßZųä®rO‘(G n˜6ã¼¢9iã5ßbDýN÷²'wL å,²j"•éWv³yMÎbfv›¹¤ù&,Õ†H®†ƒѶ¼G[‚f…íÄ&“©PÀx¸´&Iš™ÿë¤i=(Ë— èz:‚[} š$êú>ÖÑ]´¡çIlv®yPôÙüdŒÓ[‚tºzÑwä;Ñhc¥9–¯éX S8ì{‘ÕY¬J4ks¹ð'$r+›tšý‡æ7)„)ßm&‹LWÌQÔ ãL7“)­³gö€·†×ó‘Í‘¶".ˆÀ¼ÿf ˆE›ý*â °MÊö‚:7¯õjm›˜ ª!µ'¦¿3¹xÄ<[r îä«ënÝ^™sºÉ:Ÿ^—M{Ã9E“Å·ÑÌ8ÑÝBãt<ÚW#ë³WsÛ 3’}Âæ~]ÏNAýÑx!¦ íf”ð%þÒ™ÄÇø°wÂÙ €ìˆëÃ˳,Ø¿QÖ㌴W싪ËŸ«D®t2ïó̉BŽ›°¸JD_æ£ 9b ʃ>,Šw©v­¯Ëà0ÍüV\åaµÙ4ŸT2G+¨óÿFä]ôÍ,Ùšå]©z ~aŒ1›æ›CÑÈãÓJU­Ús/ 'Ú«À±ÂìÅ“k‡¿[ÇM#I8ߦò’)¨´óq±U‹ë$ÔràCO>Ű½âŒtMÖý>úIóªŠV­Ì&¥õµ“€Mi`ªo k¾Öà ÊPìÅ^\ âm"¹eð¬]V¯D‘¶ÄÛ7uë\£»»~ò&øbÉìÀýOŽÄñ4˜ÃtÞ¡–KÛLôÔ¢”¸‡N\™-ÎvúaK’í¾D¹­~2W^€"á‰Ã¶¨ 8Y´JBX5Ó"ݵżo¬v›¹Föv$=MÁ ù˜}( ˆ7@L»&5ƒ|®L£.`Aù@‹{åšßhrÖõ£Y´i¸(ƒÀn$›we‡Bp{o‚ÙǔԽxM}TàÛ}Qž¿4ΑÈq’vÛBäfSDžét•Q…ë±¥t»y%שÊÙå &[ßÓ€£¾˜çIB¿|ÂBî8™ÞLMGÊU "ç<ÿ¾B­tbv@¢1~8·„[&Þç>6 ¢F¦´#˜‰_•Õñƒ¹>,òomðq™À;¥Fo£ï½w圠¼ŠGÐÇ€G+WÐ¥á©øQ¿®„‹wÕ]é6ÆÀ®:l¹‡áÀD-K¿ ¨!”c‘z¾íÚsVriÔ)¢˜tå8lÆÑQ€ÒC·+>p—o"Ù•ÎaÛîwÁCN“n“>¾nÊ&ZÛ*ò~µá¬ Bb”„0mòUsQ_#25}!ò¬PršS­Uj®ISDòÜ®ià•edð÷;#ëà…ñÅ>¬PîUþ^2w7©y…vÉÏŒ›±!nÚ{Éa•|¦˃{ºŠöÁ/9L0’€P+%[sÖÖ­ #ƒÝSôÑ_!’—`%J/v8G˜¿X·v@ß;#b!O®­ßs~¿ªa45LÙ×9߈6ÎûuŸÿ»Ø&ÆîÒe˜h_GrèópÉdv´œçöÃÎ<‡™¯„ÿ¢J_?žëqÙ. z¾‡ý„™dyoŽ»¢(áw/EUÁ¼€ýäÊý‰**î-|Ç ýXL$ž—†íÔƒµÀõÔ»ÉÌ`z -†Å#ÃnåÔ y•JüÄÿKÀt7Š7Ó«ŒÂ6bk^1bÝç -Øy:šM_£Á­ìvÿ×BènÂÖÄð¸QVECÉ:‘÷u‚þ®ÙÕ®$PO.’%S¥î™Ô Õè¨kéÜì€ÀE&Hõ=´Œ<Æ­ª¶4i Ú­07L'죉úp;™í;IàªXbÁ[¿-DÞ£CFi¤ÂµÑX°”lhÓli÷,¹ÂãŒæ¼¹öƒº®-†¯S¢£CeÒæ`¦˜EÁ‚ûï…I`ÊÝqv‰˜å¸ÿRMáSÌ@Ñô+¨Ë'Ìí÷>ÿQäe—ÆòYh…™@ —w¡Çuæ¢J@Ù]ÐT¶ƒç„J¼ -WÊ€«N’qžáQiS}Q”§ÜrÖýµ¶2yÁ¨]X’aXÐNý™.ç‘8Õ¹ýÔË×Ãxäpw‹xñ鸵º…ÛÓ4‘ÛûªjO¶Y‡æ´÷žÅ Ô«DùŸwFÔ>¥ß¬ì¾¥Xó„Ü'±4µÕ¯Ö›B‘‹2®’‡ _n€¿™ŽŽÕ#¦.ç¶ÝEœA{Sÿ›…ð*µsßcÜS¯ -ˆ\mãY.n² ±š\B2‚áúûE†¯ðOð<´_½˜ø”&Ó6Š ÇCöN&Y‘Ï2·1h I¾’Ü-Ĭ±ÝDœõ§œ¸&q(ã7q{é?º:9jà„ÝA‡Ná+?>í~Âx­¾rÞlt(+—–ƒÅšÐ ”ˆ3ZÌ2 ÖJ¶ÍÆïv8ÌHÔšx‚ÔQš>w$ô“<èZn:¤Cb¡•à„ËȨx]ªøg¢¢EϸÁ_ןž©Ím„Ä(célšø0/¸µÜü*•âMžÐ|‚PF–ñ¨bK—K•1O¬–Õ.åµbyÔmñ2&Ö.@ µ_S»¨0-hB#Ã+½ýÛ¡f2œ™°=žœ$É‹¢Þ–KŸ_Æb`,¸Y–[å„©´l„(hPú\|ë] -ˆöÄÀ*øcN¤À3kéìÓžÇrzþ-¤µ¾\ÿWÜöh« Xa:-i~O#kyö®Gд•¨ -JØÖ@;hÉÅ-‚ /ý‹F!?*Eô~öh2zû3 -µ…*Oô3£a½Dôu°Y[ò‘ns± ì]CÝ)uVŒJI6U5º0í[ц§îډϙñSÙ}›oÁxv¯²I‘lñG)ͫݥWÆÓúz=UXÅíŠYÎk¹×_žÿ’6]ôäQò^bw8‰SŽægý!rõì¢ÅQRP?B¹}’} -U#Œ*÷í@6,­=£´.7ÔØMß–ö_ƒ¸^bVVÅuÚÆãšC‡¡Š©B¥J9œä¸q'CvÌ•ô$‡ž>?üp¾>ów¿žÂû¯÷\ Šs$röHêv,¡!1 ±ŠŽ$oQðRT¹'µ'½Üñ`WN7ºÛ§˜×8I$ÔLy èÈÚ¶Ûú¨ëTêIbSYâÝÅUÔh»ãô4÷¿‡Kk‰ˆ Ðiº³Æö®/]²"–gxMÑQïXÔpwâpXù4Å͉‡[ -üý|6Æ-’L›0Q?Èœ¦»Mn–<‡=xJKŸ«ŒÆpCð‹Öƒ§ï³=wdÒ‡SÆRU^ÒúG¶PtöÛofÕ48¥—IÓÊXL7ÍŽ™‹þfVãH -;‡½ÎUÄ:G25ËY‘u¼ón”ÓÞG³¢}?æÚƦh=$z~]YIÑGy4!¦WJ Ó6éì™HþzWöƒ°%œl{Ë»79èvSäØ5[ìÆè¿pWˆ+INV§‹Ü¹ €¤ƒ›Ú^”á¼0T*ìa±º”Ÿ³ìàKWÄ7_:þM²Ê&,~3x?O UYnÓÚ-¶Öé^Ÿ¥ Ê$m¹ß‚¿€ÎÓ}7`/ëA"ì„Ø wY¹9¶¤ÄÅ>‡ÀC‰QRëo¢}«ùÉį€ðeNÃ_#ñ l_GAŠíµ Û¨š… úœD…¤Ñš+Q6ôù}²((&¦Ëö–U_]zýWÓûLˆ{úìªv4XW[H¢ÐÜA~m[Ï’«Ë?òkõШ™Èw¡÷õ·¾‹òÎj—µû;]‹Ve§òˆFÕ0ÿ5ˆáqº*kpú–tùCçìz½KÍ/È÷pè ºíÏ q’¶1þ#ºƒ2•kØhA$yå´4>Ò[hÏÿYã8 'ÿ¼ã2Ï•®>×Ó·‹"Ü2(p³!æÊMºÈ˜ë@4ú9'M.2‰¯£˜t:+0 I.û -ï©Ú&5ßq¼|úWÓøî¯¨F†©¥¼‰+¯º…}sFaØCžTü¢‡ù” ž’xÔÌþo\£›{´`¤—íq=[f0‘‹cb%@ê¾NV!nñô“à·Ç½:jN-{ hî!ÖÀ-E5a?fÆÈ’XåáwJþQZÙaésTþ£:ï9ûõûŠ‹žµ.5Ë`"dè˜p´òŸ3R‹Ç߯µp~HLÀ«ÀõRhëŸvÈ6bQÚ´ö§?v*^¸ù.¸¥«…ÆxþA²Â¾–q‡?øÁ,1ç‹QùùÂóe­Smla·Šëãq‘ÕÈ*Ø¡õD`–쉃cµú•p¡~|- öÃ1}[ªÂ)þeÝë›ëµiI"•4^nÒٞݽ‡ÂŽå ãúSü§}D‘+eÞ³í¶Ü'¨Í`ßòó©Ô8=Ä>‰uL¼é”AÔ ¡:ÙMC4 ~üüZ#×ÓjWi ”æHXê‹nDКnnÑcàÈ;q†aN0ÙÙ¿Û«H†¥MY&4!® -ÕÊù§:’ÕÛïqg¬í4ž E*u"jd›ní {ïD?wCp8ÎØ"'(î Sq¯%IÔò2œÑ7c½þÍøjY€rPaHǰ±''+½¤¸?gò®Bo< -FˆžúìÐäÔŽÉ—t~V¶aCqÝá˜ö*UOÑ;VLñtv€æHô±‘¾ˆÀ€¾ªŸ[nqðsÑÖi¿x¬8óê1@ûZ¡ÎËwS9.ÀÙ‰¡(á(Jo=Aƒ¢o㬦5 Àó£˜Äÿ´_câlã(©–‹Æh$÷¯ÅUqB'„"VL¡„ϱ‰+¢ZÀ ”ZC šáký¥“òœëô¸ÿÓè*£Ä f2AÌ3°$¶÷––yk& ´4C¤ÓëfÞ×Ù©@2!à]JX»;!ÛM•÷²,§Ý–yêÞÌÁ¿Øâg¼UQw1ÕMÝÉÌw¶ŸŽš#Sþß)vŠ™Ì?=l½úyZŽ -¨W¡!`™+Ô%!³@îUê>W¢©Ê1A7í¦8 –,ïü¬ð÷|âðƒ[œ§í%Û-íX7‘|*ç>åÛ³êØùå Mræ°öÈÖÃ3ÅF?ÀZ>ÃïÚ-ì“xñí76àÜ(Áë=Úµ'N>ü/® -3ÖqŠLT>ß6—Þ\]´ª³é1TÃØ g‰ÂZ—P$æs^îVUkb? ¤=ì€úš"ÀΦ™pü“”^b~ŽÉ¸ÛmwÖMu¸Ldͯâ§Îú:' áƒ5­Þ•+T¡“È6Æoñ;fI­W¼šúu€b=âJ`H‘;á—z–ît€žhþ Ç‹„É,.ñ†èl†‘6¶nÜ$y¦W¥J?"øš&j^d9yÌD¥£TŸ$ß´k\˜fŒ -nÝùGý¶¡&CO—V$ì -ÿªžQÅ”2QF{77`¼W§/o(âàã ¿ÌcfUTLaœðûqÊx!1 .ÀÛ SQ“Uô^È’³å‹d*ÊÝ(u} E&$ºíHŒRÈH[¦ -š̸ì+Û”Ô]Š–=Áï󈮾â¬ü«3d?äéá“„7×&Ÿp/jö¬Ýõ°ÜùÖÄ~– LQ4Å7C“Ïë!¾(^Ð- 8í<(âóSN7¼©Ð1që·`èÂ[ÿ à©Ïo,¸ÜþA -9¿%ãb.RUÙŽÓ„¾MúkéÑÓŠ¾16•FPoÃc|0Kò:E°W†óòÈ`•”ÖA@Âlºõå‹ /ê‡0;¸0B•ýB‚IçÀC©`v0\bÛr,öÁóIÁ›vÅð€”’h¥ð•Ýœ•ä邞ùê%U¾Yؘz󣟜3w[Ë驪Ï?%s}ôï”SËbÇù­Ø\W‡²,°OÚJ'cÍ'ýÞ©šƒú¼Aý -§Y³‡5*Òì”Ô„t·v”fè†MQÅZ”Ë:Hï%Ý:ÐÁx)F -^¸·1Fk5%´&ª›••ܘÇDX“µÒR½x{ù9å‰çÅá&¿^X2»P¨÷·hí¯÷cm„l%r?cõYV9íêBÊ”áÊdû¸×yËÔ r¦ËÎ ̯ýû7¾ —Â××XB¥Io«g™Þpí}"µ— ®ÍÈD⎻ÀÄ„1^ä+ȼÂe*åw; Æ¿§ƒ¡ÑsªüºçÀÁù›'Òæ#fúò‹@5>#Aû 9 -D+îÄÿžÞ|×ZñxjIo»ÞtÇå+ªMƒVBŒ7‡ý®‡N1j©P?ð”NV^èú»Dé³ã,ï~ú#¨ã<;:y~æÌ6¾â™-©aŽ¼ÞœÏ¢Bˆ¤œ¢;¨…}öèݶ?ä1î•Ö®$‰2äx')Üøm{»¶ƒ_ÍíûùDT&›%—óCÔž÷”neÇÐïöBÂÂOÆ7Ú3µ¦wðå^rg{©±³QÎç³Ðç=SD®Ùå0T=DQñBoÊæWí›"Í--ˆg¿ù·™í\ê Å ƒ4 £È»¿Å‡@¦röT«—ÄÁø[4ÿs™7_-yÍYŒ+&eäe¡ýÆ3“ôuØÈ|TfІÈÒ+%‹#mC t<¦™m¨ªš)Œß‘SuôF?Emc Cµ> jœ¡˜£ªuê1Eõj·£*)º¾Ö ɧ뗂::°Ë…oN¥ÅðÌL_±+bmõÈsÐ8iŸŒ¶yjOë»_þ¼_r(wæqË¥º”tÌh.#¾rPí2ù§^¹t5Ù­s”¶9{“¦ÿžŽ±¿ðA<1Ól~ë;̹ À¸<`ë¼ýI:Âìb"èê³LKâiÑ_Úïy²–_@³²»œ¾Å–^$n Ç@m;š®k2ÏØGY/Üh|°gås{\u½m[*¢¦å=œÛ[´:â…é ¡cc©¨<G'´ úÛ5¢KèO±§m kkí„sYdðsU±9ŠH Ø“’’Ÿ‡?D÷Ño¥Ç“S:;10ÿø™ÖxÑ(n¸Uœ¨úÔUƒY«S¹É${¢]”ÎõÅxàWó?_Àò»mrl>%J¢D‰€IßÖ3ˆáÝÔŠÓ9@¦‚½±TŸ[U_4O ¼ B»ÅWOÜæ, ³‹ÙCüé¹äÝ=\MÜÅ ðâï:"5±k­©ôö·'úšõãÁ"éñ/ÑÓW5sWxsvZ_fLQ Wbr¤“²(£ñ#¡¸‰Ñ%’õ úåRÒ” -+û ¶‰évß'ÒwHÀ$ƒQ)¼—4dHç¶ÊïöR4C_±|‘×έ1ðÁ®W¹à?“)ì3}iWô¹¡FÓY@:‚©k„“Ä{_ÏSƒéŽä{C–A«XzïÍ+Ö<ÿ„àÌ$ÀæLù1^émZûm¸ ¸‰frÒ0¨ o{Ã:>¾±¢Î`LáŽPøñZˆðH»z‰cˆ”$¤OƒÃ5Á¯ä¼BW“?˜o¥Dø‹3)è0 ¾"WVñ˜‡ƒÚ¥çå§oÝ«ý÷¿"Ùð®xnð4â•‹&Ðñêו/€d;Îꯆf3Õßó·­¥™A¹í¹ÚnI§H·¦O•¥‰‰K°ê áP1šŠ…-Ë2J[p™]îO¯µõüwÉR¨HŒ,]§¹‡2“* €ú>”î*{ ”¶Š­‡w¦zBç2=QN (;û"Á.·;Qð^?¹Q7… -‘í&5k¾¶š:_NŽqëqr4›óðŽ;0D 7eŒÒÁÒÕ¨*gjLÏ»éÁ#€%šfó0Z$1¦;!N" ‘¢sL‚úpÉñ&¡¬›_ƒ{P¡¼FUÕ`=´¶ù$üØ §Óñ±mé¾&Þ‰×ãåüVtmL÷ã©®¼VBÓåiÛOêÎ?†é¢[ZPFMv'ÿ›€ÚVZÎñœÑ•Ö¥™ÙÕÍaàIà,$i*øn|ÈE+N0VˆWük—„]âtwhp°Á´òç–íd[Žíš§hpSô–4¦ø_zW†ÏŽšaØÀZ{\·¯¢ž%tNVÊ€“Qüx¬W°I*i?ß­µÁýßÛÛL5W;“Ùò~6oVeÁ€Ù¡gdãcY†ùˆ¶—Úß`¥ý«)^]7ò·üžN$JCBÂu:³¥><ؾP(á-–«œqƒk2º àQ†Ñ‹ÎFzi…RòaN^¶#jŽ…äÄÔ9aÆ\ÿ,ÙªÍójåè#Zºÿ7Nø¥á„;¾7‘ó´Sm§G;œ©tËO¯Û¼ñ@§j.½R[ ³n4S6WXõ4æá„U7Û_€mW¹°žáм_óŒðÕtk¢Çt²Q0p‚‘+äöÔA·BÕhŽÑ÷ËL›âaÆÆïñ1ÌÏ4P}ÎnkÓ!ù³Ó´ZZåÊ‘sTÍ{Xž…¹Öâ…f†ê\ òÆÈºm4;½D=8ôî¸~¢K0çúfîv±Gù{”ˆQrC½ôbF¥ÁýZî4Ùìs&¨ú@L%Åp…ÒlÜSä`&;87ëz>6æ«§›T÷)F/[ŒÑÊÐP°Tv£(ë§ þ`¹¨¢49g@Åjö©òßñŠu>M&AØ«ùðqK1ŒÛ7¾f‰B 4#ïÓ4÷°ª€íÖ‚g±‘uÖgðs×=·#É'Â&WÛ^2,Ÿjªb$³¬ë› ùõ„v‹L¼ó”ÇÿêÐb’!¬-½]ù -ÏÅt¯œÞ^ÊKöuBvp ƒ`fácݪõô\9!¾{„þL ëIƒ14A€ð ÿ -6q!ܾö5Ï\Ž,éŠkÿš­ÿ~mWùGk±}!Ó¡¥ƒn-}6 Ð.z""XCð<ŸÔ±¹‹HÉq¦Ô4ÍÛ‰ÚÍ¿’ÿœÇñùñ:æ&d°±‚øUýŸ;æ .‰wv•%a¿Ø Å­¯»Š"ýÆ©‘ù”¿Êoᕹ‰ÝûÝ­Ò’R0âƒ{\Qf)òF<Žt7@²p™j c.UPXñuÏÓuMe/–¤žáÒÛ‡âlä²½üø½2cïòPÊcíb!&&˜¹?z;ËZt®°ëŒªíÅèØáMÜ„÷ø/UO$>‘«ªhÜãŠuλ<ïÊù‰óà}a¶”§š6Éø‚-‡Ž‚b£{O²TÄV¬à+í噿äDMõ_ܯ¡Ó݈àm’ € ëþÙö ‰¨ÑLlýÁòÕª™ [0/l|µDà…Ìò(¹ìáœï¥AtZ’ÑVÓÔgÁJüM‡ˆjk·7ñõøÚ˜¹Ñ=¹ÚKÈÛ®çÙ˜øl‚žš²­Î‹E‹=;aÇ}+£jX;ûô*´…Š0aÐ¥ââÂÄÃß~¾YDŠ ›x³A…S¼¸ÑÝò‰‡»4áúkôï­Ø¬K$¨Ý„â -4ðbú“»é].ÝÏ7ãÆc„H–Wòíbe =ÅíÁ ™1}û~£âgb÷œ·Ö5Bãcn"-£¨ymƒíðA“Ù¼¤j³óp@¯M¤Ü`ü/[ÃTÊIBŒæ’üøÇæ½Ì5x²áòr•‚:–;¥p -¡ËÄVmž?ÝöÀÆ^å-wЖÁ_SŒ‰#IÛé.œ›Ý½Î‘ð‡ ˆ™¦øZνÒë°Š·JOÄ­Ê8,¦ž„ö~Æ‚Gò˨µjŠË×£þ¨Eç”dvà1ûœŒ!Oè20̨ޥIÂ+L¾NŸxê‡:È ÍÝ Sübó‰8žk…Ä9g4&mXQíG lÅF+÷ÜýdÅ“C¡CÎVL÷Iv ƒ.rÝ÷¾ê/1d—Vv—¡‘£‚æaDp sòà®R_ ON1ãsb;¬ æýÛ7}N¹fÃÐ4J¦Dkg‚DÝGaaÆ1üù¦ˆµ$DY£ ÍÖè–Re»ËׂL.½Mýìsê{´eHÅ^g>(+1~žþÈ÷½(¿»­Ñ¾P¯¦êP†Ùpœmäw ã®ù¸(*ÏÅW9XÝmÅknr•æI9‰òõ 0º”mš»¦ÏLtû€×:ý=PSÌß³tʆæ/CÜÃ#1ÓÓÄ: -$äÕ›Ä(ŽÀ‹EW” ¬Äƒ\^ó½|`__ú*™±h¹j¬ \©¬/f©;&$'™h—,5gÁ4 GïzçiVWS#ó9ñhP¿a Y¬ «ìÍÈ.…¤ˆÆŽ –#eÂh¢öPqd('>öš±æ,}ä/h=NÔj°›æÿs­¶]Ÿ¸€DŽ¶Ð¶¨h–†p¹ðízk@hO°.䶃7-¹‘TAÐá&´:Å´fÌá ¯¹5‘ŸTþJÜÍ.îÏ1ôFB”ÌgdD¤$‡Ðt,ƒSÊŒ'®>«``ó|¨ß˜ŠÞÄ(ì /Æ~=wpsç¿å“¶qd«EiNüíŒg@å)5_¬Û 4Ú¥ÄN õ `ó*)†1ÐdÉvþ™}SÓ©TŒ·O!ßZ8¢ï €¸È¤OPý¼'~êSté~%>™¥§:Ó¥=wè}¼ S9l„VùñZÐâžå”¶HËQ,¼©Šˆš«·Aœ ©å^tú¡/ìÀÓn‰wsµN 9ûaÞIŸÿ™‚¢ ®Œg¢~Ú£$9pµì¡›ö 6QCñ—­J”Aá…ÓÉ¿ -ìÍëγÃE¾6=“?ÚÛÚ%È!U ƒhø;‹)`“t¤q|â§(Ë=M)Y»®FŸi¥ºü´ñ»¹ i{^™È[Å£N|.$@Ú9\P‹}¼š/¬¸DâEû°T é šŠ7&y˜0x›éÌÕÕ×½ p~D`[ -u€/3Ðå9:¡º”ü|z—€(i­ÅI"ÄQZ…—‚á•X"E¸bæÅlr¸†=qX¤HüP*¿UNäÇTA¯b¾YÃF[, -yæ ¬€Ê?0-š7oßPÿ°ßÞLá Á8º‘Ûh¥}ažÚ›ÏðÚUü¥£s¢"Ñ8FW_´â‰LÞ(®Mù1®ý4ãe6D¤:¡žÞþ]h¡tó,èò @`ˆ¶úœ'Î2³f‡‹¸Ç?²ùvn½º—¡Á6–½@P^˜Å@mÙßÂnê¦ôÖX$µ8I-ûKŽ·A6B–4ävg`“ØGlÇÚÛÚ½g;ÿcû€«¿<­k–'ÂQb#U, ŽÈI2ÆWg] õùé$ýâ½’DKa°‘lX:úZ¸àþ8yÔ¯þ„˜«–È-îKÔ$s؈B˜Ðks¡ù¯ è[ ß™xŽÙ4ðÝå%Ÿlyé LåLD -@¦Ÿ¬&†/ú–@3†%×¶9•£éÇì˜çûÙ Eÿ‚Œr’—ߨÆßKÍoÁU4 kó¿JŇ ’h-xh¯Õ?ZB1g&눬‰áÌ]‡óXÇØd0LÊÉ>v¿rËŽ¢¥É@b¾¦â¹].`xD$âYÇ_kµ,KÄaȶUŸó¯•¨Î[á£Áúb2¤¥©·ñjÁVUà£'b±Úûv~^zÃ|óqÆÓÿŸõeò½A)!Ü3= ÎGe{!£}ÔB÷J@vA_<6]LÓÂHÚ ^¹¥ÝJ޼Bø‡S[û`ƒ§4I<˜ÍÜQ×Ã;-ã7àh k h>6F”·OClå±nÄ/‚Ï3‡„è–Ó{Û~ßš®9lûìû\*o„ÃÇ‘ õ#Q¤© -õ`÷}ßy$êˆñ7Ÿ±Ã7Êì1Ž™­Ë׉H‚*)§0„ü8;¨ºñyé€è -HÓ/ƒþIÜ,%ÚY);Ÿøõ=ÇGñOOž#€p1);»‘ -Ø£®O_LJ»%²¥ûêg’åvKãý­t…Šfõ¬Ýj‡}殩Ã;TΠ½4§ÀËö~eP÷Æz«üÈ­} ; ¥È“j¬R£Z£¡)÷'¾uZÑœÅ3a€ÝövÚ¼˜c8'éãâV·*£á=‚³o”Ðõ&l)*ÿ­b¶S_ó“ÔaxçR·Ò~Á€D‰ñ•8ÔmX¹GEnßöëÄüv¾ -—ÎT‡¸ê[9Š4m®X’;³é!xŒ¨»cŸ^«}›Š™°·.™ -*/'%¨9„s…kÓËogž‚û»€DA(.†/9 %:Ø(P¼árÈP2‰(õå[Š5é°þ Òl¯<r2 –yË'/«¹Š=ä<Ä81Õ{ïƒWA‰À­Ãµ)žTBÌãç}ÒÎŽLÝ_cN?¯ku‘pcG<Èúbvt¾£`¦ ¤Ÿez_Ê5½§³?Û§í;)áa·SÛjÎÆý¢G_,“åÅx([v €’P’¶iw|ÒiaIRRâݳR©Xu%iò¿cFHˆGT€?.=TyØ"+$Wög½ŽB\ÇH™YP~æ¶!UØO–¥qä<ÄÂãhiI$È!ø½~h_® “ºÁs†Ó`r !ãÏQKÉ­cÿ4µºòáÐãvÙx?ο'0Øý4×t@£‡DXÎJÍ›‚ãüÊ‚uKyÞQZUTïêŸO‡Í¶Âf¾{k(ànL™2Íøàž‹ËŠµëqéÚô\ùc°T.ë TÂOP€ÓEfT×ò÷g>9•ÌÅí9÷%š;Y|ÔÉ>ÓQ¯\!ðc¨èæëSý®H1ë1ú4çÜ00 ö!·=ê$R'ø¦U?®p~†™m¡§ñ±'´v‹¦‡Ñ( ? ]þÛÄѽHwp Y¯¦ÁGƒÅüÜ謮ëâ²Óôü -endstream -endobj -1974 0 obj -<< -/Length1 1494 -/Length2 2555 +Qš=>´óq±U‹ë$ÔràCO>Ű½âŒtMÖý>úIóªŠV­Ì&¥õµ“€Mi`ªo k¾Öà ÊPìÅ^\ âm"¹eð¬]V¯D‘¶ÄÛ7uë\£»»~ò&øbÉìÀýOŽÄñ4˜ÃtÞ¡–KÛLôÔ¢”¸‡N\™-ÎvúaK’í¾D¹­~2W^€"á‰Ã¶¨ 8Y´JBX2UË[Ø0½låÂq°‚߃ýãÑ‹ˆ>â›ÒKHäÑà>sxÀ[bÜÿ§Ü¸÷ÝœYS«ú8–‚¹ kÅ)àöÃ!~¢w·öÅeÒÚmGäà "1™y¼­¬°Eïj^Á?5…ä‚m”H謥kØôC; Šíjc¹àº¢j%™(V«½qŒÕÙÌë½}óŸ‰<ü‹Ì9m·h>þæqóU’⌽"ýœHXù|yЄ’¬ãª><žòê%© -Ÿ˜‰»_2†æ”0Ój]yçWñ +ó>ÿ!ÙÔÄ‘rì,ÝQ?z⺑{@Ù$í…dâÀôÞ}*ÙÊE§J%GÙÛ ×)>­]6—ù¹÷t_ÜoK§ÐXBè½i§ºÐþN·9޶T¥HOh}/99ÄçeéÅt¤sF¶|IìPq^‡ â¤Ï¨þêÐÓ›‚ø-­ÇfWÀîzžlìsÇ‚}–Ë/Q­ +ñ*=?áá¥ñ(m¹¦XÄiJj~†¹E§=*tÇeÐÆ˜!Ö7sàN•~Ìu ´9”#Âs ÌmAǃ 7Rà±a®yIB­uÏRÄ6Ë«Š{‹.z/xÞ"/oÕkýEßUw˜Éà5,ꯡàª÷ã|‹ÌÑ>d¥‚,ãwky ˆŽîJ‚d-0ö·â<äÐ:Œ'^-ÜD®: ;vn„Å›™ù.ñäp§ óê~AþÊì]±x 1y=ÃXŠ—œ“;Bën ›=D»þ¸#&J“TW òÓçߢTVET :@Á •ÿÿá:•)ô ÖÍ\Ê®vC¤@úÀŽjhXÔÉy~A·*óH}ìw™¯4lÛÜu‰Ê.’ß–øÊ™#2©rAO4óøS"Ø×ïéb—.ðnë›ÍµË,MiúÐU~ ~@TîØJ麜‡q"q„ìâÖ$ ¥î÷@jM¿ÛTºn[©‚ ‚ÏÊÁŒd¼J±Á„&Øê ò¥F¾mÑÕAEBôcò + +¨ ɹ)ßµ^ÅÊ>ÇS&Ž(Åq¹RÿÚ¨[ȹùmhÚ˜¹¿„²q_~_ÎÜ¥¨b±'cXC9íWÕ˜èz±!xmp¢i‘3jà0AH1˼D\K’Âòe¤»}ÿ&C¼kE:(1ÏrJ«fË…/3yñǺ¯›V:‰ª-Þª·“a®æÐõT˜ðRÀpï.©Úóev>IJárpô:½HÜÓCpwðr«ìIÈ»„×_!„Ç%>m(=A°“uhAðç9ôƒ)å~YWøö ^f©±Ôκîä»—©a/ÁÐÞÞY6BNÀŸê.„©®f{áÂ{H pDuf^ÿbÜi8û Gÿw•ÎxüúÈ— ¾ ·¼é,¡Hi © +M2C“ÉÎL™)Åy.ù)qõð¢?ÏÀ.Ø=û­L©ìN¶ÿè"»ü¯Ã=&g«þÕŽ–ñêÝ»,ðÝ©×CÄTß¿•¢ÐEºîâfôJë—/ž„7¼Ü¾À·øafÆé[=ãíN>OâIR&ÖÒ5)v}Ððȸê{ý 寖â‚ÉOlïKµýDüBD}û²VeNpü¢”læÿTœžæe–I'àÚ=´T±N:Èý-ÏŒp <‘ªQ€ŽCÍm‘ŒŒg2e†rožÊ\›«úÉ2³^Èz[jx  Žêu4³ì…Á¡¥Ç‘Àì¾ËÉýÏÞ·¯n^©¤yÒp\`V +¬‹†è°"699¸ê¿-p;ã‚1QÄ|íº®>ÇPúàÅÑáZ½¯à±ˆ^rƒy®ßL}Í ƒ¯Vè_Ÿ¼¤d·7ò ¹Îóh݃mâˈ= G¦K&=ûïšÍRu|2}¦`ÐîE8¦ Ä‡NHn`7³ÛÏ•Z¡Yúší}ÖŽo/Ñ@uäa_­­8÷lzÌæ£˜ÃËOn{[k;a&Y(ßI¤ ¿ ‰ðͤ#Ó]+Ñ rIÉSk£Ñ¢u)uçb<ÞðÏoÇ[J«Cm«_2Ñcc¥YEïe”æ2û.¼ ÝþeKšœ©± 2íçZ†IO˜¨2Š-šò½/(Ñ=3ˆÝé®!¯å¿KŠÐ¡ãÒÞœT˜DLJ¶©S>cÎ%øyBjë ㋞6Ï¡Ò\¦›YRKÃݽØLýpþvaÝõ¹Ó‘î,•…ÊÛùÞÝ ¥³žýãŠ}=2m¸­ÙeF£ÑÛxÁj=™/kKç–oÜ“ŒÊ0Ô(TCgg´ÐùTiƒT…œæRî+õÄc"¤P©O| ÖýNŸ;ðjÕ¬†¹Yb˜~nû=Ò¯m<É + ^[Uà¹ÐBÞ MHÓi£q +W Dà©®3öwÁ+»TÂCÊ àH“•lÝÝâUæe…v|³gÅ¥l3ŽÕýVœêD½ ™=ZBlÊÛ¥ìüŒõ/Ú•I|Üèõ‚m´‘…ZàÅ=™ºûÞ0>ŽCÛúå”@!µd.7 Ÿ»oo‹ÿÓàx€Ý +XèˆX¼?âsEï(ŠÇk_,6T¡‡MO·Xp‚jÈ”bOÂÛ@HB?ôÑ‘åH¶ y§wœpÇÛ‚ßêC‘(Uô¯íŠpWï^Ð[vjÛQAi1:–ð)d+w„™\ÑnEÕBûzc„ìnÕ¢CÉÔªlÃÿTª7/,DRmÎDä4Bú]õž:õ +ä®™MÂÑj°Ÿª¯ö¢´0lZçËA»Þ Ïj:6n¡¢°…6%¬åNBŽXèVX%¶©£ÍÝl?‰­1P²1܃ÞÃׯm£ìMõmâ®Ï!`f;É”‡NsØ'~ËEnr^•›( #¼½ÿ¦‘u¨÷$dµ*ûÇåçÍͲ¸y–ýéay΢sÊ»l»1G×l2†]`áq„u€ h4y€´åýV©ÓЭ$÷H­ cq³«½›T~ôø»@° —ãÇ(+Ã@ˆÄÓ{ùöì<Í \`…¤Gg“YbaîÿÍ$Ú¨ñ¿ÙÜxV’’+6îCüõµ*ÒÝ©˜ª6ðiZÞ†ŸíQw^ØçPã~Ô4¯`q}õÔÃá~>¿’O{DÒ‰4¾(pD1šüÆÄ£"Z‡²³–ØÙ!)hçÒ (X>¬TN!îo‹Î|Æc´ {Ҳ着3E—SËòlÙ6 n„Ló-·!µ¹ï(b~EyïÉ@‹×²D÷b#ÔTA÷Òé54Qœ+­½h”B°§öÈßU¥#ê|î´¬ÕÔf5J¢ñˆÙÅ®™œöË +EE÷PC)Ö¾O$l¡úõâÕdrw¹Îsùð²y_týæ ³cýÈRÌ COÚñ•ISýu‰„Õ¾˜ôâùä>O$Ú–|°2ÅŒÁwÞžåp)ª8ÄMº ËFµ‹ª‘Ø ú @úsÃûýÎ_è@¬b:k9êe‡µõÊ8¸¹’Äœ,¿ó,oG ñÙôÆÜ‘±˜Â”=ê‹5»px?«Õ|QÎ †Î4Ù§˜éWä&¥ßÑ.hO¢$F j¶ÞœšÞe„à1£ró³£1ÉÈšüÕѪ#-ø²2G3:šQË3Ù5$XÏÕ¹ÛlÙNK]õ‡ˆÞú¶Î8‹ië~–§ŽîÒj*ŸÌ®3’l¸ÈÕƒ^6¥oo¬ñÃVÐ×ÐŒî9#Rð€\îäe?Èh‘5»C"Vçq^´SoÍ«»TwØ¿BI¬ìË( 6`FKFÑžgŠ…‹sf13× ;¹hÚ‡ÂÝAwòAŸ“Áž`Àa4Ùó$Ì/ó0ª +#×rÒ¬|W¥`+ÕÝÀÏ:๑öìRqÉ"€ð-Uuüâ[! ãPõ ýèkewÔ@ÏìGêS8å+”³‚Uåg‚vÀÏõ·m‚Ä`G£™j äýÜÔÖTZ'l_äùgS‹ÖæO5Ã~Ñ‹€'us× 8}†³äº–Bâû«‘×Þ2¤—U¼šÁxI+<êÛÍÁüõ;€"'è(¼²eòឣ͠«+Î ò>©wçG{ú)œ«LKǾ™¹Ý›’š$2 ÏÑBGFÓOËýæR3I"^kŽˆÁ‰µýOÆ’W-³,Íÿk…ØúÀ ¼ox;Ø—¦º4s!‹nQø.B!üFTÇ$)W:™Ôø³ÖÃìh§TZÈ{½RÏSRèC¨ewòøüR8#Ñg¹Mzõ1#801ù…rÃ,•n}š ‹ßú †˜¸¦¬3¶4¾çuøwì›ôâÑYà +vÞ¸íª^âØá£ŠÖ÷ô$½™` |"ösS;$ðûIc¶$¯C¬W¥$„6à3WPcP^±·hw}U¼r-zdãMÄø¿!Ú‡y4²Rè¡ÀÖïç0éM´@âœÝÛË@¦A>˜f’¯5ž q“è$9Œ±3ï,§C¦¯Y ÖI,ź„úŸrþ`ŽÐ­˜éŽâôŸh²ÇÒ¿Wäê'†!8©-¼š§’÷ +«ÖÂ4MŠ2œ‰6—3ãDáÑÏ‹›€ kå>Tˆ©Þà=ú–UÎS¨þj1öt=F‰D€‰„O¨ùh³m‡ˆ½U|)ða•7RÊü_-nNÁÙ®IÈvNè™m¼—™±¶Ò ÇìJóëüßšùô#ªŒëfÒ{•¹þ«pÓZIyì<ötDlÌ;kfßNÅ´!‡/’Í;0—ÍLlïå! y¡ª]Õ+kÂ@)ó´wDmZ·[A’çÝSäð4ð_ Že¤gËhôÍ`¢o})©è²5E1ð§I!)½NWxøŸå=µ¹sË•ëµYRcû %ë»ÉÙ@ +¿“Ú )òž«¾úÍì-¦¢1q´QWd€˜7Uÿ0Ã{/P/™LT–¤ac§¡X6ÁÇ“h@álo‹ŒÕªüBNDj„á£A*¾ŸëV1µ7§”9µè/ˆÅÒˆÿ‰ž0øDøªa­ùæðt­µÿÍïUlí;éU…<ùµ’¾´bë;¹o£©Q¦H›l —,¼l{µAk¢Ò²ÍþC KUæE¨ÔÓÎF0žÄ¤3þ^&á˜0ï?9¡=³q>-Tb‘bB²$ ¼ǹÄËh‘vPŠBháÕ %Æ ÙdžÙ­~ «,¹ 2ãy!¿kë F7¡3ذ„/¤¥Õ¢©°ú™e¹]©pd5óïV|£Rõ)áG»&Ïþ9GŨ¥i6^[›³ö¬”ï1ØÓÿ•Y*#õ™4Iyý‰”56«²ä^ô +v_xð®ëж<æºbt~ù+íýnH;¦ ÐìñŽËv)r'øømmNç> îš“[6Æô™ÛO“k\¦ø’_ Ñ¯”íÓxü#ï b-èK…Mázï×~Âá+Œ oÑÜ­`ÌÂà—ýää#0×ʲûË#ÜØšÿåz›ñ-±©‚%Ô$GŠÕl†\ô‡“=zè® ã@•ûÀÓèÿË×Ìûó +nÕì’EØ«ÑâoûáwñÍ"íâïÝNŒô—î0œŸ¸Ø% +¸BîØ|’¸¨|yíGª¼ºª:ñÞ-ò1¥gø´ƒÃgPz§\ÉøÇ:ºˆ;*—Ó°ÞõÉÓ©—â”Á*ðÛHéçà—¾µ÷Ù-A‡Rš¼àÿNL0©ÝÿûÚhë"…²‡G„_-‰^(—r'¯É+¤3‰üÂu`_¿|£½rOk›”|U'ûø×²Ó6ämt°U~¬ÊufÙ½©· ⹤•ôìšeèÕtâ¢{ÍXt€×ÔK.!º¬‰|ùöm)ä½—@¦§¶Òiÿ«_BÛP•S¼!ÈŠŽŠØQ.2A]cnlö¬?D2›_o›c"·ùMË#@;82Þûë˜ÜžØ8ïÙåc"—ºƒ",i[Å \HÔ£PåÞ·GSý7 +°Ç÷7.÷œ¿4ßƒŠ™2N*x¨âÜ-$#¨ÿ ãhF|þ-¤µ¾\ÿWÓ›€%Ó©I°ëmpm ÊMeº ÆÃ¨Æ÷2Ï?A ¡gmZ©:6«‚¨çù¥`B5ßY€¹ß¨6ZïÿöÉáŒ2‹âÅàòIðź•¼O%¥‹‡®p½ÂÓ¯÷&ïÝÇaóWú¬÷)k0hïéÎ2ìUè@Å–*žÛ°€T0"QoÂ"§’q°ÉVFœLœqGÝÞôó~$Ì|òIü‚u¥®t²­¬DwC›ò±Ï†ômåHý¬=iáòø¨Þ‚åkðªùrRQ‘<¶Øq+®%áÿêóvSÉys ‚8:ÖËT¢ª§dým³fè8ñ^ Â4ïi]ˆ$ÅÌ?@ßéD|ƒ-Ë5ÛÊÊᚘo9¶¡¨bŸÄ÷ö5¼^É#ˆrƒöþn¿rv¹÷5Óö⻪iús£x³Á‘@Z%) Z§(¸ÌBÐØ įÌ+úvËS‘j}a<¬âŽuHS­I’84Ò#­¤ø§š™aûø<³§‡¢©ýš–ö>%•nZQë(T ù­·Ð÷§£ÚŠžàKœlWÃweÁ6_£zy!ظ`ñ³ ÑZ ·å¹P¯¸Jþš?YD ¤#þ«^ÉÂ?œü>MøC^dÊš.÷•îy‡÷“-ròò¶ ×4O”Gé×Á÷ç$:½`|Χèøû)â},S¨Ä‚ÚHŠ[¨ò€ê‡ãan (xµ/Ø  ›6‡êç8¢JÍes6ÉÉÜ).>i¥06k!ï8G ¬å§2SáÌ<}®mÿÇ»· (C *¸¾ûžm"§ãU©4äuîµl^“ðåÈî¶ ê×9s+ÌïÃ!Œ&klóD Y±zFrÑ¿ö7÷€{ÔÝÊ[¤B¡®hœèžb-©¨|þMŒ°9‚ŠøyZ>%Ï Ê‹4ÉtâШ¦«üœœ5¦Õƒ‰ì uDW«+bÃÞ òýphMŽ×èõ—zÄ‘e^óe¹µH­o‘L®YÝð'$$²A ÷©óÄý³·t~7…¨7 +Vþ ÁÄõ£g"K¶e `ªV4n©)”ÖêMþ¾ûKô<ðçK· ¬µÆ !¸Àžíž‘¯"D+[G½/ÒŒ棻j–ùÕ!Deƒ-t¤|LƒÏ³;µ€ÏÁÞêd‰…ªÏ<Ža;—²A­{¹æ†…½kGrU .ß‘q"”Êg îÁ¡ä2ÀnÚX·”—RFÁÅ^N€H‘´ÖÊó[™™‘UÎ:Ûo2ÁW˜@Øã„Éû‚hUôå5ë¿›b„½­Wzm†ç/°WÓ^ì1_óÙ–¦¥}3¦†fœd奞/9­é§ÑtÖËè¼ìšQö¦&^*¹=ßÁ\; [/-}+ëBà‹MgRºZ'£°¿¬Ì¼€'’@Õ ¨ÊŸôqˆ nW´V èOá¨,Öèq\¹°…ÏYúg¤Ñ§ +`uo,´u#çuæž4N¸wÄU"šJÁˆ±_”TÔº]œI'³§´×(—LsÑ1@NòI–O¯2kêïþ{#bK×Ä×|ý¼AÍmZQ)ò©´Œ–¬:øAȇr–¼hh—¾`Ùßçá®á—L†g5Ž_úˆ¦&ÉÝf‚ÑV¿,bż ðDظċ ýŠf'7X¤l3sßÓXÐëSG"”[ÈV°ª†Û<‹ío޾q¤vá@ûæ”ÈÀGº; ‹&Ñj6Dö¡õØduýÍŘ3:½`<¸=)ÂÁ7ðû¡|œv=(â̺^“.áD»óè- ”¥ì, +ƒïp­‘àš:'%`¨ÎþNËúåŒÉLüä8NÄ >Àé_ ™Ì:Nú Ï’DtÈnXFçñCK& †ïAÐÃŒúM-;b€Ïj:‚bîJ„êì&XåÙ^Ä +‹šf¶\Lß >xoem¤ÀÚháx]€ø_¹¶’ ¢’ÜséË} ¥œÎ£>5ò¯ê"#+þiÉ ÀðcQ øÇ•ñüSÊ÷¾åfƒ®9Hhî[7 +•±·›{Û©ðz…>©"TPxõ *ŸDs6VA'_zuJ®+iU$¿ßðÆ44®-6€€þnO3ÚN_Á‹D~5—8ñFaK)%ε0¼ç+ö×÷#zÈe˜^9 ÃB2¹yå€*gf#Œœ*ŸËqœýûß +ú<}mæ·…I``ýt›Ä¸Î+ÿ/FÛÇRéé†Iµ*J(ö‹Æ¼þtŠèáCÏ@Lž¹$Z±£&#ÏAI9á#Eò;rŽègÂ;âH7 +z™m;Á8'íäº7Ž•éÎö.:fÝoì%`^ùyzRf:5±õ|ý÷ÖÏåŠ9^¼ì·þh{ìׄõôÀ‚¾ÿ˜È-MíèàKh²\‘Õšlrƒ} ص*Ø›Å/ “i„Y•zF‘­ÄP¶ÀM*$< KGÌòõ sHï-ðìYy¶Æ=Š^ÊÎo"Œ®öV‡‚&q¸*½+@Ñ^pñ«ý¥½›Ü€ßpãÁ?Qc5_!¿ö¡¨_z¤êèRž/³£T«ãá6Š6h)³—bb dY~åM5ËöOé8SU?cÿ({‚ÑÕ-QgפÇÄOÈcëhøò–Ìc-¾7×LÑ|Ë^æqâQ_¸ë´„pß.ÙÝ>'ÆìôI–dqEëˆÉÁ²€ +üS¬ÌûÆ9ÔW¾Õ", Ÿp:Š^5c,ƒ®ékã¯_R,â˜Aà§7‚÷²’ oʃEHzC<× Aª²×®¡%¤7“.½l†@7ŠZ})û.¯åƒõ/HšeQ»Î›éFAêhLpfŠwú"Åcw°Æƒâ.WˆBü:®¨ö¬iù ˜âÌ×Z&d“‘Ž¢j?uÝTd¡.ȧW†Fîo°mz ŒCd‚cØÞl„66ÊrÀÅvˆF§!@´|H<ë”Û—…zñ€$N:^óVH'Ž>$øc´Ä²#Ôí@ß.«û¿ž(Ÿ`ùfîZS1ñÝÀ 7DÈjå_ì@+["iJ©&RcéAZˆ{4;€ëéG$ 8 ÛO[…¬P ¼+?#2NXêü´±íqa1Œg‚Ì^fa…¢CáOùj 1ÖGmgcs\­}¬k(ZL°¾a˜ ¼|?O’Щ\bjUŸÅ·ˆ®l#ç¶Ó¼³xDY²9@)q3‘!Œ"9èCM(ððQÓ·$îÍÎù߆5Ø,*Sôè=YñïÝŸ]By·FGXä"G»úŒ‹úT—Œ.¢þ‹¾•Öú\\AÊáoWéf廵_=ÛFIÏ]U®xï·å>ß o¬B¿9%È@‰V¼àÞßefp{î_{ªaìá>R¼y½ ãˆô¸Ù‡'VŽ'šb®ÁÏù%÷®‡Ýsê½É¡¨uÕ,½d¡‚ͪ¦ãH&hD‘`–I2™%˜ 1ÛOxOª"AòÐ#O7K<Ò:5" +æÎ÷ÁN2ሺ°Ñ\¥Ã’C3ÎþÝ/ +>YÄ +…Cä+à)‚ÊV«˜\J]÷îFñµÂÍ.cÏ¢¿ê¸¯¦}Ͷ?Œ[Ü«_/]êßæóÓòñc…a-ΈbÛ/5Ñu7ûÅð0l~~ ûâ”å™*Ÿà|â‡îò§¥U_ȸ›þ ™3¸N¢ÕÖ¥Dô,ˆ7› Ü%hQ½ªj*´jür”ƒ“•~õ[—ˆy~&qÜ®R5!嵊|ç°/¼odÇ€ƒµ+Ž E0Å-™ÎLn'VÓÒ±{£ùDök‰a"îר«ýE9ÁKBä´-†Ìùò*¢Šô!®Ài騡t Þûx¡m&{„ \ýÕl©y~;Õs¨‰¼’cÑ®„æx- añmˆyl‚r½Gk™Uxƒ'…ÿÇ=B‰ÛUf1ì›C1à,%ôñêÏö6až†»—â±T#¡zŽf‚K`À?ÑÄc/|VŽRY$8·ÁF5%½ìãÈØ¹rQ§§¼óõs’.åcç?*Lóû”™É¸®RwFñ¦v΄ìk”3øM¯*–ˆëäa5Åóö>tbTF &ðåöV?|ì&bÄüƒ´†Ø9“=ƒ ½»YbñÕCŒ²ío Ù–iô†ÒE7Ðê!€Ð`>–àøþfÉx,¬À*¸¡@«@Õ/Ühz'¤ÿv•[dFé`ûPaI9+boz²„ãR²‡Š +ÚHEÊ:üüö—ÜÈ€)²­åƒÛXøt½A ÞþkˆnµUŠ‚\D¥j Ö™sלìº$µÎ“Ä‹9/'Çp<2—F¸s9;•£ëò²½éþL„¨_–†ÚÉ묪`‡|8ïj»?#ã3¾±î¶©Vå¡ðhJW8ÛÆéÇ.Ê­h¹yáŠ^ƒÑ4HÊ6Âòñh8ê»læ;ûÌ&N=®B­8=WuIeRè›@Î['XíºrRˆ­dØ¡>6ý?T‹¥Väq­–2dÝ#F-ç;È—¹xàF¾HÑé\Úî:C_IÙé¸Ê“×—±Ò‘\×iiå¼¢E¤?ÄÐÚÆúj7=ßÇ ¶¦¬X‹~ÿÍÊ>ß ÅgT·3Ý™ ÿ±ï¼;?ú`P¹3+e:öWS7Jã=¥(RM18™ß¡²Ï=rɶ¾Û)$4y6ºLÕ¼ÛbÜ!´Õü+š!Êó‚Á¾†ºt‹ÿfÒ-W«_$©bµÛïÇô-ˆ ÿ`Èpºœ³¼Üe÷µ¤¼¡–yl`T"Ð)ÌîI.<Ÿ$‘º×½,×íÀÚ¥;Ž Jj¬ãfiØ:j‹4°ApÇAâ­‰öDÁ³c‡"7Ä“Ä[õ+[@&Up4•[Éâë7v‚€fè‡þ²ë* }P^mÒ6Â:½6þ¿\{a ʴE-Ö6ˆðµ»ôcjÝ(Dz¸–IÕïÅô¢EÞ9ëLð€ÓpÉ@« äsA¤†§DÐ̈́˼ü·›zŒ! ̶`ýWÎw¢q7™(˜’'kƒÃ¸{‚ÊžY Ùh ¤¼†íwu³¢Í7Š_Í•ÒÇ-±›1ˆuñ ¨¡ãTKÀà¯÷ÏØ„c£å8“pAEk‹3Ò¿ ôVaZ ®g·Š±üLÂ(,$^¥ŠhbjçÌfF‰;×Z ÆÃ±­Úez#t‘£&E*P*¶.߶İÙaê§Ý[ dó®Õ*]}0à[IXU‚¤a¥o;¼æ€„`%£*TèlD^¢ß±,C›]}aq–5uo¹í%§‚ îmâeÌŸÿ £ýØŽT`…ܹ:ÎñB¼WíýDâ‹SÍ#[0@LÈ»“ßò߉ӆ..FÀv‚<‡ê^àGwÒëÊù"f,Y ïÎìöuÒ¿ÑÆ~„;5ô0Œk…žjZÌï 5—§=üÓ½¯qq¤Ç­§Ãªæ¶´÷¼¨æ± õ¶qýÈ%€¹‰wÐ3.n#Y–¹SsÔbÞÝÿµDKù©@>“[öÑèÈÎ)½ô£àäjPÓÔ¯g7…*!RV—˜:ÜÃ+½£¼±îîªF¬´l΄‡u8C3K*‘Ž3|›<˰zÕ‡«5 »xºL%ƒ—Í¢•~ÞzÍ¥7Ò6ôÓõ¬ß ¯–VBÂXh=Ð\„úM$?7¦›íÒëì'©ž’I€ƒÖ›r‚Ìźdžmˆdœ_œrªéõeúŒ= Ÿ¯±‰°ðq‰öbú'›«¨ñ»Ó`€E=­vaë[ù»­àX~ÂUúEóŽH”ªÅê[á•.9Ù¤ ®õîk½ á•”ÊD_;%ŠS‘Q·„¤¦qhô…Ñ9íã±EµÅ±®Ãpï§|SkÚ à¶ù-%´Ö¶ }ƒ¼äa|K3¤¡_ç‚à<Ò²–Ù­Hð²û{n*,TŒÂ'ñÎL°—nC3k fy™ÎÈ1ºm†àš^q»nâäZÝ·)w=IŒ}|w^ž¬™'v, +Ò¾ë$&nœS«^ÄN"ÁqñÐÌDž"6®ñAœ½4é¶ÙiûæRQ41ì™ñdý¤8iä97¾fNçEdcœÝŠ&Ä=dÊ|bêÌCðĹ:@"Štx·µÝ"¦9›ÕòÃPKr6O­Žy’û‰÷ˆ1„(g{zùÆŽ`Õ¥LÙ3ò Üâci¢öÈJ49%¾Up¨÷Kî¤à҉Άâå}ÚF¡–²—Çõ;_ô(1<­Tž<÷kÚˆY;Ä•ëÕPRóÃX–ïñ€½ïÕB@Ô3\V9º„–­mö œ÷Y‹ +~à™}kÅC÷\Ôø*   UúÚ¦ÄõœþaÖ+7¡Xš”Ë"*·šÐø„æZò襓ÏAûS€­õlÔ¶ÿv—zªâáæhnfd£Ò{½z°Ý°^,-Ø­YH6sò•ÎFbAz2mì`¨]øFûÀ?ï…‰Šj§ãî¡ŠÚ Äã§É2 t8ÛŒ=ŽœdˆVY™:ïµZ¥jþðµ®Ü1Ã0þ·¯èÔ5(Ü áécƒüäNæ…rˆ˜JMˆü7|Ò6Ñ3>„‹cþTTVÝf,¸$â&ºsÑ5dÛ—^oöZy$3ˆºa ¢­µ!P»I,½{mŠz­Ej R‹¦'‚ ã~†eõ§>ÄZº]ú²>r ÚØ´jxI”¨š’5íhÝ){(A ð•[ðQ(&å+A¨Áü­‹Íx}ºÍ0(È•}•rÇRˆÐw£ „ñ¯¹~DÆ04_èxü9ê¯ü±Ö  » eÇä «-rŒÜ'¸PÏ[õkŸ Ê:òIÏOt!¦© PËœû£êà‘L6ʉíã]GŸºd9¹Ð + ´¼»é"–õü© ‰;¨>–uÐÊ]Æ]½´~%ñ2Eد'!L ™Œ¼‘C”¨x¤7—ÞìÛ”Þ²Vr±z+zzNaif*Tv¶©-W‡;3ºWa/7›}Rñ +»]\÷¶£Ö>°‰\ê76ÈŒŒ Bø«Œ|nCYx=ë¥IT ·Î7Jî)Áÿ£'{l0- Ì%ÕaF”…·…yIB…ò'Â(jÆ»ždœ÷ô%ñmÍb S£z$GÌá•5ÿG®ºˆ‡E9Üîoþ7‰ÈhlõºÏ³Ñm¥uI͉ľX\X°ÃU8Y¬µ£w½ø´xÏe¹ø œâb ƒ#Yo¹äV™uš#Ès1ÉW›Ÿ +E¥CJ›,í;–÷±ô²×õÛ‡´¥ò\õlë€{=Èm-ï´Í ÑW\=éOK@ñ©‘Pdçîi5ŸúèÄí!u¨Ó±vÕÿ^Ú!Þ5 ¤1s7Lîlüª +Í›üŒ§7q¼+³Œƒt0Ñghäœx¸ÌVE¯{goìÐX´ò[&bÚÇå—¿e‘¯eH*i'òò‚—1(í^ÏÀ~|G+‘XÚ¦xCºp:æyîs©ùë±›™[å®óc6Ê» ‡AúÔ˜°ßBVG4rémýuhjM)®CylÔ°<•‹–r°8¨/YýÐ +bvTÉNá_Ãrî†g‰6ò•µíÛ" Ç–ß9Cô‘À‚Ó +­¬Ì$LŽ`u\$ÿyýƒ îË63rŒa ù*}L¬ÿOƒþ­Nûšâ°Vz`NÁ~©âÖüËïþD|°ˆÏ!5…çÆkW· Îï¯ËÅ[uõ 1HÉ6ÅGÍ y‚ÀDÐûW…ÔC˜¿•%‹Ò¸Vî96ºkœ>lLtÆgÑ)d:ã|Ù\Ž,?†ÅÙ¾¦d”5·ÖŒ‘ì¡âØ#2}‰Äñ‘«Ã™ >>©™–Ay÷r5dý>>Ç:Xqa¿ÓÙ x^åJÓ^a”Õ ÀÝ2PA¡½Îk@¨AP£íÓ~<¢GÇ€w*ßjRr… (Þbí]4²2æa´<¡ï‰ö‚kdù©#YtuÎdW'*Õ³C+%˜ 9vȽâðq +endstream +endobj +2307 0 obj +<< +/Length1 1509 +/Length2 2696 /Length3 0 -/Length 4049 +/Length 4205 >> stream %!PS-AdobeFont-1.0: CMTT8 003.002 @@ -26994,7 +33943,7 @@ FontDirectory/CMTT8 known{/CMTT8 findfont dup/UniqueID known{dup 11 dict begin /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /HZGQIC+CMTT8 def +/FontName /KPZRIA+CMTT8 def /FontBBox {-5 -232 545 699 }readonly def /PaintType 0 def /FontInfo 9 dict dup begin @@ -27012,6 +33961,7 @@ end readonly def 0 1 255 {1 index exch /.notdef put} for dup 98 /b put dup 99 /c put +dup 100 /d put dup 101 /e put dup 105 /i put dup 108 /l put @@ -27025,20 +33975,17 @@ currentdict end currentfile eexec ÙÖoc;„j²„¼ø°Aw-åÎ24 ÆòŠôWäEvçQ‚C<ùó3£‹ØAÀÔæ‹ùàë2¨ÿ·kX0k^ß|™‹:Ù´¼fbã,|Ð=úësLu2æK¿¿Z`3ndgï¸RÈwô@Ó),qñåÕœéG<&¸®÷­hï'¶ì. ΋883 „½Aœ½®B±AÓÔ¾I$sò@Îí“Féù™ÅË25âÆÚª,iá™êê pKôœê>˜èÂ6K`Ð Ó%äÂEû(‚ô¼„(bsÌP’ù,‰‚…³âW‹K¼T"0KDǯæ¯Ò–Š›¤‹kIU–ܨf/+WN/ƒÐr’±Ú4÷¨½qðÕW@À¹d³­ÌO ŽÅá¦G|Babhyè52+ÙÌ2›¿up·a²,-ôIÅ"‰³ðü±ŸMC㉮à0×’v\’›ÝàäG’‰\¯% ØŠZ\+;í7t›‹ÊaßmŒJsX÷€Œ´Pcû0ÛNqËL¬ÌÙÄì%®‚tãW‡tåį,ëœ×Çñ ‹);âbÈë»õõ&L¸î}NœÖ²òGÒi$ß²:[´<+s\6qõG•úq° Ô±%E~@‹kÖ´ðøÉÖÎþ„÷[iYKÆq[Ž‡× Pvø£brÒÔ~ÍÇã™Æà1z޼6‚ð>)廄¾\û¯o¶xob÷Bi€n\!¸[ï(;¼"0¸¾ª:È÷7w…Ð yqMom¥D‰$cºqÎKRXr šÿÁc¶À3–m¢½ƒ‘œx]Ï€¦òï†â‚F†Ð…»í4_~nµ} VìF¤çuM;¹²[¹©)(6!^c«0Ü?;´´ïÜmÖXþ¯æBö󈊄±èÐ<ür#£ƒæ5ªö\ÇÖýAµÎçþÝzñ­‰¢ÄÝ!»±C`0î~°îȾÆUt[¡ijÛÏÖAsÞÅТ3X¥ñº´‘ûh‹ ŸQ‹;l:²´†hAßyÑPñw¢Š_R˜4žEwv½a¿XåäI› Íf£ö”JÀ{‰ï“ùþc ©šû“ÓTƒXЂ¯?6ß_×ãe—–?[=‰Ük³§ºDÅC³ÌàDר-'íÜþ¶Þ¥²_:mȹw‡à¬i)EÝ -É!Ã.¬â(’4G¶Å×gÿÐMwùÀÜŠeŸªãoðtn“¹Eí§Êy¡xÕ<ý¼PqÝ;AÌ÷Üâê >OÍÈ8‚}y•€f€2r¹ÏÇD§˜Õ’ù%Æs¾}w`À ÛÓ°ß’Ö/3x”Œ®‡Á6& ¨?ügçÙ‰¦óÊ.Ö\YEIIö~Å:Iß`àÔ‰ôŽž—Òµläæ±WI1‚{6oy"˜®Þ¤ûüW[ Í—¢#@xsä*¨·å®“dæÎ4]'oK·è›%~%j¼Ã{ž|FA1¨àà´@Æ*z}òÔ\1±ì3©¤%ÑÍuÖ}jyÌ<=í/Þ ê6l§@CC³À \+ýÄFè!} †kåŠ9ÓI ”F _’›îW˜ùÚª+Ìš~Ø"'K²È["ŞР˜öººß4§³±]#)Ÿ±¥»ý l'ª¹iÓ¥Úì¡åj§I:]Ïe± '®{—°½åͲ™}ÃÏÙÀŸïü¶Ù|€+¾ÁÙn­ Ð³Î™Yçòé0ܬªs¦d¨u¦;?äDz~Å!À3È·gÒÎǯ^@œ³¼Üe¥Ã×xƒ¯òÔVa؉ýåH±aÄŽqO*K{ŸÚà¼Ú‰¼ó Ð%½8;'FJŠ`‰K¼»!-%l•ëR,áܽ†x!öòÅz¤÷á ‡óóCêB!ê1¾ß@÷¾ãùÍ K¾=\ßߪðb•(m¿Å4ôwÇPòZh6?@„L½C@²äÊ8uwç1ÆrÇ¿è¶ù©Ì$ïÊBóKÅ„ÂØ -&Ì¡‰ç·Y5<1B îdlaà;À«9^¿'l„Hn,³ÛúºW˜÷ƒ{ÐÊY³µ¬Ô[œvñ +É!Ã.¬â(’4G¶Å×gÿÐMwùÀÜòlö¤Ÿ—t@ +ïÊÝ×a{ŠVÇÝñÕ <{Á xúœÊÐlU¨ÄÞ Úq’…IGS2½º£Fx(–È$¬°§Óÿ»³MqmË‚Œ÷°ŠHTÆw‘…ß›wC9Š¢»U¹IK}œ]ô8Ð ]ßN„¯ù,Õ¾¿Á_3­Ò£D4?)Pyn‰³5^×òÙⲬk¤©1Q!Û3½‡Q5Žüç>J!‘ž¬PùòÜÙ>|J]-êüP’ÐïT'ü»Ì&Q‘ÝlÌ ~ãøÑR($k$:áåzïšt~Ç>fÊó*ÛѶg'g?Rñ É wåܲæh»fL޹ÿ]Á*ÇRyÈiuK&äáõõÝŠNÇ&O®´Ï`šÇ²æ\}×°K¹´ u]“8oŸGÅ53Ta™ÙîÉg ²@ÿ6ï¶Ú£¦¡M¡&Ž‚ènQI¥Npãìù¶|ÏÜÝQ¸Ì¢ìÍuOVfhj³?ÈÈ`îÞN;•Mþ³äà?~ÍùÚ;Çž²}58Q8¤ß4r-uœ¿u>äÓD}¾|z®·ðœ³ˆ¿cW#6/ü#ñÄD{(шPíôv8ܽðš/¢¹¤Ü>*£O$jëFã¸çB×b¸¤FFËÐ!ñ÷§ÚžóROìèg†/k“³ÌOœÔMâU]ôÔ„,'C8—N„ç$¥…°Õ ‹£—4Ô7›YÕì»9ˆºØ!ñêˆ:y`e“(2³ñ¿= ªjñ±D´0ÖÅè.ÑrPÑXÁ‡ØJ}ySnõ ø9%m¨Ô#,úwžM=°¸£)1ŒdÒHÕV,0åBÁ £ˆa²„÷ž±8\‡QòóÈ›ÁŸä°xï+îÂåJó…9=Ù(ÓjàKqp%ÛÇ07Løœ¬fUôàoÌÎÃôÚÚ –– d}p5¼ ££Ýj›‚^±ÒN^÷LÝTÕ3+£g~áûœ¹í«ïwð:µAn¾U*úÙå‹÷ \AE£é†=¤ø@ :‹žì,ݤîkÛ6ÆTÛgp‚Ɉ#7P6Ëd”¯g‰©µ«^ÏÜ£d^—N^#höWy¹ Ø÷ðUI•O)™C#§”““ظ9¸|¢ž”¶Tm§À„Ò"[~±X’´تWêáªgb8ærÉñœ"”z-¢¼W_/®ÀJ)Yã]þõ¨ô悚eð.¿Õ(ìL´Ÿœs/„O®¯X‡äŸo«ºa|±·JØoå$Rù›*2âŠDRQ£Ó4ÁT¿ZÊéº%¦^„“qú»O5è5>°’ét&ö&-x»¢‘ô bKA8|éX W/­ ¶“°¤Û]ܤS8^Âÿ4xd)JÓ=´··‰,»+»+†A-Ho&µ7f-SKãÑæâV›½`¼-ù”hÃP³Jç×Ú‹ ¾öU%qê<< k°&Cõo¶~]^oè‰þÉiWµ3YuXõÖÐN7YJž7g÷@âÆÄ¹„Ï«’À)òbì ½Ü žÎ½G¨D kE÷ ý°34ÄÐ!ô¢éœ£m4M{ø²Ññ…þ²xm0ž^„G¹1@rP_Vœà]b+g¨„ú­lÁƒ*´°ÉùP´ zShŠL{%KžÖŠ„©–_“*©só´¼+‚ÕC»˜}¹Ö¡‚¡RÛ>O«òÂxdKV5ƒÁ—°Xññ^ðÒæsø™p–_®ˆ +¢½›ß½€eдË*›¹¡Å×KÀËHªX3.Óg 2L9tžèbMµ$ÿx*Ý™4$R=å Þ+JUùCwŠe–䡞±…›-%þe)ê³Óƒ¼Ö\™MS{Æ•Këž³.œ endstream endobj -1976 0 obj +2309 0 obj << -/Length1 2068 -/Length2 12106 +/Length1 2495 +/Length2 16116 /Length3 0 -/Length 14174 +/Length 18611 >> stream %!PS-AdobeFont-1.0: CMTT9 003.002 @@ -27058,7 +34005,7 @@ FontDirectory/CMTT9 known{/CMTT9 findfont dup/UniqueID known{dup 11 dict begin /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /BQXTWV+CMTT9 def +/FontName /FYMOSO+CMTT9 def /FontBBox {-6 -233 542 698 }readonly def /PaintType 0 def /FontInfo 9 dict dup begin @@ -27074,16 +34021,32 @@ FontDirectory/CMTT9 known{/CMTT9 findfont dup/UniqueID known{dup end readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for +dup 65 /A put +dup 66 /B put +dup 67 /C put dup 68 /D put dup 69 /E put +dup 70 /F put +dup 71 /G put +dup 72 /H put dup 73 /I put dup 75 /K put +dup 76 /L put +dup 77 /M put dup 78 /N put +dup 79 /O put dup 80 /P put +dup 82 /R put +dup 83 /S put dup 84 /T put +dup 85 /U put +dup 86 /V put +dup 87 /W put +dup 88 /X put dup 89 /Y put dup 97 /a put dup 38 /ampersand put +dup 42 /asterisk put dup 98 /b put dup 99 /c put dup 58 /colon put @@ -27092,24 +34055,34 @@ dup 100 /d put dup 101 /e put dup 61 /equal put dup 102 /f put +dup 52 /four put dup 103 /g put dup 62 /greater put dup 104 /h put +dup 45 /hyphen put dup 105 /i put +dup 106 /j put dup 107 /k put dup 108 /l put dup 60 /less put dup 109 /m put dup 110 /n put +dup 57 /nine put dup 111 /o put +dup 49 /one put dup 112 /p put dup 40 /parenleft put dup 41 /parenright put +dup 37 /percent put dup 46 /period put +dup 43 /plus put dup 113 /q put +dup 13 /quotesingle put dup 114 /r put dup 115 /s put dup 59 /semicolon put +dup 54 /six put +dup 47 /slash put dup 116 /t put dup 50 /two put dup 117 /u put @@ -27119,6 +34092,7 @@ dup 119 /w put dup 120 /x put dup 121 /y put dup 122 /z put +dup 48 /zero put readonly def currentdict end currentfile eexec @@ -27152,23 +34126,48 @@ T ÖÜ­“a’èŒîjfnÌUØÛ™ŒÃCž›¹a£t·/ŒÜçÄæ½òvÛÜ×ßÚPqE$NP^_Ù…‚dùèv±¸ö§"TŸC[ þ¿|¨½¦wÒ1|7ô—B%Óÿ¶èf‚£W­"dÀá6QIa½÷Ò†ÅE.á<¾SiœùÁ—Õ™)Õ+µž Ì%ÐÅY`; —Š®5!‡KoLS»—NÕ’öhPhäDðÚ¶R‘™-¾„®¹Sòéæ‹„ç»? ò¦åÕ¤S8ûŠ!ª—,ÊÏ „«vÏN²[¼ `É"Baæ’Â5ÖÂ#ŽdwºéW%Yl‰¾~:/´‚>®Z¨;eÔFÁ{N# {Òí}Ê?7yøg ˜hS·…¢ à`tZ¦" *èQC&KsulÊÔJ·š|6×iÞ$rˆEÖFXggξß&vÊÈH_I«¦)³ÇÝLRá*ޱ·QÂgÉX¯ušV,Ð’"ª%_©L΄¤"4vܹ¥ÂÙ©v€Ã»ZÇõ! åÕÌîûßY@øLЉ{Ìvߺ‘+‹ÎN*Y“Ë+qi]øî/^L)ãŠ´Š’myß1=óˆ‡K굎 ™¯~{iµt{ÒnÚ?'†‡2²d,µä½›ÎÐrˆît‘ß³É÷”ö-¼FÜ…­¦ФˆFl L y‰fÆæF?Mè€_¢‚Ý*aÁ–Œ¹ØÄÁ5ÕºäÓ½•ÂŒ!»¬‰£d§ 2‹ãΛiú¨w/"Ï;qfWò"Ž—^®ã0$go„ï¯aWó¡vêm.üÍ¢BÛÀ/c@ž{:¿d"€Aú·vÏø]P}Cð*&kxÊ;ô¡k€6܆ÒxɃ%ºÕ ^X2ÑLŒg]¡’øó…ÌVm¯·­¦Mhx÷äŸP&ï(Ð{Ÿæò‚gû¢Ð^Ö£Èý ĤԒË|í±º(ìdkÈå¿ì/¡ÎܺH€Ô”qÏJ'w¬Bî£t®½§€—žgOÝ`O± ¥ßcÔô¹“ ®\œK*ó/˜Íû´®¸™¼g²9Zö<0väB‘·ôWK=—äQ‘]a9<ÏÀV<-­Meb^|Þ€£q£À"Ä^ÏÌ×&l |ésêgiÿÑQ=¶ÀéõrÓå m[Ò:rL(Ó¸ó…„aXœšÃIlµ«eŠøÓý€wƽÑ?Z®Nm‚CU†´úØ ü‘Ö•;l€Àc;^=§ª÷¼/ÊE#ëà…q$SpÕ TÀð /.qKAEÍ æ6–ñ­¦ËêÃ`ÍQDAϪÐ'Ön¦6O¨ýGÒã$áDÍ’ô[Q»ç8s X¼¸9ÓÏ'¬qQ@µÀ{ÄŸ§ðn™ù઴œÎ¶Þ×Z'v³4쓪ӕDºúîÙŽ»-GºîÑáªJÀ8šÊáðY+:âóâA<ÇôF&úÜÑ3'Ë]˶qȧ–·¸òÁ·¤äÕù 7׬ÆLà”ÿ O|òg¥dÑ–ÓÏè6hç0ÿ£Ú%ÎPL•ðƒSÂñañ8ž R>?ü6æq¯Â…$¤‘ø‹Qø8sZx5í¤TÒxo†$3ÂãØ^5ê t·§áhœÚ¾Ä›èÒ~Uƒ¥f”vRÊq{oíY“—00¿4¶W2ºö‡n­D׀Ƹz¾ã§[›žlÔÜ´ûÑ;qÛ£ßóâf…}å²°¤$ñçÄ0¸}Œ¢PgR¸ %Bîu0³Óí”Þ»i[(wí4À’(G§¥"O#õÅ-_ª‹jó5E]WÊÍhíˆÒd]ý#ñÓå‰_óúȋ˘†ƒÿ"ƒ€Œ·´äëõ,ªª Üí…ÑbTlyu~‰ôCT…C¸þî•Qªyb9›6›jÍQÉÍ$1†Šq¿Jƒzծаe4¦ëÝvþ†ýõµÈ§ƒG%Õ[)”è¤}›º@ìh],l…u2 ‘—"t%åãE=3jI3Ï?èÌÕ·8‚›G[Ñsv›ä³hÊvì!RÔf1~ÂãYA Q Œ77Œ½ú²5tÇc¸Ó.,ë {ºr]‹,ú’“õxôæ›wÈ€„¢X.ð}Ãuç= Q°-‰–U>s/Þ‘˜/;v†e)Q/ñÀ?§üê…ùÎuRT -«ç>¾€dýkWÇþB¯øë¾&¢’ÃèÚR|S }Ob¹Öƒ˜mLjH'ØéhsO’gëw¡7ÂAeìÉ Ì>M«¡©w§ú–_bó'(>+¶ú˜:ù¢hÀÅ*þÛYÛ&Ã쌢côômØXá݆ŠÇ@åˆ>ÓêÖŒoÕ*‰ˆQ F¤µÒC†'L ¤Ò쪕Ën2¶s)|LêÊ"Ëóë–²ÀöTE®°=1IÄ(j臗œ/ ªLƒè@„Ò„ÙVŒ §Ïù$½'‰IV¬­–މÉ_ø:ÐÚf;z8˜®cë½üÕ›Xè^§@±nÓp60×Ü8׊…iÒ1”4®Áæ±Á…ýÎçZä…×µö>8¤ÿKò]•ÐAÖ¸û?HOEW/²‹Q´æû5NÒPŸÄo;&[ -±)D*Þ Œž\ÚÕÖ¶d@aTÁ˜À` £Ùk"¶}EÏi4$¥žK‹ÖüÈx…‰»Ì8}g 0Ϝǩië}ªüÔ ÃÅ^½': ÞÇ_óyZn*Š/!¦‹{0*öÒ™üA`èäEÃT:‹ñL|Ì̬Êÿp\cL7ω²¡ûÚêU/’â³ýè•yE¨¾jC9ŽÌHAòèw4ßÓìòÁ? Ø—þß•¨“øtD,wgþD¼ÄÇ?æ1ÏÇM ÅVSÏá¤ûGáºx)°¨Í÷:­Q†É­o^ÕHžåÎÇVƒ*0Jó:ÝlŽ+h³FLÛ*+Pd‡™Uc‰ÕÎ(½öoü A•[ÅÀÇùj0p$8æˆMFFi+’+ßy•Jq‡Í ÙÆIÎ[-¼þ¤—i­¾¶m@M9õZ€?û7•QE¼­•œü» -VÕHÖ_3™ÍoZ¬NbüIGÿ´ÁëÊ'ûñ›ÁB©²g ˆn{ ~‘]Æ/JÒL kY©/Ô½˜3‡¢DH´®×y¿+äS@ø4}¶'±ysv·žMÄT±m7@ÎJ†rŠ ux¾xŽEË/Ùª.©E3ç¶´Í=öNCÙG¶QvÊ«‚Ôb8ª¼ÍvwC®GË™£ Ý$ví™ÈŒJ› Sª*ƒØÒ«CTenBüœ*þݯzƲÝS_I‡Ñp’%PÝfp²J_>^x~ Ã8¤RÑ‡EZáTåyª¯„>ê±°&E-ŒlÁ:wc€XDƒp¹ -/Û’¦ìô‡ÕÂN}ÛÖ°õ²qÑ:ÈUC-þ5‰È¡Rº®§J©¯û€ù•†èf2¨ÊÑ>~É;vM:!ýý*â5¬Vcjpp]Ÿ13‚¡SšþƒìŒ]fyÖþ:Òà KÆ@ï—¤‡˜’æµSèÕq›D¾’^Éõt*! îY¸˜ÀÅ/lÀðïù¹WÜ"IXxNy=Ï+ÛfîE´¨PzõD‚ÿiãô°®² è»>Ô …aĵàq[Àåõjåm‡R§Èke{kõxe]‘;Î_ÿ•Ž=J§è YiÄžÆw/*’íaq°â_ÇLÓáqߨç«õ52³ðŠW U%r7õéq &ŽtE»ÃÕŒîHçyß QMøcÝ(­jg¹u7ñSÕxã0p®ŽC‘~°Ò èüÍ _ “|NêåµÛûæ2s"@hò æj¬S‚î1›eɉ#ÀIË4R'6ég¹,‰Ñ÷”X+Ê)¥•Ýãa[TÌâi„¸fdà+›8¨ÜTÝžõK¼TÅlTâê6Ëø•Ôncwà-‹°[¨’E䯱¥¤TìµGÈ?Itr¨ç ÑCe1K³¦²/Ô0GØ'Wí„¢´*sÅ•S8Ê-H;×Ö`¼fméú#3ªt^þŸ”U<Ý;YãÙŸŠ£“?pɈM|*"ÑÍ÷ÔÐPµuÃ%v!&ŠSÚ Õú»µG×7÷ó9œ·HÅÒ¥Ph›õLXGÓB“—Ë)xTIøÃcó]Z¼Ÿî¸p0k•˜®›ºÃþb|Ë ¿š£„¿h?%ÍžÀ…2͸t|¨î -ÁíôÜ/S¿.FMË„B4Ã4at&š øy{¬áswfnB–Rî%Èv—ðó2¹ƒ«ôI¨÷vÓ V‚±ýÿ9 ?…Fú!AN„ø,?˜uê6¼â¡uÿÜýUö 4…u±XÙf†Kæ»ZS"ç@?uîãÿhQ+sët²'ÁéMš;ÜÅ|¬ÌîíÚË¥NCÞ9 £Héu -À|Í=ízÑaǹ(b•œi¤¶¢„Ñ)¼­·Ò¾ä aÇþÓ÷±¾xªù.,DŒž f{­V) ~?éû EÏ_Uñª•ƒ÷ÁƳ«GsÀ±Ú ÒëMbl«hc{sú´d¡‚ðrÌ=-š4°)L<áU\éHmRî¸7Ñ™=h̼¢¸K8Y¢Ï¨”KÞ§ æYâ’IÊ¿·Ø¿fÍ™=‹,ЙgF;)«îxë ‘ÜŸ¿¤ŠIíOG Çî;6Ù±*$§5Gi‘BÀPîe©CGè÷‡£ ¤£íRÌ?Œ=Œ‚_ÒçÊ>„1ŽÁ¾U²¢ oÚ7wZ‰){åí£‰Žã.z‚×G£›`„cÒƒC&m8Ô¼w´6Þ¸©…´œYØ Ó2$bP:àðʼnØ\èG¦Y‚›Í`©™1¶TÅSN±ë»tvŠgÉ`ŒzÌâã{ Àfkû)¶ íüøÍ`!QGrUžj9°½iiw¹k¯±Vw5iÆ $Þ^òðÉè’_BíªÏqþçî~y;4TúÕ¼äÛ¦¯†ÓÔ 2† 5 Ã"ÓåÒ Ë•ŽU±‡\ÊÌjXNßä¤ xÕ -Íf‘­åÛRôùÉXJ5&¶*çuìfX¸m1N”é=ªV.•c¯·S¦£ÄpW ïìrËÆ¥ 9ýžþ>–„”Þºµåk¨|‚ -ƒ¡ˆ‹˜ÍtºgŸ9ú*ïv*Q' …wwEèÊ alAòã±®¹@ÂrvÏ·:m{C{çÛ‘8aný}>¡·lNüI… xY.L ô>*««¯0Q¨Ø¶àŸ#¾ÈZÓp((œNŸQý2E@ôC¯¢%Kn˜¹!ë|OØæS3½XǾŸ†¬öÝ3òï´A·©£¬ºãÞßÒBYƒ˜ª³Ùìd1î>ÜE‰„$5ÐE3ðúï»Ëó%?˜  æÚË>4Z‘Ã|mcX<ÆÖju—pe2Ó ˜¤¶àVzA•ûʲíMqœ¡]~Ž@ÕÝÝB¸Àö—ððöC×ÒNapP¨ÝMVf³Øå>÷¡®ò+´?+í¸^;¿+øÜu,¿ ¶J(De¯5È?}wTK\{ñÓSá˜ú«›dšÜ¡e(¶À¾zb«þù(a/÷OÌ­æ`Ý@èø¢4çÙB:ÈAœ9Ú,.dƒWÓ]ÀNưN0aÍÌì}ÉÊûP* i}¤iriûÀ­’ÐtePsªÞç.ˆ›bé~z×ÇÎHòáÒ äøO x5ßµ:Nºþç¶¢Sr%–¹ÿ\¦#­CÐ=úÅä›í(ÍØ_”eV­4Ç›§ºÙé0"½âÌhÑÚs7::`Á¢ä cü:(à;:O}JB´Orßµ KõQ'·Bö±* Ëy;8 w¯ÑÏ©4ë½>@rÀÐñ8ìv#ÅW6:£̃d(èZ6ÔÚŠ#ƒ*³ÓoŸ»,0`ãdÕÆeÒ­n…¬½È‘˜- õ³„CÊ)„P6«å¡b*& Âaâ9·oV‚L“°ðH³hý…¦¬Ûfþo²™>¼t[Dš)i:ü|Ûóãói0Í"§g£JnwÏû«ën¨ÏàÒE5 -ÿ[Ð_s‘–]júç|#÷f>[6³µ…Ü@@¯Œ<×ÿQe ½ÚE /@8 ÿ€RÊìdÁÉ´–ëÿÜê&y.¥MúŽ"C‰I”­ítŽËê“y—Žã&³ËÛ”Â2"ò§n}ÜÙ’ÙÌdß‹ÄiàÞ`càFƒœõ ¡JßC¹‹ EMãitÑÑLS~ðNÏŸ%Ò8)&>ë‡ÊVtâWë£Å=2êfý P@ÚÂË)iå«>êÁ2Í!fQ²6ÂÃ!¤àNëkb ×N´iXWƒpu*˜½bá®c<2: Fþî’¢ÍHùŒˆ¨¢MH„2ïK|Õ§›ªµøX©^i?HÁûÏZÖ¿6±kÜþ^Öï°Ê ôšÈzîâdÔŸQÆrÍÁ°²Ä+±¶äjŠÓÙ®¯ê½× iƒWÍØU ’¥ýÿxj™Ô\²„åf1»×¯ÛÊ)Oðq[T7NÙÉUÑlŠï -endstream -endobj -1978 0 obj +ˆp8BäJ°ŒUõŽå«mš¤3œònA™Õ&é,oXØ4 ô°+×A£‚ÛÍéÏÒ¼4ßàÃÚ“n›µ«]âÃä·ýR½¡Ü½Yþz»ÂxH±9V{ÐR9–"Ñ:²Ûh—a.^k à( `ꬺé®Vª¾àUÛºú¼wxnLÆEÿÕ`m,šœÅˆâ›ê³ý"ëûç×ëY×TíO>LWA VRœ–{C–.WwèÆÎu5¡KnBÝr-ÀîD»¢ÂRýe°‘…9(=«ñè¥Öú4TS6è-ïH ¿;Œ&hF²§ÎÚÙïjc{nýÅHæU×qn9áÓ³›Z-å¨wP;K«¤½ªúÑIЕWô+eñ‚tãQÁawб/_—b”±ZÆ• ;;_#,ÁŒ¯\»ï¨jíPdUÕ‰~²» ¨tjíÓ¨%†° šŒ$r¡g#*\>ièÁ{¾B§vw¼“Ur½/¡l•Ðàßãòö¶Lóº˜r´¥ƒ £…”÷9Œ´ ÞÆEmÒ«t뾃•£öJ,ù9®®ñ~ã¼5Q?ºðßó”åt©À·c›Ê”~×m¸Ñ£ƒxbÿøz„wÉÏ]~ä ¤D%îõ»É•÷cޱ¨ì|ÄùT½¡_ èü€µ˜Ò¶ê¿›)€3‹‚!¶š•óŒp ˜AWž¤Ä+v‡HQ…è8(»«>×–¸š?ó;?q(Cqý/±“GùˆŸµÏôÆI¦ ºÛ¾Å¬bÍ.àq3Ô;x¬V-1Jk<ãÄFFƒïs/H ¿êñ[ûYlFsxPÅ\W4{fó´´§ÊЖ˜çÅ + • `uv³¤Äý«úfGœP¼†|¤[Ð7âÚKEgð&ù®³œrïN¯C’Œ V²Ø0P᪞E¤ø®º½kl©Øª0öå[Ÿ›Wæ‹#>¯sè>Z3Ï»îñ9á+G¾h,‰ÑG:Cs/ÄATûv-äÅ£ëòš"ʤ³A¥~"ÿú~^9(c¿Æ–¡ÉrÆõ¶ì´B`j9¿ÒõÜ+Íêcð6žiö‹n¥~>EAAÛ*/î}Ô.K°ø`ÎõS •„7¬ ÛT–ìZ)O,&ò])6`í~‘z0>¡Hi’”Ñ:i«•B·–ÉF1c¡FAa SNr¼›¨p,W¤b7€ÖAææ.BÍiÈ2¾ß¶3©\£gÒHŽ?;N®ÿõ^’i7á¦Fú®ßÈêŸÍ;<à€Y_‚€8º@êæz•s%E1,ãäRe5#k F‰?í¶‡ ­ñ'á-XêÖäöÜÆÍÚÅ9îÞûG[ðOLö¨àŸ¬†Dàmd'_RKžUé‹Ã¨}ÑïCœK…Ä¢Tñ“LºÉú›cΤM{+I›e—txøÇ-?¡[ªœ$tƒÌ™}î¤-B’,¹ü àäƒq‡Öȉ`¥µÖ\pæOêŽR ŽMÒÖÄ«0ú8¸ºŸ¨ãs!@[ý4ùú27†W(OW@eæV–– b±÷7R帘V¸š›í ÐpÖœM±8ßõ\ÙìòÇŠlWr̽ˌQárïwàøËþ´ÒfÊROÕí›÷cÖÀÿá:§ÄêTmKž£9*VÉ*¯ ãi%Ñ>ÿu JZ3†@Ú7ùólHûäW‰€¡jdáoMtŽ•s„éû<9íÖMâzaÐ$|íö¤ê «ãs¢;|¦Ÿ×?VæL „1¯.ZâŠ$³•HH…¤TÒÒ‰Üú×YIï–í'I¯9¥Z-_´M•“˜ðÉǾ¨•²geq<ç½'aãòmÔͺñâ±÷ùÁ„‡S2:;$~Rà’lÚg ‹‚ëcFaÕÍ+E-+ÓŽbÖþUIuÈÅ¿¢nÎÄ{?B©çÆ %§ÍbŽN³ÞDé‡Qé +Ò’¦¿¾ bKn[¹Ú?ë’Ž|)ÕÓ|D*,¤æôP°­ßúW¬ )ÖHB)Ÿ]ôª‹üý3=ãý ØŒû©(ÕÔÎþûmˆ¨fkOas Wà8³®<Ï6…æ§G{(VK"¯ì ýü×?™—ÐJd„SܦÏÞpÝhÍ,ÃËbêyžûº˜ZV­çàr½T¿M:V?iÁG2Ö› + üÆä5CBiS%Qàé»ñN.ØgzÁÚžÛ:.ÕíƒÌXþ'ü–°áE&•µVõµùÀ`yZ‚öMÞ›¸½—oÝ ‰ÉÄ*Ø(¥œ>"CcšƒCa̯…º#³sA^Qëõ@×%’„³¤n´ƒ}ö¸šCEû·5vê,ÆPr 3vXؼ¡„ ù¹#•Ã|T wd¶-ëŠxŽâm_äD5­)nGM1³ô¾\`QG«X·óÑ„ RŒBaëS¡®;ɤš HÁœ…î=pÖü1i/’›9»Âà²?W aADjküõ'œÇTóÑŽü9OY7YéTñ&JcÛ?M%Ê"heÃÀg~QO«kkœë}ð?j«Î'Ó‡ZqÊ쾋ÿÇ· ¢ašªø ¦ ê(!×XDªïüï¤öê(tƒå|¬Íã8]„*T›0fÏ~PN{ßéÐkˆä¾j+0Å1Þ|Ç O_‹+œð’CwºvÄÃ¥²éÂl¤™Mü¼M/Á(»Ómq.ê;åO§K4ê·&°ìÿñ­7îÞB*û¢³ÖÐ3H{{SùÿÇæùû:| êÊ+¿B)âMEK³xŽ8“޼®å7-ž¼s­‹Ùé;9®ØIè²è‘¬Ñ(H!Ÿx‘ÎIËr_6ŸÍµCø¢¦Ä`ìGøKOB=Ké)°G<æwB®wÒ,@lŒ‹®HFÃ1`=ÁË9ÔÎÑ›Ö=ªNé]5<Þù¯d¡/B ÚšÅ}º`ðçöPP û*øþÄ4ÛKBø_ÙJP@"æ p°_Ö3ÛŒ¶þ¬£‡H˜ƒb™o—Â1Òž¾ª˜q×ÓHR +Ÿò@|m +ÿùj‰uÜ"–ãae8MƒP–BêZ½Ù~ì¢#JE\Ðtš}bK|©m îÞ&ü"cJ4;ÁàÁ¢«)­Ò”N +â…rÓ‹ÜKì +©XÅàû¼ÇH ™Z£Ê§èq!$Õâ=CÕ>»(Ã>AQÑÒ H©Âê=+Ü}ð´çöÄ·ÄŒg à,м`yâÃn!î»*š*=e6Ù`%ä–¬úÀ®n™-m}’hDæ|o⽕AЉ ײQ>øVƒ¡ á|N©2“Pè0ƒ ¤Ê +í ¶:&sƒ8à²&ê†ÙÕšF½‰š}›Rqà„b+¦Œ?;Zγ9À½øÙ4nNq¼w‚ÔÁ…À«‰1[›Œ‚­òÞJ2ÝðaYg`Ͻ¥¬ûÇD(V8Ð Õ +b•³R¼#ªgÂ…½8 EEóh \cª¦ñéþ˜þÀ£50¥ +Êöáyÿi¬J´-Û–iñ˜-JV?yl~¯Mh‰#_ÚÖtÔŸ`LN×'ûyhÇEs”Lþ*‰,:B6ü>„a¤¥@I”zlK“õ$ÿø'y«ÎfúmÀ_^ß]îoÍÑè88œÞ‹ˆJ-Ép$…ákþÅ?9Q´¶|ÀA;3Îh6¤«@–yÈë¹YÍSóoîßì+UîÂìtÛÛOcÐP!^°,’¹¦Á?ê7 ‡å&Ý+ýdñº`ÞU^XF4-Nuï´LÄÆ˜¤ÎræbsÇîÙ‹ °WšFb·j_uv ŸšDHÁzÖéMf忯³>‘‹‹=ö¦üÍQòÀÙ¤ë€^Ú䓬ًm`âD13F Ý%Žl¾ñ4›Œ‹á”ZVò“Žœ^¢UV°„œ} Øaø—s.Ž XÇø»ZâçæÁµØvUãm%k h£`oªÝƒ"l%Ã&CR«@¤,Z¬[KæEÆhÕCЕ0æÆvxFŽùÒŒïßî;™Ù±Ysj—4­”ÃÞ0Cí÷1»èî¯>‰)ACW‹÷˜Ã70xJÄ»m$Tdô¸¶Î»$(Êi¶#so¶¡~wEx ”º”Ã\’ÄlçüYÕîe?»X&a¶'#ñb°{PÝP-aÌ ÉQ •£jqó*‰ Œ˜®Àlw|¶Wé:@ŒÈr¤«w¡Z®_Õ9 v£‚'E}þ«ðè=´)Ì‘ù“¹˜ AFö+½,bC×C•÷û›Eeg-ZìôWóNN`[5õFscnÌ&`¡gÿWbÑV;ŒLONøý<±(6ÎÈÑ,ÆS‹y!¸Kh¿ÙCœ‚ŽtË™âóBÆ Ì‘ÆCªJt¾ô\Ôðû Ö¢û µ€%ë—'ÊZÍÜEoE“OPû\í.ñ»èzôkËÝ0g¾–\¯šÑþžô|Ûši– \öÆgƒX×5¹¦ ñÀ«Ò¸˜ö IýpìôÔ MUò„F5Ão1IÿÚ³2r)%¡"Ö±?&朗£ŒÞ‘+y¥™ NÁ_ÑÈŽn­O—¨öëµi€¨áûÙYò}´F¨3tÔÑúÔäVéÀsŸ8)ÛóƒSDÚú +¨µáK„f/Ò_|¬"‘àØ=æOŠ¡þð@– P˜Lh=3}Hæ‡su†:¹XzsµŽEœÕOOO  Ýçc›Þ4°j9Ùäb¥¦´q +ûÄ&Qlïçj ÄÙkËÃkøk-)å³H›ê£›$BeÑ‚e ÙÕñMÎÞ´_ê 5tùyƃŸJÈt\TûRŒ¨—`§I{@ úi·¸Â¸0ò_wHìãšùco¹§Ikµ ;N •1óì½Xmq²* Zȇ6+ðVó¸[. \O8  í'»AþǤÁ®Dg¾XÞ*}Hšx¨ Œ ¾Å2f!‚¯zËL¨ðïb}ÇÇœEF‡rmË‹m^0®±~`õLUxK·Ááú¹ŸZ?tªhÒ˜¨ÛÊAR£Ùê'±C'úò4á)ÜQ»}â‰'fõû}‰L%u?„ž÷¼¾¾nCOc#šr£’܈²ðø×v)ðþ³ô$¡ íxt|””ú"LdðeP*¢Q5P\{#êÙk›º8¦CZvc¬ôµÎ>Néhƒ5Ãa]h..Œiak¿„[]Çù¹>‡Ó€µ¦?`j¤R“®$¬H$æ1s¬= +³{Pë&“š>M4TÍ夲S‚æXL áêÜúéÀ­ Ù?@õv:hn÷…xG·Ãî u‡^²‡‰Y ßJTãÁˆ ÇQ’øßéC“£€G¬ÐømÚ¢6“Ú?^¬k¹À î\ß] +˜²¼BÎØ.”žFlì£ÿÜ•†-Éþ¶k¨…òúÖõ•h`³pÕÁ€’B?»‚54¨¨Ñ]^Ó> lLz´­1ý[2‚ÀÇ>v-²Ê¹çyb¾äˆrL¥¡Â£ÔfË›ï—'TáÐãÅ"ÿ=C2AÐÑ­õªŠÙĈ¶‘ÞIF¼äƒ6yÞþž´2i²€—–Œót‚³«ï¹ÑÙMgkÒûë­ËŒåv\h_sïbN%¶zÍ*b¡¦Ë‘$ŸDö÷íߘÆß$³güŒ£F@Í÷á3èTÔe‹¢¥²9@ +0 y;î*@$”_VU2câvÙð¢õË§ŽøëÇT1ØÕûCÖœ “-—QRš}6ÚšlP_äKÚøÃ½Î.m¢ëžˆAç þäÊõ7sö‘9àš¬¾$!öìW'%äÖŸ%Â^iz¿£‹‹Ñg<£þKÉ[BÖ šF¹qt¥¦ÇTç–W˜¹aш3.Ã=Mª“¬ìgµâä#Úãä&]_ +oèô4ì¨Yf¿Ô†…ÏïJ&6k/(¬š\äG»LûûI÷LÕ  ì-œ†üBVŽ ðÒŽ‹þ³¼…‰l¢²‹ÁFº_aiÞw;±œ×OÑ-‰üš¬b¥Üú1UL®ÔÔ dššº‡ŽŽ7o€¥¢÷I Çd˜‰¾ !õ²²–Òmøq4à²\9§J7é¹Ô„1·¬¡Z0-ÚUÚr©ñ*ý›ó +â1¹bTØQ„<- Ói6Oy–ð ?›½ó} çmÖ ¼gÚ[´þ¯z~] hÙá„Íh7c´P$‡úÇ£ØÊµM§[ëö'ÐÆ—š¹øÛ¸É8õS72j…•ºkt>‹Uuh]¸.»Â7LQý' Ó˜€–LL›·¼fOÁœ»º±Ô«ÙB º{ÙN\¾±u#øÚÔÀÄ_vÌÁíó»®4¯@¥†<ÃÕ%ÕÆ‰¼}ËÑ»úŬș¸ÅÖjú,ļ æíÈnc&e ÁÌëhh±Å3Ožýqr¸’ÕçÓº©ó®ƒÉ2„köG%ì NA+ãɱèA«È“]0–t~*Æ&À"²WFÃïçå¨K´?\õoÉôtôlr8Ûã±uRâPˆ ?¾hlJ?Ž:†×Ш5T¶ÜQ\õp} ÙTð,UÍÌe\§ ©‘?<}”SXÆo×qÿê{]‡õŠ·ˆºñ‡ZØù-”‡»ìξM¯”Ÿ¾}þÏ)ä”ý·ÒJ¹±„ïì…°mµÐ.JK»oס~¥ªß`1å?`‚¾Jtª°19Ç•þE«îLPä÷3eáoœPŽŒÝuPD*ãàø +¯S™wÁ§h”~æõ@ú’>p b¤|¡Œý¶ÚLtëZRí jU¨ÀfÊ:œžAêDIÖrz/ng7Cà ŸÖÈ&öBSyøãnÊH”ð5ÊÞ¨[¦˜…G%‡(L®ÈÛDéK¤©0po $oFÊÒ%%š¤¥ý¥@×ù³@‹×î„ØÖ®i…ë·greª`ÙÔÓÄZ½KК + +Ü æû$¬v庻–ß,e¹°Aå+•J4É#°ÅcgH¹ë›)Ñ®Éàç;Ï[`·e…ü/iC}“Û +äM©Ýª¾aâÉŠÓZHî3Vñ |¯ÛÉ"aOÅ…–ƒ­&?z𨮻†¶í}½Î ÝÈ<¹8õ´X­îOV£ˆ9‹†Ö×ÅÖK-+Tq9£çpy9Q$í"¯šIâ©cíÆÚJ6ª[ˆ'âXüi­ÔÇ‹µ(Ãà7õ¿M›!$=*4Цcô—¤Íß\ëôæÂûC¤³JL±Ýdíö€9‰Ýªs\U0Z’¹=®Û<¡¼¨2Xs‘ìTÄÊh|v,œs#:ØòE›¦N«§Œ…EW‚fUÊnrŒ£o_íNAD×zŒÏ³º5>d0Q]½ ô‹¹q°¨ï_ikœUx-XìÅaWŸ2´ØVm®ÍE£Þz>ˆ'cJçé+3Ò73`K©_CLj,„0ùS +Ñ92[uÓpÁðµzžŽ·Ç:z ­‡µK‰b%]¬Ä7OZ”T¢0È=ÏmÛØÚqub8ðNÊ’o—;¼¨Qu&ÆÐnAØù*3¥ÙóN4² +?7®sÆ ƒ v[Q<>×Q…=v\QgLÁíhÿ¨}aÙî,׳{ò(ºÙ8räd¦ßùl®ª·à=r(¦J_ñ!O+’ó®½\Êëó˜Ø¯î\\‡m…Ÿ&@‹Ž¿ A€B!Ï{‰å›&· x¡äf®@ûÍV^Ë6zê3Vÿ ú/…rî©l@.%Åé¤y0½×Ò=Âyíî%©^¶z¥ÚZ%d oû ¨x\±¡¹Ey™yxΣh…9@›µÂ…0Œ~ž»Ov°ý€ê\ø ¯ž;;šÆìÊ@ö×US%š—íÛÞäQW™æ@Ñ?R­Wõºò—^Ém%XÒb©òe/z9I'µy7gˆî}ásBdn »ƒë +ÀD9ާq(æàI ãó·|GéAÆbEÖƒ‘ÛÜàöØc¯^"JËϔ˾Š4\ñåä~C¡†…Ñ©| ¹·¨ÒØ ?©Ô©€*.#ÿ½Æf/ŧÒÖÐ~ 9KÝýHó{1· ýs ÍoiA:ØåJŠü)öØ!†ð½ˆ¹ôü3kXæJÍ©‚Rwj£”çyG0”A>’gX ÞÜópÌæ˜¢­HŒ(ôh˜ác%\ÆU?ñÖâ­EZ,NðvdM´w{rιþÝckêÞ<âÌ2îõkŸëz¸:[]ÉD>-Yk1èÁèÛV'Ú1‚þ´ÝÚ >KOWæúɈõº©Àt9vä–‰ž^†þq 4»S¯ë“|;.û‹Hé<†³¶ŒÔDéþÎCv¶²VÌåm¶{oÊšïã‘PêìËN%à3¡0ñ‹ƒØ® Ã…ÂÍbbµF 4~A{߯< +¾Ì" Tb(’P ƒ2­à¬°yé y–EÏ¿)°V¥ Î ,Vüß0¦œh_3¼ Ä¢úe7®4LHn¶¶ÜŽgþ8FHo.6Ï'˜ÆQeãÌ+ÌðµNeLXŽ]ô‡ƒCQRf‘š½Ó2õ¡#?‹·˜<îÒÀž¡ÝÜBšÑyŽ™Î•3¦Yé9´˜þ®œÂ5{кzµ¾‹(ÁÒø³(§ÕGÊD }óSæÚˆŽ¸ˆŠU}0öÌÝ>šzÁo¯Êù*%åð×»$ÿ“’‘ŸlÏ& +}ã´ûKç+ÑàôÎïÈL5XáçB´Æ`{˜PØžFR/´e¦•{›ùÉþ¿xì³If!«ÐÞdå˜w!q>)ο ç«׫ÍélÞV•]˜$>Øð€Om•Å€„”œKé1îy®ïà^ÑŸ= ˜"YkèK‡ÇÉÜgt¼[1à6eà´§p[hNc¾}]Û#Ü^ékNoʼm, ±5u£‰‘yéá<Õnå•TA³t<Íw£&+•5O–y[šì÷N„þ[ªP±+J ã$ìiÙ/×%”ÖˆÞHEPBÓaÓÒàz`~-Âôa+Õ¸wi W€´êÔ=ÞþYÝÃÊÿ%àb-ÈòÐÈSQ­=SVuIwï®–¥H°¸CÒ8ûq—-º£ ©›¨íW£‰R͢ݼ?ÎÎïÒ‰G¼»g5…áá)Ó9 P9©_öQ‰+Ë[nÒÞð‘k¡ð#ß%J!³„zùŽø6:ÈLÑiÑ)Yð´™exR4ëÌ©b&Œø-Àé¸Îä˜bŸ©³Ï e’ðNæb³x+’ÎÔ‚>êxró}Ô„%öˆSx#«ÆóìðáÀ¼àÔ\/¸aĬ1^BIOÿÌ…Ô•x_å<=rãÔTi¤º½Ôø_ƒä­[›ÖÉ-W¿õ…&íg!zÂ{N•i×%’3s|刽¨’66gú಑do–D»¹^-@AæJ–ðHSPºHSä[N¥©:ORØ~\¨·bj–•1 ãæp +endstream +endobj +2311 0 obj << /Length1 1173 /Length2 2916 @@ -27216,7 +34215,7 @@ s; ƒ$ü-UÒÛÊ3¤³# endstream endobj -1980 0 obj +2313 0 obj << /Length1 1188 /Length2 2740 @@ -27265,12 +34264,12 @@ AÖª^ 6» f žÑrº^‚7£ö1ë5aj³à¡Ašº£¨nWÉ©!µp© cílov®>ÕªI8©3!ÕÜ/ª˜ JÝÚ?|iî$dØ_ìñ(°/;Åb÷üÒ49Ãï•Û%X¾eÊal¥Øw픋v_Õ̹ûœ@dr®Ëp.álä§<çêÉJt~½ŸM«÷[£†yœ ä™û3¹î[Î{ endstream endobj -1982 0 obj +2315 0 obj << /Length1 1614 -/Length2 21998 +/Length2 22531 /Length3 0 -/Length 23612 +/Length 24145 >> stream %!PS-AdobeFont-1.0: URWPalladioL-Bold 1.05 @@ -27295,7 +34294,7 @@ stream /UnderlinePosition -100 def /UnderlineThickness 50 def end readonly def -/FontName /TVMKYN+URWPalladioL-Bold def +/FontName /BDDEWM+URWPalladioL-Bold def /PaintType 0 def /WMode 0 def /FontBBox {-152 -301 1000 935} readonly def @@ -27309,96 +34308,88 @@ s(kz Ó¿EâLGFô_™îßxÄiÞ¬‚Ç5, +€T üBÁ)þ¼‰äŽÿ©.s†€«ÍLÁUt …µÛ«€Ã±$)Lè¦ä xM¹ó`Ñ0õ„ß^üÓº[—U´[éѸ\Üî²™ P0¢þØ“ ëއaWºî³¤ê;ü]´Æ ̨۟eÞ&§6È&þ~sD3ÈR¢œ=‰çŠR–Ck Ê‹6ïá>O2‘S;ì—z250Œý"¯²)ŒcèÞd/ArÿÀÙ;òK8+“f^²Ñ9ÀEŽhpFRtgŒ,,zÑf€x0±œ“5¦z êÉl“ʢжˆ6\¨E+PõOCêj“ðˆ6=,¢œ3 .óh­…aD¡ YW0þ„šÉ ‘†…râºGüÆßÇSì£W­=¡ûø˜¶k½¦cnÍÞ0¨7 ?Ý›ú`÷ߌÖD©çì«D}Ò£€Ó$‡!”g *ýV¬µ#ªHô;(ö˜èxSG >Ú|írIúÎx„:[[O£eÉÚ>8‹^tûíÎá8Õ…\-áwÐk(lÙ.9FÌâ&S6¥0J½r‰#D‰’àÂÿ­^b2>PèEoó¡#­C@—Hêg @¢aˆm°1%-½~@RÖ@,DÃÜFXÅP¤c*ûöõ®~G áf ØÁ »»- -kDMu¸‘‹¦Xœêà2+m˜ìQžMý'¹é…õÎõG’¯j*4×<#BL’І²l´N‰ ] ´ÛÂK’[ÁŸ^G |ãhLV8œ¤6K[ƒÜÌS¢…>ü‘¥êÓïšÀHÛw[ísýa…jÿ_:@³A"ÓD"ŸZ0­†äf„Ú’æ»ç®¡v¼rãVkÖO¹C5&Òl€<Ku<¾u{ä-›¢z’-ËGFø·ª“+PiêáçCñq@ʳi¾Ž°9h•BZØ´Üch>ݶ¤7¼§ÛÑMÉ -9³ãÀ»Å Lƒ3À¬}1W_^1³†º}R;ÒXÓs)qP¡½¥d¥ˆª@šOPó-r8#ì•h:kûU¼öphôu`p±)=T•·™ÅìÏ/y<¦h>Ф‘«ë@¶À×T­Xûù&©Rt´ûõƱ}…ÞtŸ^}’‘>ñ!I>5?ù/²Èñ•ä>á’) ŸVwǚČt±¬¹¬´¯†7ȱÏi‹†®]x¡Ö´ÃÝ/X˜!U+–_(¹ÇÎTåþcmç^ú*·µ!‘½¯7›Ñ¾©‹Ø˜Ñ 5ÕãÖÛïæ“Òwù?z¼ÖžnkD{xKúr»ù(ñÙ8º·¤°Ÿxl‡uÍgéõ0_dŽ Á%et®ˆ'3y8Vþ»8ñ:¥˜.qkIË;ZPóÉü©’(fœn œiÖO<½SÞÃxw±,ñ„Ê^~Nªå±ÊåY×°$N¼Ì©ÐÇXV+)2O7=eÙ½¨ “Ý¥õ núY’öÛ3;CpKý¶lÎ#OÝÚ>¡¤4÷Dã¶öô’øÓª·â’އD=oàiuˆ8йºÜ6V£¬4ó¤ònº.Zó³‹Ó†à¨³ä/|¾Ãfæ An&‹0ß&ü8‡ –VoIÑq'ªyÑwZBqÛ"Ìú%cœý­«²nn_ `5,uZ"uß»>‹r—g;¥HLÆÂ߇ðñǘ‚Ö—€÷Ëå·–ÝðifãÜæHÀiÉð¢ m²ƒéI§Gò°ë˸$eV ÒUõÐ -ÉÈ.cK±?ÇÕwžhY°æW’ £G±t™-¸ÕB¾Æ1È ó¶ó£Ì,ÃЂçMEÝLÍkGÍz´m¢™1pÝkøEMcÖ׸‚² …#HÚ…_ U>¿a‚ñWGMœî -åñ0Š#ºë$kaŒ¾H"^@¨ 7œ–´dl3YMŒáov©>_‚V:÷8 y§PK¿[¦±]W6‚=ŽìПٯ#5ÌÜP+N|ÏQüÝ;ÞUO*CÙ$_œqúýj©¿^†ñíÞF]~Îdb×½0ïGgPêâ(Cfƪò²’|ú)4¹]~eT€}Æ’8“KBƒÂ3ÈtOÕì„K)f÷/Ú#ê:zPûÅĨÿ® k•9#&w@“{Zó·)µ~at·F•2)2Ã0ihª?ÏmÏë[ðò2•ϰ`\¶PËVyÅè a¦Qþ¿Û í¥“]ýõÁVÁÕ 0ƒ¥L9{ö{"ª%º€Úû#€Æ©N3g|›¤s *Œ&ÀéÁ;t“oDû³››vÿÃÆ!¯³A)/™Hý¯Q5²3(@0nÚóA%5Áã -ë A’Q Æ6l`}­î>–½~Œ1ix¶¹ç3㊓fÑ•Q¶×?®pN¯5/Ñ -üðÏ™$ËfE‹ì²±¯fƒå VÒ›JC¯ÁãS㓘]k†*pa…§ÈÉm‚ÙœRÁ€pn2Êt¿Æ_K‰Øº•®)TyonŒ«3Bt æ´mèO‹zäÉäfbXzÝBeq¡Suø©±Ž;Sq‹Ê§n¥ºü´ñ»¹ ;°äo3‰ ¢3kî(}Oû¹Š÷Çkˆ jGª,ƒÀ­#7‹úXª7î ò’Þga°¹â*GÈ \ƒJ³‚SŸ»Ø\âù—å3Íó¾:ž%ÕÆÁoökYב‚Ó²5´V"fA –¾p2ºBÞ?[‘ŽÑåWX|€T­ä±–ÞžaÂY©¼²wÚ$KýȮ왆IC@ÿì€+–°XM þ£8›¨dE ™:v'½&•Ë è ‹íŠ:©ÁS&rÜ”·µ}N÷Ť§ø­æRB€z Ö›\cW0 kçù@^æ²}¶¸´Àý*$h!çWþŸ‘úoóiQT›ºnŸfµ.Ú®¨Ö ×–½ë¬“•GçÐ5‘—ûb®“Þæ@zjyö:]o2>é*£FÊù[fv7s“‰Â]÷°±‹ -:W‰z­ÏzÕº°|ÁçîNù1›ãá‘ÃïƒKÞ^ÓD÷¹uyR±t?ðƘ¨û -yvvýûAæÃ)¾*k4µE~nHXè-$Š n€íÅŒwxUîÁIù\…> -ºIuž„ó®—D»‡D“2š„(WH˜¤C`Àb«m‘%›¤m­@QX´xÞJÍHø H¼2ŽH¨~P>c{äCzÑsþÍKnðéÞ¶uÑD:¸kUZµÒß°ç¦Î‰ê|’àÆõ’·Kô‡Üèz!Ï{ž%–í-ö”è»e/’ëÐõOTª¢ü:Z—¢Á³©2 …… »³Â÷O] Ú×U–*lGc}d+w•ŒK+œÝ‡ö5Ù3¡ü!Òžƒ®Ý³Q¦¶{¬ŒZDÕJhöç÷z]r{þ©))ÿÓÃ#"Qjg,éiÅOݼ -Ëó@ ±ÈÃýÆZ!àe½Yb[¼WÞxí“ã‰êXZw¤nOÈöwUë¢ñ -->Io£¼;fNÓ²%Z3‘ÉÒÔ‡Ð3mø*ĪÇNA—ZAÀ\Dãæ…9HžÆ$ÿDÒl7#9¢¶Æ¢ï?õìnLs´…eÿª -jåw‹*˜P‹”ùÕáR×mžzìQ_É_A™í·+?c ¤¢%¹s4W½¢0¶6âO”5å$~ÓªV”I„hŸðm²P<Ïó':|Íó¶Ø-NŒésÒF4/ì‰ÖO9®½\)Q±mtzûœd‘õS˜¶"T&f—&ӵǩ5¡Ujƒæ—z&.Û1V’Ö¾)KÎêd·³¿ÜÍÎemô²ô>~V\su2±k +é•EÄ8fv_ -ëê>eAž2þ Ïó.$8‚ÈçSM¼UÞµ¢a·ßBha=¸Ò,i–˜?ããÓÙ–‘ytûR'@ÅøVz›kb,Ñ•í«F™»ê›[Šlpõm¿Â™,»ò¢åª¤{é¢zt‰B§;Fé¹$ -1å†+B›û3Ö‹8â¨lH’EhÁ G}ØQˆV_ Œ\ŠEÆàßÓT2¤1™kÔX”¨ïe ©I½xüW7¹ ö;ãö’E‘Tâ«,oM‰z‹èÈCí"HñŒ/$f‰Ö¸z¢÷½]aÉ¢æºOw 4n»¸™ñöŒ²›O¥½ž}`ü’–Ëlyb%¥—g(1†ɫ@ül3'Hঃ¯ì>«rÝŸ¼—ëmÅþ*©H®s‹ñá["Wúô˜ÆòDìƒp%÷팱ïó„B„˜¼m¹2 ×}A~,òŸÂJNzñ÷E`#Ë”ÙïÑ}’@ˆÖ’VaµµÄü-ØcM lÿ)»5mbÙ›“Q3ð €> »ËTJy©KÁ: anYUª©ÚeÖ­¥>3ë*3p(±FâNpŸßß∔Å!tû_ -(&¾åýC4|®–¥iL[9YûªÝkZøAñjÎùŽFi6C+Ù‚¡®k¿7®ãZÔë)éh¿‹§¢ûÒ5äu•ëi†P‹j™'ëdp{SgžÞ <™^{O“ [ß~½¾)üæõo“̧ïÄšæZCãYõ ¥‹O­­N© žÍ;ñ’Ï™o«|ª`le+ædcöxÁhÕW¤¸¼Å2ÒkÎñùÔ’Ö7÷܋ըé -MKõDúè¨âˆÕÖUO+’(¹…öˆˆ¨†U ”4bEKq”ÉËP«š-爠¹f¶½é¯"çæ¼o71ai­‡ÚG]Ó}|sï7žly–V«ùÞÚ¥YéµeŽÜxhW¹G4•¬DË•@—I~ë8ð./,ËÖÀ©ðQNE,„pñkÛJD¡5„å9K»þ` ’Iží%`ûh+#‚V£ …øÕYmè4Ò¥¶Ljh€½ÕØ]—†2j%Ú#!þ f¢`*Ž_}‹ îe|'8X‡ûzÏúœîßàÕIOÉ—¢åuÖ`kHuåÊ·N£ñ:Î p‚³GþD‡0ŽÚÜ{Áÿ­O9ueÏ%ˆ›pb°©ëŸF Ê ]¦¢Ê.Ôb’= ÙÛHà®2!5ù?•àTgZYÚû7±=IDw@ÚŽñ ‚É=Üäh¤úµÉ8ÐÇÅ–BÌ è+b&W -9Ö7´Áà{qQs¾b)ÁZˆ>`S÷/¿Ï¬¤ÕaÒ‘w-á4{¬ô…nvŽ=‹Â'}×>ÃÇ¡~6~ßžº€eus#!:„ÄÏ -FÓE®ŠïÑ.ÉÈ?*Ecžše¹ô§ °VjAÜ{#}1/«`„ô›Ô{Vàùþ>L¡tCØÀCݼ‚‘uɦVU§ð¿á+@ˆÔwñØ„³jgð$¥ÄÔ !mµÌ8í)šïJ… -U+žÂçr÷²e¹p4‡o|v2¡‰ÏD % »p~´Ž»§4© Qaû¬C<‘Å{óÜ>l\$r½õÅl+öºB?„ÄiSтϖêñAíÈJæã+X«kVà£Jˆvøø;wÑ•ÎIZlÇÑÿÜC0º9þoù~®$.üç¦9íºdpÅYÍÇìP±`% $“þÙ’WNÆ¡éݦáub|óoJR¡~Év9˹ƒ‰]¨÷陂O•½Ps™c–Ê·DÐÔÏ,݈15ÅÆ¥ãRÆtwÚª Ų$™x£6‘츙s`+%ÜJyxÕ÷°í9"Ä=¶ ÉŠ…-°R¯õ¿¦NÉj–çƒÊ„qæAŸM±p¼±×±•CÌ£Š;À‡ŒM.äøÓv’Þ=ô Eßw\jÖ&I3µ‰®dì)q›$·Q“Ä;…Æ%’÷$Ýgº4;ÕçBTä;V¥¥Ü›yôU0\:­?šz2Pµf&ÄðöŸI^œNq˜’ S;ù:¨M!UºÎÀϺoø‚Yª€¡G4C’º” ŽÈûø‘†"ìM]h⵸D“Tí…=xzoÕóPÆSãîmz¬«wvÝö¬áÂpã¬çYÅ6äYá( ÔzÔŠë=„ ˆÊ³ö ÌESQ1f /˜*ý2N0Gâm_Þ€s€d‡Ì¡ <ãV}FȲúH£Ï£/A0‡0eÖ2bèFkHbñ*ÿu)ÞÕ5SxV"dÚ)Ç"=Ñ@ªfJd²ì½m 0R·åŠ ÌÊæh·Ý·W"Î5ªE¥j²«´ß>òåÍ&«åÚ,Ñï /a©ñ8óN÷À€ôÕqþ;$ÊÂ$ƒFXȓԖbf^\ã ð„Úƒ+~¸ Üq…ßø|p¹`ã^àv¢ß˜Ì©*É -÷1ÏàÌj…¬"ž,Ú¡&Æqä$?ë‘ -¨® °ÛÂPª˜ÓÐ'^»â¢€Œ pBx•¦Ä"c׳Z¶Ð+æý´¾ZvÚóSM ”½±x˜’o?£bÞu˜¢ÂxZ }·lûÛ7bæŒ%ŸÊÍå3û¸D—bö㣘%»)žÓQÍÌú¦ëû$(6JMØ+¨€§¥@¢M킯2nÏA‡ ã~‘d6‚ªÚNËø‡ÖfŲ̂ß|V5w[†ûÓ¿}:ÑÛ•c€g¾ $ŒŽ²ù@§Ñ† .H/eüØGiÜ3gU;1c *´[‹ÖªÃBB_…Êëø®o3šÊ›>奮œ©í‰‰T™ý&ÙG@:¬½aH…‹<Ÿv]ý™Ö+røˆÒ;ÊZëAҡѪ|ÏÕp(Nîë#w_@#AŽa½we¯@ÔGä[/Š ìúM:X™ëÀ¹Ÿå=ÞB1ÆéÀ:á‡9Åf•ÐZ”‚ÿ­¨Üu„}”­º)R¢".I‘š ÞDk åºò}átCëƒ|X›Lﬕ[ÐOKËYÖýPS†wôiÆ5-eŸ¦óp?Çš -&-ð2Ê ê¾!9Ú_½UܤŽHË˱™VÝòéoÊ…°…ÊÛùÞÝ £×¡N$=ݼ±/&û»œCÝ{÷f]\‡ì]›‚³-sh앉qöÝ]—éïðU°j C á'ŒUBmêN£2ÕM!a¡}ñ8óÙõǯœíBC'¨/A|eâãû­;¼Õ"Îì[|©ìü;iX™±òÌ iÇ]ÚcÿÀr1¦ªÛ ÓµkH\%v‘(T^´É¢@É«æ ÂR¹WßÁ©}R,°ùrÛ–·rùµ%(à—Ö.¼gD¢ÕÊ\ë#@ð&FíÚ¦°E—}#!_§DNÂü¸ñ >=Üæ“±fEÓTåNÖÉ~Ôõ‘b6EÉ’qh•ûúÈ— ¾ ·ŽÍ„n½G׎ÇKECõR¹|s‘‚ÇÆ¡Õ,úv˜˜ùOÜF5/cìä°†ä€ûT’úêHe@=ì|)µÃ>ÞÓwìXŠ×[epïtÀ‚YŠÆâv//tN½®“’%š/Óœ/0q)x–ä'r®6‹Fÿp/ý2ˆ}2ÎÖ":Í}ê>`ŒTÚĪo(ËUÄw:B– -UGE.6†köNIó—oæÿ¬Š­L¼ƒšmªÎF¶<”Üçƒw^E§ë…T…šÃXå‚çÂÞåa{¿ìÊïý\û´ŽãÅ=57xÇÜY“î‘MÚÇ6Ÿ;Òp4=™?"Eví’bŽöU ¶·ö»Ú qØ]o7Ò½éáŒJIî{¼‚Øë¦k¦Ë¶æ²;Út¨¹z?#íû‚ȉÝâQŒÚ ÛÕ "ªò°$sÕcej“Ïmkí¹ïðô¼±U‘M9×UgÖuÞRJqF6ty>7D¤«>N"×ïVóå=szÂÆ¾‹ªé> Ç—¢„µg­œÕ9aêÑ¢hC¤pÇûªŒ-W㉓ˆ~·çí‘ËC#o?T#=eÏŸ‰îð …¢l– ûÞ¯îj ðfçжMÔ±å,¾ï:.I ÷™_l`×R+9†ÐeôÀãM3€i‹c<ŸòótÂö¢;¶-g]©ëzç$¹¹¨Ö~ôÊú&)”>¬¼ ̽…'ð“4"æ&¶äåâÕ}ØÉŸ Â=ÕepÐð‘5iü‡p¤·c†Vj,)D+1åÛH¹°”›|Ýy\Ø<û½5üo<7ðñý»¥.«0ÍaÇDF|¸ê=ê´ï´š§» »jp™w,½J^&*G=Gfr^l"¦óÓÆG(cªi5ÐDÖØÔ‘KŠù£sTÇê«à5ò¡šÚ蘷ðlåfµbÕ²Gq2„h*T€ËÒç1Ù<[p #a;…049]&°þ²f¨.þ%D6öÅòoN(„°…c"µ2E¡>xl4„1‡°º_èwóðÍв¬ÖN<猘gõ¿ÍC~ yÒ½½¦ºœÒ1; øvéš/䥪¨”:½S åœ!ýuŒ´'ñâËTcÓiŽÕ=JÍtb´áµ(¡š.PîÌJ¿ivªM€äû,dÇœN{w÷ÇêÕ…Q¨TýÒ߸"‘Êþ}vpNEâ(‚í´GþÐHÇÚßo/ãò6cÝ=/5¼e¶ Ct˜KŠJåbý5S/Ìe^£…ÞI+²v® ðÓÔ,«#õ”%xDú‚:s;²û¡]ÝðÎgOޏ+î݇ñv‡¦á¦˜ÕV–äÚ§ÆAp¥¿%‚O~® - `‰»‘ù=êS¦†ÌÒÔ1ƺ`®¯9'{±;°žh?Ф¶Qé"à]™5%&i'V (ÔkئÈ󨃫ö 4göªy‰Ô:?ü í=‹7k!ô;Ï.7‘ª§3ÞFüœ ÝRTþ<óVÎѪ¿…¥e&¡S¡½ç°zÀZÎljtx–Mïà³Ïv§Äq·3ă6f“ Ñ’žvîáê˜ko§aÝh„[‡ºøºûl]†Îˆ†4˜BüºjÖ»`"whœ {'ow;`÷8±øÂQg6Ô´û1RÃQå'é“§æRv iã‚Öti7ù¨Ã@N>¯…Oà¼Eî­rUâþ~¦0ñ÷£—„<7Ãf"ï…Á8£ÐŒQÁ5`ñBª–\ãæÏ7”šïú¼?Û⑟ÖÞÖB@‹âw˜Wj(¢ØfúuÍ9­tzéNuîºÒùð‰U3ÅãR´×ÄÁg‰!PCÙf%ÿˆª.uã}ü¹ÿoÛÝH6­Žõ;æŸ;º=ËÈêl Ôí¿@Dðâüü,2»ÜѶËÂŒØ>^6ͯY…Zö[ -’„Þ3ôdýûðŒ‚« =îP…It4[ÉΓ³õóä:ûcøG¦ëŸ—¾®+êFcOñvíšYxÅùrùrǧ‘ò²•’Zç:Ô€!p U${¾Ÿ¦$âÛÿÂ<¸žz*m´¼±¥˜¼} FF主‰ Cúu”rù+ßÙ‹<¨ÆJE²ÔÒ­µÎîØ%ÜÌh}·öpAǽH²¾Zƒûã6ØBÏL×È*V¾”ck3 L,JöSöù\Ú‰o錋Ð1^MDætV@ÞF=y³E‰6¹êÁ}%)¥y&?bÓæÒbùs‰'ë*¼§AµW%h†cBMNƒÓ“  ~æåÊÉjrÈæ'ÂB^fdF]Ÿ¥z†òé9HAaÙƒtõŠpvzçÂTÓ”Q6y7„Ã`dIbfô¯<©/享]0€ËÈlÅÛ&ÖÖGºBä¸H䥭‘9éØ,½)óò£nCp“‡O|<`x{]¥|$K%›73|7Ã}ó*±‹ü'žF=<1ééCøÄŠŠgR(­bÚÙ^iNtΤØõh\Q G™§»»¨É11V[컕ñÞ¿e8&» ©÷a 66¡Ž%µ½Íp`jŠ€p¸q}°î{/ãEðB)C_ËÉWEè‡ïìWð.R¥9â–²°tiˆ(½ƒÆ­ž…Ù¶Ñæ2’Hr7‘n(f™“>ÎHZ*êUaqÄ©b£ºMêW -œ’õp2a°›òWI[‘°‹¶ì˜Ê¯·™;§Ì‰{¥$rtÑÆDâBlÉZÄsˆvöõ\¤:+y\Wª‡>¨7”¤¨rbcÛý–5±iõM7°–!- ˆi £-¬Á@Éäêsé^Щœ¶ÖígØEæ/ÉŠÖꡜ'›vÝØëBD„»ö1¨¬«ªg·D ƒâ¯N7ÞB )œk(¡p1®/AçöïQA"4™‡=–âÂÅg5%>eÖÕMÀpü9áÕPRÈÇ ìƒ4íz鯓%žÿÏ…o2g¯(кü§÷çâ$<2Jðbš&›Ã¾ˆ¼oÕž#¿r(¨P«_tÞ(’*¨íO·¸ èu}UCòq#Ô†'6ÔeNñ"ÌF›9ù[×íÿLÿ;{¼¶ìCOpTq]Õ:ê›?vÂFS-¬È´YóqõöôÑ7¶lZ­Ž©A$½Å†õv{g„APl™_Šsï˜oy*‰8IhhÒñø^ŸnSUT#ÔÃloœýiXXiMô»4Oi•ÿµÖø»ß¬š>¡Ôo1=‚ùúÛ®Tš"UÇzß”emŠx–†Fà v»þïý\ -Hkµ‡ÖeîNÂà…>ñÀ¨J´ ƒ÷MÿfÔÿûî¶“OŽÅ~ß9XÎTЗšq>ŒÑ~<óú|ÄÅ©7Ë¢Lÿç©Ã™¢VN—9‹qpÙLßÝñ^2vÆ-ìÈ÷½p‰ æO°*Ê]ó«VUò¤k–/`’AjæÌ( mÊ/¼³¾[NµñjB¦G&ƒ·»„13óˆaòÅ\÷æ}Ý;¾«‘çÁ>ð›%Òx{l7G‡*±1LPòç…hTn‘À‡] <±[ˆ’ÎiA±k°\úT ¨ -JŽÒ³†ã©:Eù8 ù0ÿŸB§ŒÉ®-4‘þ¯SL¥(åUƒÃŸíE÷µ J»~­¾õ@-sûe[›Ä¾¨Øú@Û©-Pܯ„@Þ§ÓØ7 -‰kzxm²+ -™g‘hÁ‚5Ÿkò°~ªââAö| mpˆ ‹,‘½}¡]j ͆jÔ`N…—ÿC\&¾%JçÿÀo¹ió™ép€{ì׫(÷¿2‚Õ$ñÞ5ÝØÕÃÑ-&Ëó,»·êË]6C¿&ÔáO•±•nh§ý‰ÊÅÑVEÿ€nP(4G]xÚA¹ôͧÈÊêOîÛ\vËïòé„ê”Ô9i›á—Û™DØ[‡ùÁΊäzÖ£W'ç}ÞÔºÛ_ á§n8•¬ -ÿ–S¼¼|;€ˆ® ŽÿTŽ™¿6)Q“]éZßâ«÷A€-1ùª@f5VO2 mdn:“\Y·sáóýÇñ›¸®?í’DM`Û•Iß"êé²5êÔÍî–å“¿Üà£é"*\¥0Ò]¯Q¦R6µüjBÁÏp?ªVxš|g«7Yx«[þî1íŽÕO¯ï§ç9îâì†q!—ÂÀ’êo7bW)wRG¦¬eÞûÎeJ"D“òò¡°Ñ?Ý÷æG®(² …Ü®¸ü:¨ß=X†HGvj¹?r­i7Q¶¿Zèº_JáÕ?òõ.K>¬ŒvBÚïgÈ ÎôíWþGåÂøw––ÊWûzù„Ö¼–öô•*¤§© mCœª -aA…D4ñ9Š]yóóqJLÌ/ƒ°ßíkhºr¯†º!äÔdŽGApX¶—N ÉA@@4Zß_ˆè°tuž^÷ QMR N"ÚzLpPyrl/>äÅ7X–TB×gª)éVð*Ó^uÓe2<)ª$"Óv/r}HX†óe—ÿ1¡•yIÏŽÃPFñVoY^d{ 5`àc‘½é %Û›FA¥¶¢=¥F.§Ie¡QèBƒÂÿÉû¨·ùØÝ|¯µŒþY΋n;{6NwÚ0ËùDƒÁ×ÅÎ×W#‘wl8M=©{üsŸ÷ç2Y–kF5ÿ8˜|ÝLÕò´Ú^?1a'a}*Ḉêw"ëB_9g„(‹QŨ \r8«ˆ>>¥™Y£d -9ÌÕÒ ÙÍ ŒØÍ¿£Äs/Î\«N|fÉÎFœ6LiG¾.d¦ÚU¶Ô@¥–ÊmC>LñhX¡ï€äÊŠÒ˜·\îo üû(b¯¬Õú¬×öd×j¯•ÛxLe`=ßÞù,¹É¢_f?j(¢JäMxeÃÐZ!~ÚýØz/ÁZeÃõ˜"ÒåXw`Ïþ¼W9>IØ Y¬›pj=ó.õ»uäÔȨ’þ×¾¯FÂWv\Hûv;®ø¸g ño®ž_ŠbKÉaC-ß° -ÙF˜»ú„°9ÿ¥q=éÄb;˜¸Ó·~•;6 ?¡î¨AåíM8g›îqZVõ½OÖU õYñ½†â³ -ºÓ{b|ñÇIþã.Ò‡ AŠ-Ã>ì¼™q“¾À²žOC°XM(öè ÿ•:Y@àÌÞ2ØÝøý€A©ý ΢ü°]ÒQ™®4N_ ù[}ïcBu{hÕÇZ1?rX É¸©ãˆ44YêAŠÑµXM5¦g7Z ß6Î2Êüg±FÚ’ôÚì{åqÀ0½y‚³&Ö5¢ åXø™|å¶D*6„™±–‡åñµšgëß鵎˓9åE.³lÈ÷Ñî¥Ú©7z¥¢Vöœ”P"E}õ¿ OæBr ¨¡ÌyÓxï¼x„"upœ±/à>‡%¨s×½ÏZà45œ‰Ç;uz“íêÏÒIññcºô럗Ïݦ¦¸R‹Ÿ+¤´ün -·%ôbâô:ÛN]kÀ;aœOÊ\­;a¡]cüY–äQ._R¯Ï‰µÊÒÎÜ‹Raï[Ñx×HÃÎj°Æ¿^졺A•Äp›¹Q²HÎNîz×Eíó›Ñ26{7¸?RßE'æz†ôì\ñá;ÁKÅR¼"$ŒlfÌß3Ô•–Ù€ þ¼7‚LÀü`֛ϠÍÀ^EÕŸó×(3Üp,GŽìØÛŸ;¦¬îÒ‘6 ³if˜g“ÛxÎgËæ’Ò$—ˆÁ[x &Þܰ$ß]ì«§­ö‡°ŸvÉ\/<>ôBœ^D_wÌDE¤¢ñ°qB‹ëã¿áØò’вu|0‡7ÏQÜÚáÄ´È<î‹\¹o=š4‡dTþ¯ìÇ—³ÙdA»°r¤îœHtƒ ¯Ebœ]ͨ4 /Q6kAiÛ¶ÃÞÞ.Ç=äŸ@ Ü~.“u’ŒbY9 ¦›è/;çàyî5U îŒXâZ6¾`¡s“‡W}gÛèð}Nch—ä˜Æ*ð ß7w§ßòéq-øS;+mB®¢Šø¡,NbgÈêm‰`z,|`K‰e±–ÏFÍeÉaß§©×X -Á[ÈfVl³,?ºšæ°®Mƒhà›ªºÕÞòÀèTîÍ Û 0˜3/ØÎl -9:±½Ë¾"NÃ\CÞ ZOÿ0^ÑTî3º¨íh2+ƹ†œã¾n…Qã:õ UV¿÷Ì„K+q…˜ð‚\ÙtËz*)YÚÊŸŒh|k¾ij¦s-ºœBr8•W•¸µðvõö×s ~öy -÷¶—¦ðz©_ÂX¹ÃÁxf"b²bÿÏ+{k<å &NâÿuÉΈÑ}L0i{€™ò<½Pv'ÚdìPP:øø —Ï%ˆñ¢KÛ8´Ì`dIƒâ&RR!žÝ)˜IBÎ>ZiùŸý÷}…Ô37Yƒ¼„rù¯?ùéCnˆ†­·¨È$#˜ixWh÷£7!õ£ãª/n>ãžÇ¬o·@øîÍ£6%› ÐË?(§Ý˜•H+骖˜¯ô&<`™¶Ó´K6a§”-GAÚFâ–âÒÄMn½MŽT.Ȧó‡9ìncÜK•ÐNS÷‘Y—XËZM$Cë: Ôº¯÷ x¦Â¸˜¯·Kïu›ßͧz`Ý%i¼¹ -„ òcF*U ê’‘Ù2'þc¡9`åj¬EÊOlÑß\ ©Ÿ…Ï—%zXrpð.F[«œJ=g° ¹ÄìY}®<±¼ÝA à…z´W~&™ñZçáþº|áw¸f­{§µ}Ù–[[B–*I“PepücØ.è±§ÑùO~ÿŒÃ,M1ƒ’“£¬Ñ’­Ê^ÕÌ«Â˳ַÁ`‰ç^gŽÓ™‘Uä¬\šcYE­gþ7¼slõ=ù­†}Ù;h»ü·x³v]B‘^%çht°xLžðyT'² ƒEî™íF€3yþ­|žÌIòÀríÞÅL!WsÀÒ‚ÜH¾Ò)¨óXf»xTH:lé5«ÉU¸HÈüÓ¶íŽæ=‰Ê±¹#Öp[@ˆŸ7oø?¸¤Œ`<×þŠ…ŒÛ£09‚wwUjLQræÐ×Oô¾¤É8K³·I#E³ZQ>áâåhÝÑ®™¥ºkKqÞ•4ÈGÒñÎÁáPõSÙ¹šbIgïÜnlë€k>l“t¶§—ïiÁy®¨"Ão,) èKgšK-lî}ži̹ª–ç3Öµ¥1Uò( ×R¹ºMQvÐzCF‡þê<‚¿}©ËU¹…³v•¢¾ˆDý`\¶o6BiáÒÄŸ™ê*÷Ñå앺îNéÄ0gŠÖ©fœ‰Me´3{K¿;&«^ž@oÒüVüÛ¦wûç˜92ßòêb%k7îÊZ)r½±!Ã󣂅ÞXAop…†ÇÙÜTF¹~:ø ëM¹QvMÏmÃeȬòïjaam+<祒ô™¹.ѲÝQÞX^ —,Õ‡£pðp¤|D¾¿Î2ɄØÛh U:é,K…uͤLÐÓ€S\¤¿‘‰]ûâ,©³32Oc¥/Ÿf±†ËuáÄäÇ¢Úx¾øeÈæ¼œ„)nu -~Àg ®|ñÉ\ç çë´VŽíCÚ(ájòú$‘p$Üšåž,°Ë•XIzfÎrIs5Cv®÷7JáÔæM°CA§ºt]=óÝ Z°(òꡤ{.qä´ñ¢­/,»Ñ L­7_tøjS¨¶!S?j8èº9àunµ¯Uu"]‹äB3äåLŒUÊwª 5`‹²õÞ£*ÄæˆKþfAÀÓMÅ÷ë7•G±Šs){ò°Š€‡Lð؉(Ga°Ú §×=] £µºâC‚Ë:¶H5¦1qÛ–JøB -zV=y]Žž`õ".sCôª_ìêÈîˆÈƒ RnÙ&MOU¯èªEˆÕÞŸ3DË„LÏð¶Y™õ¿ÛÎ^@qªå ZÍË!c™b™ú%çS•ª¦Â"­mÑ^]ãf<­pÀ´>>Ù“u×uÓˆ'Þ†È@Ô!;ñé`+û!aq,b’ª¨Å}Vùu…‘êÊíŸu4¶) ”¥ì, -ƒù±ŠN+Ù˜/~Òói“9æl¡MnñJ •gF¤ØÄ“¼ã·ÖÅ…÷‡©ŒPù‚'fL8§M’:áÙÃ131Nãêöô’øÓª·aã¡BÙýGÁ%”â“ne¼*4 -•ƺ÷ÚÊßD2vɧ©{‹ u]=áqéLÖ¾Ö]Ê4¤¦$[Xò¤‹ÄÔ§Vùƒ$æIùü‡t\¯;<î¨MQæÓ×oëŠ×‘œ;IÃÉMÚRîI³¾œr$šÛ!e†‰­'æÇ/mÅy’9a®\!´ÔÿUg A¾·=Þ¶ŠM ”ÿV0co>„(h—(GÏT)¯¨×¶32±=-™Bú²-fõɈ\ª9l%É£Ú“Ö¾|Çþ}‡8tMˆôvzÝý¦¥ÿn:O¯È)N„»žHí -ÈGÑcFÚP•±^×|b‘±! U4™–?ûà›/¬µT=i?˦[æUÉÀ0C¦ž³XcS.è?®48‚.ñÁ‡i÷è†S³ßÆ“‡—ôÊ´lHáð -o‡ò'kDQ°sÿif½“ ×g3‡ãn¬$sAJ®×þ*Ç%$1o‹UD ì½|—“É£‹ß¼[v«§@|“ØáX Ë3©bt‘ 'aO=®Ÿ’ÕÅUGc)ù­”Mò|ü¯ŽŒa7lüÇ,å+Ûý»ë{ÀZáP{H†ytÒsù’5üŠwõ'4c5^ŒFÜó¼Ë¨ŠŒ„Í@ƒ2ò5~#ÐfÚ3‚·Pš;Šd„ºq>ŒT“èþJdš ä¿jáeŠUä+|œÄ™]k­HÔ_}ÔwçœÝ7ý°q1~¿…áL(7iRsƒ‘Wvœ‹ê}Éö˜AÀf‡µ½—?{ìù B15Ý+Ž8Ã*A½j ²æ„íøìâ¹Lª”ùè \3x'ªEbˆD¥Btÿ.Z‹Äúµ#Óž­oþkÿ¼JéîJð@ÝæØá[)\ -hj:úå¥*ú^_8Võ;àk™"äL–£€FhèJÐ툙ø·±»Qá@¡I-o W¾ -©eϼ£êhäåéù4ŸKן‹5‘ÉϽqk$FŽÛ2¥ÕlEÆ€ù´ƒšÞ#8§²Í]áîÂ ÏØvÈ4ô õ,„S÷‘=Òɪe–40*h˜|{±{ãµç¦:x"«V&$ÆÑ稜eX eŒC½ãß½é’/+¸÷Y;åˆUvÊ{tƒ)N4;×ÁÕDý¢NÑ[¡Q¶F·¬ÊR¨¬<~«=nmÇ÷¶¤C‚®Q.„6ÓΗž› s˜E2&¿{@¬Ä‹€Êf5Hlp[Œ -'}Ò#ôoÛöØs ù×ùНk#­_Õo÷+ Ÿ€UÒÅ}'ÂL혊#ÒˆÊ43_³#TÝJ¾‹7çR³ÇÅÏþòñøm5¤ šÏ… Thû:ɽôS­Q¯ã}X¿«$î e£ñQ §^÷GŸéÄ›öÄ8c¢|7ÜÉÙ`6Û9T>û´íV‚¯¦G†¥…eHÞtÌvmùÎZ^~ic}læÃ½²ÎÛfcË?njj¥-µ–W’Šºù$o| ™Ü€ýè9 -Ðûÿš¦>,ë>J[Y´£9^C”(ÔÇFµs.»2’˜û~×i Ñåsö¦Æô­FM¼Ãˆ  ïöoݲq £ÂÜEcµRâÅåÖjp­A~rA›ÈTŸ£Íý“Ìi‹Ô¥)êYmåõº$m¬¿ ²™óŽùŽ÷ï¯:ׇ“¾‘ª£Ì_“nE.>/ Ⳙí7f†Sßv÷ÆÛœH -übˆoª³,ƒxÈñ¶yÿŸ‚àÐìë^”ó­ßWĔѽà³ "ëaéíøxýÅ¡ ßúñp>: áêžVUqIUG¬ô‘qÂ+âsa•´ØŠÒ›tåʦ˜zëW…ârÎÖ $ªó¥$RTH5nXÁ4Ѐšo¯†¢I…WÖg€s3´¾4i•åìË™B©7s‘ ¡ŽÂä»MåÅ~Ô { ÜÁ1¥*Ž1‰ˆ˜’¥:x‘ËTýhï]ßd3¡*ù»â©­ŽAQtbQÊ ÁÚŠ/M&Òùž ÿÎdÈü+èψؠ«¶@r~ åv/÷Ñ2 Ê0ÓxCB¿Oð±¥<Ö3zÜOWÌ÷¥p-´ÿ žÄNˆ%3p–ê‹Û`]­éÚÄUÌG³ŠœgðÁ&ŽÃ3ªJ" ûŒ¶H:e+¥î\5]ðàÊ”“¹³=Z¨ê³B)BÑœÇó¿æ~I§èøþÄðº\zwóü®PŸ+Ö´âµkI:¡»® ’"˜¹û„¯’)?ÞTÝ8“í$Ýr‚0qíÇzTÉ m\Q‰W€ZAäh -Ì£œ‰7_ÀX.!¯_èdü}gSTÌùþëR#êî…¸e}Õ8㥂Z£Öx_ùbÁ}Žt Im¿.¾:'éC:FÓ0@µ* ÔØoÜ$9DƒêEÜ^e2Ê–DÙôBü·ù4ÎñÁ¸¿…’‘U‹É[6üýüƒò—„›ÈñV`}@H¾¦Çõ‚…+À׬n¹'ÙœAŸw,xp³%ÑWõ®59 é»ç!œ¾4 ?„#V’!»$Ï~ñÈIØþóo(ÆMn>ϯ¾(“å"¥ÍÒh±ÚÁ¥pÿ‡m¾Î‡’cc\ˆúŒ -[#´óÑŸ~h„ð~倃HÀˆòÇd!,€+{òöîŸs†{ls}¢òf°ïÇ««æÅyŸ7¬ŸÙ¢vß…,w”…bݼ+LQYùÕ+ÅŸ1oœ‚ýgnøœÃþš:º;Û MŠðÄB´‚­} Ä[(EI:‘Ä|¶¥ê%C”~Qj¦µ=@™œ­Û^Œü&”A.MÆÔ™ê8àì·6dX ò›ýñË!±Òþ-öúpÝP -Ú^-³kÎ3Æù§óå®cš1ÑakÉxÅY8$\Ñb<É\¸çDÛÎQ(·½¯H Ðn -q·žK“+‡MyÑþPúþ%£=Mz„KAyÉãŽ.ÝwôwÿÖnñò(ê×¹ªöPyá:YNqó‚Iqй{ƒ€´Gf~§w–Åùðâö‹p„ŸCJOtTŸIâ2ûµyûLD»j}êyfp9x‰ÏQ{ò]Êø‚Óíù· (3¯Ün&N].Úáy\.J;²áBž0嵊Ò]¥3 ª¨¤#í=H#RUÝ[A?«5×gˇ”ê¬Ø‡kŸÙKã¬Zð²K€³&*œÎK§u±!L3æ—\åLoÔ116¹æ‰$¥aõΣRÕ:V™§íŸ€*wisçOø%^ðç-Î\ÝîÇã·6nïòñ i¶égƒŸÄÅé§?4eB~ÈÓ$Ó44œîCõ-³ôÍ®%Ô!déÿýÞi|µþ/+™ô=ÍòÿGÎ&à=Ú!R0q’å=Mž5à‰+9ŽíÁ:+Z纓‹ñ¾ÈÔ™Q.“†\–^>EÄkf,9AòUÌ…êÔ4_Ä R#þþû¯°&Ü‹Mò•pVü‚^øI›³(>üéún¿?„š6š¿“Ye¨Ä¬¯Ï‹ý(3dæÓÑUág#¿îµ¢Úœ†®Vvžˆ?B• ÃoZ:Ú6®ÀuÓdÍ‘p>g«Ïº¼ì¼&v“k¸ˆ5O¸æ4¯ûfãˆüþàðHIÀ¹Œõ~†²ŒŠTW0=1hÆ‚y²¤†O:ÿJGбÊ¢®«ãÌ´_Ð?B×\~2BÐt¢jwË}f»†EpÿÀ¾Wcªjüçä9Ç’PV],G92"~§ !æÏKÒ§¶Ä!»èh;ðºmXÃzžª‡ ¿†Ýxõ]N·ŒGz=?„þT«ˆº“it Ó!^ù™¹SõÖ†ÞÛSµº@Ú*ñ´NªŠà?Þ «íñÉå]”Òï®^òS¶mõ3(AÙ»#δXúpmn÷¤´‚ˆ–Oõå?%ƒ™sù¦Ô¹Eÿçù$ü÷²>t›Òä"š¥à1;å TŒ$ •~k®ÒçX|“*l%ª#ýÿBAX³…®ß @G¢ú»z™˜ö³yè(WÆè÷8©ÄA$€!`ô²,Ö4ŒäÞ—xłɪ«‘"kZh- iOf¾Â¬ñg¦â‰Ñvî:RëWp -—E]÷mk’A¿ÖÃ!«ƒúêÂŽW‚·ñ3MÊÛCÁmÔD“ÕiÍjb™šm^B‹ûz¤[=ÞÎ÷Ž0J;Å+>ˆéÖm7\§²Ñ—yAȆ†™˜ Öc©Pdd)´à®%Jˆ˜ºêÛßCƒì¸D¶ÒU -"Óü)OFF‹3_ÔeѾœóFƒo5ý9ÿ—2H{Ü*¬ZŠ‹<˃£Ps:»; -Ô½Yðt -Š &WxÖ™ _ãqüñR²†Á—oòëqðÁ8p8¤Êiøö¤ŸòªÍ^ë3|î3ìï†ù¢Ÿñïû«cz½™71à^œ‚w{ÞA("S…kÙP2ø=9r¼·Òì@ëóÃöûç ʳZÂÙu -¦Hð@ûc¸ÛË"ˆÍ¿ï ÈûO‹ÖÃQpzýKT¶5žœõøÕÂζ¤LÒÏxýj±Jj R‹¦'‚ âçV–¿Oj:Ú¬+jNvV­Éu_h¨÷—8Û ß½ïÇ>9`Sh¿¿ðï;x_1¬TéþuÇS~Ýb®øX¸çO¨E"Öœoë꒢Р)”L7Wù!VZv¦¤Ç±W®yð«ñ ñÒ⥕dèfÌ^È!ÆÞnÒ’½ËQ¢õfy@uñ°ç?ÓÂTM£|tbñ xHå^’ìÙ¶#¦]Åuþ\ʆêßÜòÃQU9›§g--Ü›Î/zUÏ0/<;;§]2@dÉÖÂyýnií ö'¿­r­ï Â×ĈŸ“qøPÝâ­á>¬pUIǼ5kd õ® GåÆRàâ«î -‚ÆùÜ!›hEñ}¤ttϰê4¡¼C´äœ“'7çÊÅÊ~é—-¤©6Æ>|é¿¿”’nnö³è>í‘FJ㫱ŸJ4XbuìÓä»PDèð99ťȆ–2ZÅC’}?¸^—–ÝQ%‰‰5~ÏØ‹—;~¦¡Æ#“ݸ^'ÆdZî]€Ü ÂOêiöFéuoP´1ì •s½s=Éh[ãÞ‰#ÒþHþ¬¨‡ãµigH—ÚH‹GãùCt¾Kâ}¼j]YŒñª06)8¸˜ošYjKû Ií•Ú€«´`Ì"=€£ÁŽÄP=?Ëþ„¸xfȤž§»Âã™™OvŨ¡^ÖãTÏóCŽÓ#òÞ/‚,G/†Àv·‡ZŠêÿvyÈç H‘O)Rv)Üêð'©•\*¥M‘É09ÂM1uáóFÂÆ°yË_U[ŠØQ|Å4¹‰Ó ,÷×»îÿ51ÒL®'°é”¡Ù—ü,•»3_îÄd_à—¼¢Rª“¢ÿý÷¯Ý¹ç\UÙ71×™œdI°g Çà±iÊòººÒµw,Äç;J…¥Ë”þhõ'hÆÔ¿Xè®v0æxa/û°ôBæšQ,fßL—k¡‚J&£‰€(,qø(ÆÂ=K¬;lâNNö‡^>ò«9*>:¸ÜÙëÅ{ÿ·qEÉǨˆ·èv9àÍâT´¡šÊ¾Þö!QfÃE É”Åh=ˆ ¬Z “MLØvZrò˜ø“E›2Ä—Ô|'’Îf@¡³çé6“‘DüÎz¸ñ’iúÄD‘1 Eȧì"“¶ã™ßyV –u¼ºÑÒ"ŒJ!ç›±‰û½¤žÂWeMî÷¿’:*~ô³ßöR£ñÒî¯â݆§T1…L4;|({+Þt´nÊh%»Ú'àû?J]!b Ü'dÁ„ý&¢áúVi³T3pLËó{Û"§/§{7M˜â\ïµ rgkË0ØDç]™Ü]ðh¿w{ŠÌgÄÕœVOêç]×îöM³ÒI\Š -UÅ­| t·“åfkÍ#²¿Yï%”«7òûÈEd ¤„¥fw¼Çwy}»¤”!|žè9=c©Á1ôåaW£ø©#×cQÀÃZ;€§ ÉaR"/7@ðõmDCZÈØïªùr¢î‘R=s€ü5cØŠHë5l÷åИœ"h´; ‘þ¥Ôïsòm*µÒ;b`~ÌÅÈ–ÜTx=ÞŽ:ÓÓã¦+Ø-R¿å|püįbr´®ÄÛGLj$#‚°ô-Ó|>R‡Ï˜âá5ü*räPM[;aS,@ª83B$|_T¡»„|Pž¯Ç®q^kåf3‚ˆl·<Ç!ÀgmgÝæ½SzîÂ÷Ú™ñ!O~WSjeàWÏçGeyÀë4 ýêÜFây8£ÊHx!øÕÚ²AQƒ³è³†ØÍÕ8aVŸC8³D¨÷?D3h!־σٚþk óýœ³ý/TM².t!¯Ô¸?r¤ 'Qyuë…öë0PwPF×K8[5ÚNQúÿz0ÑÖh¡2"J]5ž‘‰øÃ y#žÙÂ<á˼nz²`AŸµx¾ÍE2á«©ù 3y‡U­Æ<Ö…*‹ù&ºã±‘ÈjB8½®A’ä¶²?Pê­ÿ8Mqà£F*AÑænÌy”vŒ:"‡B¾úpXRIµoئê7’+Ývã7ú—Œ“°ÂAÉ7ïGYB0 8!³êZ é‡^f¬’æ¼Î7ç™zS’*jÏp'âçÀ꺵xb¤/VH[ݤQ€¹zˆšÑ]†'MµÜG =µ·û©3ö*Í´0“åáÏ·rJ®kÔŽxÃÂÙsƒÐ©Ò©*²eÂ/kí>0„#§\}à&äÐ+X¤´r“!>d•“­àœvíýùY‘Œç5>…Óív¾Ä¶,ýgmÔ3wØi½yŸ™ØìÄ®În/¾¨qü6Üœn/”äL ³CúJ£ÔÁæ†VDÇZGhXiåÕ™y—åÐq˜ŒÏO(óõ‡}I|)DÓTéédšé§cÍ4jÉULÎTh^Þ/þ×ôþ¹6ÙÏP7Èÿ †¼ ür3Ç\5Š'3ž.Rá_ƒêɦGÀ6CVuÅʾ«/<è>Ya—áÕKˆò·ì¡»ÏæZ\ß{xúçíÄ<é&SéÁ­Ïúø:Ai’oÊ #}»Õ(ÚÓ¬E—-âé…Ì-¹tf_½ MK1ëXIÝ¡¡d*Á´óŠDÁ‰¿»ç€H ã”Ë÷,aÝ„?Âݯ#±:)€57[íf=%^–ÚxÌUþñîû‘ÏÅÔ ÊÉKêáK¬{7J,C!lÜÛùuU¢Æô’A#ti:Z1!Y1Ç|X5¯…ÌËUÑžJÀ“‹Ý¤°IúÌ7ÿÞ›†ö»Gp×IՄȹ×L+„Ït|LG>?yC€¡í¿É ¥’JgSˆÈÇiZK]`# A­ëÏI­0Øâý1Gœ™dƒ,ÆírzÖI¯Î£tYÝÁlQîHpü­Æ?ÉFÊõl(¸0NDï°ù>XVV8µ[þÆžd[î ˆ!¬§.3C™f"ÇÍ h5æ¥øÉªçgL*GXü8ÖIÁf®¹·ôi<¢¸gu> X©Õ‘KÖ°o‹à:ü¨ˆ—ø‘: "ûÅQRT SAàl¸ôƸ hÝ£ÿÀÙé%ÍME©)…zJ4Õþ™ð¹«êúX&\6à9ÙÔ%A¯Õb·$ö?gFw/q_èõ$,Kj‹æ0·ö¬„¤ ~ô ì´íR;ª ¯®ÈŸ²G·^GÓÂK‘¬ñŽÄËüwçád-`¾9Wláðå¹»y¶ûa׳ æèŠ4’ZæAVŠ!!.Ñö¡³~ÓñÜ6׎5¬YÙýwéŒM£Ÿ«¶Úê2u0¿»Ãæù±¸Ø&!Gïa˜=Qy*;_²WÍÂÜU7¿_è ªð$²¦õ.Êûwµ-ào·¦µ÷Å [§å‰5¡ë{Á^k~'Ù½nîØé8ÅWÊÚ#2%õ5³1Ì'êAðn½Ð@Ï祫ê×cú9»'1výL-/÷(Èœø*)cí% -=ùG{ùÄ)óêÿíñôIDJæ6ˆ7©ÐÑÊË y@{r‘&m»ŒÖ/EßHP8Mj'Ðj Æži1•N1ý ”²TôYß…-ÆC N1J ~”ƒ]"ùÈãÉðŽÃÂÌu{¤iýu×5MíìØVY!:“ -M -F_¾e@eO23Éß4FQ÷ïèÂFi9'Z’dïÊk7l݃)?ñ4Þ2i¡S"Ç({x]çù_*g ¯&¼ZÇeª—c$¨§µj$Hwÿmìx¬'à$³Î¬XrÊôÁpÒG· ,g¿@ *5YÆ®>u€#Û/¿]çè–ÆÕ¢Ã.zGefÙéð~N7^_‘©šV3{¨.¶vMüãy'½Æ8‰ËÂzüº³Ao%.fxÒß.tžx8VêG‹FE^Î QíŒÑD27”OW…[(e6Yü³èàtK—oü‘¥êÓïšÀHÛw[ísýa…jÿ_:@³A"ÓD"ŸZ0­†äf„Ú’æ»ç®¡v¼rãVkÖO¹C5&Òl€<Ku<¾u{ä-›¢z’-ËGFø·ª“+PiEÅÄB¢Iÿ³†È šš-¨ýu»TŽIˆ¥€Ý“ò„±&ÆáÖ;1ß]Þ'8\Í1D·zë¥çÅïÐ9?âÌm&¨x\±¡¹ETØ"IÜy¾™ëÌà“÷ÄõŠdÊû-µIæÑy¦S´x/¾zñ¡Õ­NmP+ü Nl-d«íRpåÅo&Q„´gö•áðE0pÐ2XÆ*B¤éù6]‘Mé¹eó.0záV¥î´@ aâÇÅÏÿad¦&ÉÝf‚ÑV»+r +ªçJÚrZ»n–€žÑ@~mÐâ%*ë»—þ^AøÆ6%ªNý!“Á_ÄàÌx˜/Nj¶s·ìõêK.‡6»l0M”ÇÏq ¢6 •à›KùÎÌoC +s}Z4IEŸ;•Øi91_ª Ž`~Ÿ]{»þ:U-ðÒ ПÐÔ<_ Ûìyl>Ò’ Ï~F»FC6ÓþÓXUŸ2ÿÉàl©&F_Â’B>ßô…†|^;¡•¿WGy)ˆï÷4Û0J\ß ¹Ÿˆ$›âÎeàÌ_¼‘¼þêh:OÒ4™mFÓN°§#îYsJëÍ|TÛ©øÄqžoÚ]Z÷ð£ +°ïCQÇÓ–‚Î$˜»¡÷nÐl…Ÿ-PýJq”̪àn3 6ý`ú ç° hµ/<oLµàHX€Ä÷Ú8¨U¦Åµ-xÔ#Êe:zaÛ—~ǽÆÜÊgf‚òÑAä´›øì®&–.)OŠðrºcä^ +mÍÜB‚·4Ú„×+¨/tá>IB´ ¿K ·ËÕõ#}ô¾ö˜ú½¬œ…× ŠâíHCöJú È/÷©z©W5…ÐgàÉùYÛOë‚*‰W8¡œÉEv(ŽK³$ñ!!NÓ11™ïwϬj3«Û’|=M‰¾C!gp3 ukZ¸G°C²ñ”Ï7©V¥˜#+OÉ —õ1Œ–ZÒðåo”‚¸¯š?þj`;Fù_~ gœ _à. ùªEbˆD¥BscЇÑ(bû¶s‹'õÑ›­UÀ¼^ƒP¸ôƒÈúY®è=ZL™2®ŒCñ UÝ'ƒÌ‘@º5ò4Ò¥^ý±'u%¤3Üèýåþ"Uþ¾PÛQ²ÛÂPª˜ÓÐ'%¡Y5‘ºࣔÒB¼°š/¬´M[—m–IùX[H:¡× +q6ïÂ|!mm9IÓõïñnÝJìÞ@Ã#ŽÝ ½§s{·‘Ñðd]ÎlËšmÛƒ$2ÒÄp{êmÛ}§T+¥ˆ9›®ÝEÅÅ,7h3̜ћ°ŠVŸ¸ÒªÛǃ;ý—x%™¿S¥éˆw®»O+z}ŽóU|ØGpG‘ëÐÐ+z;Ž*:E›…2“¢£I‘ÞäCñTAR°‡Ö«ƒ…ý6iVùã + Ái)É>ãA=ÓògöFE·[ïK¾/R?½WŠžm˜î[pºg:­t¦yáUÎx4=üÛÙ•˜tÍb¡!Ëü´éJÙ NÈ6=õÛfâ¡Ùn<…ÿø‰<ÄCTÊ +"EAPüaÿ&­¨¯rÐ9l ±ªá‡r‹ù¹ë?Á ZBdʱ~3Ë»´‹Õß\b¬«Àa ø+â\S?;Ř¾Ó6§?ñ#‡ÅWÖ¶g­º".ÆöäyU +ný<ÍÇŒŽüêN;0¶µý•ès!  .‘~ÃäŠE³86ðo=òËm¤>¡€Ê:ø1.C,]9ð™&ës<×yGÿ#È`ÂL™±Êµy!œ´c2¸Ò”=3,]ÝŽ—»´?ZC †šIö³Ú"‰œŒòÌ7®c[‡g Çùâgj~ù°PºúØ/¡Ç*ëîƒÏÂS6‹ãÅmIxÑ~ë\+à¤2Úg ‘­1Ó’x:ÿÉ2ç(øîôä½÷LS&Žg}ãàìPõåùõV{±ÍqCCþ­…Ͱ»V›µUñ‘Z^ò=ê,ß¾Ô¼|ËU¨O~¶ÿJŒö„þjÐa¾ëâ ÓÃãe$ !Ç:!î–mÀ +ka·,´D:øm¾ÆGÔ)¨ßãm4†!‹|¡¹ß4²CY1¦sXHü¡f°•µ+ߥyƒälú8áe»ÆÑ*Q2DSðkŠ5$q¥ÍÜ:¯4~ð_÷÷þž¤Ê°èÑWö¾Ó°° Çòn`‘µÔû²Ø×£i™ˆzÔÑ-Úä\ªÿÕ[h©‹S´t\Ý*z|äTFl»è±çêX¹aÇKl•å%,¾­¨që×ÕÏ×òKWv !ÐWYðS¼E}è©:­Q”&ýZºŒ /)ó™“n¡â‡©4‘BÖÃñüNØT‰e,!"ïš bJxœÎ¥«>1qUMáVäB‹¤Êg¶`4S@¦ý-å²’ÙÑà_z¯!$‚,•¯‰1­Ú¤‰…›&8cr§”ðN8®²¾ ³”dùaפGÐ\WªÊ½ƒú¤ÊògÍÚ6CȽA4 öJgðKËË'<ì¿©Ýwx$›÷̵«Üj½è§O_L{´(3‘#çrM/î {¦ +C+cÞk¢¶ÌSÁð¼ðQjDùjžqK¨f`Áœ¨ºïð2õÙ›ûXDËC·7(¶´6h íå²ö„‡§ÌÀÀÕ„›ö/¬/.)ïàqêB¾ËÕꃙøÂüÁ#=~Úmü9Ï&’¸£‰ £èŽëÉÓ³ÃÂdÿjc„AŽÌQ¼hSôÔ—£Žiáçë²ïØÐ¥Û œ6±]ûgCP[Þ”‰ÞXi£Í©gÎ<+³Î¡æ±ÿ–—‚Àm¯V‡)Çï¢ ƒ\Ö‘’Bï˜=IÝ--Ÿ^}»í±¥Üh¦uM‡ŠîàäÊF¤òóžE›¸S^Ï_‹IEýy J˜[¢E¿¹/I{Ù3/´uIWX~:ñx…!=D,u^JxúÁ÷ðÌf¬»žÊ,ñÏÒorï¦Ï\MÔ_³¿PtÞËÚlBýã]ÙËïê Ì£Œ Š32…í´q×Òƒ†c°À>#€XÙÐB€g ðxÝþ×ú­“ÁbéàþâgðÙí]m  :ˆ±¼‡k߸„šõlS2&­ŒÕ¨ï¨¦j'~‘‰‹\lÅ[GGÚ:jØO„»Í=ÖÓu!Ç£¨Ðv®…}™»¤·Bµ­ôLže†¾Þ›AIñÔ·Ò®žžæŒEÃ+5'V»º`—*Gx!”Ðú{„³ÉŠpä4GïÂå +V@ÑÄƈÜÕìG‹bI¡%¾Úò39öö$~‹)QõrFe•òì[úÑ«â{Képc ɯE$$µÇèº$L×ÂÊ<%þž¸;J±.ßÉx³$ze´F«Â0»4#îþ*Go«±‡XJÛ94¾ô|ÊKÍt´­›ð/;%I£Hå4âfHJCÔÙb4Æhð\¬$3vyèX ¶ÈΜ¾K€åÆRM—IˆìHÔm_^)7É[PÐ TÞ°2ü@¬Až¿é"ÜÒnL ö:5Ww~¡ \xÅ+²†•¸z(0GnÕÙaY›™›úš-L+M$ã³"Öu£ù›VEÔtL$?¢,¸¶ówë®+Íb]O½sò[kvc§Ujé™ùa)Žc(€uÕ®gæFí²”§ÏW0Î'ÆÎÒYÖCØ"%++• ‰h˜bJA¹¶é +£h@&×>rWeQ`ÀSMé´míàe謺v¬ß 5%kªVÓ¸¿nkÁh@öèÅÍ€åOš$U¡¡£]Ïû@ŸÌ¥ÈÔ‘q8–ÑÜ[»zòTGù¸WÚž¹xÈïhÂÃâ ÞEæÆ|i¦h´:ã ¤À[2Æú†ùÆžg‰;¢rßTŽÿ:ºŠb×YºÌƒ¾*í…OÈÈÌÈ@¶ jNô?¢Ó…ÊÓ–sÏ€fœ%Û1w„ +ìsí?ù…›¹L@ãB;DÈcM7ΊÀì™V)ïj“í×ÕKà)š÷+GNÝ_ýð ’• +¯[U@¿†*ÌRo>_Ÿ^”¡Ü×Û¾miz £IÅxÝ0'õÔ›<Fÿ–ÁA˜÷=U¿~ž´3DËjó]*©eï*9=7³Lo-É)<œÃ¾*LþìåÓè‘»ÇÏpÀs3~(@ qjqM†sÅ}™éæøþJ|YìžÐKÐyƒM&ç'Kûór„6ãÑç“S ŽTH(|øõ•ñUZªfœ(´w΀ðk@Ùd#z]ŽD­ê¯®C&"¼Y\й& Y** Sp·?0¦¾ ƘuRú©x“ª²ái‰9Z©QÜ“½®ø@&²ÃPw”¢i9»Ó>L»®x„!ˆ’ôƒ½‹ñ[ã/ËÁ°.¦k&GæŒÙ—¿7õ˜¸WRh´Iì.Cœ>>ƒ\ö ‘¥ —¿¾Ä(Õ&Ö¯ø©¨XÅ$ý6~¦ÞÙŠy±™?ã; K€ÝëWŒsª¦ÜëÅ)ì±Eĸ4+S¹çß<ž¿‘BšÎÔÅìí>Ubq)S6ö×Àî‚J*.¡›²Ð&õÞâÃ$X o´ü4îŒî.Æö µ‹Ö*izµƒ\Qª}qPF§ºƒ9]q£ Á$ Ù‘“2ç%cê‰j]”Ú +;é#î%$6üØ>OïÑP(}ú nœq!$Òý“OXÙöjM/ã„A…Žz^ÐÊ$kq;²’á8¬œ©„ƒßzÎI$š‚ éüÊRwŽTi™öÙXIs[~}Ä,dQü®½ôc*¦@/¸”J ƒÂäî^l¢©Î~ÃÞWA ¡.p6ÂÝ#sDå—ý äoöA™ŒktÝ åþ ©ÎÇÝ¢¼Î¯*Òœ‚f3'Xš~é]kJ€Á¿¨'äñ^b:–úLFyCĨN$=ݼ±/&û»œCÝ{÷f]\„KûvN¤ÖOЫtc5##K2z¸.'p+ø3 Ã¨šÒ¥à6Ìf뢨¥z·òÔ^3—›Ÿçñ1 ª< Å z%šó/LÙ³”pÙ™<¼:½¼Bb¢mgžÔ؉²Óÿˆ®ÆqïY™Ö¬žÖøãœF¯{j- »²`çÕ!7½nå0ã2¡›ñ«8mš4&5Œ•sRHà`†ÐA¿û•å¸$Œ(Gú¯¿ P LãYšKgadŸ[»ŽÓN.³ú8!übÃËÈ!½ó nõ/ #ü@öÉäŸë°ёަ`UîÈ12r½  Âb#Q8ï:“ ÔñA~½‰õyuøåÜË‹3ØÖ_Öi‹+q ÚxÜsúÃMßãkêlÉÉ΋a²`À¹IQâùlÈmÎiûÿJJ÷Ðu½6óÞ*øÜ˜uÂ"ûùÉ-GnÉ8PÎÜ‚ÀTñ—"j2w<tÈ·S½(·×1R5Ïo„õ=¿íÚ-I×$ñBÄ™ÓjéÂyœR@´PÒ +· +‚ó,· ˜Xœy?QHE|®Ü<ÇŠoR1<4êå4áÅ,!¹éñâY§ßk¤ fí¥?#;™Ó»0¡ak:{•#! êJÕøÈ_÷ÊÖa\ÁÚþñøc™d;Ntæå”sî >8Pú«œƒê¬6]õ÷ÌÕaÜz¼8E9BB9ÆñÙ/g#椫Òƒpl×rÓ0Úy.<<©º÷m¿µx=³ÀöÖ4¾YKe×i·äˆœ#¹±Ô©o>¬]âŸôæñN„E…ø>!W›£jÆìÜ_:†rRu8Y‡A¶Ò<3)›ÍÁ˜÷™ìÓ‹°,oG0—¿‘’>„èúýk ¢^ǘâ3â½TÞÎvúíÐÙCˆ¶®³ó öHkE³9&ަ\ÏE{o;¨–94çÊòj7Í̓ˆPйÁ((É̯7¤F½¥‹Ä-6Ù†QJøF|ÊÁçåQ'[pû5Cf×§vôë-º£`QWiBy}‡Ï0fz1o ¨E2Ü °PVp*–loübÀÚ‹:J¼êmå¹kR¬M+¢OÊvñ0f¨1 2§hô_bDÆ:RÖ†#Ü÷˜¶w^¸5Ñ4¿aM…Ñ*êµR7ÌÝA‡`µ1ª¶ möÚq'±òGÿG<_‘¼N#5{b¨Ô„r5^r»¬%%7ã=y·æÉê}¬Ž5OêœvÑW낉FÉ3©L\õ:zEø“(Õ󯚛šË <Ëü'^sqâ–Sw˃×.áïÞç²6 +¦üg±%ÛãÜ!sKBÔ1Ce>w®¾_êñÕòãÖ¡»¤þXâ¨Æùpä+‚a·4œæîR¤uKAüû_€†¾Ù'J‡ 'ðÃÍ#2%lñ*èy}S«ÐÖ‘›Xù‹Èntw"Kv!.L!¨^Aëc‘0̸ÙIâ]}´>ó,âÌÄ¡ sûŒäÑÿ£¹BÞôÆÇ”SúµšÀíÆ ñI§2Š=åííëõ aÙ pUÇk}aÙÁ1I™tˆ¦b=¦ÉßÖÀ04ŠýxžœÈÒ²8­:I#_ó¸Ë%‰8søKïÓ n}èˆùˆßB<`û{6ä=ÆØwP>[ŸŸâåú÷wÞ{M‰õÔ½‡iÖ]ân¯Å6øäð4÷6*þNÙ'3—Èê‰ÔM;óžÙ6Ö°M„ì'*…bôOÈ”óßþpÄM–1bM‘gî³ÓÜ›þ„«]C~ƒo´çå6œž j •©¿¨Å½û¯1 aƯ^$Œýr¥S§ê)³ˆ¸Ã08IúËoö CŸÒ8ÏÜÓCKc +]=@°BßIÍOö`íÏäIÑ_ŒÓ1Gð®Œeq„Mß*¨öߦE˜åûSø„ˆHƪŠÍ@j†‰ÚÍZ×cÔ¯—UŽ1>@†ÅqBôÈPû©Ú|?.}i¯k÷­õÚ¿þ«£ÀÙlÖ–Ç Ðw`dþû{9ÛI’|á›üã%Œhm€}Ì8Œx‹A™Àâa›Î|ÁºáÈàuR¶oüjc×u§­2e‰‚ƒ4fôVqÜ‚I#Ø'—£>K4Ìhô M‡ª‘øUòbŸŠžÓªï(%ƈÚâºóµÔ§Ÿg{~‹˜ÿóÌLT±{óQ!XŒÃ”ý••ÜXfÑFe5˽؉ħäò0ÀöÀ`{‚›ÑݽèОµ5±‘!ÜïB»÷Á!á©÷†²šjã±xÕ +­üîD±Z¿ŒÏ[LÂâ­?Bg8Ì&ú±¼„!~æ-ùÊoŸ€FÉ)! Òùؘz¯­#H,K[Òšýÿÿăed*eDœ·š®Ïð%@ÿ3Ë™épHݾúzÓÒŠÑn—í®ç»~Þì—"eüA#Ïê†ijz=ÄÛà“véE«ž¶n˜FÆ×»Ú¢£^±?!¾=ˆÒno_h…]I~'.I–»O˜µG(r{Í/¨ê„oá +]mmgf³Ù£ºï\ÛleÉ!5¬cG‹”ƒ´U [d] ô`ÁÓBÉC"îA‘ʸ`ìϲ¸˜,aÆéo'oÃM( ÐØßVÙžÓÛ> +í)×ß=1î’£}¿Ó?uÎùµèa¨ p‹ç©Ú©ùßz%沃ࡠ+ø{oJ¿c?ˆˆiëi’‰ ü˜ ÌÂþìڰ¢Z¦„­wÊ7mORwl"l›Šè¼™@3삽å‹CF÷¸33¾hr‡ ÿkªVmç,«Õ£gÍp1,üb1Cã²c”d©$È´–õA`•`hã.Ë1ë(dKåªUs7#‚©zYVdièk0XÊ»>Ñ"ó➤Ԃÿã[ÌW£}×-ZÍ !hÂÃ:%ýó+–Äò·'ÓÛÆ³gfä`»ê!wÈrã>7^EÊ|Ž¿C%ìYäc0¬ t'úh/©œûÝZ~~y…AGñ7›W n*È:Ú÷QŸæª.YW‰,.^¹vÏ×ÈÛ)}J ܈è…)ÿÆŽú-ËÀÂWp/MP÷£-©Œ@É"³´¦®AHl¸Àpf09X”Êè­9 ýXØÖ)A} %T*ãPi¢wisÎ<-«'Q­:Ýô ÖœÀ‡8Ì¢Z¥±â°tãÆ4T¯² ){, ¬ç<ª¢‰8•3¡þÿ†.ߪ–ËÌ䌙¦U¨A½Õ[Ç-´ÿ}ÖT*ß›J‚†ay&„ã ²'ùÌa¾_$1æb{U±H#ï†Ìz¯ ¢ªª—Ôf"ùÔ‘Ü~,Ú”ß*Àù-9‡¾ 0—cEtEO™ÚVÕ^í_ºÎ’8”¶\0G“¸.ô08(2³ 4 ä+ç¨âô ™cyJØÎAz*B„×#ªß åÌ™d5 b…@ä a!%ú°M¾‚¸j[§t?l³ð¹ç 3É¥Þ!ÜU{UÓŠ–/á@JÓ‡sK€V+!ÆÐºã"ÒY(c„)*.Ða½"ƒm:²wp_Çš‹òËbí‹ÑÓÊO¶¹û ~\Â^æcò‰¼B1uëÈ$°¾rå%mxÀ’¶øýy$‹µÕ{[ÆîÙãDïP. ÷‘fÅ7› +-[ãµþ^P‰Ö¾?M]åÂP€Ló»ØÃì˱9äý¤Å}Ër8fŸmQK…·¥Š —û¹™gx`ŸQ J»7Ö7X…ÈEv1ûN ß"PK R±!×V¼ãʪիkÀàåì ÌHÅ5חǽ-¸NOÀUÊ8&þ“ØãX;ÛE +BmC":ÚDw|ò™¡²FYVf(÷ZŒfF=b«Ì~C³fw§nꘅãä ðõ&i^«´)H’uWkõüëP`š…KÒ(%9©»´ /:=°œ'œ!/™ß8=JУÿ=¥§9r¼««}<޲‹Ò,tMD{p°Ì?äN k·ÜNvò7køª³’õ±{ Bv?³²+ƒhAÞê’Œ=¿£Ëî—JsÀ«€?iHÐé1ŒkV—:Ìnð Ï‘èŽôrн ³â–mÙ.O„FæD‘.lLçò ')aãw¶¬`t¤ˆ¡ìA#;•FæÝ8€~ø%ô„ñHšÀá;×I›AZتý™¨:ý®Íà@n:†Ú(£q';yûÜ‘1j—lóðÜâ BáBq–'ÚÝúÑ܊ﱪýÆzÜ>|Æ÷ÀÍkù‚æ¾Ç=e"ĨåK¯ÕW +ipµÓÁµY-§bJÝÏwpvõS:õ4óøn¸?,b)Dós ‚S®‹¼¼è­GCŠ g°œÆB´n*X]Ø¿€Ö Ô*f q[ï¦}NÎ yZÂì¹uVìÄ@GmÌLϾ¥åô:÷É’0oUý®àb›€k~ÇBA_€ë°Ÿ‘M¾,ƒ–¬˜‡¶8ű‘ë–½¤m6è×WÜ'µ :§¾£R¾ºÅ~ ¶rƒQÕO>é%³*Š]¹eŠ«|x„ë|´T".ïsiX­7”È“¹Î—¿•ÒÄ~Ð1-%·«çÐØÎ·á¿L+ÓÜŠ‡rÒ ¤±ŽôQxpÍÞ)QP9ܬáúì;›§´æ êdÈß÷!-q k× Ø%ÅÑm*c_8pI ÃY!RhÉ¢ˆãËÖÐ,¿öp¥e¹I¢C¢„-$âcŒ¹Ø•Rɠߢ>b Mâ\G0" %²0RAê·&±Âc´OB`)›zÈ£cÓZh4|HF¼ÞFFW­ùø Lô“Ë¡W­×îÍdðLld»Qßéï챈0CQ9(O^½èÉF:ŒÝoíż[(ÌÏuGÝž"q8j +ˆ –íz.F>ip"fûχõ–HÂÓ_A ;–71{òø5h¤Td}³yÄåyÖ`ïـ܌¹©ÙIž•‹ã}TZ6Íð\“öAºÒµw2ˆ•⺣T,bÁŽçÔG¿"¥Æ­øÆû-ŒÝd!ñ¢èlÑ™OÂýHY˜\e&þ£ÄuÌ=§âùàÏá!(54ùþJLê¤Ò¤þ0°öRüzá^kRU“Ÿ£Þ¢u÷I@Ã,ûÎóí)ëaàáªÓ4ÊÕ‘+{(På4y Q¼®ß…\Êmsyö¾/õ^[iˆê?v@ðé"jÓÄ„ „\Z+ìµ¥ û“/!Ù_=aSú8¯‡¹Ä +¤Ükæ+ǶçV óåiýwíï·ÈÈÑ5UŸük"@!ô½‡ôl6›3Tìõ\Ùí»ñ õR‚ÀŸuŸbÔ¿#;ô¶¬E âT;ûõü¢G§9›5K]Ý“zŠöVVE}£jHÛGô+¬w>ðÝž÷ gFÇàý'î8ð + +eª\Ã6OÐM9Eâ´yIW¬w#û<ËŒ)Ùp%¦¤æŠ¯þq\ÎDs“#¡ÔWô<Ìœèèúõd̨ßHJ«çVkr5›Q޶«· ‰ ©£œ_ùç !=;¾2t?!fš÷Óη¿ëÞ¢€%þm®.÷õÕwËÜr¿j2ÇXÜš,Rªm¤Ñ™«%mè•rcâ4ç€2—(2^œAªÙ¸IÒk™¬µrí»ru®¤,G}³¥jЄ)¼î³ó<òˆ|ÓÀVäZXô ¾ë‚SeAAý/öªå„<”™R>ž¤)\:qPš4±…= £a(uê3©1~:ø^ðàZX»Æä+½8iù7_[ú™ÇNòþßDp)„3.©Óƒ¥L®›µLíÞ\ñΗ³}VòÖ"Agýlvâ²33«òáYþþ‰Ênˆô>,±G`ø¹¡ZZÍ +3è¶2šx}]O-iD¦u¦VQD%émÙpØMçMî”ÑÎß-)»C†Jµ·p¡L˜¤6ãÕdÖ1A±í æÿÑØ,äЄ5.—ð•ïdUšS¿Ç̽®-‚1»|јé|u“Ã`óÈî:zåœf\ÖBõñë£F=ªˆ.©jKǪ¯ž«Ö»'+MÚ¢jþ»&õ׿@ ÜÑ^­¾Þ8uR<ßÿ>¾.ÊŒ°w¸£Qƒb 8S•uÝ€Œ^#TÐ +öO1PZ®ÉÓŒ$|%Ï!¿)OãÎóz¦Y°F1òH/$Lt¥èâ‡MùšßÒá“‚Ìßû%æo·a+¿ã×™ p"D›vz£©ë.\'ø±`+žÀNcéã¥&8‡{ìe‚œo3§Ï¶Ö˯·fuL‰4ù«s‰0ëvTÉbƒ¯Y7^XWG–Bïgc§n[w½µ¾àÄ9´>š§\µT®·vöžÿí'nÓDŸ·Ó ’­qr¿L¼f§ˆÒ Pü*/™±‰»…¤ÏU¸º‘´wˆÈ¡ˆåÉf3 +=£c3oy&˜Ü8¸|1H‹™Ý#g&¼ñÚ49TV]5•Èæö¬8äê‰mÒÚ¹Ù†ü]Ú±MÆF ÜUÃìÁ¸·# f°'[Ö¸´òÂK¥Fˆ‰KÑ©Þ͸W]Ú¸¯“XñÅÒ:"¦¿M=sX¦ ^&Xµeö‡É§uÙjIüPdóX—Tå =Dït œ‰)¶QÈ©ú¦˜»";垎ã«ù†:ò \E:Là—#‘’šEøÊòGXÄÔdvß@ºuúìqÑçùæ”è?—Ó§"áÝSØ•}Ö"ó¬zåÕ—ÙÊo{$i0Ýûé¡Ïˆ4æ/Ô pvÓ”þi혤 y±×¬¤ëG±ø%­¼ͯ:ÞâÜ‚ ØÿƒãH7l#Uÿž úÜ*+¶X*{ò¾§NÐ,ÛÜRh§˜ŽR[‡ê!{´’>î›eÎÀA˜FY"Fªžƒ9ñ“Ÿ•ø¿é®þ±É·²jÞ9Y‰Ã\²Óäö_Nœ'W)Cûdï3²ôB×LzMLN óO…X[`˜®j¶H5¦?¨æ³MR‹ÐðäìËãNîŠÐ;>#G3c;¥)§`UÆvLfioÉxY’õ‰¶&;„)•QܱÁ®G^òýØ"›H¦£ô™™>jHDkº.­”_Æ«Ï>MÁ&:0–‚AhˆýD@CºÈò7¢’æ A›ÍámIŸ¾T\€A×Ñ2‹u¯‡lX6`òçnfmÿÎ{_Ü0´‘(.ÜW—ägcmþI÷ZfƒFan½dÒÅj,SÈ©cyÉUâèЕa`•‹‰a5“]€*Ü,Y\ú57÷©–==´5lN,ŽnÛÕ·-Ðk£JCq¦µ +÷÷éº!á ´ ÏÃ̺¶¬8ØàC‘L'2•£ ¡v³6Æ Ç.ßßÂwŽ”ÈÒ+·¥Ný8ö"‹@gbp}&©¾\kš Ñéî‹9¦Ñs\ðž¿FxϫѦÓû¿ƒWÔÜ'É”úðA(øcHûšQÉÅò¤”ЏëPeB½[À»>¼âçÅn sulÍ®¬’¦Ú7"^Ž¡ü\¥V4É8ˬŒÚ‰—W¬ÀÏO)Ã1ÃtÐaDÒPãQ˜;ñF!Œò6'O布î„ýô6­;dmýõgƒÄfYúw1ù`¢=PºM†MWY"hã“¿ Fêù¼=†ã`x2ÐåÜ:ab(™¦Í¡¶ºwÏE%é7ó óùðj¿dm%Ä©¹7úÕÌ0Z6ÅÇ@¼[6ŠŽ}e4<§ºðñ:Zkn¯Ã¼¡ŰþðÜ¢“ðž1,´Ö½enþG¥÷Œ4³Ýø:ä#YL*GÇoGÐè‚üv›![©N»ìë c l_Í2=çu¹ôëø@ãòÙAa“Õï´,y§x’ˆÓ¯p +\¤ëøýô_l7´&Ü$jdŽˆ:B»¿IPåÈ¡qJ+ôµ7Ñ\f¶Àîº .»ì„ÉØ9ÏBbdìbJ^{.B²Á¬œŸ{v˜Žö¥ûVÇÅ3Ä(lH„Å|ì૜¸uçô_‰X”™5âNûŠHn‹½‰%üL›÷ϦÀ Ê»·!5Ô”´Ö¾ªØšÐ¾ÈSÁ<ó£m/|£:QJåo>'û(Ê: DX^U¯Y½ÌMÊEõpy‰>Äz‚ã–µœ@€ëH§wØš_†½‘ C UýkºýoZ°ç&ÖÜnðú¶s£?¤ëHrª0=3ÕG‰•µ3†K«ÚX©¡‚ ĶL›ýQÂà? TæëÂá@C6@²OJ¾4]j:%<–{ÑöÐæÎ¢tÅT¸„ý¦€\m˜1«æÃÈ,uò¬îU ‹€kC%ªôW^(.ãÐãÁ£•‚Š·WÖùÜ¢-!´ +½š|ü AûqOîPïGˆŒ…íèU\a¡}Q[&IqHñ†¨2Þí\Ÿ´&5¹C{;§þºéórÃ}K"bw ›åGºÂòàÅ ÇGiä—ðó Fæp•• Ør®æš«Jïy[¾xÒ±š§óÌYƃْ‰MÙÀuÃÜIL€ê;¤`b#+ŒMúNº±íùŒM$ÒP¬?k÷רUêà—ÃQ ÒzaMò|:r±i¨è÷a×ix¹!ªñå"°E'Bi—ÔC@ŽBšù±Üi/…+¶—éÞBâx/íL´ŒÎaaÏ_|‹F·¥pËV1¬ê4êºðÞL"6Ó~Ǧ¸îO/³9e`äV†.‰Až°iY–¯™áR%!e +ïšéñ¿Vxúýz‘ͪc6P„˜2)toŸÃ×0š v;XMâÀ2R’wM Û®ÎÆÙh*Ø¡Ñ{ÒWP‘<íUM3YŸµŠÔuÍK·VÄaÎ9 <Ä•N‰Ú +cä°(É?Ìç#ÁºKô¤o~L4~¡5¥ó^EÄZlã0‡“¢°{ïc6VböD|&ëS^‹;’·o½ù`0´á™™­¢3y€òHÑ â° êñR½HÇî;c%Is>7Ú}v)‚n˜]Q8ÁŒÿ`ô­ö‘Í]ö!$Tá(ût6gù[éòYSÊ4Ï4k-mž³câ[*FÅmi¼«ÈH­w#@™ÂRš÷ž£©Ôo¯N,vžÄÕ6ã`Ké}_PÐâêí8µ`¢ß3¯›ï™––ïU—HE\'=êk*·¥¨KìBŽIjÕá¡“êÌãX%Yú¦ýÃßß‹×=Eëõ[#6•œJ9é°}Nªwß1û4Yåõg‹)­´6ù“Ëu…l(ŽêDW¹8qâ¨qB~8^~^L)®?Y½š6?eö8ä6îºIÊõë©ðÃÔ@ÿ«RÌãÏorqFán¢8íؾ¹d__Ͷô_ÚÙr½J!Zn¹G`~«|ØgòœÅ¢*Ì^åcD>Å-2Ý_ý€â_c„öõˆÍ}òÕ)p™†ú#µŒ<¼U‹z¾!JÐK÷‰@NÿžbÔøòIX²G”ŸJ°TLäÀíuT9F +Ý×(c Ý•ZGkðÉÙ„gÙ\1Ù6‡¾í.f¿üIò8¢ãŽ£’dJ¤ ûg×=Ò8]„Sò5©æ98·+[?KM9MÚ%ã?f#lTðEø ¦¼—ŠQ,j“0bœbü9s§J.¿õð2qìĤ÷Àx!RˆÝ¼ÓæÜÄ0²:e÷âri~¬ìè•ÏWcœÕOéÜ®µ§ãIÄKtå\rU<2—»ÚT­Ä%®,­8¼Ÿ ûìqftÎGéµ/$’¶ÄqØoc!ß]AV웚Y3NÕc`‡L—ºå» ✽õ ¶ ~FJ¸ˆ÷²T€Ž^L õÀoþÇ%Ýʧ9NûRýŽàì3`x¾z²…«£$Œ‘Oá†r&;b“¨¿‹îÄUEÍLÚ<~ôB!ËÏ~½võy8ð æ’ V'ñÜ­˜¿öÓœšÏv„Ys†ãŽ\öË.˜ü4…D=KxŸ;Y8â¦Aù ©ü_zû1ÃIP¤;´S¨l» ÞLi¤}ˆ§Ê/!RŒóúp¤:g¼¢Ÿœå·† ód"Á•˜IÍ~•©?è$a:Ë"d£õ}2“žž±tþäE®Lׂ§.d øÑytïa+ïÄ@çlY™.ðêb‘ЯqN Ö@èŽ5¿¯äâòwr†e"¸DÐPX›w¬Yî0m»%2ÌðÔ +5ú»$['·‚Ͳ„c´V"ƒÞ¨à@õŸáøì‡ÞB´QÕm"Šx”U²¿™’„‹Kéa­h†rt]«¥›Õ +.p›˜]&îpO%ýßW¡4Dß}åzà²fîÞNL[ € :û1SSQLv+"DCPžîooñ½:LBygnUÞœÍÙ«×”[®žûúeÐ`Ê;»¯XL5¬–ŽÖp„Ôq-b¼‘€‚²Ðezï,¼xY¹-)…í==£ìø3E~‚a “ûSŒ'NrûL8«ÕÌŽbZ῎¦—9D£t-íÿ2RÛôÍ#p£YÖ9ËÝ«…ݧŠcUÀƒ«ÃãÕEå‚ð¸æ­j²™%0C¯H–ÈÄÂýÐÐÀÏ`s„¦ ++ö3^9zî IÕ×¹Œ¬}èŽn“†;ä}€¢I5>Wuí´æÑÍ„8ÏoÇ+ýa Pˆ0â!ŸM§¸ð þj“ì"Sk/mÇñ’M¿£¯NbÁRäZù&N•ì|8j5ô¾¯E;Á£A*¾‰-›šŽˆî Êpô›èßÈ?Sœ¾ÓW÷ñs*NÔÒ`9Ÿd$dô1ÍLZkЙ± «2ÆÊpñN:®Ædk?›‡ 0fêéÝðÛé4 +rätÌU©[ øþ‘«jC-_¨A›ÓøÙœùxÕ'Ôê»vPµ0çÛÃV±³äÉH06h|œo3‘/rŒ¤ Æ×k¸3åÓ]‚ ßÂÆ$I\ÓꪢóðñåÔ£[Üêwû羪$Ïüâ Õ6\[_%ö!Ã+߇–ïgÛåÌó»– ®î ˲6 ˆ_uª`‡C§õr¶®¿[Û|¼Ÿ;ª{| Ç­ûúú›îÀí7Œ¬QŸè¨÷0û Õäßáhæt”Wzn˜0œOb`E~ñìK7’ 60Ƕq~‹yJ²ÛjÜ€œŸ·#Êö5wŽ{4c²~¦ŸKìKØ#”R1°Oo\+Où~prИ¸ôlà–á&„‚@±Ç?‚tÇoã¿!Õ÷ŽhÃÁòœ {Ú¦CòpÑÒ0Ý„ÔÂÒ­B,ÆÕÍgåÕœ™ +|¡5æø–Qš€ÀÏn™kŽ ÄŸÂ™òûZç e›¹´¥§–.@to½W¥=Ñ’]u3%pÞ°ó8Cxz°Â:‰ Æ÷·a>²É …ßÈl‹s÷«Û¯L²6&CÓ‡âè“”Bú.Ù‹Ii Š¥U9CTƒ#¤( zÝœŸ(K•âá—á8u(ç¼qCS)ŸO6/Åkªúôú«»ÅßuZ*óì6lðqÄsvjЃôk—_ur5OmÙ{(UºòÖT‚wO +Nˆ Ãl¶Iì¿ëF WR}7¿Åù¸±‡¹…Æ[ãCfô0$ðäõñ<Ño…¯eóg­ÈÓyÒñ{p…jü»kx ØÌõjÛ¿K Uæ(–ǵÇ'Äzñ4÷èýëò7—ÿAóoùR§@á@ˆÞÅþÌ9`á¤Z凄Þú / 4˜otñ©?r]êiç|Ïm1«Ø0OÆ›ÒÑ‹ý^u²^}™›¶ºÇ‹|Ç»ÍßKËê¸ái¥ñ,di ™¤üµÓÃ]}ÙE¿H‘$,¤:Â]_³|ÑŒØ?T 3¼ÇETâã&ýV.Øçìù"‹ýüŠø "‘4@·ôÃÏQ§ÕŒT sHß"&e0‡KÍ"|=Ž…Ø7“Ñ®ü‘½°†G'íÜîý”8b"#UÆDL¶ ë)­s|õäÂw>…Ñøº’}3}PþqÕ^‚þ~|#R¾™ZŸm,Ô¦ÁÄB‡à£îÙåâÒ é™Ú¯¯¤¤zÝߥ±–©· èó\JñYz ãçhRϸ«é 62ùâßÉúâ9ùuDˆŠç ‚»ýrÖû#‘^W\T¯˜2ä<ÎÛÏÜ —ßÜ/ +ù¿Ü ú•8f©'¶ùÀóuÚVñ’™©.d§Ú,i ¹ú+HTzw¡ïõqFÎѾ™N¿le "…ºèX¨¤å¡’æZ˜ ’q£ÅK‰iß§Ê"½úJ¦5¶Þ!î¡ì#ü$ ž*+ J×ô•|ï)åÆüQÅËÅvŽH¨¡÷ råÙu˜üŒ ,Fµ Ddÿ`qõÌùܼu„Y½ íž4šs +Ü»ýžo6Û¿A_* +×õüp¦ÅG½´Ra$Ñžl~ð«Þ9éÉGâa‘6l{Í„ÒiM×oª~Y·+‹ f¬F¿KŸjVP3{ÿ”žÃfò¶S£ÞñBSïçP7Ðîd!Õñ-lÔèl;Öá—r³lz¿'—Hhb»ÿøû¤}Èw²ñòj\îgð”@€nÔNà qúÞä» ¼6¿\ ŽmuX"øqßmÔ) 5/H^ì#ìï<”º‚Z^O{DtË+ó‰|ïÿÝcÎÌÒü±nÓþ\3ÛNsñòÕ¶t‰Ã-…˜nöÖÅÂ{ã±bˆ®÷çÓ÷óˆeBý’ÐÀymÊvȉÞÞ#ó¥¡þ‡ö!9Ü«“u¨#x!F¢"†8{Kpv¾Út8áÔ’öã3å%Ü×&43檑9t‡IlPhïb”w}‚.½BÑÏEûKj€Í×°‰ÎG?Ý“õÂÑÜÏÜÒ*ØC„yR,@dØñÛ¬ ªÄI€‚G~ÝŠ toAvAö¶Ö³ìçß<.ì‹ÿA¥úîsúâŠoTµc§µÌ‡Ô†,̰"kÀfwá`ù?ŸU­8ÿ·á‡ÔÄÿt&ë½ ùÞ:nAJ`uúdã­êìš'øýuØ`S6©¯Ë)«èÁ¶3£Ö©›ãâ R„‹vç"–ÂÌKE±+2ï*[h!œŸ“…m"Ž5œh:gó@uF æ\‡š¥.X»(øµ¹Ìãû,Ôà•<­ww ý[p–Ê–Ú¢Á‹Zª‰aF >vC%{¸Êÿ*z<$TÐ.Vyñ=K¶âW;"%Â-goà½uÇŠwÎ+´•ÞbQ?Ú,Bö¯1JR¦ïYn±l/;]ñ)5j„é çÜ>m¨îobꡊe/¹kY„\¤^ÃmæüF. Äåe}ûDÿÆÂ@ƒUî0 ëaë„ü«[˜Ž©¤IÄßô¶d|Vg`ü”ó®PR‰OEѰ¶4¦qúÌK½cJâ±WM‡Vô@íÖD‰‡±êƒÆ±‘Ñ%5é»ÝcÞÆ°2ûö*¯éŒ«¼˜}úÁ¦ ;å:YxöFJ4¨«ô[ ªô·3‚•Ï’/öÖ%¢Eo™rŠ—µ?=Kå­B b/òlÆ òÖÅ,£¾šq1:ð?„Þ˜ÝÜ(r²áF°’„ßò±±Kþ)îh߈~ÌfÀÀ8ç~/ñHN°æ&ª¼Š?µÿßëq±z¶l©üý,Ö˜$ ¹Ó~>€_Jb߬J_PJ´—tø';^˜ ·*·†¬}½;àÚÈ "ÞówGñdU:d ì´^3[ÑDª\›t‚&þÛOq“Èscy„g2KLSTºÕHKûÃ;91lZ©¤§˜á¿‘þrÉý…òê%¡[ +aU[ ð¼ñ"cÂ8)¿²ï‚\amFñjËHão0.ÜV‹óE‡ŽçWI×Ó&ô±ïUëSj—uXÄãÃqÀö%kE Çíœq5·ÚŸÆ_&Çç,IþDzíýœ3×HÎÔ2„À ^¢%µ’>*ã1H´þÉA[Ú±Z$`NÏŠ§ö±§XmNm|ê#{â€7jû¹[øXä4ë:ÛÕ°¯YŒ6vOO)†Â»›Ÿoó`ØÐ¤©+¬Â¤wU/Ì`»ÙvúÁ¿¾M×Ü WŠŒ§2¹‚/ßׯuÿö`¥Ç©`1ÂMé >¶Øëòš{»ß„¸ ~Fp‚Q6'‡ü6Ô”ùýÔ³qЛ‘òWE˜:Ú‘daže#Vip †çô’ñfJÞïjs%ã‡E˜¯p½”y½€y Ç·‰yG+ðôó(Â-ö¬0ZZvO:c]ßçSSEZ¿·7®Õ`Ù ® #"’t¬ +xÞîBSÔ·HÖ_f,~·&ßù@þôz]ˆÕØ!/öð-¤j7Ò½éáŒJIp•kíþ ­cìA/çRÕÃ8·ú©Ç•°Ç£ žH:ÜžÓ?Sû#„`%ѤMánóÀÑÑ;’êF8ÿøU7Ú,r†Ÿ@Ûà³==Òò­€ø¦I/³ÅxÖT8)&fþ[ƒ_‰}Y ‡L¤žD9ûÉÏ9­ŸöÂrÏ!hX¡“/w4xz°Ÿ[qXPÅ[< Á\¤Y.Öì#©GcõØ„?`˜°[Q°`¡>ƒúa<Û ¥ïú¸u€~€µg0.!ßÖ7Ã,Ò%á޵ײ+Ùοô$G±oˆcJ+‚m3ëµ;ÞuA&:è@ÒÙ|ŽS)ÞÛ1!,¹ÖSjÜÉqÔµ[ñ:E‹W: ÂDYçe»zê³þmFeg7l™sø]gÍa^C¼&IO÷E¤»àoŒÖ>ž47¥K»m÷üzüŠynThm1úG cg@%‘ +Žψ«"û\Õ©>ö.êÆC’Í"E5#Ëæèkrò.aÓûsáóhÐìî‚ ˜“7UÿÜã´¢p VÜ\Vï¦ÂH'½ ±Î„H¼Æj}lŠà®YI½^~yHׇ®¡3ÌSóXx¿/|Þå7ž[…ËŽN ò¸(FÝLŠ}ø6ŒêŠ¥o9®èîA®€+*†Ä÷½c÷p‹ÕãÑ+¨õíÙ›Ë ^tÃð +ef,éX¢µxdc +~¿H+Ø« Ô˜'rè®c´¿«Xƒ7Š.]¿—ºOì‡q¿”D@QTp %BfË¿lôòÁ75¦Çy’ûÚÀDý6·l›pme¯øŠÆ©`ìb÷õ.×)V㪾5°™鵕Iœ‰é?Eù–AÑîÀüOö-)|µv¸æ½1²_+, +¬ÂëÖ±ð7¶JøÇ¶Ò£âí€æ.›¹§”åmG£«XlÀ ì¸ïzyD.æébÕP/„îÿûœFrP4|ÇÓ~ ñ]®õšfïóÞÞã¸|§§üPbKû>픉ŠÛ5žÔ<‘ ·¸ñW;›…¦fÏaûÌ +ôQ å¢Ä©Z nÑ?ó~ÓªàÆb朤qÀrªÇߪOÓ#ïÆ#éÕríé¹–cà¥8ÆbE‹’í‹Áx¢9eðJÉ{ÀDÎëžÞ™K´hVÃ¥G¯=,qIM„`ÌG€}ÂÆ”õFúp[ö„¶C=y‰R¤Õ¸8­åm.ð(õÀ_B²œßkö}úò“âý{ 2V¸¤â‰x÷]ˆðήA>Vi^ÜóúyíH¿Ñ>Òõ5æôAÌdf/“¸'Vß„EŒNvï×à©PÈÅaF×Gáå¶böEPË'K½™–Pp'¿  Õ,9©QîаBIŠ©H{1¥ñ<žw%*ÞWK³o¹…ÿòÎÿ:rª¡Ìfð! Ì¡–²â(T æ"Étx¥L6nÍ|9‡ƒh¿A‹`‹($ZÏ¿ÿÍZŒºF­êo|ÉvúÐÔ³à_9Ò°ÚŠ$³mUËì=Fh®`¢¿5ðg®c´ŠZ½qŽ‚ž»®r'áÆml¸ŠÔZ‚l¡Æ`šFk; ùœ¿½Ââ0ÏcoêI q?”kÒÇk2™‰°#™«7²Ë¹A0ëXႲzzµ¶S%å7&mùâr!©K¼Z¸y’î¶K8¤ßW=²ÚÚ0¼ªãAÉû<%Ý){ZQ*ktªñ³"~1[‰ÝÜOp¹ôé„ÈÄäÇ›‹µ¿×ÍvT³»(4ZL»Œ+ønUâ(‡0µhDqgÅä.ÿ¤v†ÎÈà.9–Å…ÃNkao”|—å(îËÃÈÉŸË3œJî]¢[YFñE(“}zÑå•XF©ù Ò2,‹<}|s—ÝCGÎb¸6ÚtÛù*lrny]Éüð©÷Åð$-ôɦbnF'1q#…P¬!¯jÀSÑm£Cî@õõEe¿3O3É•ùú=ÀÑ’žÛéÌ]ü•*€©~kæ²f$»zìNGé7v +s‹üs¡!²¼wfbE¨Ý^ìŽP/vV"çæ}£ +þ·"þÅp®®ä€&DxÀ¨0µR+Ð1DÈ<4E{W"†Ñ¸§.ENÖ>71,[ÙúÙhPèWRI‘¯ò¤–„8üÏëý·éUìø"M—È,½þçmˆ P0ñzKz`´>¾µè]}Üû£M?0—j÷’Lçž,ijFf}¡QJ‰RQ0N•-¹@pd[LÄ¥ÃO*½À™ãå2ýmˆÒn"C+.GæX“7%¸RÿÈOŸéæÖÞù:úx‹cÁa%>61jÑ'ÀÛD¯x(yð@†ËhUÝ-fð÷z2wêæ;¯ÕÜçB1ä¾&&Ág92ø<|0/•"×”,"jܒѱÁqmÒ×^7û߸úC™˜³‰c0ð/ÚL€qè¹)¿9lcé‚ aƸ–¿PzNȲÀ)Å®þ„Ah>6V-ÂÁwœU?so$]v¶Ú÷rÝ&¾[î´ž”ÞŒà,‚níYlO·lž5«¥ˆ{ÝzRiõ«N6_™¤doÆ¢SNÐ ž zoú.¨Û4S»vªV „H{Õ>i†9dÞìæ÷GwjVâ*Jß.öÖz2ñCðÌVÞžH[gçeÌÑlÖŸF¥bg’’Šw\7¤êu%1d¥F­ºCÆJ)ô”?x¾AFÀ­¥¶o]ÅÑYÏ_¹ lüÕ:…áí×7´®¶·/0F€Ô(rçí+¬€Âsíw)Â'>èˆ7q‘-i‘ú¢µÚž¨Õh†Qá ?Õ$%LòÔ«fÕ²=Ö¾!4ì !Aþwr¨ô\ ÝNSÓ~ò8œÉð¿JŽiNJ…¾píx¯¶„›X¢Žp ±Â4sÒ‰bÆ;eMnÏBBê£Fº˜Ð&éUù]v¡’g×ÌŒ|z k/•¹3;‚ÁnËøé4N\%8 འþj|.ŽWgÖˆd¾Ì4º£ ¾ÆVÏÈðò#Üž …ªÈÀh>NFV)†5*uQ”ð™v˜ÁUU#,Ú?Ô ƒ~·ÁÞcõ®Â3ÚE$rn›#§È•»AúÑé÷&"ðoR¾Aò*§LÌ)WÛ°A«I Ê@ÛöûÔE ¬koʾ|,]îõAªïò]€"7M_Qc®©Ñ kÊ–6«%7#¹Qãf›Ik $.ŒÖxˆpŒôÊò Àsé+¥tB|¾ ìS@;2‚µc x{baTn¤WZ +êeä¼>ôòf0à(hËh ñÙžð +–¼Æs4[œR‚ƒ[âÛ^ý*FØ™e£^™ŠbÖv†Çþ¤ˆ× °Ÿ„ôj©Ñ«|_¬ó)heS.y>y•*·&6ØæP`ƒèª¸ öíÂ7Ø8öM)IªÃ5&À¶Å1@Ý”±Q¬·˜çµº({8еÉâÉ¶Ì +éØžzBX‡—¦½ƱÂöî´«ï.~K55>=&-†Ú7æŒå.‘qòù«JÇ‹»Æ÷uŒãäiêþþÜ¡ïOzP\'L™½[«©ÓJnOœÅ|Á–“˜Æ6?ÑÈï±IËgA 0mÊPÝq•]c)?çdºС–Ù%¥$J¤lÚqâó?ò›,õk=Ä¢¾Ý·”4RÈâ"ý£PXëcó¦0K\J‚SM_Š´é“¢¨é« +ñëN^Ï¢ÙÒ«=ô½É¨ÜØNÙÇ6(°ñÅêÊY¸.h–Â5Ù‰£][ÿ4ø·ýqwÑÙf ¤Ç"/ÀX•hXd/ìvI»ºm­Ýç¥GfØX÷ +ÿK=ö² ’á(ÛHfUïÎãrÒø¾IQ$W0LÇòÅÕµ8ŽÅX‘k¯ß‰ô¹­JÏMÐz{äPãÀ÷–¢Ô“‹¾>$ yò§ÑéÚ"Éjrº½¾‹Xf¯yÓLEœtnÖy“ÉŽœ‰ö•ƒQ]Ex.ò¢B‚…;®úˆ[„çðÂXánÍUîOýÅ›1Nd É¸&÷ïÎŒþ +endstream +endobj +2317 0 obj << /Length1 1616 -/Length2 23613 +/Length2 24418 /Length3 0 -/Length 25229 +/Length 26034 >> stream %!PS-AdobeFont-1.0: URWPalladioL-Roma 1.05 @@ -27420,599 +34411,1700 @@ stream /Weight (Roman) readonly def /ItalicAngle 0.0 def /isFixedPitch false def -/UnderlinePosition -100 def -/UnderlineThickness 50 def -end readonly def -/FontName /TCRNJT+URWPalladioL-Roma def -/PaintType 0 def -/WMode 0 def -/FontBBox {-166 -283 1021 943} readonly def -/FontType 1 def -/FontMatrix [0.001 0.0 0.0 0.001 0.0 0.0] readonly def -/Encoding StandardEncoding def -currentdict end -currentfile eexec -ÙÖoc;„j˜›™t°ŸÆÌD[ÂÀ1Æ…p§³T¤¢€®o¿˜ˆà9«`ü¯….´Î:þ¹yÕêpýäJ*åÈÀl'¿–eî¡}#)Á¢Ý&»7+å‚/^§ ™ëWŒ{ïÔLßZ60VåáÌQR^¦üλ3rï÷)€#v¢€$öp~c—²¼´°£êë&“è÷ö'ÄÉÇÆÅ¿ñÁä+ž %;a~•ê!”ty`rô³cþúv5øb‘/¼W©¹™zî²#gß|fà‡ -ËÞSS°C޼3R>›NnÁ'Р(ÒeadÌ/³¾•BÒ¡ ¢ÑnKs­Ò¡¦ï°êß w8H3øETzðÒX‘H£W‘}ÎàæÒó±EpúÔÚÅsg“¾cxHSAÑB±Î]& *Pí>9Ýš.‰ÎTZwŒ½¢^±q$ÛšÎÊS²Ï ¾·QÂgÉX¯ušV,Ð’"ª%_â /w!¦•ÿjßTšƒûaù­¼ÝJç@¢Æâ„Pq[¿vÜõ·ÐM~³ïˆµ­ZIêf ·†îÚèè1÷N4Æ6{ÉAö"‡Ç€ôޤò<Ãhú8ôØ›Ø)¢²¼˜Ð—Âæ¯xl¸aì®-Etö T"ÿ5ñÚµ^1z=Ê̸õe.°ä`•¥Éâé`̹–å,±âÀl‚Iº þõpïpšäЮ#QçrWJ^URòlUïQ6ë·O {Àµà6OQžD´;•Dn -¹`]NèIHΣ{¦ü+¹à×Î#ß逞(BUndê/’‚¶*ƒ ÓošâÀxq¸Ýð{ïo1Ø1«€Hÿ(Áä虸·ˆãÏ–à ¢Ÿ•[äävå ™‡½½i•ïÓ'¾²òj"E`$•Ì·…ÄИ8K¦°iÈbÚîjë{äÃ0eLáÂUluJP, ãiËÙfÆà'ÛªhŸÉ7 › ³®$6â༊:f‰W½<Û6 -%)p\àÏçèeþjÊïÝa—ß\" ûÿ9!œg‰Ç @GˆLmúÂs0½_JEn|‘&$SÁ…Ÿ·úÐ@È\mèªÿý<؇Áu®Zý=½]¡m€jX “æâ}i·¨]ÌP»Û¸ˆ]…à'`€ùL¥Õû}½ÏY>ªÔ;h÷ñŽõdã¯Èg ¨¡û‚ol^k¨‡ i‚âŽEb¹µËЯ'ÕA´vàÅíYqe}¼±ØnŽ—PÞÔA 5Ñ]«X]a¬î-ÎæñÞãš]3ÎÌP›ïi1{#>â訇6sàIŒæ·•Á„÷$G(ìF´ VèŸ[y•·}/ônÂ÷§òñw"Ħ¬¨Ü"}©y Ú8çUÝók¸n‘éÈ'zØûaâñyÄâ5vóT¦g¤g!ï]®¥`WÙn’mmòº7Á[ï]‡lÀ•’2N>|k6ë—Ù ,Wã:á/lÀþv_*ùpñ¸ -jÛFSÇÊ­h1`Œðwým wèÏ¡>¦,,ÕÂ.$aGª:>kÍc½ÐìÛaF‰½VG¨»bF>#ì-¸]cÓÝÁjRÔ¸úŸâjrÃÜxñËTæ;{ê^è1ÿïÿ €æTão¡ ·)a_c]Î#ƒMÄY¨Ò%ð™"#¸áoyÌâñ4g&Ôö¶²è’)^Õ_8ƒÒô‰üÜ‹KŽ?µÂa‚£•üáB…òÔY¢ù.cñWRó²׫İü§Wãœg`Àöó ?Sù[U Òh"Ï×Jwc|µò—=¥]T$ÚX‘×{ÄSHNÄ£d«´—ÍŽÏÚ£½gÍy7 íü œcÚÿÿ†&R/4z~TÉi  åÏ?\u(ç£Û­ +· üñÒô/oªà>¥<³uY$éÿKBžYxrMt![I¬<èæ²1†:Ê—Ç+uªnÓ=ëÆ(-ž¼˜»lço¯ÃÕìiŠûî$ФûÊffd¢ÜßCVìÃQ4}2\qæiØx–¸%Z¤ĸèÁQLZl)ëëã⟜n¿ñ'y¹Ý¥q5LÒn83£V­ü<߬ËÙjúÎÎ7Àf>„%6Hèdu[éJú ר@DlîÐôgóë¦q¨Ÿ‰a맪¸.^©óêm B#§ß&7ô¤ÍpNޝÇWoã.®¿XHà -7Òu,š_ !ñî´DLȂٙõ2ìmTÊ»V-íMV‚2Î÷7?„Û þr™GŠe->4paÓ¹Ù2ƒ=ƒü·|Iö ¶ ~FJ¸ñBE=)?Q¡»—ÚXbogèsó“V»=…’É"¸(LìÔÞWþ¢ˆm˜‚I%Y -èoŒ1Ü©imÜû]žiÑâeÛ/R¸R@ö»óÖG«æW>QÅ×9Ú+@ì7?±7ңªZÆ8õ_ê8ÕódG¸%1ÐBå ¶ 0 kC[}Ëdü{žÑ“Z[x’œ)SÓÿ#WÖß³dÄš¥ÿæ MdFÖ,*Ác‰ÿ=uói2D é왥–©)ßcS6Ò:œ>³–èª+5xë¨-´6â”ñ%’k“°Ú0³áu¤šÂÿCÐØ+Ÿ ÷“Ý3Á¡cr?nf®ä hj÷áãoÔ¬D“a#/],mE(SmÔúz¥¾&*›#™$’bmÅÚ0ÆEÉò;×øÅRº?@zx~ÀY樜ª'òÂ/:Ûœ2ar6Jžš]Ãþî[‰àÑeýÿÉ9'‹÷C¶ ΤÆäÍâÁÿOžQkšâ®Lm9r ·6œòé¢@gSù s‚È¿¢[ùçˆ0U7…]:çaª÷°¹§AK h…ËS5©²zöL…mÚ3IŸ ±—©ÔUœrRxM›×¼,åQ©û Õ2òµ¿Ë"ËŽxgDôõ%¿kXS°Ö††äÍF5W@^“jYªÅš80l‰x?ÑRè%@ßõ1œg×±ày _¯¸Í”äÓäœ%ãg¬×gƪÀàD‚Íüîže„8%MÒùö\è§a݉ão.ª³·vT$R'l.1UøX¼²Ê¬ûºlnþ8LÂ!ë3ž_±'”/˜9Iýêñ¤¯–^|‹ŽcÜjx¶ON‘Žø”ÏÔ°’‚QL%"¸“[‘>ÇAñF -Iâé&†¬Q-RÅ Ö Žø‘3©ã žlz{%_> ù{—Sži¡:–ʵ¼’åë” gò­±BœM“î¶ÇÃGiü kEªÑй©ƒÚÑß펎V==ÄážÈÃØè[vû±@ðÚÞ嬽»C0xSøI¤'¥‡4ãhÞÁ¦¸ÕßtZÎà!îˆ$ JÅý¢'?›²9yA¤'òפÜ3UŒIi”r0Àƒß‘å9Â8)”JQˆv‹O0ŸÎ<{>hçá«ò 2CG‡¡+ý>áÛôŽ˜%„EâÓïðÛ<±N1/± -ꃼ”Xp>î¯ýXþH¥™Äž¿'e?¹pµ‡€G(¯ôCc+XŠI¡\æûTÐb“ó´Xø3åZ$V£ |ÜþEsPæ± r¹Dß"euˆÝ(ï´7áÌ ö‘@¯¬†þk¡ -<ÚzW&üíY83LÎOª*¾~ïb¡÷ áʺpÛÝ;¼‚Ÿ¦Î…véMÁP+C -¿.å–³+³ÙËbßÂ<±™‚sê^ønC¸JÎ4Ö7 ðû¤%ú‘‚žAÉ"\Ayý„&7ç;Ÿ ®ù_†3'‡)€j2á1“oÖGáZþ>iJ~®G Â¹wÊ@Ë‘!<˜‘ôï)‚½_?Ž=Ðÿo*›|Ib\§§W$L)µt+Dq—r­3Ö°ÀYl؆5Ú;”á­ˆJ¢ö¡„ä|ˆí5¥9Ü ?oU -‡ÇÃijØ<¡P™ìß:'>Œë2 -‡Ÿ’D°n;Ní»’Z¼µ¬n!¯êïM' ²N¢ çq Aáçý -´K±h94°˜·çˆÞ¡ŸeÙXŸ‹ >ξVŒßw‚ƒBy6¶±µÈ'Ù¶3ý\Ó.D1ô”ö{ýæh]åK¶ù…Ç\ÕX*äØJ7Íämºó Î »QÖ[u«û.žÄ§Âö=ñno‹ÝDD<Ý FYÞ£ƒ aí—äOÌ4õ7gI–yN¤k\Và郵´¥Þª4¶`X~ª›]c»g·¡7 -š»¼EyS°³F`Õ “Ïeä’‚ä•…b¡>ÞÙINtÞ& Û«ñ×qv­è6¨¨~°6òèóŸ·¶{FeDlZSVFB²nRTPŒ¥Z/›èõ º -µz`g­ÆM` -ÔËfØÆIiMæ+•4Øûÿ5€Ÿ j¯ýœ:ÐV¹ÍÈÕ]S_J¯HA‚c¤‘1¨úzw¨¢Ÿ#ôgœò¶Î=›°+Uk²(â¤Ã™é 4í}ç•ö7ŠÖÀM°Êæ^Ÿ[å©\1d´ñ<‹Ï -*½­ê^òºþeu‡ËŠVYlšMœý›Mü;ì -WR¥„ÃŽè°û ½]\ ÒSZ|ÞPÅ?Wu׬X“È÷.ë±î¾µj\ß’hž„S¯WC¹Ï }3¥À¢ ™‡Œž ŠÇÐR5 ¨pÓWà îf"¿Þ#Ègr¹ìjE?9ñÅNdðBl¯µ>M§†ì£‰šú‰ û"¡^®HÊ Båõd&¬³Ú²Ì}nŸ3]ÓkÔñGoõP–¿eŽÜxhW¹»LFåÀÜ*$(]fµŸV\s‘ا/ÓzhUSíéÌÒ}è(㎧õ§õâÞrö¢3Š#:>²G£#{æë;ˆÏe»¤³±MÚÞ¸g¥[ò4º;‘Їf T:¾˜î»û:¾£Tjë˳%¬X?1Z¸ÂLöbÌTsÃdbUçûG1ñ,uµÞq ]WÍ‚´Þ—û¤]¿­ù°‚€|75þé½Ïs;ξùE#„~/AVx³áÚ&ü¼à,ŽÎÊ!ÓÚ+;±¸ÃO™¾Ù6Ó÷p<⪳’Ыÿ·xq±fdãéu³ »[‹›Gf <µQæŸ_$Ð=nûqRüGÕ ß z >pÎFù}Ñ òóÎÎ]¬ð Γd “,ŒKYÍ2s6âäŒÄ‚eÐ&¢Ë|¹6'šÒcœÆs6÷º•¬.’»â×ëk• -œ÷(ûãöJæhnë¡}Kÿ) -–a~ß̧½ù°-þ Œ·LÚ9î'*ß]25RqŸ6gOu–´»‚~I΄NJZW›1›Tú 0*ÞðUgÞ??¿½9¯ÈŸx.>¢AÊÒˆ¢E@Óâ’eYûáØB8ùncç±$ø†Ø¬ˆsí_N›õì]__ñKú«ÖV…º>·wqZÝ,2 ;YÏÔíè«À¢5œöІ‚ÌžÑi³X©FóFxbº×Óçøž ·ÏÏvê,,ì" Á-è²W¬9«ÌÛcf¨šü®X=j*çŸUr~J{<×Ɉå~û ¾N /ÅàÓM¼ÆÓÍ›* tÌ*ášh îøÒJüu3GÚò¶‡ÝŸ× .¸*óû|X`‘D',âjt/ªz&ÔéšõœÒÓö,¿‹J¯Ø“°±®'kØZ¡rÎóQ‰µÃéO®ÃÏödž™Ø,ë ««ÆjWþ:G}PM”‘Šï¼ì"Ókò1qô‹ÜvE¶‘ñLT'扼šº «FŸ,O"ïTN§çscF¬gúÎ\T‚ûøÒ”#äzh[;ÇÞÎÎÅ-n^ß^¸œ=Bî·9<Œ\¡¿O.ðóƒßŸ_0ÖïMðP)7:ž>¸mR§7ZØhКcÆD}|˜{3kFÉ"¬;‰ñ@%æ+t1Â+©ÔªJ2:ìI?— C(§ÎòîV³Úç ùa#û­ùo±ZãU½›J 8ßyX€cú\x€-Úž!7»°Pt~}Q/œg)l·5ÄŠeO5Ž~lì”äÏÕ.f¤ÿx°é$±=È“ŠUŸ [òÚÔAr^☬qÖ7¥hƬ7‘Øå B(p$¥M§:8_šønöãõ!¾#Ï¡tz¯ `¹G0¾äüê˜ 1‰LøÒJ <¼=®F{,0jk&ÇG®j¢·u\Kα…ËöóbÁF¢Ú?ZŽ®–{g8yzü÷~\J2’ͨ!#ä቗xk§.‘º'Yu>–£'¶ÇÌ Ö-½Ê^µ|ƒ-,Å6Jµ©núöEd]'ܵ iÖç8ú´ü¦Ã/¼%¥ž†K¬ÒárqSžT=ý¤'/Ï“î:4Ãððá]]&§/H—ŸŒ<çÖZ5_ÝbùŒªìÖL}„hÚmÏ'g§2~IÓZÉñ8y`û„>²ž;jPf›Ñ¤ƒ†Ù©Ú -h["šð¸X­Ð£EÍQ8,ÑšÌJXxd5i檾5°™é“n›,„ˆ¸¸úmTÉÈJ`§9óÍl‰B¶Ò.LEÜËÑÐ< Âó¿ŠÂäÎó?jÚiS"õåü® ~¢kšL^Š%wk÷¬½Õˆ…:ùß@ä"¶7:˦ð8âþF¢(Ðo.•uàæE_ô%A¹V¢CÊ ÿšN™ÅäPKp–àaVi17.DFr’”7Kî%”a}´¨;¬»È"¸°ÂÊæàщՌ¨LU €ŽŠS(tv/,,å€ xß8¢=Z/ˆmWÁ†hÅú.ˆ‹† -×Cs/%ò„pµ`¢°u7®® ñ ×.®4@,99媦VȪ›Xc‚œ8Ñâc\2‰öñF]Ï3½.›~r ß|ImÃ…9ÔMãÎ@é¶PæE©o£šÁy ¥»/!-]HPIíŒRnãQ%( â= -€`W²‹a"N¿$ÿ}ø$Íít«ª@ì¸[vem Ùgùd&òLÍžE*“Lffp–10<‡,šûµíO‚"ß÷&ý-½uƒ‘ê"ª¤‰¸zHm Û´:B×Õ[@4µ)ãåÒo¼Ù:$¬^”V›¯68À¸ ÑKƒb±õ¶,sà ŒÄ½c>í†Ö-¿¸&bWÝ­f'ÑÜ?âÀÝ•§•؈u{5oÜVƒÁôÚHí°ž`s¢çK•äl9ë K7Šæáž8‚)TbNoNÙ(!z¢v:ƒ—ä "ÓuçyîX-ÒÔ½ŽÏàh- ×ËÔFÑe WnÑâ1lÄÖŒË"3‰C•µHNÓw¶¹ŽuUëG4 (|—Âל}´à±í h°ÝÉ'3˜`«3³iëú«ÁÊ-Q4ô])¦³O¨Z¸K²Å=¿i@ä™UÃ3ðÎLx–¥h{Ùó=2vͪ, P‘ÇkŸë츜k~"Æ»]£û‡jd8n… ÝFLfÃnäÏ¢ÐYK²Ïlî”-k$Çh¯‚“YÙX^d9 %Þ¬_ŠG€%Žô„<9íÖMYý”æ˜Ð¦P£ ѳ[k¬×°€¾wÛ/‹œ Ë©%y®r7ö{1=]˘ó“yùæ"GßxË×=aM Ñ‹.¸@áo¡Öû gWeW8$î$é¥c¶ØØ‚À—·¼÷Õ­d -%¿t*tÿŒ;,kîOf6”RuËJF’9?VÃ9_m›Šäý î¬ûW vÐ ò/Mj7D¿[¢äYÑ]ª,mÕŸLµQºãÄ‹ɇ˜Ižc½YjóížáÓ´¤~Z°á¿d9C¸Þ÷ ¥È™|Z|ö}†mÌ’™Ä¨ éªçŸÇþ狵Ó*8pÞù—T¡ »…•š)ú‰u*Á‚ÿ…Éïa‰#ŸR7—­6—K‡!n<¨¶ï‹h+ôEå§< ,·Vl¾æ«€R÷ 4CCÎÒÃЇ1࣠™ø v×T…ÜHÇk¯t"]ç2Ÿ_ÅÐà%ûBÞLbR“Ÿ{QJ÷–¼Ý…”#öSÈyÃ?6¥ã¾)~~<BÓéWe$k7‚ýÜT‚|™F¸lœyS•tÂÄž#ã³æ(]æw‚ôSíÅH)P5iNy˜@Sü›fÁýÐ1Å¢HóÑÑ8Â3þÔÓ¥Æ^.£)ö¦ØšpfœˆJv]}ühàwÒqò}bÝG`ÞoЊº3·+âD!Íöñ«v€Š6ÂÞlb${ ­:â*š£9‚&³«JeWH­Ä¹å<¤ú2 YêÏÈþôJ€i¾—¾;"ž_ôfÇêúðÒ˜fͨ‚˜<¤èþXIöBÀ€ -‘\ ƒI¾Å “ÝP†¡¼‡Œ=‰P¶ÝÖÁ²ùÐ7Ÿê(Üõs`;AÛ⊸4ž2¶Z”y¿U½6dmåì r‹p#ÃïK¸_¢&LUÖ©˜6ˆ`ñ-V Sÿ€CÉuâo€WöšÞÒ(FLÀ†Mú® -çù·8VÁ‹cllJ1 Rí®ÇÖ€MÍË›0µŒÁmõ: -wiÀ6›}›ì;@|:Ñê=ÔÖÓïxí¬"éÌz2ßù rx¥– cmÓlÕiµA^a<ïÖwPæ“pê¿=ÕXZçš.B­d•z{$ì¯'hþxI’'@þìr3[û,èeÈHïq×ÛPßþo–´Øô¢]­é±½\ñ”ÎÛ›*žxt)o ƤPÚ‡gûIOf rö…º¦<ÄÈ{Xp¿®É²¤•î$P N‰ôÍ1©<1.¤gG0Acîùï}kB<¾÷ê;U%™´!?̲a?d/»ü‚ ,j5›¦IEf¬e8þ„ö;–¯7Ìž.ã—Q!öÀóLÝ¥ÆÐÇûޱ²_P//8Çáy¥ºk™”žË xN,ˆ®÷}Õ¼N™,%&%õ¶ãÔûÖçݸÀ³ÇwN@«`ЀÖŸN÷¼›Qõ¢qèIåþ^<ÍnªÐ¡b‹câ1ÕPh'†8@~¾¥ dˆöJA=±¶ìHø³°ÜÏÿÝÈÒÑ«€´gqÛ -'ø‹—¸7dÍKË=ù÷¤•-¢ãÐ*U(ô`¾ßÝÓ‹üàvÓKØÿûªcAHôb¸¹H`£2@æó¸íÑGƒ¿OFÊy‰Ó+Lå¯-1¤«Ó94Pdeä¡·=DÁùC¶Š¦¯?)«Æg Þ?)§nטQQ -èæ".ÔÕÆ_p.T¬šdFÞcV¬O‰í>Àß‘ÔyT&³ãÉí?±©n´†¬›aÐH±.Àb kÔVÛ5-‹få›å>Ú³Ú}YÌnúWŸŠ7Š 0Ì鍊3óÁ55á|ˆ‡Ò7Fbc ¿À;<¢Sf&^R:üÊgýÌC‰ób•¯švz£©ë.\'ø±>Š®—pñ6Cwå÷^4_=`ØÅUº-ñí¹¦Uî4Ÿ VÛmbhÆÏ °Ù>\¡Xí|›ùaXFî6n2¥fÞJÚ(‡{k†vÕ?ÖÏ#K -X›1aÃH´E±;lÿ•G=0H#õœíÛ †íœÔ¤žFÛDkjÉäEöô½k© Él™®B¸]´˜‹ÃTó™§jV©”þõ‚#û%º‡½à¡—Pʔѕ<ûïùT‹äÔ -À¿.' K?Ë-¸ø|ˆwAÀ¢U¡mêHàŽµp“-D\ÔßÌ„šp#ñ¼Ü™;1Ó :¿.±°êÅÖ ÊOÕVªŸ±ŒÓx¨ßmc=Åvqà vü_?1 q‰™/Ëk9’,+ÃæÜYþ6ãà!ÈÏâ¤Áœ‚mÑ´Õ¡õÓW&¦¡'½«J[2LÕ´ÕR«é«›ŸI´¶¦»(¾_ª} Øå))x=ÍÈ%‡fGo°µ G¼>ÏS^ÅF¨ÍbAfÖÇÁíKÎ@îÇàÈ?ff%h…æìoá}Œ 5WyY×ø¹]ÌŠU¤úÖ?ÝúóQªçK¦û¿31ø{O5t(œÆ*¼EñKEz‘J°wÄvížz~ßv%¶s+å;A…´é:ØA'*ϯëˆâ>ôE…ó#jÏêÒÏE´3Hõ~½8¡ËÞ\¬Äx´Ãö÷T²6k9¶*çÓ7í´f8ëã<±ðCŸÂB~³°àrö±0 -/ qvP;*¾Òk’ã‚ë/¥å³úœò]žFÍ8=8Wñ>„„¡þa TêÛoUw²ÒGtOpwN` ·_[gÕAC Hż¨D%ÞJ€ ÐÊÏ(¥ñ^+ÓPI úÊÎèÜ ˜ÝmÍlRX£\/6ÕŠáQÈ)Âo›ˆ¬œ ó––fü^¿ ‰l× ø „‹ŽS<ܧäT’0¾—Vþ#”[^{£Mk|ø¼ë´iÔ 9§%ìŽñžþO‰Q}LS°èJÔ…Ìçk“7¦°¥£›P¹/ I@ Mš‹êPØ©õAº<C¸tÏ·ÛïÑ›¨½nOY¯×Nr­Í{görìxúãì~Äc2gD€ß¢BuÏD¸@|+O}2M ë4±ã±ú¤,2ÚM$8Cµf˜bRÅ A'æ?ZÕšañ¬Ä¯§Sm“…ô–mÑKÛ­ñÝÌ-(¦ÔÉÐûˆ*À}Ò;›˜‘$é–e+&ìÝÇÒô/áI§p@C›;mFôÀéeQq){åÅÈXxüdž½Níhá§ÿf[ïpçmŸòY±HEµ³AUøS™‡Àƒ -<ºÁ¨Fjýê#÷Eä/÷)¨3\Çcœ(ÅÀŒ†r]dÖOPR2È9´øÏšT }]â(­Wæ¿y(R?“C- Þ{³UŽÇ­BÆîøJC¡æÚTGúP )D|"6Á—rÖ­>ÎËu*0 ˆi~¦rS¥Zé©Ùµ…ùÒ1òø‰¦‹²£ƒ-÷Lî°!VÄ6¡þœ£kF±¤® )>AôÆGü -K©’Yˆ ê©>ñÃ'O0r¶QÆš…WCÔ%ÊÂhÆ» Ó¢ˆi+3yÉŠÌbh¶’̃$ŒÃ‡zmT'¢ªºtôná¹gJã'EJjI\%LúåùôøJÍvb •×P³.Èés =)¢U¤½¢„¡ µ‚Ùfê>”„€åí–ˆÁ‡.at<ÒRí劙 ã7xÊRxQD¼³;j%À_”·®fRI쾦Ž+ÆpDè|¤¦S…ŠV kU«pNTÌW@;Lµô—™¢ Áш֊ê2º÷°O¯¬Ím ´ŽL “é«]ÞÒ/“Š÷ð^b~Bò·RWzYTí°­½Å¤ÊÕ‡æ]"Íùk€#<•‹w¦=œ}¨üØçF2ÿŒ÷9Jµ¶E,ë†t®#C`¶¬²év¬Ó'èGØã2N*Ô§ÿ£HÓ!ú7³\ÐyZß%m$\»Vi2SV3¦à¾UBÉ©q)zf†o–ï %4Þáâ§B¹ðòvM²6„\ì)®"&T|N¡wb­%š°6UèÛžYjžžzëÚÈ}^%:1‹¶¸¯ •-ñÆ…vûDé²²£0H ßR>~“ƒà_´8¬“f½×°ø{àG J·c°ä‰à;ÉÈ€É?XÝ,ÿôCŰ*ŒS EÛC´Œ#)¢å:¿tÃá&¹‘xÜÊ›F/áø¢ äÕÉ¿0"8t -±Ê”ó‘C]l˜å–ðol½I^†èó¿ØèÞ]Xº“4âtâl›0ªîŠ7»Žëùé HOÈÈÖ7ööÀn°£¶…x0ümì­µKÚÒýé!qä„2»«•Ž¢:ù¹ <ðh—â^ ·YMåøeu…z4ƒµ, ÏrFÛ !.qÚ -]q‘‘ƒ)Åä’™›¢ Ô™Zb+¯~!z.cŠÝΈÎPÌÈ€Óú\l;%H£‚øFÌ Å '®}q÷qðfXßÇýiáfæ¥<ú·+©œÃòö -–ñ‘lNú¬eó‹­ÖålPlð‚Ž›‡8 Ô[Gnm˜šS¢˜6f wEÔ$›ŒÍú‘ˆ‡‚¥$jÊÒæš#',õ™1U„#¥XÓ#Ö (Œ5Õ¸y¡þƒ:¸NçF'â$YNßtk“«–)Jz31ˆà@W]´OˆS5Ë(mïu‹Ê/é}Ž=/’Bv®‰üÀý•>I·[îÛ½›ëIÂUý]Ö{6-H±05>¬Ê—Ôæ$²MDí=?v(Í-Œ§¡±3§EØüì2Ñ')f$!œ½áG!†O)ÎÐÉFÍeÉaß§©×X -Á[ÈfVl±J;Á5ŽÃú"á_Æóì` VË–>n¨ BÄjq™ÇQ›*Üäùyvwpÿ–ÌÙâ?rãõ -‘Aªüæs;7ó`5å5¼F8öÑg.ã7ˆm €áÊUÞxu´äÈl÷bÈkyQšû¹¹´%‹R5ZBbÉF{–£õ 3ÆÙþL„¤ÁW1¹zߟ–‹O÷]ã]4c/" ÇÂhâvWÏÕNÿÖjõZ¡Jñsõ­;¯‹7ï Kkåg®ÝÞ§)E+eÒ¯ð³iû~—iJúvwOÃúZXïø¬Ñ2Ʋ™ Í=¶§Ñšnõ-(· ëËÕ;!˜—PÂÂNi—ÌÏæ%šõ½Wms`®¦YŠÚÍOº -LÀ œ÷¸FcÕ/R"ÚÔJ-»™¡s|½”W¯Ëš¤*Ã?bÀ1|Ñjçº.f™‘§7y QºÑÔ ¿£í üW5öÌ5qÑ Å } X´¯§6lO(2œ:>";òÜv,…›Ü¨hŒ‹z0ÞE:8¿ãn7ܪ=§áº¤@þat&s‰©°c¡h±†Ù¬,)›=Ä.ì±1Œ°_\26=ÛMNiÈçO€þºþ\5YKœ½$O­ê‹«sy_øìo9È[¦‚ÏœÆRg#Ÿ¹ý§úê{n5,fZÜ3ƒA<Ö jÚË’é-(¤´M`‰= Mçœ_{¸^£y=w÷R¼º ´5ÁøÈ¶qÚ·eÞ‡”¨Ÿl æçf3Hº‰ñ«¼ä ön¸ÐzûÉ9K©1¿V[«syÐÁYjN(ç8ÐÅWì—ì¤ÁW1¹zߟ–ˆ"M™}àeáŽ(ŒŸ¨Ï{»_ë,K^—QÈÈ9qšÃž.^Ér1’bʦã|ôÚ{³¹Ì9¥a¾¢7 r ê*çÝtô3<áô+%ï+AH`ƒ-d¶¾ -“Tm€þRîu°]fi””±m+~&ÜO$‡!—OƒÏ@Q´y(È«ÜÄ„%|2SÌ=•*éO0À #a;…07L½ºŠÚf•Š ©AvûÏÉíÑ]‘îA•¦[ÀùÏzÅ% ,ÞS@›ÎÊéDAi ÕêÿÃËÀq‡9-"ÿåZI) dDÁ[Ò]Q(==‡…ÝÖ2£‡.ãÇkÙ\½jcóË=UBóÉ–™?ñuü²»Ûó¨iœ¨NpK¼…ÖÚ]§Æ¡Yl*cú:*@ˆ*<ž6CìY9É¡­„Vv0E‡¼Õ~”I0ÜE¶ÈYlAQß}]µ­w‡©b+j6“yžŠˆÒZèÊ 1[a¶¾Ž/s¢#U8·BÏfÈmá°d¢µ›ê—,¸Ê>YŒ¡¸CÏóÊX€͖ʵÑ“Æ]õ™ÿí|sn”_„S«¼ ™êèΠ®Ú¬è4‘#ïãQ=1Ýq ¼§éÝ'ÌÝv»›‹†{ïaæ]y¢Iª>ËP1ßÚÄ‰Ê±å• KÒÅeƒ}ËÏSU=‚”+ý;­Þ)ö#ïPɱ*0 -dC¾ü5 ’1ºHÂ?¤|)QÐêyµëIg´]™$ØñL@<ÜüÇ9 a2H½)¶WÖ+©P!Š:®£Gok¯ÒRj(‰s¡“RÙæÖöe3i‰‹\ê õÃŽàïNZ‰¨G×—¦’ÄK>"(H¯UóRÒ«T)¿vá§SˆÒ…´ä¬0Ѱžu6‰ŽüÑb6Yœ·ŠçR6—ä åÑ_³Œjæ¶Ç²_ÓÂ&\‡0Ù³¦Eší¶Ç©hÄÍ–šo߸’ÑímÍŸïÆÝÂAO+(£oý‹z†ÿÀH§ÄáþË|¼J›$5Ö ¾¦ÆÇWWn äKRÐÕí.‹rW6ˆOevc¬†^~ö#BÌ'L$¶³¦„Ý5ìT&Í-Ž »Šº ”Z?ÓÉìËÚKÓ¿Øj¨[ý¢Ù&‘×D*wsgÞÐïo’­Lxõ\Mm¨bµŒ,»»…ê2#I¡¦º¯Ä&wO ¬¾Ì‹ž´6+ÞOÉæ½±\ï&¨‘÷‘ê4ª»9‹„ŠËM¶X˜2©K*Ø™´oJ2ü|ë~æ&Å0ÕSèÑ÷WQÑ‘BO5Šïµ‘Ítcgj²WPš†Ê¾&…-V8Á¾zÉEóøTuÅâ¥Ax³é7ËÚ9û‹ÑÙˆÙ"œ6lC»Eü§P‹y±g&Õ>„ ¸þŒaºÔ©ÈÉv°-–]jÉÁ±|¥¼v²ê}AùÅÜí–zçšåM˜RH'#ÆnÆ…zBèSÞ(oK8;õ_˺“Qju±•=wi<2TÍsôÆpDõ÷X `9Âi–ÕÚ«ê…W‡ †’Eh]v&<íÝK ¿ÙNÁv!D{À$µ¾)v$µ‹|fÛÚ2Õèêö§TZÀkv~w+bøM*òø:Ž%úšóèA»ÑrÌæÍ+`á6ùš­˜„ÂÒEKñ=âõ®¶š‡ -”ÌAôÓ›m_Åá˧¼Îß²"hg#$Ÿ\ïá³Áaà–[¡÷1}Rõ¨„V›G¹ëâ×?¦¬ø´~ã蜟»g°E¢.‡Å)qÇëçD¥]ep[_;¾ ÈÛ‰ Ø4iص“ ¬{ð• ù¹.WàxNåö-¤%¤' ]œaꙂgûQÏBî­{°ª@êÜú1Š®¯œ&3ƒiö‚™‡Àûƒ˜D ~` ¨ ß‹K¯å¼Ét¼ÂŽU‘H° øøvíÌO8@Uý7ÅuraY<”¹:L:eèÈ á„+§‰ðUD)å]ôÍ,Ùšå­"óyq(Íñ „U_ŽÙIRwN ü6îpñ…ØT='ȱí·Ä¿üx¿»ŸÛä§âF«kI'$Uv[ÿØ#Žf.Ó” SsVƒ@:ü3‚¬ÑCÀÒ}!ø’¹®”Y@…bYMÿžúºqÛü›a?ÇGn)´Ö}kn=F·JŸ¿½ËÏÏÇ?|®ýÁWœq°‚߃ýãÐR½.Ìé0]MlChÖg– Ê‘`j¾˜Qnë”Bcm2^y÷¿¬á9¸†ÿÔÃâxÝ¢Ô—ðšÜ4”‹hÔkD qB‚¾!x˜Cq'ÎßÂÁ!Ô\`dx¯Ï0À’ñݶöЋ”KG2‹XTD,_¹·Aõ²åþǧu>Fí^ƒÅ]cÔ.`=õ®Pµ_ATÖc8œKZˆ9ùù³mc=rØ«ÜËÁï…2?û'*#>žúÕVvûÉ=“&MÛ¶›¬Ë+Îp{Ó›HžTEÿhÏ ðàzJ2dÉ ‘aй¶›-;±c`¡è¸/þ®ºnÞÅþØvº˜ß\´¬Ÿ#ÏU’м~ûÕËŨΟ7N»qïóƒÜXümª1ÄuåÔsÍ\»%)a‘Ø©ð‘a¤¬jr|é£â,n”8äõY1f‹i‹<¾& C¾õŒ.P1îz_38AâÝsR%³¿úêKo@Z“u¿yjõ èïºæÃsú*Òˆ°|)ŸiÊ€‘õè,jÔ9< _ .Ã4¢°üùR…%|B¦í·ŒqYPç -¬Œ®±ž«6Ø µ¨‚(aGV£$}sÆý_»¿ÿå…²GFCõðÔíõã"K''ËúŠ’¿$+È•óšCC6]|6è*L5áòšÅþº…2>#6ðÖ±òm´¾ì׋Šãµ¤I¿“™›U²«€lU¶ÿpHÕûU‡,Ùå[.:⋌_MqûŸóùWcE‚†ß‘á~ï²3ø¹äø'‹ÇÏ3˜:sÞýé©À¹š ‚uñ/ð/pÌ~s¯ääô8jÙ«úN4Kª5H~&Š£Töȼ#°ïÌÈNÏPFlp²6À¡_Tre—ô»É&É*>ðÁÉ’¬™¿Õ¯ -Âb`…¢½³êØlsóÉVÁ#ƒƒM%Bା¿ð­ÐÌ`|ze‹µ¸†mYKû½;3àPø^P„!øã4g„„¡oœ9õA†ž:éÌ{`‡ÉópâC(Ã+•Ç*$ݬÝúØRÞ´¡ò#a ¥W:XÛAfæêY¤½0ù5»W5>¿›WSlú÷Ž`Â(ÍqnG,(ŽßJ¼-ëÕÃÍ×ÓtEÙÜUЦhu†BÎzñe} ‰žèÂÍVfumŹÏ‡»™»“Ï#Ön§œ>Ì\ÝGA/}ðÊÙîop†…ƒ)ªÓDZÛÝ­áŒÈ—Ë0a¥±Þ/ñÙˆïàD´ê¾4îÛMg HF÷óÐгñàIc­áÙB4J7gÂÃ*-¹áœxÄï/Ÿ–Ã/]\›-æ `#f8íK‰n#ÝNgv#¡Ó¬u^0Õ¦1Ô'ùG~úx"º30æì7V´Û¨MõΗºS$7ˆZõWÐ’j|³µg¹‡vù]›ô­h7[úŠë‰’³Ä¢ôèbÞ:ÄN%“Ué4«H§FcôüÛOÛ¤|õ1q8A§I;Ï­b p;¬;«¨Õ^fëÅÕý'-K„4ƒƒÐƒÜ¥HÔÅ"EºÍâK4ý¿Üó¤Ï„Ò*_B¾=Óèw`2±] aŠØ4ÎI eâ¦ÎŽˆËtzˆQÛ‘›A\ÓI¢=ƒ< ‹u)îøsf­_&‡‚+“£ú¼8xÀcI‘m*çhùÈ -Ö©/ê™ú2¡‡îB˜Åà6^AĪĉܴÊ3,-ų=@–û2¿àí /ÑŠÈÄ< A…ÌŠNðµÌ/òXS©dÔ5LUî×zl×õ¸Rëýí9%d"‘ß&Š]ÈóÑR“,}e…Aè$[}…–ž|.g¨È·C©¶˜´ˆaGoqo<_o­>J?*ÿB—œèºPõñL¦ä. á‡úYØDàmž uõô¯#ZÜ¥­9[„ªp€Á¼ -™‰kU'¾ eŠÌ-i4¸³<‚‚àn5ë \Õ}@ru}ØM[G£±±¾ß*Ï4³~Æè‘¦~>}ƒ=’ ×lØJ¬‚qUÔª õq5ÉvâéÎÄ>‹¸B­M“îxš%U*ØþîD(®><㹃¿ß8Í%ŸtF¶føòÃèѺÇy¡Éh×cÊ/ %o8#rtÀr9/ãÀEUÀc¨ëjU“tb'ø!«—ds¯Ñ2mÁeÇÛ{‚¹Îb. µæÃ!¸ÕgFë‚VyÔR/â!/™?Ëf>I)tItÃÞ^ÛDã~sRg½gpx Ž¢ÚGÖp÷¢2„ÚÝ©±ý<P«p{ᆛè†6á<:Ó¾ÒLaJ«À×HßÂòÐ)[ÇA0"Ì÷ž’¾ eèå~&%«ï)}- Ø:þí%ÏÕv 5˜5Î~®˜Ô;öùq - L¥C{€~é>ôd†ûEÅ:C;”æ§.‚aT/«VÊ/·I½­tŽCf c7y–¤è -qqv~Þ…[R -ß -•€$\GŒÊËŸ~LSÇôßFÕD² Ô2ûR7«NÙ­œ;7½€ÇÏ¡Á.È2(€†±ŽýÀñœ4ɦ—’ÌÞ¢“;d!òêãFªMº€êvÈ®>î¥oˆûæÀfT„h>›+/,³=†|T¥¶bÂë°Ù-ÝJ&ºú—tÓáÅ‹å8ÎZ„lí8§–Àæ £,wÈ$¬A•šŸÍɨ†ÚœÊòÜ7v"HàTr¥nE­^r¿Òç­]…KOÓÉi[áøvÒÓ*‡À¬K0ªFoX;îQŽºrË`Ëâ¿KÆØCóœ¢<–ظ†D¸â˜Nk¯èáSDAôš¦Ïm… ¾\RR@Ô§U¾°4_Ó®÷˜«3ç¾ÁŸÿö&}YbµqøÉÛ§òû ¤ìéS *Õþr¼ QŽœC¼ø@ @9Lß”VËßÇE iLI&T(ŠÏØÆÅu3ÅÁBCÔË­góåòß[Ê­8zI?Í48‹ÜzìÀbÜÎÛb)u€Êæ(G÷TÞR[¿ß_\Ww‡U‘ÛÌß²ÇÁدGq3ùò#óØÖÀwCé3²˜† {ÉFíë°ÖA½¬‹R$¨Œ´zzÊwØl–es? cF+Š\ôqïh€-“áÆ´[šH¦Q8êõyÌSé,ø´‰eí÷ïȰ‰A"X‹–î ²',Šj¸RM“,›ò¶)0ËÀtýR` ƒy®Û“^õóšS™b’~6¢ä¦Vã6?:ìz»fùÈGýÅ7x9‰*¾ñ™oÀ`ÃïäÑöé 4Âê”d¿­6±úƒkOËqEKÌŽÍÒÑ-À¼][wœh‰f„æ9•È®.eÒîÿ!îëRîÙ*•S1ö芷†1üªŠð`‡]ÖfÁ,zÒH˜Þ“è3:±×΃Rìv¼ï}òññ¨I-C ZKp*I‰4ï7`Ø+ÒߥE§²%ÀZð€ fÜþNú§ÕFVJ¬°Ç¥ƒ ù(d³KI 6s)•OY5#ÝPu¸x¨þ©½¿cØ e®PÚùžÿEu×d­d‚<ä"<âÓsánÛRÿÁÔ‘f„ßu`ƒàÕûF:w5—oá«a©ÂþÓÍ™ÙL 7¸×HæS-“ý‘hD¾;¢;ÖeÑl W#d¶-L¨Qû€»ÝŒ›¯è)\ïòò›…DãÃçRjG”ëvœ¼™¡þk3°ÿÈrÊu;Qdµ°ë ±­¦§½"#eô`[ªÖâÜ ¬´³ó©JÁ;ÏДs,ûC] › -QeVÕ~ %rÁó¯zK’FÞ—¿||œ.W{¤Íq`S~iï/Èõ_«—©ÏwvêçË6T¡nªÙ™1[aj¦:“%Ö˜<½¥ÑV¡ò:* -v ÄWô(V¥¦ò=3=ía-väTÓÎ.볫»Zü¯ít„q­ƒE˜o‹ŒIZûfþ,)ƒ¬X& /ÏØ Iw½RÀ8¬ŽZ¹$kÚh’[ÉNXÒpyL8e–Gï+Xh¤òW"þýs[åp†pÿ$9·Rÿ°eÒngÞGÌåÙ{¶õîK5­zsÆd‡ ga1ëL7Üd”¶Ìiœî_Jç.°Ç×L§(¥cr¦ô¶Î•”Á@„ëT VƒN5Ó ÷Œ;E"ž½ -ëm³•d6U2ñ‚…M^?o¾ám#"|Íi s„•–Ñ  -¯ôÓ7’Ã9˜®ý"u)B­ÐÖA|ôR}Šyzi—ñÿJL}Ä{úWNËu]¹¬Ò¸n?Ê…+ -ßc‘çÄ»}tõeýgZjI„|¡Úwñ€Þd¨4raäÛÚ æ+ù_]bq KžÕ}‚ਪó?—}ͤ 1»ðÐõOxÇ «f˜jší>LXêÔ³©wêßsPñEµ‚‡"ðúu6ƒ¸èKFýu”ë¯Rë‘¿xo³ 5M…Ç15:„\uå—FòÅâ$´TZL ©NÔx§ØøuÓs 2¿_ÛèaTœ†;²˜Âì/{¸s”u½®­qg]ô²›H² fbÒ(³-p܆l®u®;.y{”¨,Ÿ]Õåòûþú8ÙY -8¦ê¶Ý눡ktðŸ‡Þ‚0ñ±ü‘ĽTÏ@¾jŽˆ·K£©¬i¶¬9€)B†oJÆ e Oì,V}]IŠƒ)Åâ—‚63 þÕÁ -°4®{«'(× (F¸¢SÇ|Ò[Á5º§&¢ ÚæÚžu~–0ÓÐz1fë²Ad|_i ÏAižšîîKƒ×ûú½ÙÛ¤‡rýœåž¾-Ít§œ_ÑûFˮݹW¬vѳ-÷ÂH§[X¸¶ÓIT;ŠÀ§íÁGPèÿ–!¸Gžq~Œ#§p¼îM!ììeEv_vžŒJW–)4œàÁ(šo;à%Ícpºn³xx¡5; ó€c¸ £9ÄŽ‹-<ÃFób­ LÛ²R 0ÔÀû–@&„³”uÓ¿ß¹0ð=-®Q±#Å Óúe°õæ1¶”8jŸ·6¥ûß@„“VŸZÿzβ/ÏêJ($É””Tr§ªR‹° Zìáùp !ãz"JG@jŠa@lÁ%‘O5(¦GfåFÌF¾¬ÑêÛßÃ3jt©ùªÆ@£÷m Æü" ¡cgÈx‘ÎÍé2Æà‡}D?9 (ߊ¬Y@´Œ[êª×ui¦+[÷¾Åˆ½< ÌDIÜ>…¢•Šèáù‹‰(îz¯"hh1a¢Ò›'ŒPÀ™Ÿ|Øpú6ßå|êBÆörýl¿™í?Àd^V'jÆ)÷†p=nÐpî´÷Ô‚ ìÛÏŒ„=Êè¸nÿâu¯ Ä3rD ²N° ǃO¼m¬¨ÒXèw©ççû -þ4žºDp]Šñ`ïZ=áDÔK¶CTœÍuÒ‚®“°dMÿ¥ÂJÎ’à±Ê¨ñ¿Òã`ûHK~gO”ê èönEµ\A¨pC8»gÙO?xŸ/¢dáqúCé¤Ùø&äZCßi7¯^ðºû&ÿtÞ—$CN‚‡òÁÿË™ØgP+ÁRÕü´H+¸þÃçâ{qQßôÌ0€”Üm%]߬8m^çŽþÊ}Â~+Z:-`ãLÓ¾<­6ãî—ÑD—~ö«šlFpzðY¥›Â(üÏÇC7™*fë fš£;&áôµÈ‚™ß?k>ô˜iÀ[ۃ܌Ž(+:3{&ú'òè©Ù%óeÄÑ黋ÞŽÄzõq_µÙÔwH±wyŒëAȪ¿ZyDí3.«‡Ãæº]§ädžªk¼êMâoЉGY­„›ÖöªâÃè -à¨\os é‡SM5E†YÍbü ¤¡H¨'›éа‡Âîjë®ÏJŽ‘ÄƒhP§iÑp|Fƒuˆ¿®Ïѵ¯ ,•;Ü>r -¡•I#“Ä–³3MÌÈóXÓéíþ+À?ƒgÕALÖEXÁóÀM÷‡1ö,”9ü˜¹9|nË­Àç~ps•›£v>An^N6ÙÅz¨ºæ£1*ŒˆVØ—e_9hÜ=n2£•BAÿ×Rêy]žcÁ2ÈgX`M•#X@>êzŸÉýØ2¤éB¼$èÇt—˜Æˆ[àjD5Ÿþ'ç‡_{^íÃh™ë /¿‘-޶I ŠáŸÚÜ:uÑ9¾¦Ø´këÁÜš§‘,”¿k¢Ð‡-"6ß-Zû9:7/ÞSÂqôöjÛ¸`•wKçj{|„\,UíÃÄ˦tcaÆáY‘÷ ¯*5…‹ -Q ÄS!1m†w¿ÖªdT¬!˜|{¨¥˜ÿjküÊ,Bo+;oUH·B¢­=Ô® ÖUûdÖp T‰X§Âúz§k”¬kÈ$ùè˜YÐ<ºslF¼j»‚ÖcØÂ!c.ΪåÈz{PŽøâû§îqÍX½%;³W£˜"K 8Zãù½ƒðƒ¾ÁÓ6€Æ~X«ækZÚç< gõ€Î¶/ôÓƒëo8‹5ÛB€³ši‰ÈÆŽ~¶µF4ÞO{ŽÝW–Ô…')ž_À'Ë6ÿ)Š-`Í/­£®¦íœ)‘%¾ššä0ãàß­2d¬ÙÒ„lL²a'¸ÓiÈq‘ÿ¨u=ÞLYu'~=rW¡©š?ª:Y¢ô&Â%3Xüªo¢Š+_¹e‡†¨|MîŠîÈ(ç†2äÐÜ´=¿Ô|[ðHМ¤Ð•…EÏzÜ}Š4ãB34ú×êµ<‡ -Aò%T>³9¥Cë¤=ÔÑ Ú€c…¨ÖÁè+=ËÖýB‚ä†uæ‘Îd5T&<)çÍÕ¿MQÅÈ0T ä×.Go"TàK‰ìhô2‰¼7aŽ®ñb¼9[£‘híßÛ<• ƹ̉‡ý™ßSã}-ÿð‰fY„öäâô‘ä¼ìëCGiÔÑ ­€Ò“³‘ÄÔfºè †B‰ÂDCŠ|Tø¨vÁ‚·b2`6 m{Q'Mˆj:¿ê2XvÓ‚IËR,|µÄ|\~¤t-)Õ#IÈA7 ½Ð´€Ø|ö /ät†eÄáeg—W›óÅÐk²Nè$Ðlo~÷v &^i …庂ÑÁ7»tÜkÅÞF­©,›1ßÑî–l×­=µ}ýÓs#1’uÆÀ=ò÷³ºbÖütˇx—Æ›Ì”Ì AÏuZí·©˜äÆ=Æ U èø/Ãxû’„ IlŸÿ\Ù¸ì¶ïOŒ¶†¯=æ;>Ù+/0ûðŸý¬ r%8(øxF…µ#Q^ÁXˆíõò{cl{ {M^‘=¢°y^Y\¦zÄe±ì×ƈÿÐÍyÖ b_ ¥ É­½Ìâ¨t@ÖOûÿR -º}‡/ÙœfŒ7¤úŸjã-M2ðÀ¿!ÀÑ$rÇ?°§)/2Ø][Š¢Õj1 %Zœÿ‡š »)sS7ùXŽÈ!¡VJ–l”*›kT½0| òò)i€MZˆÌÏÙûØ_Ôý[Ñ$4!hôv+Cd -'kÜ&£X¦ý¼–e›¹OOâïqaJ:“ÔU xXÀ³Þ^BÀ-7H $ ÷nU«¥ -ÙÖqìéÑWÙŸ™±- €4äg,òý°*Ïùà´A%äm«*aRHd2ß+*t¸ã¬õØ rLlöð ‚~´¯ê?=s=ÿâæ5HT -vÌ•£JUYGTy2—›æìLüsƒïðúÕM·ty"-åÉHÐ=]7×B$K´;´±jíëÍ3ÓùN'ïÁ¶”n^°hGv1HŸ ±2×Bå,É{šI ­€là‘ål„}ýa 8|kZ¨~üèYÂP§Ú¦f€Šªg@<ßL\½É¥QdEuÛ„®'YL3—)Å`…ñÚíÛLÛ"vªŸðRn î;‰Ñ-ŒMÍ×Àj6ê€äöÞï/p18gcöù§aPfРŸLÐ HáÚ†ê#:¸u}êjV4Øw¬8×z) ´Ú°Aõ£œüÄmúr½¾ÌÁ0°í`ψš‡ÎFRÞ¯j¡È| #ÔÔüȺr\&´èL ÂT"·s¤®Óé ÷aZìÑxg¹T7ï0|Kì¦mñ¶M±bjþ…³,ŸÍìÞ9”êfrIFà¡ÔØBù(­Üf+ŒOSº³±TÓBÑÿ7ÈóÂÔ#< “V‚Q¶¤<Çô!»Ê8œ‰tõm¿Â™,»òn§¹æäW=ë÷ÁUÍVžI³¬¶Xó컲s¨bϽË7âvz{ôEùH6NÏfÙ*¼wK9íUn*¦0’ó~Øe“fšëô¬,3¡¶Öœ ÃeÕbºNÌÏîy¹}U¨ÐuK¡ÙG0?8ÙŠÝÁ$ Dâªæ=2[ˆ¥ñÏÀ„=ß+s‘‘ƒ©ßüeDæ(ÓÆë‚$‚8tªfŽkÍf~âÞ|L7‡¶'4?=ÊóV«·¿Ei4$ƒ‹K¥ÚF‰nð<‡S'ʲuÿ丸_&©›f@KR±†`ëµè™%'•Z²TJ•NL¤óp?<PªžB;2ò›˜ŽNEÍ?ëòdÜÇCØ(¶-£ü@y¦$ý¢;¤BÈeg-2HÔ¥©«µ¿rª„Yl½uþZv¬<Ñ;V,^áE«—Bµ½k1oÿ|›™§­Àõåôgb¨P%À_ûUüVî‡Å˜õÏÜÇZ3ìh™¤Ø\Û»¬úK猼üvÑ$úÀ…¿EÊh¾Ý=oÏôÌ"ZjâdKDvqΡîósÕz¶H&ö1c$ sJˆ>Ìõ„D-qª9»®¾šÝ{‡ÏkÐŽÊ:âaçéäGb±›÷%!ÚBÂiŽA¡ «Ùp©°OjI¤nÍð…”SŸñƒƒMH»iÊ‹Îkºµé°e[¡óG­NnÊÃû›1ÖÆ%¤Ì†Â„>U–@ˆ1›mJ˜­DIë+} -â×¢W.#]¿ïƒ×°§šg™ÊåÂÜ[L,z¡†Ô×ûÞz¨ÉPV>ú.PsþV1†;2\EަïTv»Ü7»uð¨æŒ%vñ1\òTg û~“Ù6 ºå 9ñÕŽ2Q”^r4"g -UƒBO£œE+Ó/’dIƒ‡¶W|Þ«1¹%Òïslpˆµ9c±“h$¼ä|¥ªƾΑãî9ʬfúÇ^€Ë÷Í~Wi-Œh û9G:ŒHä Ca>0'èé—]&{¿ÑÈ;OZärJYLWÍ s_(Š©97¿–ö“CUýÇ›\¦ðµ‰¼îÊ ”/mm¸~fSÑ¢Á_|­×d³ºû舘DÃn çF½1Ó(§úýë¢ o}ùR—÷ƒŒÝ&865IVºŽ™hÌçª B™¬Î -endstream -endobj -1986 0 obj -<< -/Length1 1620 -/Length2 17956 -/Length3 0 -/Length 19576 ->> -stream -%!PS-AdobeFont-1.0: URWPalladioL-Ital 1.05 -%%CreationDate: Wed Dec 22 1999 -% Copyright (URW)++,Copyright 1999 by (URW)++ Design & Development -% (URW)++,Copyright 1999 by (URW)++ Design & Development -% See the file COPYING (GNU General Public License) for license conditions. -% As a special exception, permission is granted to include this font -% program in a Postscript or PDF file that consists of a document that -% contains text to be displayed or printed using this font, regardless -% of the conditions or license applying to the document itself. -12 dict begin -/FontInfo 10 dict dup begin -/version (1.05) readonly def -/Notice ((URW)++,Copyright 1999 by (URW)++ Design & Development. See the file COPYING (GNU General Public License) for license conditions. As a special exception, permission is granted to include this font program in a Postscript or PDF file that consists of a document that contains text to be displayed or printed using this font, regardless of the conditions or license applying to the document itself.) readonly def -/Copyright (Copyright (URW)++,Copyright 1999 by (URW)++ Design & Development) readonly def -/FullName (URW Palladio L Italic) readonly def -/FamilyName (URW Palladio L) readonly def -/Weight (Regular) readonly def -/ItalicAngle -9.5 def -/isFixedPitch false def -/UnderlinePosition -100 def -/UnderlineThickness 50 def -end readonly def -/FontName /RUEFYH+URWPalladioL-Ital def -/PaintType 0 def -/WMode 0 def -/FontBBox {-170 -305 1010 941} readonly def -/FontType 1 def -/FontMatrix [0.001 0.0 0.0 0.001 0.0 0.0] readonly def -/Encoding StandardEncoding def -currentdict end -currentfile eexec -ÙÖoc;„j˜›™t°ŸÆÌD[ÂÀ1Æ…p§³T¤¢€®o¿˜ˆà9«`ü¯….´Î:þ¹yÕêpýäJ*åÈÀl'¿–eî¡}#)Á¢Ý&»7+å‚/^§ ™ëWŒ{ïÔLßZ60VåáÌQR^¦üλ3rï÷)€#v¢€$öp~c—²¼´°£êë&“è÷ö'ÄÉÇÆÅ¿ñÁä+ž %;a~•ê!”ty`rô³cþúv7qÎ ú¾˜hˆü‹¬Kê[z‚ñSß=¸E+öÃGQðF¦–½Š¸ë’WVMüì¹cY*ÚK/i¬¡ãËi;™t—¬EÆW¼(Ð$Ýôôe€‰oLD™œ)å žàA¢“…t[Xä7‰¶þÈ_ƒUæ*D ¡Xõ½Q“ ôYrôvB>¾Œ:—#usò¦€ -Œ·«”(†HõÓ-àk£ÿx¤;u àÇmâCõwWUåÆ/Ûç3kñªè2o{–{ºˆðñ€]Qå”`†^@Ï0Zé¹>1í£°` waÆÕÏ)&]þÛÄûÞu[x߯&‘?¤N~ &?VDäµ€P·(³».y-aîS¸lV„î˜6ˆÉÝ?!ˆ¯–ÒÛbŒ‚PÍGŽÝ°uµT†åN­×ž‰ q*«ÏÛeü¼­ö<*VΤ‹niå`½Õ™ˆoAò0¬U |¿)P]Å_"dÝÂN`,g¡ˆ´ù -U@?­ÒÃéúà$¹÷r8ú~L*vV™ö)‹U'޲ov7tJÍ\Àá{ù ›$} ë2ÓØäKáö(¸¨„”µ¤Üg>šç¯[/ˆéiÀÇ­2Q·Ý“ Ì$íBÇÌU¶×?–ëXJ·G(1‡ÈÏ|ÊÑïY²ë =Y¨'¢Î?»õ‡U‡âýâ¯éáZÒœeß¼çvôñàÆ·¸g¡ú¦ä -ò q!K:ÔF¶Q¸ðÎ&>)¦RÙº¸@×!ø·÷åbëáÝZDæÁÛW%aÔŸ1IŽV¾<'Ú+DøÕ-8¢2óuDª¬¨Ú¨l:rigº¾yä"—”ƒÌ ¨iD“‚¾«nö.À/ü¡nQ¸ «þˆ©Ùrr3§Ñ»É:cw·*|°4è í×ÄsÝFÍ8ì¸$q=çÇW6"CD‡U%ìˆrâRqz p](0R{âï™?uá‡Wë ZìƒáÄ ¹XQo!oÓBÜÿ¨ŽÝjX!6«wå™—Nåí15ŽL ²ÞEý’ Ŷõ.;'ùâ(öaRt™º]•ïÓf#ù?uÖøXÇõºPq-@4e@Ô·YžÂ6i'u oÑúzJ;«-›“)T°vèWZËðˆª¯™m–jæ\ãÖÔ3X{ÒÑo&_×Uü¤È»nn\Û b"—O tÍé7üRá'±7HÉûE€[adô'X½ÒÈÌ*Œ¡¦‘RèžÝ"Á -<¡Ü+ ׺HÌÙxo’›úö[Ÿ ?f®˜Û¡ë\}$Å ¹ -fðÿÀDÑèRЃn mÅ ΟËŒÙÇÀ>ŠÓ\øVÜ»=X¼¿‡MH2&½ÓB¾fæ8̽ëÓ܇“w¡<]ËJÀˆB%aLážæ]e~ãU½RZw˜žy‡V[‡hûV³òÉŠï»È‘M H&Áì§mȽ¸U+ îJ0V‰7ÏAø»®7ï²gþÄsü15§Ö7B`¢ßX±ä9¹úcµ…!Dq–&/™cÅýÐ@2¦›-Õ¨QŠZjÆ"xÊ)Ó+J0c 0V!f $øæÿ.0ŒÂ|·Õô ¾:R†š(fÛºy¹¼E{èÖKaÉ'Oq†2ºÆ¼#¯~fðŸ»È° ºÔà¨È_ؾ\I9Aò Ì0¦ ¦,A²Šx„aYb$™òl–ÿªÂ5¼†6ˆ´>È£ûùLïèÕ5áíG¢g!ÝÖP²ÏYÔáò)ftà׈[jyÊÞü™sZL¿´ìª"Jày,þÉRo·aQû):=sØÝž»K:¦ï’éݹn4f"•8u?"îN#i™ÇÑ÷^Aëú¹f˜þñ¦—BÌæÄÃeͰ˜†ÎŠ»FªG:ûgžÆ"Lr±rÝ[muÏ‚ê;äÇ´¾¨} CÆ\·ZË‹ñiÚ¿ÖžÜ(¾Y ^Ï ,uýŸ‹oܧïö!-¦j9˜°¯$O¼Áo$úNÔ ôïþX ð f£âoò uïÜg‘à Ì9B$Öús~Žýu‹]—ٵ͖C^:oê/oúüÆ«GÍŠa¥²†õâ¯`´Kgv&†:§Ë -ÛpÆ‚éÏÕXØ~ ·®GÒ°#zß•èˆVI±‡$r–rÆ}¨WÞßE2dߟ· ÷¬ÜÄð\ØÛ£æ²ï~*‹›d#þaIAd¿TY¹í/J$×ÔŠ–1òó`¾^a¯Ã|x’.s¡YN5¼Zò1¹kw¿’k˜W‚*”íf``´Ã9žYͨ‚dêô!Ðtt.èú`”ûy­sÁ!wp•d×KÑ–Ñ«†<•b÷E<MQJœGØW-ã: §ä¤öJn\98%ò7~ÂUô»•uß?ðj-Æ«“$Ó,…b€¸Û´KõKk„‘ÞÙk4²€Ü~tÂt¼~>‡(Á0È·ïø`_èî>g²€Ú2÷”̨4HÑLV_˜‘¶Ké&åœõ«¬`Ò‹/2É‹é&ø˜w?fµÍ^këL(žb3Óâ÷Aôl9(7>yù6Wâˆô| a‚ƒVËD©Ô7úö»éW!'c -œ•œ?Î(™³H à‚ׇªËõ&ðZ€[æ!â ¶ƒ0 ´›š%‚#€'¹¸á/˜èDÕá#ý»ÔŸãj¨ùå‘ȕՑÐÏœ¹mmV+Î2–•þŠƒ ùl•Mñ`{ËÉ%2“d4Vª`SF 6ŒõéY _?¹‡ˆò’ß/Š>ÔÊ hGyIâ<Ú+§›* VoÂjN=v¥ÛTÑÇÙÆ„’è_VwË(FŠšÛ3Ú¨,Å.[¿¨ÊntTFiÉ«Ë@NËdÈ…¦3Ã9¼C€hrZ"‹ý„G¶›Íoq'ÔC=gÔé¥Í}rIÕ‚¨ãõÂc—ÜÚ‚½¬˜;OPläÁÙMû¸;„‘Óž Û _õò%«€ -`!Ã=ˆYs“\‘Ë¿žA¢â*BáÇWÉŠ -¸=0׋kDTOk¤ydÔtãÓÃÔPö{ýVta?lÆx«hÊ»âèp n¬.Â×2V«rœ%v%i§å)à†*–‘¬™ÂDà/çP800…g†œ[Ä'bSÊàJŸŸûäðæzÄ/›·Ò$½“é5r¹ì5¶Ó˜]¥”–0Þø:N™ñiB¼%…Ä¿H)¬ÎeIÄ”€ØhS!ÈOX£Í㤿ìÕO4 ¡¹+¹2­®== `™ŸøÐôæëñ_4ýÙ»N’U–þMÚ“lÆ%ãŽÖõž¼å&°kaŽ÷ ;› ÉБöz–››¶›_vÂùKôt©f1F[Ñ“}á:ßF9>q0(Ñ"ŠÏØÆÅu3TQ á`cìµMŨ”Ø’£<Òó²Ý–ü»°­ýõ¤É¦Ô!¼²pE<"+¼ÚÀnÉLg!:˼ßAb^ïàlO²0®ÿÔÒiS rþû~wÑâÒ˜¥_ú™g@¹“ÇŠŸ…¦‹‚å¼Á‘:nfâ–Í%õ$î77JNÏHIH“VweÊž·%¯.¹ŸÑŒêx ÕãPÓSÓ!·Þ¢49$úÞjúÊ«Ó)@çÖÍ  <¥´Ê,­ÖG—KîÅ5‰ôKlÖfÄ–rÝÙ%LYÚï7“? ™TÄ.¡6¥?~ F 4R¬£u’–4] AÄ%”Jïë ;°âGù?è›òLb?çþãiôµØ¯(á4pÎh@©0…›1·ÞSUœ?>œÛœ–mÚÁ l QÏaêlM¯6’YŠi^¨GY{G`Rse­ JM®×2ÃŽÔâK^Œì~<™i¯§÷ά™¢ Ê5s®É7‘ä~®û÷µ%,tÅ{w[+¥a£¥:èžó‰ªª˜Jlºž““Ú,¯ä[B>Ô=<½ï&N3ËQ&aÚWY†‹•Y|ˆÔÍÈï˜^ŒhfÑÌ£×Ù0ñùè:«0:ÊA!éIeñθ û%ìü¯çÕ…övФßÎÞüö¶zl u¯3km%Cñn庞xãotÐ0(sýU$o#j;ËtLïÀ@N™x¥o3lNùìžw¦ÄÞzšô–Tê3(RØ¡ÂàÓ°w‚ỄD&;¢¡ùþÆC¶[óÂáÕxòœe™(÷Ú®ŸCwÛ[ôšH€wï32€Â|xÏ¥“mk”Z.·ÚºÙ«ÈñMÁˆSš‘é$‚>½“ „iF²kÝ!´:¨Ÿo¨¡ðj8½U4afs74?i79 b‰}ƒ4{Qù•NÞÃUׄiÂN5š¹ƒ‚ºÎ ž.ã Á^‹›ÿƒü ¢|úžíõˆLæÇ¥À0³œµì‘kv °z­Oxu3qñ5¡ƒ„fÀšV€!Q'z#Äaœ9$ÑÒ> ü-šeGÐHu„‹4‰Ñ¥U*—Ò"U†ÐøÍÒP" ^É÷.EåðbÿÃ&uB%´ÒªÒŸ[æï˜×È®äfòJ)SC®È"âÏ‘UCö£rí{ÿ½{<9ʼn7 ‹º[3SÚANSí §çÐOø€ã̓ÒaW††[9¦£Ú­07L ”1m‚þ¸µ>)ñüÕ×öµ\в•n+tmþ; UÇ~ŠÉ•:5xÁq>à‰V£b¥dSA陼æB\j gf—CX6_:Æ` ’Í÷o3U6\<(L÷ÈÊT±oÉ´îóÍê7oDDÈ‚LD“˜ŽŒ™‡ÐÞŸ8D}T ¤‚öJЛjœÖ&™1vW -`ËúçFE2F¿Ó8ÒEï>Å×Ãþw6«(ô¼Ó™ô}ƒ0ÆÉOöûM@×9àùôI»² qS¾¯ð Ik&\}R»jâmòB´ë=Ûk-óÀw-½7õ7l d–w¢p7BNúDÆ9h~QŠ)¥’•Ûuåå˜,í&¤O1+Ûk›‚ÛRJWþ\0 oArœ@Ê‹þ«›P`ˆÛ<ùœxa:ö T éªÝÎ6yd·±iªg>·‚âuí¿àÄ!º¢ûúÖ(©þEÙŸB:‰û¶î™­΂«OŽ}€ mÆi”Zè†a§Nv‚d ¿Î²i²·D·qBüÊ5šõíÈŒ[$œXÎÕÆËÏc´ý"CäXL„š›ó‰´Íoõø6o6ûv®çÊªí¾˜ÆŸ‰ççtÏÇQ_søg*ë¦Vå´cdžŽJ…7ÓØSÙÔ•koÖ»° üÅÇ g»XgôLÂ/­þ†oX6  -;æ çw$‚’¥n^ »3@|V­c@Ù¦ $þràîHÓ¯N­'õ|ÀøRÖ5O©´@ÆÝ WóQ×HXbÄ@w@_ðW‡ ‰‹wþƒëËMÚ‚ƅO÷ÍáºqŒÑ¡SÊg@Š–ÝÂ…ØßüQ × Ÿ}c’*Ͷã~µV‚m¡QB"]|5ÿ*|P58ùëd»)Ú$Üx¯Øy½vÒ:÷æ2 ÇP6<Í|»ïë™ÿAž;G'ß4oå_ÁÓy“¶ýË5 îÏ-D+ÀäpxpÿràZ8;IK\ênNy;•&v.IˆëŒÖq3fx^¨ lá-N¯j]q¼úïÒldÒ2`*AÕÂÔö½ç$™jm”z„§p´—X„5¦Úfªc¹I| ö²Ù‘Å\®=+\xѼ<÷’¯ýg—Ó -BUù9º¥l˜>Ð;ñR;š`EƒžškÔœ'^—ȉïHC¼NJÝû¯ŒÊ¤9ëlV4ù”¼‡f¤þý3n”dt/µ ˜é>¹§cñº3†àQ»ÁÇû‹\¾=)Z2Ê%Ü·ÀzŸÔ—<©“l|ëÕ^µnBPø¿‘‰Žý$r± ë1ôåŒÇ`º÷>Þ– {ò®èfMjÒ)Ónw`’"ì‹:œFî¦sóŽ®\†/n½tµM¼Œx¸~]¶,†‘íbÐmšÐC¦­²£ýÞ¡Èbɽâ±R„+øþE#^?Í Ú­ÀÖߺúYËæãXÏù›\2xŇÄ<ÕŽAeo{5.$‰`ý ]òÀ€Ðµ¨sv_¬T*Ï;ɈidbMÍ“dБûn2,Q+|΃f%Ôz%žzâ¡"}Ü’=)ßâÚl)«ó˜ÉLÌl–èØ Ý{DliTþ\CÁ-¤¢½‡éœwYãZj/úîT⌠qÏ©g+ßSVtf8ÍÙýhãÝ­ŸSí! ºY¸‡Ÿ÷.£za§ßåNíÃõkÚ–:K -ibh -¥Î¹œ×_GÂOB·¦Y(u÷ùJtLgöÀ"©¦LS‰œ="ÅV–JÌ{<ÞÔ Å s¡…s‘Ô>P]âýß©áZ¾UÈýÌÅЯ¿lÝØ}3R„!íkuòSÔ¦Ì0äÏ÷¢MìO†]ó"®uÑ¥úR‡kjÈ6Ã]€ÝîA½uƲÇÿ .mH§g8‰µ\àzÛ•ú•Ü8+li{Æsölt®—¬³i“~#“o~hêl¿Þoâ¢z„Då®èçð/Ù{ícvðöj¶oƒÈÐ(Ó³„3ÍÙj^éaH -C§Ü¶L|/úe‹²¬pØã‚>5‡A‰¥4¨Øú¤þGä²&‚&›}9;‹GEVæàE…´ÚTê‰)ºÓ‚OÍjú -Öɮ߫1Nµ½w-H­ä5&‡7¾’¶ëÑ­ä­›zÒÌÍ(– éðC%ÇT`Ÿ\]ºSr*úÃ]…=‡³?Ý"-ƒY!•ƒšqRsá¡¡í4Í©lb_A.Ñ3é¼9÷ OÈtE9o¯lh'ø ) ¨‰4Ž2èSfÅ–9rÉ@‹M°N F{5µÉ<3™¼üÔî»ðrÀ[°R…7í¶ì™Ä™}L@¡/*”Ù5^ÌökÆÀGúª¢aÆE?.¨Á1 W<Â8Î#©Z×Ê›Œ^~P`}‹ÁPâ”Úùò‘"†Ód´±Á’̯ËÿàÉ…el3žƒj”wòš„¸ÒŽ–p®V_ù:v'©{–1eéÚ³YaHüŒ.áÖèTõצÛh2–hš í o*´™ó˜ø-a¯¹Òl 6Ó,ÇNÖ7ð»™¶‰Ùlç=Ôb Ñ<~sÒy’ã·­_ÛƒöíÆÝ²Ðßg^ª¿B>ª'äMžjàšGt³mAŠ=÷ü¡VÔoÊܪ²£vLWÓc‚#ŸS%Ò­.–^xÙ5Ï“¨žŸ oÆ `;nbBš‚ºê†Ö‹µ=ZõØŽ27ž,á7Ÿ6ÛRP˲ÌÌjQò‡bž»¢ºá6½0™þœ8¿Jo©§s¥%8 aN>sàú¾$ÎI|T&Ó¤=s>¶ ±MBÒæÍ"Ø¢ìä§ÑO¡æD i\g,ɦ-2û{W‹»Ìäߨ¶æËÇÑÐö á”'û°Uȼ¦’økUxbùm„Æ¡»2PJIL"!)\ŠCXqEë’‹È¢b–üêî†xnñ¿pÀb¤VC‚NG…wB³sF8Ï^H5”è¦ •)&‚ˆ¸$AÅk >¢jÀLÃø›M¼0'³Ê™ð@:.è!îw!qÖ#q*èc?•©ê¡?I¦oÂñw–ýýy"9º7ˆžõ§ÐÝ%ø9’fy³ÆîÜÁ†yUôóØ3}Dý?¹„EkêÕ@ÌøÚB÷”.Ðá© -åLY/#K]¿Ö¥"—¾‡zýH;z.EÕ˰ j’Xöžù.:Ⱦ#¾mÒ³bÊŽxi£ˆÀðk%®hM¾BîVèõѤ-@ÌûE¯ÛÙ¹õã°­瘲(u8Ø‹$tVŒ]ä1Vœ -bhM¿$,ÐÇ‹´X:ù,2Cæóe³â±|¼»YJ3¬ ô\†¼åÄÖ.9 ýŒðó`†™¢úÝ>.ÐÍ2qˆoò²%8¡8)BY&ózR¡z™¾…R¤òÇ©9×É»öÄFøZ¦ð÷SÅ•)̰ÄÉÔÎ/l?%BîýÌ)X£ÛôžòtÆëìËRYû -oøï<ðÎ vàï?ƒX8ñd|”¦G|ºíwÂm’¦ ÍëÛ£”¹Ú(ê|´˜1ú‘uWJDó[¤0T!!¨8‰Æ¹¶)5ó¤-îáDÆÅò·ñˆ¡;~)ŠÀ1w"Ã9ö¯óí­Éº`s+8Òƒq­Ñ>&<%I$ð•/ÓaòE“)ã¤Ѹޭ)¬6béX].Žr8vŒƒ Ð•hˆÍ"ŸX ûEÅIÄ8]û½p´­˜\ÿ‡¾Š")fD-aŸx¢¾äP 4‹ìMœ3G£øh ‰»ë•$޳ôóá¸Üî1:øÓÍ(•2iNLÝBù6‡w„½€ôw¬ -J4¯a_¸— Ãx µ®•”_ßO”îTL–ØsJ8€h‰®¬ÅB7þPæLâ›ÈêØÏù¿kÝKv]ôê›VH=öëuKµÐèï´°O›2DþGí©ÜÉ¿*”šˆ“7-’o>g®Â¼ÄÞ°¦…”ÄbÙz,»?UÈûäp¸ PR‹^±¥Ëø–p_*„שjaº»Ò¨jº0`÷zBí*c3Æé³çeã¼Nzܦ±À½U¬6 ,Ò&XUééÕ ÈÈpYßøÚ ,jHÉcº¤¤ýó’ûs¬3$xç@ ‚ôb“˨?T!=]B* ‡Ì9+ÞÐyïªôsû½€Æ'ž>ˆö‹ ýÔÇ@³³*ÅZ »ÞXM•²®éÒ$‹ -¦I,ŘÙ†úmà5 áŽÐ;+`3&¼Ó„’ÀŒ0D!’éBœâQ˜²…#^(0¼Q„$÷lH’®&bqØóy_gC¯ÇÆ(°|»}‰‰ _îK®TSHݦݣTxUßæPœ0ì¿E>Bçúb°(«÷*¬vš8<Û‡O}$’˜vHŠúŸ{~ÅhR^†àA¦ÞÙGùÇë„NR¨K éa”«8" -FWG†RÝRfAbú°Ÿ †écYî#ÇæxÏ«‹©‘½óE v¯}SŒê®ê{ôbçWZ„áS}œ¿ žD¹¤…˜g:ä__·­Û+®Ï“©Ÿ®ZƒEu²¼&¤>Íœ‰²FZ¿Õ34× gþè+¯ÝKŽ; àc6Æ¢O·0|Fú"Ð$«¤èç fCÜûÃä -‡´¬u¼Ë¯i†@Æ úÕzY{=×Ú—Y -$´–‚G…gº%ÃfÜäÜ~j¤ø$ÝW,;|}ìàôŽ3Y;ýs‰‘Áö`ŨƆôä£ò—#‚D3«ŸCB¿0 5ö½åe†W½ytý1…Ò,òì)5œ×T›­bMšb?*½†²šXɸñX_-©¢@g{ƒ®ØGŽšÏ‘3ü§¢lGÏ>ƒ!üÁü³"^0¡4Dv¦¸%~Ô‹·Íðú§ÈÌïÿ†Ö0cV¦E.c˜Ì € ËКšU2†T²ë;¡n™YC÷Kð>¨¿ê-WÀrÎàÒá{æù®ÛJ «´s' -\ªyS|Cþ29‡%y˜ò­ÛÞYH9dM)Á¹ Ÿrñ —™ãøX“âÑÆ­DKý¶Jœ¶îA/TÞu³ì‰®²–ÙñV®9=™h³ˆØdÆ•´;vÈÖT†8JIfJÓ­·Wph…$à´Ó^QÖ@ÇAÅõM6 ÎYRA¼¹ µJ¥pŽOx5‘ÊYS£6{£Â¶:æedÀ5JA‡^:ÜPôFW“AQ0:3¹ôZeÚeü_›D-ä¦>b³é±™IéàC)0ÏÃNôÿ¨çÈõ\£iÝøH/ø%¶Ò90ÍjÔZ0oï¶lÞ‘±×îX„jÀžëY|6>1\ÕiªD€ÛO+¸¹œºÞgÊ7P‡C¯ °/)4E›Ñœ¿q3.=®JÑfL¤ˆÄîíh£KìŒîȃ¥5F¿µ·ÈyÑ$`J¥Ÿ—* *D¹H‚fu –»e½õ¹ -Ø•¦ù´ê ×þ G­ŽxIÅ@Lèf¤Â·mé¾Ðýø>ßÉp‹A’x$Ô÷ L£k¯óuVSmF,^™@nR9áT€ÿ¥zžYøðA¯@®'1¤×Hr›Ãä¢p‚2gܯpXé¸ìMMî0ŠÂœ®Ã¾3iÆ4žm ›¡«'qŠr¹ª™±&…‡öý*i…mãy©p€©ÌA’h` ‰LœeVV}£Zžõ+ñRÙ¡ÃÌÔ[ÓmÍ®tî?ÇŒœ‹µ5Y¿óAb{ªïû™×Žò9ž†* û%Ä.V SäÂ}B4ÎAcƒZÚPþ¡:}Çú¶—¦ðz©_BíFH9­²—½]5Ùáf'Ý®¿ôôé T¹ÒC»€â™žGâÿq¿ÉÕàAhªÏú]‰õ=îá¥sòÕ+ãÖ³¡»Ùê’ãÝÊXzdPÑÍ“‘B¸m! §Ândí ëôš9½K"_z æ…Á  ‚'j»åòØWQ¥¹WZÌOöÀÌî6ü{õe~–ÜCŸ1Þîz¶¾'<ïô¥“S ùrF,ëkiׯv…-Q7ó;8:nÅÍö¦ ‰újíÍš ˆHã6(PŸÃú9Q™úì =…:›Ýßzóòb´3%j˜‚†€CAêeÔA7È'›pÂN¶÷*=εŒÞ¨Ž…g/Y‡‰Ñî ™ñ‘2icEí“Y*læ 5L-Q¢2T¯"R…‹1‹&†êñ0‚_:þv`†•Ïéz³Æ¯Èõ»‚ï+:ÐÜÜÑÜç€U€z:Ž‚(P‡Øgé5´?î)¦ºøùWbý?õ­ÁФżJÏY<¨>X–´”wv×O²ì^eâ¥@C+—Íi÷g¦zóºà,“§i’ë&æ ±ëëztvØ1ÒÀ8?9=‹ù?ü‰5~6qòÑñכĊw‹¸¤|‹M3Ÿ8u ¿J®Ã‘CØ\Ó,”Ÿ=–éÙ]o7Ò½éáŒJIî{¼‚Øë¦k¥88áìÌ‘bÖŠÀ1œ’5}ú]Úç®KO1ƒ9ƒÚw¯ l1…‘ÀìªcÃÓ¢g@fÛ­œ<þð…£Û$¹Ý–¡5Þ$´ë–ûk8nÆÈ$à¤Ê¶.§l¼ØÛÚÃJÃgh)™e|TÝÄç6Ì¢*Å46¯ôâ™7?­kýöj=Ê< *i’¿¬  ¦k†:jé^X[þš‘ù>íÒ¥€§¤4ʴПg+ÍMö×YÓ ·- -Hì$(¨BÀMPä.Td¥'÷"²¯&¸Ó%Ÿ–Û;¨2±\ßl©Ú xÁ|©©µ\ûH”TŠZFEŸ×V0ã«Ú³±æéb>‡ß«=ʯ ^8^äòãõþs·Ñããì8õ<¬“ÜýCûVˆÓk ¹„"¹JeÇÚœ„bå¡ ,éGÙW¾¯+ýÁ“OoEŒ}µ«ïJØnŸºå>*ÿÝ/÷Á¥Rµø,PÿÎ@¯Ñcss•6K&Á‰#Wqì……èê5©À¬ŽÃ=!º©à¼8ÆÅ>§–eªÁÉËT{3ê³ÎRÿ€LI"gl„d(Ûu¨E‰¾y¦nl)¬åiqÃ"k<½F‡-§ät‹¼‘7žã0¨…ý_Ë©ú [Ü ¢Z&ãßO¿ŸgÈz{a¯ô˜}>v¼Qoþ»~™“[Øxë£l¾Óû1Íu®ÌË{þîß6N¡ª“Þ‹9·|0”¦tá6l-“ÝtðæÜ¦ŒKgÑQfŒørÿŸW¯ÆÙC%{¸Êÿ*±9×uã®$Œ7p©ÅÐòÏyCõþ)ØÆ>í·dQp¦¦GŠæ-´½ fÊ_g¹¸;q0s`Ãý_Óç°‰˜q¸êIŠQäpâbªt‡/H쮺۷ïG®¾Ay¸¤›äøþº°:LSCÅû@êÆ?õYÁKR˜.ézr[}fK³RZÿ']à‘l‚ýn‘iX±B6ªó¥ÉSþ\,Z'·–'Åq¶^‹ ÒiV25 "‚?âD˜]Ã(Ìçâ™È—&?…¦AW#7š â&C€ñ5ÿ¼Ëÿ‘›!º£x˜Ÿ¢+˜zdÐÌ1éWW6øŸHáÙßô8ïŽ ^Ó¿:aH÷úpìöºRK%ŒËü‡‘¡›ÜOïºwLys¦ëC¿¹#sœ©?à@¢lt<”^ˆf"™oò…øëMPÿ²Ó¯ÒðGíwÚSlú÷Ž`ŠôÖ†!zËiC(fÔÁB’ú){™ŒV¾¼ê ¸ „Æ“JYÝøÏ¯J{8kþ£‹éš¤‹Å7ú¹”õÒhÉ+g©ºDó„‹2TŒwqœøy0?¶÷ú£|q毨¤ÙßJ›Îôc#½­Ž[QT3âÓ—„øÎÀ¶“ äÑŽŒÀšOU¼'QÃåõGâ¾Ií{EX¿_ú7áþ‹Š{%=̨Te‚8¿«Y¶_í„£€$k‘ÚŒ–7Œ«ûïÙSW›Ú~öåe€ÓTÞä•щŸôÔQrQL5n Õî-«ÖÿQ){ôØ©ó·O€‚Ý’‚»ÎhuÚíûutIƒÏ\ƒûûQ»ïENé‡ÞÝs€ØPIA;M^45n …’C õŽ¶Ê³­Xà˜™¼^³ûçšé|Û¥6í¸»ø:LíÀ¿Ïqgœà+ˆðt(Ð;gg;L‰žB»¶=Ö{’w;I/£0ëÔYL}$Ðz6ÂP=ÏÆk`i¶œ}yüø$,rãî þhÅâÖmé̺¯oPzýN\¼ú‰ªÄŒ`C²Ã†íX3¥ƒÜ­îÑY±eTÈÁ1ú¤&3ïx¥W·Ã2û*Ò,Dep˜À\½ÊŠh"\,t›6ÞÑýYØ}ñ9cÚëqFS½'#•ÙÎ-¸)‰)]æ;7)–IUs -øb˜w‚ÏÛÃmöPñoê?ñª¦æ×ÉÌŸ‹òVÛO!¦­_îÏ·­BêµëöѤó›ÕOL b'ê]Ý4öHÈD³±6ÿ) Ò< ärÂaÛìÝAEAF)¦€†êœið®@Š#‘c¢s‚ZöË];^%#ýøhÜiêv’Ú¥±5ý Pu^ÂrÞ™¬t,­tg®ìÀI{û¡¸ü‹h¹®?h‘ÀðCðXá_U!¦ïê2åhg¶›®ò ¨K†ékïöõn7 -åq±søBÂõð—\ºÇߘàçPM²"öDXœ%žÝ…¹C½ÏÜÕ z]œ4˜–³à(ÂöK -©Þ1K!0ÐíoZ¯{ö–bÂ0¯q:ÛË&^/†@ck5 4×Jj%—L^ m£ÎÎj—W{?ÎÆuš†õ‚W꣖ùh£¸·M$3[¿åCdìse¸wJ]$$´&ºÛ€{pyü5-žïUwU_Þ_¬:¯×«ÜèŠâã£ò¨¬š$¦Áööó4–¬ëNs™0S7Í™e×Dnf#›Kgð°±eÝ*ç:xÖÃ[ãÓ ^Ø2ÖWw¸/Õ’ -¼×LògÅÎÓ_ˆl•$6û/ŹkM½ãy^#ÆÝÞ1G!|&0ä "—­–”0Ž! m!uPRMwÔ7TÁ˜îåoC9ð¯GÙ*ŒÄ ôBè“ë㉿š[‚KmI“îA^˜˜…V‘žµöt78:éÕ¢ë•Ö³cÖÍŠ_ƒ·µ`ÜS NA Gmõ\ÿ6xËYaøìûeÎç’½Gœ¼aH•f'T%™]äd–ߟœñ1¬¹íAœwY¾N³lzHÊY7ÏÛËsT×ø˜Œ„›à×c).†.-Fo¿£÷"‘(Û-ìȧ¾à2zÒ)ÚÙVºô¿á®…YsR—;]9©R}ü¥O¥A¸Ž6Ù®uÉ—èñÂÞ•ÅBˆ'•ëñ¾áwS±xÞô;¹û3¹ƒ”¬üAñ£iïc½åll|­|îCêšj`ä=N‘Ë…¼ã°Õ{_TqÓÂJ]¦òò>¿ä#eѵxö½­U×Χq¿'’U.—Œè”N˜óT®~Fò¤áüIðu±¯Räæ -qÒ2פíå'f±+y3ô¯ÿŒâL¯e°Ð9ùÞuÌJ¦³îUßÊà\öŸ† €Rkv‹;…KßÄIíÚ´4¿9°5=>ÄKІpCS>F›ÉTC9B÷¾a0æóüñ× aà0wÒ¡¾Z¦áÔ]ô$¾mÙ=„Ž5IÅY@3¾Ö,º¸l¢té³ü$^hˆMÜ瘑dü?¹‚‹ß«•S´É±tºÐ§Ïê©dÔ4QP\.:ìå(zõ8Οèý“Ð ¯Ô‡½eF22L1fuˆ:×#Ê|Cr;ÿ»×_:«Â5òµ–2Œ¿1Q‹Nì-+™‡³ŠÒˆzZ?o‘†±r …̳Ò ã €ËDc_à-~ìk?Š3,­â¡Ð}ã¬3J5%›w™a˜!¬÷>˜ÃdR÷-öÞ¥StÜù íÅáJ§4ߌG[鯥O‰_MÆiG¢C̾õO9€]‰Îç4™†‹TUOëÃcoÈŽ®±wˆCªwqAÇX(V»=]Ívjào<SHWiôŠq,üuxº ò #7´èOg°¬hn—»þfI.DÒ9½f²éÌÈ.o6£üSß¾Û;àÈÞP²®—BÑØMÁÛ†mÚíC'€0 çp¿ Ã>pÀ7©{kôögg¸þyQ±žÐlôAuØvõÅ%Ó‰–»àwjpA`I诬j7[£éÓ¯ô'¯‡~1Ó³ù‹ˆˆ[gп~ñ*cS½õm0¦d3ËB -mº2òX!lN×7ªîeå†wú\ÁÛ¢“S\2²D?0éÓÔAk òpåí·I“ˆpI§uqˆÖÊÿ:)¾)ü }.ÐŽæx—ü€tæòúLºKöi‘(þ²l|žI¦Ò¤”0€'U"j±—Q1ÓúpÌ …Äà H`¡’bšÞrJ!Õ€+´Àr »t·ægßêÚ ß{6öJHƒp“d”¸t žì— þ:ñö°ä^Kãâ|JìÝm ÆCCþBíxäÓ¨ýN$«þ$P–z1äþÎòv=|/â{? -æ#yUm¢|SÅÒW ²Í¹ÂÕ]^&ºØb{|¦H¬Ó* &x³¥/Ê¥v*KÅ unšHÉz)ß&âkYC,6Šp Ê`QŽdõmÉDögd]ÉËvî0|"³±ÓÄd$Ž‹ÿôT]…ºÊl 1ØGLe–ÖóåL§|ÿ]+r=ãŠ)ötëÑ¢˜ªßŠqàËw­z)ÜÚ9ºÑölDÏa>[æ 6ˆÐ´ãµp9ûˆ»ôÍu÷n~ ;“ÓjêZöú­wáÛ^Âêþ ˆÔN§£Ç¸i÷0±¢'Õp™°#=cdÄÙ[—äsñ+Üà‹Y8T5´@hÑi§žâ§ÈIò€Ç¿F|¢*òóÍÌÜ D#`Á䇰sTAH5_€ŠÛàEBÛØŒ®2!ü'¼I5¶''³Â¥`Cgzõ£×«ø À1jËi°0úÔŒ»êA<=LÖAä«f -Ñ<ÝlxËVóE"“³°”mG;á50xX÷»&£5#gfâ‚T0Ù¼cK¯½.ûäL²kô»A~ÑU=/õ%ÒNÕ^-”¥@¢›Œü\¡Øbläv²’þ§&“ 7ÌC‹f Ivš‡o~öš$oSΖ¤h4x˜ê«GÅ".b'þC ‘4hüÇùˆ®î.U÷å/Ø8æbÊݺ2%duR#‡NWK–,nç¹2<ÕuÌI×щb£µ‘ëBbÃjÝ=ÐÁµ‰‘}¶°Š­¯>\~&ÙhûxQüž­tË_AHF¼ñv èêÁÄ6®7rÈ*Àí€ê²•d¥ÚiöÁaÅJösPµ;lL¾ž¼vÏ9 ¦ï•Ňd4@Ûáf·¯8éu¾<Â`ôùüpÃÕÄ„¾Ü£%AŠ>bѨ…c>ö7ùXÎwÂnëìÈ‚Y@cJÍä`z ‰|Ë˳…Xšò –<=þ?Öj©uw鎎’»ì2¥éBõg0$×+ž âEËí ámòlÓ¢MÒrŸd7R…¬h>팿O˜%PD¦Âö6‡‚‰~e;WÐâ—Iÿíâ# `õuª¨²sí.aCÃq¸W­‡+¿ ¢„ñ€á3xþ¹ài¡­¶@Ìe¼ %L-)a…½Ï²nêùó’dÙ¸1YPéðÝЪ«¨¶¾ÉKÞ¤Ô–zT¼ì¶} ©œ^ÚS²–+Ù+ƒ:‚Ô–8 V¿oʯ ã­#œ0›Lïõ¦"¢—xGµgmØŸØò¸ÿE&Ò4ÜÚ™Yu>Æ»v²ôm)bg¶ò/¨ŽÂJºŠ1ÃØHÒŠ‘ù²âè~™²f4!µzbVÌŒ·øXgZ:Öæ¾§:óc…TPœBrq5-¢º|Ôàru¤`vZ|„.’‘<&5„—R^LÊ(^¶(½ú/ÇÜÜû5ŽÇÖîNU¿)7—y0€ü¼¶äR…îf’qOw‘4WŒ£‚Ò†ŸÏ¯ŸÆÙ|–Ÿþ„·±»”Gk/V“»càïtt+vΗ$F÷ ÷>DîÃiÆ¢’VžêŠlÀU(æ/ùý4̶Ýg4!t¹Ùæ[MfðWW2&<Ä-á Í; £A·OÉRqÛ8`0ìÅ_0(¼§mîŽp³ß^’tŒLÖ?èZõó&ÓÎÃóšî!ô… ˜bã{Îö†š5½A y›«©¾bšDÚwÚ¶ZwõÎQ2y4¼¶ Óˆ+µO‚æ|€šªBh³1Íhy‘´ÂôØ©9²ö伫üòW;Æ£âuêáªmïqÆÏ1‚5i¦þ¹{ë\½‚ÎÁÉ{XšËØ¿zIÎz€²¤¾¶öÛÿ‚ ñÙ3P…Aõ5 úË_£ÚxêÀ÷Ò׸µÊ¥š‡¡Q…ùÿ³”‹)¢÷épY7æj™Î¬BÈ»¼ÇQTÿìÓvyžì+à_mk—ÁJâ›ÿ¡Ã®wRš€#Rî¹·§£U ŒWäů­ %û -0¥ÓÙŽ÷Å÷N&Ç¡¹4#päF¢è]Ö@/P¦ -}79B8cîÆŠ -V >Ÿ&²V±~ Hà eÔJò³šc9ú"â“‹­¸ïHZVK¼Ä-!0,+\a§¤†´"ç·pÙövšpáW8j|X3$œÄ×D‡_<Â7è‰lÇMxÅ]æ£D/6b5Cvf «Ê£¬|îMÉÒd°ìÐÒ½ÄyóîÖ[ç9d!uRnb Ù죊*›^O:¼³xE¹•{sŠDcÑlÅ«ÈÅú&×Yæ3Û²Ÿ.Œ¡Gösuß!1÷³ùIïrcìüÁ8ïf DÁE]P¹ÒØš©ljüœg¿SCºàcé¨2çÛ‹ä;%âÆ/ìõÛ8 ÁÔEçáHk’¬°ló§ÙýíŽôˆ^§,ÙJó©°X»ÎlÈæ&bnÕóŠW‡‡z¬‡,ÇhX« èÜG-iXwÔ‰­t †Ú°FyãúÎ6ŒŸã—õ-˜³èG)qêâ,C `µöœ :ä1*?¸Ûšh‹MM2ò÷ÀdöY"¿¬éJ˜cˆkÛÑ[*¬mÅá‚dGÎó7ã¹Q˜†É2B®UP†e}P[ºŠG|q¦XÃŽ{ż潴ñíó¹ÃÖU°\uïÉ$Ô§tvÃë¨ïÆéqËv\z°ÒØè ‡c­sõŽH¬e~M)àu~¬RÍç ¸]ÕÍ i°Ólø)âÈûî½oxÁ#§+MõƒÍÀ± kLO€Ãè^ô]±eÜqy7wo Å‰ë¿Ñ¶h5Ó1¨âi D 塤îõÅF4ÍÌ«Ç!dòÖÙ #ÆâXI¦ -qçyUûN¼ e M*%€Ø9LSÎKö¡KŸ1Þ×ñM{á¦qÎØè‚Ÿµ Y€¾@ùª>е?êÿq[mR^|ŸöŸè½)äŠïy»rƒÎxàä’ðh1‚V§õ÷A-šUÁÒÂù…~ëª ôê}f~Oš¿y(糺pÚülHYvp-WÄB4ËÊzŠ1ÉÎ÷øpßP.¿…¨® رo…vLü¬{‚ƒ‡ÌµXv~dh…Ð*DzÚY£jö< ìúkñ~Ñ´öR\ƒ%§µê¹µªD‘P57xH·?Æ‘žr2í°Ìræ®g<0µðWiXEBz͸h-´8û| ×aÔ8+‰ŸV÷Ô‡+*‰Þ完•½°¢cA_v?É+-#ª|Ú‡ï×pÁ¼Ö~ÖêóB¾FÐ\Úšã麘d)$“¯…ŽÚª»G«4QV‚`†Š’5ž¼/"¬[z›°Î‘—XÁ¨Süaïõ#ëØÇÎj˜ñmR=aò.‡ ì`!ªxE»ÃVl‹{ÑôdР借\¯˜„3e¿I öß÷„uå"mŒ£©h²e3jn[E˜ÜÇ–N50ÅE¢Þ»n¬¯’jÎJ×-ÈPﶸc{™‹â)n?oj¥‚5o<Ê(G›sŠààô¼™âDÔõW“®‰Ü+P’J¡†³ ìÄq¾Ð²<RV={t¨ Ú‡(¡ÿº.Æ}·n7ĉ Î ö‡KÑ`* š7²k‰/²È™­¾<ŬÌÞœ20²–u -:öcžž£p¿4©Ü°û ßSò’¬Øj;;)º~„?Á7$SÈÎ2õ`.õÕ5>Þü)ÕîðàÉP›â×Î ßVQ³cÂõŸï· - JEô/Ùâxµf?“ÇÇîÚ8Öª9Ÿb*t9憴Sóƒ±Ą́2‘NîP@=i•Ò¼2$@sƒÉM0xÀÑg‹˜^[2PÔi:Ëø‘…®8EÏzF¬Ç,~T‡Øí±; -}Œ)•¸Ï -Y”ŸÁýUczƒšZ uô#ñÜl‚˜Ohï -†0:ƒäålø=ºB2‘¦&«ÙùéjÙØÙ¼÷ZŽSÓ\iÖ$åÖE£Ñt>¸æP4äO† hP±Wç-Í1Ä]­¼·µÀ{†ÊpÌæª­ã½²á0íT -²´Ö—ör±Ó„7ÊP“KNfÞÔõÓcñ:-Þù•#ŒÊí`F øÔ{I‡˜ï-ó+ŒIÜ›“hðlkêåÇþ.þ‘Ħx±ÓÞÖX_WpYþ! —M'űÖCÇÙ3âw:ÿ@9!7¥Ë%0®=mÓ%7âJÍåÔÅ1Û_h(;TYòÏApËw´ß#˱ì?\Hâ9ÙḼu2 -!ÚªXöïl­°ª®s?2>¢~Ȩ¼è•ˆ¬IŸ'÷⤢ϱ‚Äðüeò -]X»%ë&Øûcg›8ò³¯À}¸Ü$ë…‘ÉYÕÆ -endstream -endobj -1891 0 obj -<< -/Type /ObjStm -/N 100 -/First 984 -/Length 16452 ->> -stream -1887 0 1893 120 1895 238 561 297 1892 355 1899 449 1896 597 1897 742 1901 889 565 947 -1898 1004 1904 1098 1906 1216 569 1275 1903 1333 1911 1427 1907 1575 1908 1724 1913 1869 573 1927 -1914 1984 1915 2042 1916 2100 1917 2158 1910 2216 1922 2363 1909 2529 1918 2676 1919 2820 1920 2964 -1924 3109 1921 3168 1927 3315 1925 3454 1929 3598 1926 3656 1931 3763 1933 3881 1930 3940 1935 4008 -1937 4126 1938 4184 783 4242 1939 4299 831 4357 830 4414 789 4469 790 4526 805 4583 786 4640 -787 4697 1940 4754 782 4812 1941 4869 1934 4927 1944 5021 1946 5139 946 5198 817 5256 788 5314 -785 5372 781 5430 784 5488 1947 5546 1943 5605 1948 5699 1949 5719 1950 6070 1951 6101 1952 6260 -1953 6283 1954 6638 1955 6767 1956 6890 1957 7536 1959 8007 1960 8638 1961 9109 1963 9684 1965 9909 -1967 10157 1969 10387 1971 10633 1973 10957 1975 11459 1977 11691 1979 12063 1981 12289 1983 12520 1985 12988 -1987 13540 1958 13931 1750 14341 1682 14484 1360 14627 814 14768 813 14907 812 15047 870 15188 915 15328 -% 1887 0 obj -<< -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1893 0 obj -<< -/Type /Page -/Contents 1894 0 R -/Resources 1892 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R ->> -% 1895 0 obj -<< -/D [1893 0 R /XYZ 149.705 753.953 null] ->> -% 561 0 obj -<< -/D [1893 0 R /XYZ 150.705 716.092 null] ->> -% 1892 0 obj -<< -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1899 0 obj -<< -/Type /Page -/Contents 1900 0 R -/Resources 1898 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R -/Annots [ 1896 0 R 1897 0 R ] ->> -% 1896 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [320.317 573.77 387.374 585.83] -/A << /S /GoTo /D (precdata) >> ->> -% 1897 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [320.317 498.054 387.374 510.114] -/A << /S /GoTo /D (precdata) >> ->> -% 1901 0 obj -<< -/D [1899 0 R /XYZ 98.895 753.953 null] ->> -% 565 0 obj -<< -/D [1899 0 R /XYZ 99.895 716.092 null] ->> -% 1898 0 obj -<< -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1904 0 obj -<< -/Type /Page -/Contents 1905 0 R -/Resources 1903 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R ->> -% 1906 0 obj -<< -/D [1904 0 R /XYZ 149.705 753.953 null] ->> -% 569 0 obj -<< -/D [1904 0 R /XYZ 150.705 716.092 null] ->> -% 1903 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1911 0 obj -<< -/Type /Page -/Contents 1912 0 R -/Resources 1910 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R -/Annots [ 1907 0 R 1908 0 R ] ->> -% 1907 0 obj +/UnderlinePosition -100 def +/UnderlineThickness 50 def +end readonly def +/FontName /GLTUCO+URWPalladioL-Roma def +/PaintType 0 def +/WMode 0 def +/FontBBox {-166 -283 1021 943} readonly def +/FontType 1 def +/FontMatrix [0.001 0.0 0.0 0.001 0.0 0.0] readonly def +/Encoding StandardEncoding def +currentdict end +currentfile eexec +ÙÖoc;„j˜›™t°ŸÆÌD[ÂÀ1Æ…p§³T¤¢€®o¿˜ˆà9«`ü¯….´Î:þ¹yÕêpýäJ*åÈÀl'¿–eî¡}#)Á¢Ý&»7+å‚/^§ ™ëWŒ{ïÔLßZ60VåáÌQR^¦üλ3rï÷)€#v¢€$öp~c—²¼´°£êë&“è÷ö'ÄÉÇÆÅ¿ñÁä+ž %;a~•ê!”ty`rô³cþúv5øb‘/¼W©¹™zî²#gß|fà‡ +ËÞSS°C޼3R>›NnÁ'Р(ÒeadÌ/³¾•BÒ¡ ¢ÑnKs­Ò¡¦ï°êß w8H3øETzðÒX‘H£W‘}ÎàæÒó±EpúÔÚÅsg“¾cxHSAÑB±Î]& *Pí>9Ýš.‰ÎTZwŒ½¢^±q$ÛšÎÊS²Ï ¾·QÂgÉX¯ušV,Ð’"ª%_â /w!¦•ÿjßTšƒûaù­¼ÝJç@¢Æâ„Pq[¿vÜõ·ÐM~³ïˆµ­ZIêf ·†îÚèè1÷N4Æ6{ÉAö"‡Ç€ôޤò<Ãhú8ôØ›Ø)¢²¼˜Ð—Âæ¯xl¸aì®-Etö T"ÿ5ñÚµ^1z=Ê̸õe.°ä`•¥Éâé`̹–å,±âÀl‚Iº þõpïpšäЮ#QçrWJ^URòlUïQ6ë·O {Àµà6OQžD´;•Dn +¹`]NèIHΣ{¦ü+¹à×Î#ß逞(BUndê/’‚¶*ƒ ÓošâÀxq¸Ýð{ïo1Ø1«€Hÿ(Áä虸·ˆãÏ–à ¢Ÿ•[äävå ™‡½½i•ïÓ'¾²òj"E`$•Ì·…ÄИ8K¦°iÈbÚîjë{äÃ0eLáÂUluJP, ãiËÙfÆà'ÛªhŸÉ7 › ³®$6â༊:f‰W½<Û6 -%)p\àÏçèeþjÊïÝa—ß\" ûÿ9!œg‰Ç @GˆLmúÂs0½_JEn|‘&$SÁ…Ÿ·úÐ@È\mèªÿý<؇Áu®Zý=½]¡m€jX “æâ}i·¨]ÌP»Û¸ˆ]…à'`€ùL¥Õû}½ÏY>ªÔ;h÷ñŽõdã¯Èg ¨¡û‚ol^k¨‡ i‚âŽEb¹µËЯ'ÕA´vàÅíYqe}¼±ØnŽ—PÞÔA 5Ñ]«X]a¬î-ÎæñÞãš]3ÎÌP›ïä7úÞ <ÌA +³ó£¸NPl…G6³/†E†ø¾’?¯‚]Ÿ`©#dÄ'ñ¹Û  õƒ†~ÊþxL¸sóÿ%ˆ*4d‰^¯|‘Q™ÉìµÍúX½U¬ú=ƒŽ=Êh-lvŸö_ÃxÙÉ÷…±OJk܇âi Êé¯DÙ‰¼®Ì¬íÄ 4ÇÞò—”ý ¢trBÜœB§=Ä,vÊ© ÒTÙ³0$íŒ <*¦qkp¸`´h1=݈òDy.Ubo\dˆÔŠÉNí„ÎÀí"¾·6>ó”ÀU7FäPö•ðÄÌb Á–ùäBbŒF¶åK×úêPå©°¨»Äœ›oŸ?&D†…×М 2áE¬´ž´¦FŸLXô§³ÊàÒœÜã'(^˜­´H¤?l‘€þ}ÜDçO(ÑÇ¡¸UI.2 µ'ØmåçëbŽ™ßƒŽ#Xíö„¦²üÁîâ‰ÕærÂÏX¨"b_æ£ïV¶ü`)è­…1#ǵøð±uRÿ?ª%ÇÃ{K·btèí2ø›ÈÓËÔh‘h˜Ðªwr‹Aqhîœ2r÷{ ¸÷…ËÚKÊ^û;8ˆ‘á`“JÑ:;œjâ€Gꊚ1ÔZS ßÚm¢AO4óøS"ÝÒëø ˆ—Mq|E9s-­&þPF þ’zä‹÷é¸k|¨ÑNÕ8̵È}–¹·Û€aLCó³Â‘ f§‘ Å»]´Þ©Ch  ‰àL¸á²á7Zg§Zq†ëÊ‘°d“¬®¸ô;¢æãÈñ•wöäøæµ @I&ytбR€­7X^í=iŽ?+¨Cð«bÓê+©îwVªŠÝ\0“±S´ó ÈÙâÆü œzK|V– ÂÐÐnz ¿…qݼ‡É³i;Þw ½vð@%ŸÜC±(º,$…‘ƒ‘ëiŒg2A‹-ô³CRu‹9à 0ü*o‰ ŸÏ@çè¯%«1Ü­´Âÿ­>Ø'éÇø©¦(fôè×¥Z[T[DùÖ™¢åÛ Ä>b´-¥·\˜‚oU‰Œ­k•÷äÃøÊ#Ê’ÉÀŠý¤pÇûªŒ-TR!šõLÎr«³“ÍŒ^¨ÇvLE·`#â3¤Á½°œŸ jÏhØm# oÞa|•…š[F«¾Ño¹Žo±š05âføŠö´iw¥-(8íŠø/žØ/2Sj²¯·Zés8QI@61Z?eA*ÌÝtêÝØlù –¦+#?ÞñښĒ|“™©fóÙ†>™-`›sjòc½ -"íèØ•ŒJê#å†!í³6? ²w­ÁÖ„¶ºn•‘ð?) Ô(Zжu(Y`Xë¾å`À-–Zvô¨k@Q0Óвu“=½–\œ‚Fà +ù÷gî˜J­çQíƒ0}б“˪~ÒÙü+$b%Þ]ÒïMr¨»ÎñŸ@]Ð)nÕYÐÅÇÒǶ6›;µ=wA=š4vºƒçìg5š«Â!OwãQu7Ž~8ú%PM½ª¡Æuì°Þ0Ú’ +™«;.}0ØñvËm¤ +w–Ð,²˜‹„´o‡(8—Úd½MÄ$Ðz5jŽ äó%!¶ßuÕÀ´¬ 3°Npü#ÖºA ¿4û„ó[<•ÛåwUv5—ÖÌ–TNÃÐæ˜Ø!êìÑšøây{µ, VñÆë-¼146r/¦Êó’^•'ð¹•4ûI;ë§Ú€þf¹tÑœâ9™ÀÓ˜ôHÓ*-÷èZ¢?6ο–È] ÉYÖ„Š(8g¹k‰Œ©FgÙÄùP”ëû,Âj±OÆiæÒ2ÄúAfG=×a¯;6R°T„HbGmäºe¶‹îÎÐòHÁ=NÎ`±ÄBЧãn¼¢-nZV—OIÀf@ñ¨•û\ˆvª”Iä‘Ë]ã$ÍÃrÅï 5Äݯ¸=t8ú>Xe/¨‚z¾"›©”ûJ…ã€Sg˜ ƒ?s†sf ÑÇjWKŸ‡Ùz1—žqë€Ú…¹ª mÅ7ÇÞ‰øW2Wøäº„I·šXâË[öºhìZ½âÄý3ëPVL°~€àìõµG¨;#pÞ:ÂlÑP¦ «uA·y—»ö‹%'±Z‡~e¹HõÛš.¢­/:R\Âi¸¥¾ÎqO±«°ï§¼)? :°„×p59I½[Ÿ3aLŽÆ°Û]Œ˜"ˆ«À¬¹&WvtèÕKéí‘dTmÕZÏÆuÕ*îm·REàDD+9 ߆Ê?î™Yø¤\÷ËîÛ‘Û®nP7­šÃ¾úX¼o|X=¥š@ PÝÃׂ"~‘eµ62p» tõ+”›AßæÂs–íz ¢—y0yË«vNâËÐÍy6îz_38BÈ6»ÔÀU§šó@êñÖ ¥)å<æøÀ }f·’#@Ñ=•sߟР•Ðßtè9ê;éÓfÌ©u:ÀÆšàñò¤óC.4èVÄìqú{zÊ!pf;ÐjÒŒö,yv€9ïÏÊØ|>È ¸ý®§cܾîo ]øœ46xØáþ/Á©ÚI©`®Zê&+d¬#qI„”ýDžg¾H™ ÷”è‹*K–ƒgðº)Ùá<ïÞÑ7×6}¥)øüŽÆ ±RÀr}þ49óØtÞMãc±^$KK\ù “ù~®Tr¡ +f€[äLì]Ëøu%×Ò7¨ú]àµÑUšP[Ö¹›ñ'Ü®ëÔÆÝº¨øÞ` z†a^œéMJ‘ÊñyLìårsØ)›*ªzüo±Ÿ‰ùË£ä~ðvj1æÌx³ÒRò…B‘æÿ’}æJ挋±Àáo<¯\;Ù ¤ÊÐdä>P›—ò5ÑJ¿F[nüÓÛcøÅÏf!Â6¯|—ç—ýÚïõt{9µ¡¦ßÀfGDѺ ™¸J'Œœ²é´òNŸÐûÛ“av Òî„1Y¬Ú9·% 1Ò •ŽÁ‚ ˆM…!=BŸ«Q$E<æUÝÆ â¼H¡è‘†õE|Øï†pqapÅê}·vßgœ ×z²N†êûxöŸ/Lb©OŽ=*Ë +8:ð&¸?)HªfKù])ºœÜÖ î†E`å–Bdl·á~éÊ¡ŒÈ?}1Öp’„ÈïóV³—Rñ—‹îã>P ÿF~ß¿‡ÄôÌ0ß ÷óæÙÙ )?'Y`Ð)‹sR¼|–Ï΄WJüG!pzÌEäÜͧjc"ý.å_=5½E +*‰„ýkÈÏÆå(Ùæ‰z–E6ŒÌbäû]q@øù{ˆÆö½Cþ[®/X%u±Hä[Íp[Šíå <Ã2ܶ±ÈfÊ‘ N2 ÒŒ#_ËÌF @hšyw¢Xšå6ÕÝA\‚éDåèš……Š*%-Y»šB¢,G?Þ–ìžó~r¬òÓ*€QUXÍ&^à ®70l¹ûú)¾Ö(„•škRË(‰ªLxöµ†X%yÄÇšŸU½x.Ï™C5ù˜ÙÅÄ!ÚÍ•þñàhùšžJ-ái-_á*¯îd +hA¾:kLÑŸGt‹¥#Ïú‡Ö°Ôš¼ûçø×¨åð¹á¦bP•±ÿ¦0z/„™¹#¶E\µG·qððãÈ;]-µ"j<ænø)XMŽîI¨w \£×;¶¶[êZ}Š]ÅGáúÀíÜqú~G²AÓ9‰ á#BîÓóh¿ó2§Qûý 2I‹×ÿ/ÑJ(?Mq”DÌ#¢?õý,gðÏ„lå‡ßý?QçÝÙÆB’®^.`†ñ~þAÝÆžC)Ý—¡ÍNÒãßÑš:BÔr½]ƒév^/"ο6!ZÿÁN–DôÍ{~’‹BÜ:ž–ý{6aZ£ÈÏ«œþ?}/ÿªƒ–žT>Æ÷P_ µùő˥‹Ø7i±«CU´ÙuueÓ!Ú \ì‘“3ëàë²*æô¸S\nE´u§ßvÍL„Ú uiÛ +]’X‡vãu( ü>šÊz˜—Ï.–@º¸éÏߣI–ÊE«0‚³÷ù#ç:õ¬(4ÞaÞW/47-ø"ZÉ4ò:מ„[ÆixHÖc„žBœÍ± …—€ãH[܈¾”C;R £Â¨XÙw›ž\ÌFŽàsšÇ ௹Ê0¢ˆ¹¼MÓ*KÔÚkH‡pahwÕ*¸gÝÞç¼æDQ}õ ë5ôNk)aKËA÷cü Åðb“·wî¸PÇH6žˆù‡˜:ðþWT(šZ[]†îÏÊsÏþ[8¬‡±ý† ¥&§i{Áwÿø(N”eåBv÷7;F’¦jrÑŸàÂ+õXf•úXTœ[\GT0EjêŠNã%7­Ù+z¬c1<²^OÙF «tÿ%TŽ{}A@ÒÍ‹óE Uk¿›ô]Ì9é‡W ´jðî#2ãY߆¼§Æâ†Ú‚ZÞùí "®#"x4Û¢lÏDïRõœÏ0i<’W2Æ¡¯üš öS@“/ͽÜÏqÞè +²8`×\åØi=ñuhº2&fö¿YÆø”ËV¤¶vAx.2˜ÀÎÖ_Ý+ÒߥE§²<€M¹(\-¯F­À±°S2þÓþÍmÿÅ>ésçËì¬ï¬Ä|§%eÂÇó:sL©ÖWáË)Úõ·ÃNÛˆßLq;´UàVßZPOIõ;ã_¶ߥ¤Ç|–}»ð~å&ÕF½é’Uud\ÇìsíšËÙ©ñpL¹+ +„æÓ%°èw[vÌ;e.KgEVÐ,À0¾HÊú­§}íþ²³€®í…±»’Ëõy ~S+‡µ¼úZ(Ý‚ e/Øww^ç·+ùk\¨ÆP<7p;2 foß?”é#ÆÕžW1í† +c ­mxþK‘\©Y^=F3‹0ÍÇB»—‚t÷•½OŸê4x&%–xàX¢F|ƒúΩ¼¢üvsßä_ê5»õ¯fþw.R/z€ùIÊ”U' V$ËÎ÷ÉŠLîp©ERŒ:†^E>ÐaéCó+8KvWJP5Ì]§SG€8‡ÆöÎ%ÉܽVèƒäžäù_*g ¯'SÝž«º;ük] +æ¶ûÂ}ÈÒE}Ú³Çæ¢·L^„†Â¿æ&´Rûû+§V9[„ö- ‚òÊyÔ£{œðn!5vÌ£èTwa'ý|ôÃ9tª‹É6õ,âÛ¾?f Zâ32¨°ý„k(‚äX=Mâ‘©Q›…Æ]’Ù⻳‹¦2×R!c²‡Ûõ†'p«š•¬CîI×ÁÌ•Ëç?/V3:Ò4H}“Qææ×èJG©…âá`[ãÒò-á}‹"%Ý…7ÅBµW9ŽßÐIôM§É6YýÎë‚Á[Vç7U¸×²’JðM‡;” Õ7t¸ðqÐ^Û¬OLu_H(ŸïNWê~ŸæÉÆò>( –þX‰¹Þ©Z²Q(ôU ”èÄ·ý¸]´ÜP¼õRb†'sµú?ðQµ¹ -NöÊÆ×Qd<±Á-Á”â˲ŠðûšZ–7¾„ZŽ&£|¥Âè¥ì2\&ö0ލY[å-éÖ¯g~läÝ´s§3îg +Ô„L÷“á1êf° D®r o–èÐ[—óô\û,Òiv™2¡I”É_RÁe¤‘ 'ŒMî­ˆAÜ­ùX¹Äuì8ôE’ Ì¡×KM9ÁÝm°ÄsA–µk7ä“-Ø6¥’=>m¬Ú‘}¹nbÂüZº¼u-¡cs ¾‚`—™È‹yÉRD(›pgUšbô,vb",¾SyUišµàô/o•Þsü‘[”´%VÝñé§mþ ÇÁ `fd“þ*¥«¶Y-ÿ]Ó/OÐF +P$è.Š2_'9Îñµ»_€TcŸoìovZƒE.ÇÑIEu9·oõèüÃÝ$e–ßúnY®é‰ >™~hU~5z¡PßäþÒ–ìÒ‰¥›onI@ߣÑ%#àKâN +-¼SÀ!CLÂýÃZO܈ãeô Wg´Â)Ef6;‚ …{3O&Õ]Òéz¶kà÷xâ_¼~]Z*eÀ}O¼»¹Kà]ÿ+?½ }×5áá†ð~Ëï³´G÷­E fF¤î\©šÔÕùuÍŠÛäj¶¼Å˜š9Ù,’ÀùÔÄYoÁ 1ÌÕ+­¦]¿ ë© )“{ƒÏk߆¹2Ðl?Ù‡¤ š½­‡‰(g±›†Xë +žd½û _Ä}#ûZéM„µºÊýS³µbÙ6n +»GŽ|'Hèr¢³ñÄyTÊl‰ˆ0r¾¥R°üN 7ô¤E?tæB!–6àéøy·”@N`ÌŽ£;R¬%–)|u‰Þs-³ ÷þ@aÆì‡— ].ì|7 PäÔàØ#J˜‡7ɦXU%ÐæFO?©éº’Š19ú ÍÄž*ÐÝQ{oõéYôhàˆð©¨œÈWÕºaòú˘T÷ìŸÌÏÅ#øMTpý—}A» Ë,‘=v‡ï„GÁ70g™Ä V¬õÞlÅ4R;›×ãCÓo£‰a‚/¡åƒ !¹O´Æ‘kwõ|tÖ•Åð<©YÌÔŽþÝ ‚”‰Á€>I?¨lùØÇâ?ÛO|)9;µ,²8KƒƒHö@.“D)µZè«& +ýŒ]¢ÔÌzÂ’‡uPÞy·ã©……ôÔ¬U¿µ7Ùí{‰SxÌ\ÑGÂø8Ç”Aé@Aøžçƒýç“j£üƒ$ÕýnA¬X¦ºi G > n„~¡”OïÆ2@G0”Å懪+äB\æK¹P€!x"³ Rö™V7Ó` xŒÂûðotݼÙ_÷atœIÌ•æð/ërz.†U’˜+ÆŽïX¯¶j÷ÌÌ€ù«àQ‹¶òí•0ºæ u‡õzäb›ºäŠª:ü:€?7%ߦÜrûµÕsC@uT¼×Á? 1» îüÂ2_‰rÎHàD•Ö*¹×–“ל³Óƒg:KKCÇL §ã&”z‡™æ4"‘F©sz³Ëåo]¢Äq6æ›?C3Q6ûóè+”Ød€F|§À¸gM"3C,+TÕÊåÛÆ"»ëÛ£”¹Ú(ïo—`ÍD×»rYÿ=tP3é†<$¤Rêǘ,?i`µ3üÛ_ü í€±‡ÚIÇÐ~“UFèó)líüiÎé: þÌ÷šôQô\îpõ½´•ïùè^â"Œs6à/(Á#•¿IК2Âòí€â³ +àEé(Otn¼úVsMÜBQ6«Æ‚€÷fËÔ8q$eh1†/ðˆ~•ðĆ|ÝoÐ7ྔîíàwÃh¦è„nS]¸S9V?÷J¹m”^H@¸ÝT`åŒbÔ\©^cÊ<ÓêN¦Œƒ½Á¥I7³=†ù¿¸",Y(G!â¯Vóƒ-ÞÍkölàSK7\Þ<¦ÝqELôª¥±¤–¯iàÂ¥k#ÿ=º$Ð@,ž+öô` ©áÄHß‘M'f­y;Ð;ú0¢á²c« +Ãm‘»;qà@¿å#!¿^3‡öƒ&æÅšžvXQXnŸoC{ç‰&)É…8{ Ü;Ʀ¦ +±Å(N=çÞ‰ÂiíÞ‘Dh]1îw!Ÿ…}ŠA‚˜¿½Fû¥/б§#Ä2´´%ñÿ4ÿÐÅ’'j`Ñ·)$‰Öùqàª: Ë™T¿ѯ2ŒÐ]ž<¸c‘7?gädØ|^ñ>·,®ºtüßN…⦌F¸¦3è./Ú@R&æX ÂáTÇÊÐ8ü›Bù>JÛ°UóAÝÇZÆ«QUôƒ~å† ½£ý&Ø ¯qÏmgråç}j‰¿zÅþìYgrô>ª?±î?CsüvàÐôî¤ è +¾‹ÂÕI"8ê/½Ë©n§6¾VF‹w^]*[äFî À|¾>²IÙU£:&Õ¹ÅwÔV˜£ÈnGËáÓˆÆï„«ˆ4‰¡LŽ®Dá*7i“)T¥ìÉ#-%ì9ZæÚwR@éWå¿B/ÁW)å7Š?m”wû=´"óTë` aã,cNÌý÷yOEoÌ\PG÷Áj¡Í¼Ò­2ÆÞ«A ÃÒŒÀ§7Ç =.ò3?0Jº+;ÒøßŒÁ~|úU_¢”¶XBå"(~¦Ø) yO{ëEÆÙÐ¶Ì ™Ÿf5—îž0M3žooD 3œö£ ŸIö¯ ÐSL +C¯ë]ó$""oÁ™Òa£Ò–Š!#\û·M¬p§ÎÚ-í +0êX¬¯3´ j1­šW«©©¹%&AŸsùmâ˜]ºCÎ,\glh©/Ò&GðhÚZ + ì(Âé:è~1ÅÎÔ!bZJ¼xØwLSïݘï'Ü ¬át7ÔeÅHÊÿiÕâÝݼ) wAM9Ö¨2O‚¹áÔ×õ I)õ},=’cÓ¡LA½ºYJ…ª<ýHl…¯ë–íLЧ"ÚW˜ÙX— ·ž" ^½•?Μí÷×ÙVððcèyÑ"ôøÓr‘…Ïw +c.áËŠÄ{$Æ4_>–ØßˆB ÍæÚÝáfðF3K€pý?‹å…4a´“1WŽÏkðmÂ%± bgÐOä©>wí+u5n‚^ðfVª;^]ÉÇ÷µ§ƒ5ϯ¹á«8-.Våïû=·‚e_~SŸáø¢ƒ,vÅî©€]AÆÙ²0CG\ 5%ëW)دP[&ùž¬r¨rÃâD¾@út²°ƒTì¾²‘RNI¤d.µY‹ó±ú7C«tHžjÃpdµ?—u’PežPµW[.¸~þ ðƒ0†ß¿>6ª[Á2nˆrn-og#R¡^ß/e>´õrRùUŽcom&ù'Ëè*0úÌ‘ÛÊÏ¿ˆ_Ë¡ãÈ\qS“Äq çé³ý“H"#S>4“H#¦AQc›½\Fx(ìº*Å2MÜ’ï ÄÙ(q—¬…Dí¬êñ9îšüéo ¶×xøüø2[*+/² 鈣†:”„5›ÿç”§[°}TMØ ¹üñ8nwEl±ËP¡bhè~.´ÀWžA®úW«eÄLÀ™ˆá¯’lJ{ 7¯/¹K >ÉQ:ÖÌèŸéí‰J§7›ÏfP.šŽÌySÂC±2€ªÃ /DØûÉÖædj«º©ë§^n⤋2¡h»ÃÔ˜À‘[·Ï?W,úÑT_S|Œ)aFP–K¿ò öˆÏ4*Ñv Œ*Í.üèX+L˜·60¹äØ0‘¯#ߟß,Zð6ÜN¶÷*=僔Oë÷›„‰FìêEfò8G?„V:Ò òâ«ÎTÒƺ¼‰!U$Œ1w<ðxÖfXØ7î +Z_<©:lþöÌyw8†öí²®€Kaßëµuåà•c2Ñ9:ð†#ö®Ý ÙôS¼„äšA›™»³wÅø—«IRVxìèªMƒ„…„ŒCÜ•`äkcÚN“?-8Ý€óÒ&‰bÐv=8àùaRÿꜹÑ÷‰cmã:Òÿ½È‚€O G¡t·‚ÛîúJâ©ñ•aÚbÚüv%ž%† +7-«¢¼œ>µgòé%gŽádY,xĽÜùxFws—7åaË«¾cÚ®Ât×è­£qúåHg¡šî8w­3Ó”ÅtÚ¼™<(ô·F©t%cQeÑ[ác$ïET£-§·éʲ®÷§;ùñwë‘«æJÕV¤áÒÐ.'ÚKÙUµ˜Óèxâ“ÿvuí"÷@òÎýMxZALê`L"[´Ø ±ÛN"~èý3b²w„–X×sdËÓJAºß/šQ½½"Lò xZoFç«d×(7IÀÕ)wÛñ‚®’_/¥ÖÇ'Y4óþ]9Ë/Qfÿa}bß-°ÔußœÚ +’obÖ#k…m5P>cìNˆE;äraŒÚ laÓ´¶°þS¯r)xÁekæʯ} ñßòRè ã\¸héã™ÓõPbù_VÐì‹;ÑðÙÔÁÏÅâÿOFÍc¥&%á©p}OY3ö%" Ž#+sx­b—bïP‚ʵ€cf_wTX;OŸ¼ŽP1e„ßséÖçÀµ²él[qYñ+ÍpÜÆÇtpsË ²0Z’dR‘ +·ó¹}ÙØå +s®»jXª™'MùW! +@E­úÇ! YRg:qU¨3`0O Ó þHJþü|z7®#íÿ8ê ŠPú2ª×¸{2Ÿ‘Ð+`æ‚hÌZ•÷m¹I3®¬YG¼Lغ!}¨Äm ÚÏG3n›ùKј¡:pFµ¥í”º©Ï÷}p^^°‹ÿáÕà,î·"åŽ + ?ëÃYžq•õý«ef¤òsŸÚ™æÆ+æõO=3y(VóïPSÄ.Ïw¥ÉòÄÿß;ø•ÿÍ/¼9´ŠÛïõX¯fÁ[ÑøSü¢6Sás9@;É4b.k ýZuÜI9ÃAŠb _sv­ +].#i}Å\`f0bœdfwJ T/ h÷KÝ0rØxü’°ÙOTγzVýI®¥u’Y|)ûÕlnÛ‡êÛ­ðÕ[ XÐ;= j,i ÎNa°á! %¿5€-ÍãNËhÐ{…ÀÝ1‚¬ÝnqƒÅnu‚ì‰;"FÍgœL{rÓ[Ó?¤):˜|‹\Uô ’îÊÔHÓ6¢†(­¯GxÆ12d¶*zœ×Dùrëúý>¬‹q&‘ë +§˜]§OHP £B_VÂí*LµŽñ裎A 'vvuͽr‚¶ñ4RËR¤Ý@à ¢o’,çÚñ×b”þ>¥yÒkm¾ˆHCÕZèµ Fc•¹ŠgË]—ëîÂÕüBþH”Š/—’QnKfA£"LFŠ»“<“ØÜ·Ÿž¹ž€nä1ˆ € %Fúû¸y\±õr{˜¹anX<É­nÛU³Ð:o¬"Tè ½WÁ¹nO{X4½="\9¦KÞøïR¸û»£^KDZ‘>H¶=Œ³EÚGÝyˆ +N¬Èý„Þq¤ˆ9ÞTxE;ò_æäòÂOšš6¨¶fDÑ!&™Ÿ'™³ÑÂ`óé&®Šë÷1ðe"8ØA…X’2ʺ~> —änpç§1† Z€5©¿£ø²ù€Ìç°RŠ.]íËöû„査@càÚé’@i\fÎ(鈿R–ŸðyŒçRòËë(gÁìÇGÃnëÁè’Bœ¬Ù[;Šç<ç÷Ê]\ í +o9wü½(Nšï¬¯Î§ÙéÚ«*uˆâÉé8ªy-*5òËÉ£czVK<÷ºp^ø17Û_Û3[Øn`¨s=×Y’Ö['@‘W‹È*êöÑT_S|ŒªÍÜW)i†Høë¥%‡"WRE׌´/b),Ò©G[–s¥ZÙ,®e¿]ìÎ÷j–ÅvË:aÍÖdZœ}´/Ý_ùî_©‹/Ö˜ 6SX`R;À9+ ¯ø7Çå¶Œ˜7„ÌíjK aô·p(A\N‚Òm6n1ú°Nëú¸›M¬p¯´Yòæoý]FPiÕ_–Ò 7;ÞF·8‹6@ G+Ü´¹&uüÁC®lœÍRü¯¦é(µI¸,ªŠ"|«eèöÚ7Ô8š—}ÝEíYBYvvb±IÂ#c½nl¬ü¢Ê +7›_½e¶LZ†o¤"/ߊXO„ò'¶ÊÙâ¶Ûø×|Å2ÏõñÐõÎ]ÒÊH, † ºÞƲ¯*¶Ë9»ˆWnÿ\´—mQêšFÀ´ó¢Ö²Å>0W¬9žü©c|Bà99óS³åÏRÝŽ ¥a#‚וnW^{^ä}¤½d-úعð¹í¬iˆc&i%÷{5¸Áß]'Òírqñlä‚fa[ÄÒ/ºurY¥Ó*­:jéF2 “FRùo7½òÿ¶:‹ŸAX?u¢Ñ3šF<¤?XÜAeMIuqýè¤âø]¡¢øJšoÏ ”X}§©;v =jl…R¿¿>;ìÀ<§óAdfænµ°Q·Öºõ¢Zõaî~ú ÞÐ)Ò'02áȥ͚Åå,Ô;‘ œÀbÞI&f#¯š´»6~—QŒÎdÒ”d÷¦jÀxr˜¢Î°ÿe©¯JÅÆŽ··¤5f1Ê‚U³?"§'¦£-^4yoϲ"ŰÀôàÙc¾ÜpZÕâóÃ8äÊFéˆZðJáñOS4H¾ëjOÀøSö‚Øv}Ê]HÏõVû¥Êþo§áîàõƒø–Ñ{÷…oë¬Äj‘w×SýÆ5aÎy¦êß$ ucÓæŸ¼ÌªúáA’=2^°3!#½I$ž¸>R#Bu5ì¢ûËáIöH5#ˆß5ÔÈc)rœ5DG'ð*¦ZfçÓõ·i[/5R7Í÷|ÊSÿ WY½7ëÀÖ¢Ô¾î!&ó©bi¹ŠGx\ï»MÆègrRBÛG|çg™IAé¥×ÚIÈ͆öj¦ Ð%d¸ £òÕ’‹l–Û3)ðoßÊ6¿ê •~O´»<ýHh½¢Fg9 »[±fȶµ#Õ³ªüÀú$\g1">ŸhÑŠ')üZõõ”³ÆöÅ4'ù£Ñ¿ïÔø<ëÀòÙ.vçÿÑ»ÕÖúK¹¡;~šˆ3Øx1±ö*Ul­oCÌl-×G䯤J5^¾J|µ 62¡¥¬tYñtôúk4%üb´f%)+ø§.s+Ý~U*[á¸ãŸ'b“ëûï þ3§Åî½´LkZݤÉ;mÂy9…q Jª¹§d OÊ kaùx3P+%[sÖÐ8u¹ü4jP7,X!‚ƒg2âЪž–ÑÔ””dÍ›­J©h»à¶CŠ.?äˆMˆo;ëƒÛþ8-stßœ!—o G€¦X|öé¶#Sé{›H¿oOõE˜y‡@ý€Ÿ,pãQ˜êų½y6OEÝž×cmb¥«‰…6 Ã9x T'±¾Ù4™“ŸülÅ;Þ1šbj2¶äIä1ó*ý=lž….OŸÂϺÏÊñÃ![67@oýÜ5»¯5z2ÍN_ì q’%½J°åE¤âM*¹²¥Ù0Í1œ°l"ñ•£¯ðËïYó+GÔƒnÌHÌ®·Û»n×m¼´ Y¨ŒšádCÅç Á’CÖö·¼8U£‰>JoZ3¬4 m®^‹»š[ÖTÿdtv3pîÌž`Y†ëLKË=ð&JK%žK¬5\ÙÑÝ o·DS°ÍÕ®,ü¯:ñE¹¿þTiNëwþÀ!H®™ŠËòfþ·Ú'ÏÀß«Gü§5 ÿ£{lÀ­5üýÛ÷´¶Â¾iß×,×3ðí/¢ÝnÙ)ò3o”øãb†¬†/Å’s “9xý1Q­8þ%§\†ÍjLÚQªu¥›¯†ã9mz…ÓÓu5ëýÈõÙôÅ&[Ò¸|¦žóØ@¶n*FœTÁÜö61âÃç¶áôuõ…²æ™l½ÈKÙê%$p›9fk_=ž·¡RÇxJÆonÇRÔ'§ ^>»Æ­£ç}êܲeý"ˆàØèÂEü•ôH?í€BKÍxly°¡öJö¡EÝâá1—SÀÂÛ«;ÙqÇ"hK”=‰Ö9fü˜ÙÊ2¹y3„*½Ï±=^1}Ó¯DähþÙ|¤R€jí!ªäŸâÒp—°‘Ü •=JÚßÞ)¿(Ò«‡@†¨JR¦ýfò^ä¶þ3€Ìƒãò³Ik!?4†hÝ÷Zý`A#& ecÚj…×ܲA¿f%ŸVß¡uŒ6ûOê4µ‡´#>i0Š=t¶Ýûºy닇œ×/bñ4¶æVͣѺ·b[ä±Ñt¬°’F ¨¹ˆ`ñòcL7…³Kž>l@ŠÛ5Jd‚ŠFhkÜìÓVi¾–(”ËÚ2ÿ+p2«‚(M ‹Ë¯¼jpZœwµ!†õL§ÒàwwöÑíR€…O@¶¾MC©Üt¤óKgÉ—_œŽ2Q”^rù£3û`ëZDT`%%¬ÔçÆdÂjòV–U7ÐJ”‚GÃêwÀ1N5[šN +P¥—NÔ³ǼŸß–§Š°K#>ÚŠÉcn³ÿ°ÛÙ)(<éd!Þíz*`ûßÏÓ. èÏì7ÌÎQR6›-VVŠKÊA8bÅé–¸…dy Ê\»ì™í(Ò´§¶Í—¿‘’†À;n¨v*BâíÅ—´s73] ¿^PÁ(ÿó!ΣåÕÕ5.̶‡Z¾+•J¬6ì•B"´ñëF½k­ë[ C‘Ї~J¼GXyo0Nâ¤TØC¾$‡\RrƒbP)$'ŒøÖPÓ¶àS$¡#ÃrøMÄ{T–ÀÈŒú)jj&FȤ'+žÝ6nkô˜Abͦï\ÑrÍÇ A ‰}9±ÂŠ„ê-ßDqx×QG’,Áš‰bå˜óî®Wϼê’yIg¥K†Aqhîœ2rtØ +×'•šbÊÏ_¥73w¢¹*] +fM^¨pÞsêàäÁbèfÜÔ‹¶{œáÐ×À ²õ(ûG„FLè¡Iï‘!ˆ)Óû¨3ÝQ¦· '£•ŠfÔr ÛíÀà…{}›´ÃÍ•}L±ÛÏutŠ~ÎOÜÊ?ùÄ8N¿)ˆZiØÕGàŒKZ %_'\Ñ÷a÷‚ÜÒ<†FÆÇ +[•„ˆR +FµÕÖ%£q¥øÜ•Cå3b©éÜÎW¦‡‘ÚÿY™¨ ¢jF”@ÇZK­gï»`àLÝü.›äp™VX?;žï äÔ›£åšÑÌwc‘ÊÏ—µcgïΩܖ^V„tÆ ïÏÐÿ¦±Ì£‘¸÷ ~£šÁy ¥»¬a^d5œÃr¾÷…]Gw`/s-ÌÚ·²b¸¨}ýšãe3ö+NW0©:‹ÄùŽ¥ )“A»º8…,t±Ò|%hL;—ó€4àë ÿð§÷Å.e\éNhôéî]"óXˆRñ€Šêi ®J¥³9Á‡õ&0µ# ƒO&mp%?q×DÚO—jâ8®èâu¦£ÊO|pØ>ÓVž^Ý#Zº!Œ‡çôì¥>sÀÃG#N…}†‡ôÐý(ïb¶lÌb…e4¸$¬Ê<ÕŒzæ>æ=4¦e?WÃÆ *>¯„]ÏÖv´I_•öD°8 £ü’ªš¦ê©Óýà{®¶`²ÒàÉpÑû´Íë_Ù}¡ûúó©$K¨´\¤B°/ÚQUùÃÙÃX´+ÙËÁ@õÎU5ù-¢—‚¡ênš­s¡ƒ›i¯0äþžC¢Š„\º‘)š„‹Á ª¯…`¢MÒIåùù÷ÒixDàýnw®áÖÐÊTC†Ò Ø<.ß·Ù¥íÔ·z^V±É-Æ<ûîX4òÂô‡Ê‹Àiæhê#óÏãõ1îCa/׬õÉC°céäšú)q¼ýÚ ¼Ök ï &†â5b5-c° æNW@$#¿ZTÁÝzñKÒúbX”ŸeŒ½DG'ð*§ʶµž”2bòæï²#²\ð¢'õy¥|´ ë°±œÓÑõ¸h]tÇEMšÝ-FwÍ AÞ¼±üO;(…ÎCq5pv=$êÇÿƒ“þßþH2§:òð‚–'ô¶]…EVWF¸îÿûRÏôƺUÛ¶(ŠÝ‘à|æ,àÉf43l=C +½ÉÄ¥Œ+ì=ï½pÝM#»x¬-ÉÁü·Ög‡Ýí—ý|/Ù|åZÆ¥•«Œ(_" ø09ÑkpùÒ:²“.ïò'æú§ÛÖ–z“mè}¹ýêEñµÌ¦%Ò§éà’¿ðµÈr¸…@êŽÓ)íØÃk×it³s¡žé¸ÐÑ7i4²€Ü~t¡Gn}gÑ­à¡›ër¼ÍqA©½ŽÍÏ…‡í°‡JEÒ’os/W­gg®´™÷Ö !à]&&,Ù½¹ÒZ匾“¼cð´ÓCù+]dÚ?Œ‰j’ä_×å…¤Š,Œ·÷?>Å-4c´hê‰4wþÔ(~•ÜÕ¿¡ƒ¥™ŠÞVÜÖž!Ø ÁñÈ »nê ©uIÓL йÛN£ŽÑ(JaŸ½’½yqŠB «“.…“L“‡b…ßÐ ”éKÖc“mI‰SÛ>ôؾ >h©Üt©Ò”W†“®ÇZÀªÝ °^*¼õ2ó‰[á»qgú$;û7®BEr¢Nü$WB´?KˆTS}m”R3o…A¶3:–­ ËÑt6¡ŒS¾Ó’äA™gàÕi[Ž/Së&QxäYY½}<ÊiļâÕéì”_ j!zÁa72¾I»Õz¥Ü(^£lØ7§þºéórà ¾˜ÇÏ$ö‡Sg›‹ñ³ó±ï$—MÖ‚JŽ3Z·ærq[ÙsWŽí5ÐiŒË•q",Í)W61EÈÔ—E³‡¶.gz¹LCØM^ï1t—² äw¸U5©5Öõ[€›ƒX+0ù¡L½Ç¤ÑÒx ÕÚ»{Çx?•Ý-ÜÆ4ÓÖ4'޽I³ËÜîü¬ r>ã ANì ‚KÓó4ƒ!EL·Y³Ø)ÑùŽo¯cû«8–꩎B챜{(~®b̤ÓöÚ´ní@„]šûT:ã¶Ïts©ª¨—Y¶ú£r¼K…••È<½Û% Ñ¤‰ë ±iØž^|øây,þ[ƒáÒÝ3åø\R6eè›à…q$Oýß²‹ª¿§žI-ÊJS €f?3ø}DsYKÄmÊ…‚ *Kd&*gS /sîÛØéɇU0㊮FÕÝL£›PÞ_oƒiŒË•ð§ÇÞ{Žð°Â2 lÉk°nfeÉŽ0+> îf¡Š1)1®Qâq:{rÖ#~R—"[g°Ô© +üï§öㆰû¼ã`9¥à NLÊ·5šØhñH8Ê’àº5¯F;˜ºJZ²êuÀ›2×ðéó± „u§É¾˜O§|6¥îêÄ|þWZ?Ç€ßí¹‰Z³! Lc‡·›«ð½üÖ&Žf&¸žå™f)ž*Uñüõ~'Ú"íÆGSö‡BüIºLÕÛ6†ÏH®Ïˆú~?ÑÌ?E +§@Ø×8Sö¹õˆ X}Ÿ^.5n=þû¢-´&GËúí¹½5G’¬Ø÷LñzXp`Ô;N]çûÖí‡æ$ÐõqCQñ¬õˡ׈à‚ðTÁ°O+½Œ@^xô÷óª7ù8É[S‡ÝÀן ”Añl™WÝÝ%OÞ»šLÀ™®Dævý®{±”ÿ„ ÙdƒkSêï8s<5'ûüýõî¾ü#”ë7¿™õ¯ò8?Þ6³9†k¬Fýu@n]‰žù/8³Ø¿@ 0‘E¶û¥ð2Šl}‚û÷½ä\†BÖi93*ö¹ÓçÉÕÝøY—4>zÞò‡é‹vôûʘÒeWŸÃUÂÄ8üIà&›õ`~¡z`ž6:9㬟 Ð7;uòsN‘ïK(Ÿ,Z‹ ä'¸-Ü‚-@ºGÜø!ÿjiZ*j(ðñzhÕ»Þ”[nØ­Vù$X½ÀŽßx&šý¿2ç!ã4*o’‡+ ÉÄ>/ýÇ ›Â¯Ôà\²¹€ˆ¤TžkVç`MùFª6°Ùm§¦¾2¥v9ÍGÎÖØˆ¡›ý’^ QìÁŠìº¢Åž*æ‘2Þã`ŸLÅ9*ŸnܸÙç ¸/)B”–¯K +J!œ‰×LÇJ!{¨hdqîžÃˆÞ±Q6›ï¶Þ¢ òW±Wo¸mR"Ø÷ ¥‹n£ê#(íý—¤•ºÅÍÉO}Ù-V¹àZõýçãĉaŠj­„ü<®—çf<ÙÍ…8éû¬X<1œy¸í¾GÃáz -Ñk\’lˆbƒ?suÈÝÚÕ.g§Œ¯WilyÝL&Ý=Ùßgž€åæ y +ž¾;ûéh1¢ª¢Òrݰ;¡¬1½þIöNÛå&I'9ƒ_øw€€zöRW²;¶ØQ«°•¥l§SÞŸS†4îñ %^ç¢hÞ‰Ð7c-ZwVë_Æ,éž«±mb!Î’Ø®IY¯H‡3ƒß}^ß‚ŒÐL‘^°=%‘* + 8{^¼ßy¤ÿ§! ŸøÏ(š²ˆÏMšÂõÝHéSŽ+ÖÞÞ?%ëá02•¯pfg%@s_AÍ«XPˆ)ˆ,°~¢C*0þ茸Ûô™¬ÌÔÅ@nˆZqtTMùÂZlòÄŠ Ql„l¹LÃÃF-ÃÍ‚««Ú[¿ßË}>œ°Ÿ\‡ÕsËÝ…Ê›¯‘À®²‘ Á—žamξC·æ¹Ût&­<+c„5Ü. +ìveMN:kí«v’ÕGùÍ&~ ¡ *TÓ¹OÊÈÑ?÷梆ÀhÈfF›;Cwî‡&=[·•ÿ]Í`_hεC°w'2¸åWw-p[Šc’ýO)æîVðxN&×KI³¯‹óºÔ¾Z2JóÞÝmU<‹TŸ(¢®Y²^›>0}õgaOzž´ô×U„LŠhØD•üN‹%ÑîÀ›…ƒœµöÁè!é¡u×V¶­«<Ïìc2€_ØÉzvÇ‹»×Y£¼Ç&–Þ@ëi=&ú¸\áDׄQëTC&Õ¶¶¿@û€zÊøa³äuŠèI¨£^B70»Q¼KUʽh¦ ð®k+¢³nÐ[s½=)¦[v¤²ÓKnÞÐoSlra”ÞÝ.ïlv“:"R+òÇO¿X–ÉmMÇbM¸õ²|©Ó¨L+dªóØt7*Æîv´Þ1^ ?«ÿI–ÉV­Å—í/o¼áhWúèȪt#k5ûà3VšÜ/å–-Ãøâ’ŸC¤½ý/04ˆU´‘ïL åNžñpþ0,%G¹“ËÐTŸ +^½Ø¸;i†šݨ ÁAmé|¯òÿ±‘•v©±Ø²‚Rã›øÉda` û"8‚‡¦ßá–”K$i-{ֶ󢀘#w\q$~ùøÔð ÅX8 ÿ%zÂ\N;-50xÿ÷â…¡ÙB)èMÖ½ £²êºFìe @Zœzx©HVa‹7Ë8jÌ^ÒÆKsñO(\ÈY­j:?ÿYîËq›c#ÚÊ;yƒ„ò{÷¡4Í܋ъP<ˆðŽJ`ºT-XþŒánÉ÷Tý"𦜌vÇI$¼õSJñ—ø%˜ƒ·Àžs™7ÚMM_P¯}”BÉÒ8ú›Çš”„Sñ‡nجï‘h×ùÅ‹IqD…»5gêÁ5‡*­¯¯fCBvwè>lúøÒœdS»bª:TXº2ùºîýŒÂü[/-=JƒÆq ¾uúGáÒ¤wàW¾™™ºÎÂM®o2IÝ\þ‚žµúªèÿ‘šÏÓøzta~½,“E?©P Á½}-ì°¬…G)êÄ€V¹ê†æ1ZÊï Ý®|oÚ-jßEp,@áÂõ©,Ó YA;½y’<ÛGËÊ‹M؆zËH¹Ø”ö‡p«hç×’a¡/!¬ %Þ+P‚5é5—D8¾Øb°$)B0hru@ZÄ]Ë(áÏ0û”‚²5-é™ÑIëuœnò„”¶î¶´úP¸§ƒeØéUýê‹ö€Ér;8¦M”¶Ë×oÞéÀwrbè¿Ôñ–‘øây{µ, V©ù1VÜRÌC–æ¹²·5Sÿߥ—@_XqYüÛ± dY±½12G/µ«5e;.©GÅœ¶ÔŒ0éUd½‘à}IȸӿD`ˉ‡JÌ{Ãg/ØEØÜO¬¾u:lÅ'»÷ÈÚñôõÍÇÄ‚}iñÎeÌ|¬±®eñ¾SÖ”oîþ»< py12´ˆi¤€©*N»„´߬A?*½ÛÓÕ¯ÖZD’¯Q]›ç0§‰qmOÀ½!Éá‹|ÂxäÉÅ;’ƒTBtâR‰¯’ÉöíÒ“©nOgfãÁU†sɸ|¹ÙÁ¶Ó\Év:å!žÌ ºÐutÙà™SmÓ'èW®”К›O~,£½Œ mß ˆ” í·jÂóë5Ѷ·ßyc@˜¤Â¢ñ#V—4/62¾<·y™>‘A§r= CŸx«‡Î>]¾[{ÉIrç• +$@ÒW +¶?þ…Ò0㨤 +B¡èþõ+ïví06Å?ŸmOu['kUôS1ð+”ÞõÌâ4/஋±9u7ß$áuc‚Â'\—ËÇóe¼¾€[  †57É–ÔâQÚðw +ã¸YS8HG½1GÍ09 ‘ Ûzö¦ËÈ£÷Gr/nÇã·J]žÓÀÆ®K/+©ÁKŒåƒ¤ËÁ'îÙl^-ðÝ¢ˆÎJ¯þ¼'•@Ek–ˆí®n€Q‘,MšÝyMtc’$)ïôS ¸%실Y ÓUÀF 2 +J·µXnŒ®xÁë”ƬÌáBqåÏv†/Z§>bÛÕCLòÍN`M# ÖZyìz^µCÿˆ¨Cp÷(ØÆ}‹!n9ÀèUÁGÔt™áWµ|!å|?ÜòñmUá.“·é§¶A0)ã_sêô>³A·F‚Üð´s7•@ Ek†wˆ#îÐg÷GNÈ‹_sr9·2wbâ´k Ö +—Òä—a °ö*‘Òë¢gÈ_Ô'IÐH«n”íNŒcbñùÿƒF‡ˆ†´Ô‹–_­“{ „ CâxÜÞù¢ëm®a^¦lT/ÑmUºËß×: ™&S„S”®¤JS¶zÌÖçƒÆV 7¦ùœaqbýƒz-ï¯2Ô|£Jê¥æË­{øŸ–ˆ›‘ÒŽ!åÄ—;f@~æ(+ílø¥ûøt%v?'|½HpH¤3r)+þ‡Ñ»U!&ú•*‡¿òŸjYwÔGnPû¹ pPyyÌj¨¼>åð8˜V¿s×.'_ï˜a  Læ F,‡×É÷±éeÁsàôÅY°V7N­†!R‘\ÜAvgF¶a/%]¿¡$æ©+/Ë +LþÇ8D*û2Ò:ÛIŒ)úuqxaÖœë!¢q3y—Š?›2þ'#*YEî+LžÌ:O«žO|¢X:ñ?¬æ.ñî€÷1žB´ñ¥cÎlŠ/ZýCz9!Rz=ˆ²þ3²`e¤T@ÈàÕÃNÞ´¸0Ê ?2xœk쥖ٵŠh}Ï„#=ß'c~Cu’Ðøô «/üAXSä~$ +5r×»òqyÕò8[Љ†êMôXôÃzÍÉ–i´—ÏÂŽ@„à;8‡#Oªïô¾ä(\Æ™T¡ +™9| @%ä¸ÔÃVb#½>Oè‹«Uõ‡ÛhsDRÈtÎìsî¡¢767ì˜ô¦÷ì(Æ£ú·‚! ·¢(Õš|5D +€ëZ;ã*b.ã è¿N'Üue˜N‚ŽLa%ò' »Î·V0š(2pŽΤißЦ~ä ú»œdc‚`@ç4ø ¯W“"èøÚöM:EgÈ0÷‚MÐì§’$«Cç™d¹sᬬÞlpüàôc»B†á¤ÚÃx¾%á%µS«ñ –r—%œômθÍ(ÅçIÞéôq9.ËÖì“ôŽñ亥͎➀TH{~ú/Õq™ˆ˜O¨(á^­4J0YšE¶XO*_(cª%²µÎ°”ÓÞKœÛ¸¨VL,mÛ¿yD÷oIq—CCv1‚À&~ª±¯¼®$;ßOŠÔ.0t™»‘6½Œ?Çù.V̰ÊÈRUœþçû,ÍúrÃÅk‰XóèöF’=“…Úû©†›Ì® Ær>V \½kŒm¤|I¯üS JpOÆx3eBb“ÔΦ§8 þïnó²¡Ã&58‡NÌ!J5øç1Æ^½Ôþ°[ú|+±pG œÉ“‡Oý©±g¾*ӥ ‚Æ«è 8!üëc´S m“«D&»‚ÕkÒS‹yO}ð->ÖÙR¹!Ó£¦¢öÆ Q¶#,Á8ª­½–#€&NdÓ4\Ê>\ÁÏq˜ù6ø6Œê§CÙ¦ìÏ"-$K¹´Jh}Nµéî ÒÜ“½~EUFH+îä"7’¤"…Q¤¹žš]ßM¯®P*@nuÎ* LÞKjði¶6”oÕIýsr¹gWìÚ^,cÃ-LAßÙäŒwïzìØê”JC‡2 §M¶»zkL9l%_°ÌæVÔd•—Š-ptÎÀMgÆvY÷nt&l¹¥cNñ÷rœÛt:Ò-6NY;èD™ øU®·Šu:4r¶âÇJg•'õ›UÜó(?“L ÔgXï`•Á¬Lx~§[|®— j0#^3µ˜ýŽUúûÝTÎüÐD»¨ ƒ‘µ[ÀŽTç° T õ‘ñ‚2Ô„T·2ÞRuª/ø¼dÂìУ¸–9Û;úøSª:OÅ«2>çæîü7R9×0ÒÂJ¡J–Ú6Ö6¨Þó=Vx&NÕ>ºzŠÝ”ÞÁnA)\ÆßVº0jhr®Xì½ZfæM."z{9û”¨î_JÐ gcã¿ûQìOrÀ)ôµåÄVÄ £Û‘ÉÁR@ »‚çé9'šD)²“Yó±¢dy¯Iä™UƒXÙ“©RiÀô|–¾‚ÐˬËëI;®³ÝñŒÈËÿZP-TîÁH`ô<í¬1{Œ’b¾FÀõ.j´²÷\è—ZöØ^‡þØäìfì«·Såä/IÕZÙÂν~xï3d‹ÿÀ=õó}†CTq{õüDQU…‚énÈ€Äñµ礑;çPb…¯RëGñ‚g]#äÛ jçno/áFø“/lÀ©3’?‰Öèí¯zü6®/þ]®úš2´íúPÄî¹›7öEÞ$Æ;u"ÅcgóEΚÊý¾þ/î‹qcúHŠŒYÅ×üVåÅL¨zç«2 +f{[Œ±0Bãàô\ykÈO‹1rÖ†b­Œž¿ü€a·ªð…øs_7ÛOª3_b{Ì«̵Z*Ï~ÇÝ;ˆVo…£†Ü'ŒÑÄוּ…Æ{\¾ŠKš(YÃS¦dOîðx™¢>—HsOZs1¬óûÚ둇@zÚwÀ=zÎmP³ivyDªß6³9ª´§ë žÛ¾æÀô¹ó5†~Aów£Ä7’'trÇÝDŠ§ì·ª3}Ò7Æ Ç>…û£ö=´ Ž›'¶Ž yi\K ÊsA&ŽD~Ê{›·Ã˜IaÂù®¾½ý²VEÂ)×›eúB|9®/ÙŽÆïó…´ß.n ÿ¬åþ1ŒƒÒ!˜ç+&[×}öŠ÷ž'b¢g/sž"±ŠœˆRM%å< +KW1fM[n¸D3„ÎR‹¾ÂæÏt…;“$W¾­˜ ƊЮ™ÃK(:ëPW¶¥·3a/²q÷tñ妳Nãu—fE€)8ͰkSŽNîVi-†è;ˆþžåìBÖC†œçﳼ›`:ª£*îÿN´P-%wœþe°)‰CRÂé¹ß§Âc+9‘¦¾¯+!Êo0 É”N$hŒk¹ (ŸcfÁlRX^‡õ ‡0ûÆ~!;€D–fjÚ m;4ú @¯r{*Ü©òºMºSAÁß®jV|dÿ¬Ê\–Î[!ÆÕ̰ٓ =ÐP7· ›—”ß«¨¼H ‘çÝÀ +G±H^.ÿh+î+*àlýñlP¢SÉ-Åo}¶Ý [(¥@¬<¤äLG¦w­7$Ä,¸ûÛÅ9ÕIt”Hß­Ã~–J’ŸPu€—yžÖh)L‚›öõ#û 9:Qæ!ü¸ò×gì¥$Ä„Vùy²¡5ÞüötMI2û¶pˆ¡¾™ÅÒ¤£Ñ¼F±°?Û„»U~'Ž€ë^-\åÉÑ»gœº¾Itcú1nœ!×ó’P%)l=퓈HÊ[µŽ º=C4#@[b_‘æWò’2ƒcÿ µ v*Ê8fú“;úíê$r­~ؤKéU5jÍ«%·†ÚJ8àþäâ½ ívΈ±ô9âü×åB(†\ ìÔYé"¨öí:ŽÖE¥…é½Êd…™¸_YJ)Ùõ2ÓÍ‚| ÍêlKÛË#c­<í%Sa&P”èôï¾m„¦KÊ÷’ñ|—NpòÛ×Â*°ýu?´ÁÚíóZé½$R”SÿyøÌ\6Þ?“ ´pO…(fØY¤®¶¿ÄéÚû<(†bóÅ*± 9à2Ç¢ä¹S3S –×LΫ㱸ߔ žÌß1sºpK©VÂVT\æoObêÆôG255¹' B1Û!V÷dçV¢ÁÚò.[7õ°½‘8ª¦––kNÒzÞ® s?¡(Ñ¿†]CÏrìÂH×e"ý†ÉH¨H%ŒZ|veˆI™É ¤´, ҃ǬŽÏaÕ©Fô}–Àšü…‹ÈkØ­%tÒp€ v ×Ý»ìÍκŒÓzIoŤKJ0"û†¸ö?½æ<¦Í•èì¬ãp-Å ´‰´y{¡:MöüUÍæôÓ¯#`ªÙcô“)ç‹y)+MLECçÍâuÃà’„ÁµGÆ5Qh¸Ãú#£)&‡¬‹‚ˆ+†–è20&èÆŽéTzÒÏâ:„_O€9覅‡ÃøÀzî¦L3X[»ë—(v¬ Ak³ù`t>“knµHçÀ¼W³MU§€L›ý?FFâý·_¹âÖ¿j× Cç@{šb$ #Âb +½2äòMá…: +æ¶¡KžAQ‹àÕÕ©×i‡Ž©¬Tñš˜€÷ÅŠ¾½„1b¦¿ú­ŽöÖÝwgJ‰ ¸Ô¿(ý-_æÐ$œ…@Š.5È» º_øÃäGËòT `·1A:GPD®ŽÓÀŠuXS®¸xSÎÿ‰Œ÷‰ÀœíÔ:ð$Ý—›µYÒ_1?Hö—$ŒØ–‹ô§¹ÖôÉY]o8å™i:±C÷èh„`µ€-ˆe”©j(Wö`‘¾Þ¯RðųóøQ,”ö±ó wúÝÕ\ÆÚ= •ESd‚Ë1€ ½2@ü#$ƒ埣Òæ—9¥&Üc})k>Ü;’°•cm9àGDIrmEcÚ‡¬òáó-lu%z:˜A ‘,z |Y"•#çÆÕÃðD¦Õ““Â4Þ Ñm2²o|eæQ<û¼š2ª¹3Üf4t+Œ¨Öc»Ñ»´9¦É›Mu Ða^òd¬¨ôJk´£O›Nã6¯{Ö)‰Ì‰ˆ×Ó Q’”üo…ºBm:”R¬ˆš”‘Ðv•ÛD¨f3ó¿q¹‰¤îwyÂ}¤(q²Ðûy\sØ€úFBî6¥ºÂ}¨¢ªârÿ€.çŽ*vÑÉX?ÔïƒÜ®–%òóOÜHä(åøÈÛÊ‘ä<-6ÒUýçÉî@xñ£rbÂÍÓ«CÇéýw»ÜÉrj §ñDYä~—Üø½ ÞçE’ʉýíFŠßîúޤWwÓæ)ügÃýž|…Í ë|t‚†ÛÆ€êŞ׎ï®ü߯]òä)ÍŒƒØáŸft%sih2¯ª#G£ÁñÍ¥„"Ì€/œÅ`Ad5#¸Æ£¿´a.õé¥e§ûb¬©ò ^ö&XÓ^MÛµëŒÄ@s¾å·¶ªÝTýCtnS ˜"véâ5Ó²†P`ÃÑ +c51Ù;^ezy—ßk þ7Y©AÓr¤Ô¨™¯à%—÷*P }1°ÝDrÚ©ú˵¨°ÍÌÑ4€Ô61NÕ'ÃT¯«qò_‹8zb«þù(D]•‡?•ѺTjpá÷¼”ήƒ´^ÏÀ†<ˆfœ‘«‡ eÏà$]·{¥Î»í·i˜>„k•AÞ†‰Q¾-9Ï 7ÚÿÙÿß¹F"àùqMü½öŸyÕ¯$uä_:!ý¶KWØïá”f +zùr©=™2&¤4ýª")îÀËm † Ïòê÷± Kë^2‰ x|“WÙˆèOß|r›rXÇbÒc=ù™Ô;JKŠWó-] U bÞCV™§%7îIÒ»ï*Ú±=Õ:îV@Ñî¿zW®ñáÜ2±ªåã!”¶U]ø‰€”ýÖ<À Z«[àZIuó_KþŽ}(6WâÁŽO‘3ÕŸnK’^TÄ›ÃLîk½ u +AÌø2C0ˆ¯5§Œƒñ¸ûoÔ]}‰I(&*㤠½;Ã@ar½’§×@ ž\-@óˆ…Ô|†5J¾ÏZ¬¬Ò¿ÖTÛÆâ¼ .áç1åT–€"aõJk:‘¹bÄ–D»| +endstream +endobj +2319 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [329.163 275.278 335.44 288.868] -/A << /S /GoTo /D (Hfootnote.5) >> +/Length1 1620 +/Length2 18334 +/Length3 0 +/Length 19954 >> -% 1908 0 obj +stream +%!PS-AdobeFont-1.0: URWPalladioL-Ital 1.05 +%%CreationDate: Wed Dec 22 1999 +% Copyright (URW)++,Copyright 1999 by (URW)++ Design & Development +% (URW)++,Copyright 1999 by (URW)++ Design & Development +% See the file COPYING (GNU General Public License) for license conditions. +% As a special exception, permission is granted to include this font +% program in a Postscript or PDF file that consists of a document that +% contains text to be displayed or printed using this font, regardless +% of the conditions or license applying to the document itself. +12 dict begin +/FontInfo 10 dict dup begin +/version (1.05) readonly def +/Notice ((URW)++,Copyright 1999 by (URW)++ Design & Development. See the file COPYING (GNU General Public License) for license conditions. As a special exception, permission is granted to include this font program in a Postscript or PDF file that consists of a document that contains text to be displayed or printed using this font, regardless of the conditions or license applying to the document itself.) readonly def +/Copyright (Copyright (URW)++,Copyright 1999 by (URW)++ Design & Development) readonly def +/FullName (URW Palladio L Italic) readonly def +/FamilyName (URW Palladio L) readonly def +/Weight (Regular) readonly def +/ItalicAngle -9.5 def +/isFixedPitch false def +/UnderlinePosition -100 def +/UnderlineThickness 50 def +end readonly def +/FontName /ZZXCQL+URWPalladioL-Ital def +/PaintType 0 def +/WMode 0 def +/FontBBox {-170 -305 1010 941} readonly def +/FontType 1 def +/FontMatrix [0.001 0.0 0.0 0.001 0.0 0.0] readonly def +/Encoding StandardEncoding def +currentdict end +currentfile eexec +ÙÖoc;„j˜›™t°ŸÆÌD[ÂÀ1Æ…p§³T¤¢€®o¿˜ˆà9«`ü¯….´Î:þ¹yÕêpýäJ*åÈÀl'¿–eî¡}#)Á¢Ý&»7+å‚/^§ ™ëWŒ{ïÔLßZ60VåáÌQR^¦üλ3rï÷)€#v¢€$öp~c—²¼´°£êë&“è÷ö'ÄÉÇÆÅ¿ñÁä+ž %;a~•ê!”ty`rô³cþúv7qÎ ú¾˜hˆü‹¬Kê[z‚ñSß=¸E+öÃGQðF¦–½Š¸ë’WVMüì¹cY*ÚK/i¬¡ãËi;™t—¬EÆW¼(Ð$Ýôôe€‰oLD™œ)å žàA¢“…t[Xä7‰¶þÈ_ƒUæ*D ¡Xõ½Q“ ôYrôvB>¾Œ:—#usò¦€ +Œ·«”(†HõÓ-àk£ÿx¤;u àÇmâCõwWUåÆ/Ûç3kñªè2o{–{ºˆðñ€]Qå”`†^@Ï0Zé¹>1í£°` waÆÕÏ)&]þÛÄûÞu[x߯&‘?¤N~ &?VDäµ€P·(³».y-aîS¸lV„î˜6ˆÉÝ?!ˆ¯–ÒÛbŒ‚PÍGŽÝ°uµT†åN­×ž‰ q*«ÏÛeü¼­ö<*VΤ‹niå`½Õ™ˆoAò0¬U |¿)P]Å_"dÝÂN`,g¡ˆ´ù +U@?­ÒÃéúà$¹÷r8ú~L*vV™ö)‹U'޲ov7tJÍ\Àá{ù ›$} ë2ÓØäKáö(¸¨„”µ¤Üg>šç¯[/ˆéiÀÇ­2Q·Ý“ Ì$íBÇÌU¶×?–ëXJ·G(1‡ÈÏ|ÊÑïY²ë =Y¨'¢Î?»õ‡U‡âýâ¯éáZÒœeß¼çvôñàÆ·¸g¡ú¦ä +ò q!K:ÔF¶Q¸ðÎ&>)¦RÙº¸@×!ø·÷åbëáÝZDæÁÛW%aÔŸ1IŽV¾<'Ú+DøÕ-8¢2óuDª¬¨Ú¨l:rigº¾yä"—”„üSÄs§Áâ­«Øø€¥M§:8_šøns1$l}–l# aJ¯4<ÿàsëæ¾i ºyÅK‹‰Fâ}.äèí&òk¶‚þ&³ÌE¾›_<ÌÝ— +Ñç û¥ ëâ¾(ýŽb|mÖ¾p:Îøle*™ô¦DÑx~Üz-ø…ƒ/ÏÉê¥ep*|1<¸yi°·ËÓU1dرî-MJņuP̳¶^&¼–‰•>h ­˜¤ü·V`Â9ý6IÔ5à†¢U0OÞAb@½9û& Âé8œ+‹;™Ž-eÑK皆ëk8c©"ãxñÌ~^ú.Ž•‰ìoõVÍM¸ßðÙ^S7 ÷þl"åC¹¾ô- +Ã(u3‹[Mw´íLÕµ¼›Y§"~;ìW„Ç@çMíuùzOýu£-ƒ4©0òëR`‡&s×. +gmªº,†¢çÒÓណc´Ác“:ÿšþñlavw®ùs#€_ÑÍÝPôdqíøg³Ðž—a:¥|xB*;óó sÈß–ßœå‹lí/¸YÇRŠïò˜¦Çoëï†Ub\ã}ŠÇQË$ÆÀÔ›¦ct„[ž‰Ø¬åXtéúK‡`ìn[’ÇÛÀú†Pkjb»v™×ý5Âû„Wc®×.(ЏŸJ½oú tkN÷ +“âh#ø Ôi­lh^¸§u(Ùè%b—.ðnë›Í;v«Ì=熵‰%¾d¼Õµ uAÓMH¡o./ˆŠ7”xwy÷i”CåoQ.<èÈV¼ÓSQy|Nžìr ¹<éjŒN¤…f=IƒÔ†(ªÝ…ûúÍpóPÙÒ5úéÜ$4Ÿ)aG„Ã3¿LMM•$6`ë¡V°vva®Ùˆ®YXhhyü½ˆ¿nrþª!ÅY¹E¢"úñÕ*Î Ïwþe«Íª0‚Xü¯­k‚¤*¨³ré:îáznmM(]bAd‹²MüdºŠšr‰zT¡M‚ʇ›úd?wö †—ר/É>O%“ÉD¬"ɪWý†Œ2"½ÞR:±T×½•Ý,aÏ´ã†{ÝðK¼ï ’ÃïÇ£½ÎÈõëWÏ’ƒ<Ó9ºU»ÓP|.,©QK„Ügj.Þ¶¯¯ôé#‹pÚË“¯ëÜõvTîÇóŠòšÅà»* ÷º]@•ƒ§{Ü‚­§±ÐsÍâø0³Ç™ªªá½Ñ0*qž ˜b¢{ÝJíh 4g¹)MÂ?$Çñw5 Icëq àŒ–+¢¹5]¡À¨4‰7ÿÍÏ«ë'{ul)ÉR¹Ì\ Ò + €ïwÏ-ÀKÁÉÈ¿Át¢Ñ¢h›ý«£{”YT QõHÓÝAq` ÈDÇØØ€Û¹t¢ËC]²úSqUnåúì&ˆc”»þк+â+B™©Zß?€óÄ2ê3Á»¤#FHsP;ˆ}}øAÒd¡ƒ6`0(•DŠ ²ñQ/¾\T݇çÞâ”B}Ã^ñ‹¼5¯²½áå¿–%®>J›£8ÎZap̧™Ö‰/ ðz\6×’µ1s[ë:¾ËÿÞ\óeg‰JWÒÁM_BФH9sð3äxºôž>B^ÏÖ5$OJ@wþ²5Š­°@!Gfþ_o¦N"L?"„N¼Æþ]ͺ¬ËXòÈ£×ç ú°½SP†ÑyRîZœ„©O—¿ðìó%®÷½ÄùÔJÀŒ Þû*¾÷ÿ¬ûgS³4é§D Qk~÷&ÎøÎˆÇÛ•öÄG6¤Ô(®îÜFÖÉÃ9RIQ”eŽêsÁEú+{¬;‹±>V.PòK,¸Ø{üÌ…±ùwß9Ó"dÉ9G#äŠtòÇ/~h>Q¼MV±[4 ,¤0¸k? ½Pß#3Ÿ^ý¢‡›÷@q 5ž²¡^Þ`dš¤µIŽ×ªŒ‘“aÙ%tQ)û¯¦z|弆H%8½Oùø£‹Å@„@s©Ìb0¾‹CqrÀœÇ½v±Æý3©›¤žåâ® [‚¯zPÒU}E@›d(´Ëêðg®t£òÈ}ψ¹Õ1“œ©l囓–<¼î2+XßhyÙ¹À߬á¨_öäl”ÕQ>RV­f5^æ³Æå¥òCŸgéãáÝ‘–F Éüé)çÅ$/ ^“¼†¹a¸¨R?ËÈýCZÕBCý¬ÆÙ•ÆM-6«3MþP÷+P„—F/§Ä?„í}qöGÕ×7';˦‘ÅBP_;ÕÒ–âÎó€j9 U^³­z@Šg¼1Üã/uýÕÉÙÝ×¼M3¢ðB³÷ÒÈwíZúÁÅk†v2c}ìàZ4¼û&èjöHÅÙ-Þo œèšúš7…Xdý(xV‰+VydÃðáô°fÊ!øN –f`âí¦ˆµiN;³AsºøŽº}÷¿P“§Ì¥·ŒþÌ7¡Áç$¼Ùoú›¡ì>¯ÚyÙž +„ó¿Ì_Z¢ý|±M¹ååm„®¶lÚO½•`l£UÐ.‘Rt   +ª<öq-á]•î’³F’bÓp*½ñþYW«°ýš/ØXþ1N5]²Q»Ë6`Þ:fjO½·»s¬s±R ÐT‰þ®òó$rzìYQð÷®k¤ŸÐ3š«k¥ŒrAðï ÆÉjÆ€?+Ài¯Þ粃z|8AÝùù©ŸiRG$^5'æ +¸½zv]®4ž‘./&ï.¬7éós}šc9ûv½woûäJ ‡ PÕÀŽBÂ~srD[ø[sf µ|‰³ÏöÈ^†ä(3¤BÛÕPôÓ¨_루±šJ¹sMŸ·˜*Sím)i'~ŸÖÇv¥Ž"L;fAn}L]²P¸ç€á·. áÎͽÈaÝ$û³„52–lò—ÄŒ—À*!UMÃjF²©¡•þ[5Ð+ïÞøìõy®ŽÖ¾H…4:Õ´1MÊ Ô¬3ë*Ê®<ÒÖ†;õÍUNÔð¼Ú :BvÎÎÃÍ cÿŸ[ðǺbÞ4žì,BfDÜO›öinqO"±nБ8¼‘û†' ŠúÍ͸#‰ut®é³C˜"–ÑìÑ®Ÿ;·W‹÷}—­êŠâÚ3ç™q¡ÂY£¡É>|l5Lz`×\Š*þ·e¦åÉílÍr-Oƒ•’ÿŒVÕ}ä‰r„²ÿùªMÞ6‘s~Ǿȱ–¬¦;_ЮAWÈÚEÆÎXà{^%¦M<Îi{ -AŸ°‚x“ÝQ~›¬`;K¶GáÐvw½u[~¥eH211ÍiþJ!és‚ÂÃ/»ÿë‡ Ö­gÞ¡dŸTD+`F.ù dZÌÉ[hÈ?Êß;ÄPY,GÔƒnÌHÌ®œà¿d‘„€”lžÑHœ+ϧ"&jý92{L%&· ¡9'øU¢*ZŸ +ökÇ×AcíZÒø2£éþ©²'½Œ|àYƼ™x‰@ølIlL$aocúø1±PécÊlRÃFëïHo]ŒYž÷4J8¸Ã5leë¶‘bE¨â”ÊìÖárqSžTÇ×—&£0ºæØõ>W‘ ê…¼i~SÜâM 3k®žïo³R‘€{Sûß78}Œ¿¦ 46°æãt@[¶î[VíÚ ‘uÕjÏSé…¬nkft~Š`îb\°5µ^û¢VãåvÕKý ˜#F§j`èyÑ–Vç4Ÿåx&ðþ¹—$–ô6K”½¡_wQïÛЋ#ú£ š±0¡ÓŠ ììŒQ¯ ®CÞ:;C~L€ò•"&Š-¿ÖžÙ›ËpîÁ3(óý©á8’#«›TOÉԱƨ1©‚$©(ª5<¹ºjÏXè¢u‘øòä<£×§;å:ÒâHÏ£Ÿ‘ÑXRàÊ}<¼B%-«¶mÓƒ;jöçr>OŽqñ¶iÞ÷—×àCǪ{MèñÐ\FœÈYjìæîmªEzÝøÒ,©àïØwaàcÚ4Õ3­HŠåŸRap÷5ÎÄ@®ó«në¶ÄPÄj®D:•WŒ£žª-RyŸB»òÏä|Óëv`ü';gõ­>¡åÌU CaðÖ¸{ÿÅNßø¬°Yœ*Îõv$—’O­,\êa”`Éær@¬Æú?`^ì +‚íÎX?×:^Ø ¶ƒPlnÜ’Û†L®ï#C(ÛS¡»†2ªrjЪº8p/×NfÏbÊ™ä ˜OÛŠºaÿ…º …Ø£EùãÚsmNƒ‰9»íéÍõ{û¤ˆJâr¼yîw”‚(’™r#¥ẵy”‘ ¼þZÿˆºÐ´BÁ(vÌ›„ipyêŠþ–d³Þ ÑöÜÐàt'o†½ ÒO½¶­Š>•kX6›ýÙò@¸S*‘%Â!°ç  |C”+–JæCáBU¹5bJ¶Žƒg™®QùSœ=k©´5³.¥u”\-ù?_aËC®.¯FwÆ“'NÒÅÊÐÁâ®Ã7üLKBL{9Ù—2õºäúÊêÙ9Ov_¨LÈÒú´WÚÓÏ ¼V¾±†%~—¡IiƒOkTÅŠ))•«Ê‹OÔçzzåꄜ=õ˜“Ë«ÄßÇìà° ínCüÊ¥glÞ ‹|tÉO·÷GdV¤‘Ú?HÞ ™‡Îè†Do¨)Áü4ü"ÐÅVQxО~^ Ku^§¢ŠÐ‚I³ nÎJŽÒGÝÓqÉ™‡V{Ò]jéhöPÕù¹ÚúÀu½qï‚7«Ç§æ³ˆ/t¤JCŽvr`ä ®Áé-8§ŸT3Æœ;.(òKêàÿÍ.,ìXß³z`[²-©m¬S® +’°¢ÖÃ{¯â† oKÊ×àÔMƒ¶¿cheÕÂ!WS©ø²6ØDVh˜“Å ƒpáÝd +«YÚ9ÂëqG»Ë2¼¶T~³²í`ø³‹’@ºè@æïwßðÑå¹ïŒ¥‹³˜AI!ã÷ÍùÆ_”è‡Áèé,þnœ¯ó.4@@Ûxn) tùú(ü™‹ºÐ$Ä´ÙõO¢R|ŸØI4IŒ±Ê)M¦ëذN†àÿcîi»ÿ1›j{œýÅÎY-Œ““bׇ]E‚à©¶ÖPO³¬F^|\»:OGj l“™I’Å&Þ éWù1™\ƒê±hL¦7BMŘSíô<•b?™×&½q­: tËÖ*ùŽ­üõh¿f¤;:¸×ãšK³:°|ˆuÞ}–&¯;'ãcRøÜšö½¤,z‚á{‰AÌÞJ<æÉ!¾~ïžÌ"µ×´èé¡•ïÇZln÷ó¦è¬)5ÜoÔÒ å†“‰~ƒÅB¤\Ùߨ“dFàÈ>>W$D9Nß/.Ð +Ñ^„l…ä½'Ùüå $§Ò·­<ÊXˆ¦µ“x•qÖˆL7Ç%©Î]rOW¶ýy=?,tº¦âÉôÐITê?ØP¹8ì +8†ÃŸ«R µlëv^9a’O¤hÔX$kfØ“yû[n5¢ÉÑ EvÍ;rÜáÿTn¬ ù¸ÔÖr,N¥|×Ûl‹Ú‘®²4Ã$†xR6̲øˆ2Ê<¤0¡vY=ªôµ€ {:þ€Gƒ€N8ó¶ꄉïÑûü’Åî7ƒ «A[KV6ñªš¤ºðÞüEc_ŸCÌ,±*Ï_}&Àæd*Gþ +ÍÀÈr߈ÑÂêˆ7Ryšž¡[£þ ¥\vèë ÷Ò f–ý8=ÓÃÄUÉÒñc-{¼E¼í@ÞeYß|Ý·.;jž¼ë +”ÌNh.¶rÙ‰RCÒÁeŒÃI™ùyd —¥Š®`xÄ66»'П1aÀÝÝóï«£êãœ*«]¼kM"OÀ½´äõ͆ӊRóI#çÆ·?åÕÁzB¢–ýr4ÄHrhB +s˜J ®ô#Ê{SoH–©œˆ·’@‹æ‰.^€´êW0õ‘Þˆ£½xìKÊP~Áö†¨'¾ +ïˆ/J’ÁOô‚´¾ÂÞÖ3ÄîV˜J20Û­äÐ8ïqg|N¹Œì r¨Š~íµù‹ÕTU­1TtÜ:gì +}©%vH‚ â­ó.~žµkÎæ2­¶)w0÷?üã:î³m3C#}†‚#vô…£”¼àxCK{é϶¡¹e›g¿‹(Ì‚Ré€Ú©ŒúêliëT˜†ñ×ötýN‹eRr2þo€¦ˆo¹khZß+ô+¬¤d§q‹6^e‡¾r=~ììˆDÎs™¡Heà’–Î肺V*`-´÷ÅѪ8þwÔâ\yP”®òeÂþ½ŽõÜ\,±bšÏö´J‡}<\ô³×gÄT*FÍ •ƒ–d¸¬´Bܾ˜À«tk>j›|®‘UÆ¥³º±™â“í`!e,±×)ó_¢…i 8HÞë¡×`& K ‘U×™~À,y¨Ó”«¡³ÿjœH´¢ÙÌ»¸u?æˆ@V>}ûµ*ïj¶tI§ô™aœ'õ§ËÆO’§ Ti'6ó胷5XˆúÖ»{ñô–wÿÀ“;Kí¼XR>ÿ{NÉ ßdœ&ù`ŸÓú}«´/ô÷uŸ88)-嵨û$Mã²ÆÈhUr÷*Ÿ¸^ ¨^0ùèiÿMàòdNCfìôØ–O³ þ—Á'âÁå¤EhOJ'qeQ*´ø¦Ÿàdq¢Ä š¹ÄËå©1ðØv‰Cr¸MѸØ1q ¤KÙXz¾£Û™Qî”Ü_Z±¦ËáîD˜·ø®¿h¦iL$VgÄmW†=lv3ï§7Ï@ÅñpíܳµQŽ(ÄiRY”Hêw˜,ÞxP1UЫðy|ÿVvçzYÖút¢øpÆ<ªXΟË&j›‡ÆÉ:¬G´Ô­1ç±¹²—ÛÚZ*u4pE¥®FÜ>§xkùyÁ’(ð ñãŒBcxôS+á”ÅÖLRð`P•ŒÕ”¯øÅŸšsˆ…Ò  R]°j<6œ­hbëIÿŽ«=ÙA©*ÅzÿªÞ¹•o’ §žøùnu|œ¼³„w«[Z%Ê4W8~ó8yÓÂå +UÔ„N*¢òÆÞs]ì’5[ x+~Vþ?*wŽÊ¢A¤\ÆfG¥iËö¸}`ÒOC[¡– óGh_—,Ý~m>ì2°Ü›¨&*EÄLÃÚñÚ‘¨»«Ýõbüg–F‡·n-N¿:üœ½}òÐF¹ª'º¦ñ|3¥²…Ä¡}¯&ˆ×ÀwÒÉ쬻;£8¦@8lP‹êúLgå#Ftëö¥Ô6™¬ek…˜/úÖâ ã–']Ô»2Ð>e'ÐTúë|A …ñ ®tE¹P‚áûF¹Âßyy&•f¿œšnT燥D' £ùŽ»csmJh–¿1hfJš“)T°vèWZËðˆª¯™m–jå%8¼ÖÏfåtN+ +îùY`Ñ™%N#>wÄ,ÞP!M¨o„£ìR™š…ZÏÌk_¢waн›‚‹Ž]ÞuÖq:·ªpÃtbLºFžÿR¹žlMá=ݪ†. û\ xŽWÒú´WÚÓÏ$õâä,+³ÄÆv³»“hc'h˜‹­»Wr·åã˜(U)u‰'Õ­f5ÒA -0f\dƒÅ/'·£w H¾AP˕¾ѧÀŽÍÒÑ-À¼]òC?]å·ÿóØ]€;á†Þ°)š×¤±Ú ;“¼Ëp¡Ñ'6Þ, U¿Q³Ñá…‘‘m_<‚T,^4¼K»³K—% +k Ù¡4hd†&ØoíÓ¿mÞl3‡YCMÐãûA±êM$ nZüÆ.+}sm°ó%Ê&0áyð¾» ž¹Žæ|Ø=öª™Ž×UE+õ#ÅsÓÿ6Ñšµi­ûûŒ˜ô:ZE–³Š•g˜}µþn7ë Ð 0}júÁyô„ªö o¸~ Ъ²§x`|`&‡”3¤Ê#ÊáßÔd¢fባùÐïÉ‹©‹%³e’Á‹®ãLø¨‡+qêºÅþAÕcG•F£ƒ=PñÇä#ÁqÔXSžf\L²³Í§ðKÒÜ2CÅá~e‰I”ÿŽÖgn‘¦¼Ê¤I¬Ì<-ÖÚEë#…:® Jž‹ÑTEÀØâÝÍgí£3Ø9M͸ rFÀ×öÂß M°îÐÑ0aßäÀ%Æ4óü–©Y3Ä;ߘ²%K49u`ÇŸ®Í\?t~8˜q%®Ü>‰þq'"¬î©±ûõy9â˜"áz@r#¶ÎDñó¯þa D¿Â%©u‹!i¡í䰪Ϭ¹(#ò’8v`Äg8&q³*rÜž–X•Ûe…iSA/O\v‘y êó>ÇÊZFþSDr)zܽ4WòÝät¼ ïLM8œ?N*Ó˜3%¢`3q||½Fv¥¡9Ž¡ NU G/Ða ®"C,Ž éËÓ÷þ¥¬~ë7|xÈ3à+M¦*P|·ØžÂOk ™x…meþ6DvMS]±PQ·Á–[UDm°ná4:Œˆ!Ô L¹èÉW_Œ$XE +í+Yªm\$ÍùùÍýàˆ1å+p~N"M EXY-u;ž'FG¿½öNlÊÁ‚Ô£Úw)a°9ºøƒ=à¨í’p³óÓ›aFÜÎ/˜76TÅ~~¿àxü@™Óžíø&ECQ¡¶ãh3 !aë™Ç õŸú~šÅŽT`ò·7º}|Sì+·8þ+^#IFÏ}¨-’…t fâK’®»æüS-zj9ÌÑáKsÚ»5Ù4‰%¢Õ؈eek¤€LD´+‹ÏS’mf|Ë@Œ‡“?v˜¿ëÓ(:}0ý3¦ÝUu@g)Æj¨u_É»SL™ ›CG~e$©¼ø«îw+ø8„6‚¦åóáµézzGÆÌ®›=×úz ~`"éÕ (»þG¸.ºÿ½mH67'Và ×”HOB‡±V¯j{£TèL–ý<äx7,Ãöæ’bªXÀd9è¨Ýn&Tx˜øï˦1>=?MêãIâ÷–Ð>H™€(«wc¸Ð› y:º³ÊÒÆÖàB…†tbì4ÂJ×Ã)"µß×ôb8Þ㎌ùüýuÑáÇ?te(¸ +¯‘¨Ékw~qÖûŠ\vºFyÊÐôût°>^œX|~QS¬=.W©Pœ§óUšbW´¨r»ån ý~ÔÐÆšËæË2'Ø’q§»©í\È宨çŽ@o>,B’¸O¶Ûù0?í K`”P†… G+Žü`þˆÔî(š‹¬W…(;²û2]zöv3géIƒ÷é%ò’:}U!ã_Gi!?~¦/ë² ŸEs¬‰véÝþ$»ö*[¬nROægh•ur•àöåÝNh1½°¹\RÍõ®Öàó#§ôµL´(9¦ªQ ˆÀ»¬ù敪@%˜¨7´Ÿ8;3éÿ`7R~—ÎaÁ + ¢ƒ5š|Ò=6>þàGj¤ÔÖô|u­ƒÉýñ¿¶OP] 2ŽÓ3š‰§Œ Œ¹’Pä(áùÉÙYùŽ +Ã㎷ PÚÈHCìvoê†w%$çš¶ÅË+bÚ¡k[ÓÚ•fUºÀ%¤ó™‚óWãd]mÊJÝ|ºsP" |ŠjSøFå˜ùEìed~/‰Ÿ] 1ÔFv-OµKOš%jhRfêÐhamCP1k%®Š»Õ õ==m^UM5oŽohÞ`^O » W¡ªn‹DKÖ^¼„, FaÊŸ-€†T6¸7rÑlûfl £‰$¸Mš ~ê‹Õ~0ŒZxÜ_÷ëLOúUÇOE½#=馻sü‚Pe]Œ*u;Òö‰ñsJ)`­äWÄŸÁÚÕM{JÍ}7ðÃá±K0TÍ=à=g&IˆŒêðÉ^è/÷T6È ]Ã4ËX BÌ4TH‚V×pÈ útÈY-Ú²—ˆ—G†˜”JíM>‹äHÔNM¡4>4¤òCñµdi·¦…ŠëÿØ[{^ú}‡å“8š?CeÝ.ƼÖWαúR&7õu +?LUÜÌÄ><‚§ É…ÿv»2·mæÄÔo„ŽD¤9=Ž9Ú²à5Ü@øA`<çÙÍ:ä¸6=¦–[ë@¶–|Q8k T…‰"‹mm,™Ã’"Y¤sd2¢Ž%’Ÿ- aG›¹èö—ú…DÜ9zŰÿÒØA8=½¦ˆéχÀS¡ô/$?ÄP-–úZð )Ý-<ëÝ&PýÔêȾ¸7E Þüæ\=Ò_Gè¨(ßÎ"1*Téä^ÃøÆpy_v‘ +»ßc8bª ¬ó–E™EøÏ³¹YÍt™ h¡.bž4 ýe×d´C!2Òðõi’ç‘É;SÚ¿†‹€óžæ©¢<à{¥OÚñÐx~¼þÝ8›?ŸÏL¤7 kþÊV³úZÏ%ÂÄ쉠t/¬Äý'™Y—õrG›§Áø/…’̹ ®Ý+]Y9u¢\ú2œ +Vø¦™œ‚jiÈTÊÑç–âɪ‡PhäÂQ˜¯ÄxÕꮉÂBÉ;³ î¦PIŒ[Ÿ~×$”ˆ³Ò~­ySgQ•_O/oìOÌ!ÇøåI7ŠRA­ÊÅÊv/èqëNm΋€-Í‹ÊèçìULV/e‹ªÚßJê ÌÈw ^M*ìóÚ,{1Þ7…ðˆ+SMÙfß’RÒ8!™E‹ëÄç_/<CM‹lNª¸]ß‘==HË(Rù‘Ò¤±ÙE–e9£8jBgNëdXS’ŒL @ð¬bïp_FýŠ +»{¸a†¥6BÈã}$XTÓÌ#ˆGM‚2H²áw×RN÷À½.Ž}IMøª¼ôG^\[X­sÅRœlž !)c€¿<(¼@—ê3¹gÆeé‡Ì +s²Ï0ÿ¥]uZ味Äiד6ç`â­ êЙ#’ž®EÓNûŒ' çQL¥­VüSƒXÝú¼+ õM† o¦:féõÊQxiHþÕAJTa>>jFÉ·±’ìË;piÈsædEHk®®FÌÊÕ†y²Œé|œ¯¬ExN[}ñýèXQUâqB{Æ(×wø…Ó½ä9œFÁBoO÷*„„ɉ㼗1 Ûy«~MœÈ¤Ê™±á;Ôu€ÖÖrgøßðêi㨒Až 9ÎNx‹æÐè›§ÔQ-U˨1ožhذi N™?¾K–—¯Ãçkâß/ sAMº2Gñßiºˆ ¢»\^‚ùhçàåÓ爇/ ‘'Åq&Ââ3hÅÎpð~hÒ¤‚.8iD ­7üq ”t©UJ; fh^‰ÿ㫺\…Óë4ëHI86V¡Ak«¿j“©d€ àhÇF^mÍ“:‘{âÜ`¡0#6ßKz«é¶>‹o° +¾ëm˜#Lgš¡Îì8&–s©hõÇ<%Á¯¿¸AÛžãÙqf“e|óœ3©øËTQ  þKÑ ‚·ÛÜÿIùÔªõ¢3ÅS,HU.Î9±(Ì´&éÄ)J7<Ô0µÞ=™0••øÐÓ0ó{IZ™ð}ü2ưð¼ Læ–)—/Fç}bSSøÁÄñëŠÊâ³Jþ6§ŒO¨Ý‹"X5ž¼'ž³øúÖ6v¯¸.ÓIöî&g-}ð`oéxcˆÖf1‰h*†6®3æ|Ç7B¡¬Ñ¤›íÒcýô4:—~Â^ElÀbÓ.h úÊu|µP5œ¶—ýÔLF2îàÄO¡\ë,”N³óNÎþríÊ)¥–º,ÎÁ£Ëu7ȵ`­Á¢ç}ž¯Ý îôÞÀôjwÁ,d´ƒTáÍúª5ÃyBÉÄ„ªt„þBkRàéy$ç³Î³aûû¤+=e’2,y$=üH‡Ò„=°I”Ja‡–IT"¤˜kjê[~ÍàP©Ó×9Ap3ŒDPöÖcÞˆîÂ+³b¸kM¤ã‘¬ç¢Qè&Ç‚ŽM­(Ç$3ú IµvÃóÔÉýSx‘ÌO6ŽÝ°V/³ÑÃ’Ÿ®‰Øïp2wÉCUÝÃñ¼ÀŸD¹û#>7ÓQçõ8•Þºu{êEú1éÂü QEt¶r:{ +[³^¿$`xÒ¦‹ U6åVJ¨OB eê«KÈÎ+xš7s ’·™‚(pó;ªÑx/ÕÂŽ©’1LôÎm]Vkk9ìçaD½áîÁŒJý¹?ú¦A§Êôêg‘–ñÑ §ÇÊA÷¬àÖÃr•$KHDUg{ìhÂó^‘§Ø{)ÏÐT¿îu†äX{+PWfj_i™O¨è 1võ&cr„HcþFÌŸ–úƒ©Ç—{¤˜Ö¼¼øZå$ùæ*·¢ue¶ØÀüjNIÚ¯ÍÁ–Q{pöÏ;Ç—î&ªþeck†‹•ñÊo +ÇÉM>ó +ÎÇ7›1MV(ÐëIJ–‡ã©$+íóºH9 ÀÓ¿ê3š¸™5¯7­i«DD ¹·E`ŸT¬êæîü]™:S¬4Âq <âÛZž ôiVÙ~•ÖéÙ«±@›÷9$#CÝÎ2ŠØûhKÅÈWà'ë¦Æ7[7l¨¹§"ÑÍzOÒ§‡£a,£”rªøÈ‹Éˆ}Y•¡ÏñõßäMÑhK½ƒO[“+-/Q+f—åùäÎsÆß¾/TûdGD&ãÏpˆ'SRLKa3&A…“¹óû^àj<¹Ö×N<·ì›¢¥ö"’†ZãÞ°°ZWFÂ(ÇI÷éZ¢;ƒ{ÇÏSatÇ”Ò05ùnú?럘ÙüdK_ÁžÕ}ÆLÒè\[ŸrŸ’Vöp›”m;ØIE½png… K¯C³Ö:˜@»€_Ã(^†i\9¦â/QlÑc0¸[æÍ­–;Z  H.ý\îåMD ‹0/ËOQrW-äh\"]àéöÃÅè2Ú‰ÙµLã­ÐMfVÒY0ØŠÒ£Æ1K²hÀ#íé œû(ª\£[êH‘ã.e²C?Ç&vî®m^Éâ<ŠíO,)ea°0b ŽúÔ¨3X,6G¤»WØxClyoEÃ3ü2“?ý)¡Fv ýP¦pþ^@(®mŒç¬ièo‰Áåí¥Ì'†²kLEÈÎm¤h7Xh÷ßÒ“=ñÛäÿ³àô¶Å+Ú'2LÁJX&Ü 7 éúsš.dC½BˆRÒ>‰û(ª]ˆû"'³Ð«·òrwp>Uˆ&2ÎîSU½s¸-錛üÕRe‘O›{íÆàÓF…=§0õD'™Ý|3Ï 9äÝiÌ‹ÿšòèö²‘%Mö_Ó{¡Pè‚]½)Þ¨‚ wz Žá7޽ŽYôáýnX†.Ÿ‰_¥/L))AÝÜÈ\y5–î»2//”œžf‘×Ý×!?Þ–¾G‰,±âHÄËkx(LC× Ù;·°†‡äÇ·žô‡!惽Ƚýßý—§œáó1‚bžcËq¸·yrÕ°ïpò£Æ ²7ÒæoÒ†×}vNÿú'ðéI)i§Ñ®f³´æá¾ÿL‡l_ˆ€ Ø^ –Ì+AâûXTžüÐD «Å39èÍñÙá»ÍšôŽiÖŒ© .§Wƒ",ØO5RúÐ+óœ^“º©ß :m¿u%–º*>övŠº œÈä¾–ÄHÑû‚Ýó ´o©€XªhìM0G¿ÉÀòó¹m^(#‹oÛoPâžn£É›Î;jžlÅ*DÇÅ“8aSb¬É2þF¨\€õO8ˆ)c K5ÈJ5$ðs÷ìS×¾ TmJU’¦9mrŸ[ühBZ5›;=„Y¡‹0mZï85oDicQÁoñe%r4}ô¤\ +µŒgé3ŸPû5x0æÎñRÖ˜Õ}¬6ãM·}æ(Blã“YÁá×–«Ëý+Í ÞBv›ÏD×ÒKgŸ?îvdÆp¹“a¯NA¢³ªO‘ä+rÚ Z@ˆ÷¨¦Ÿ4@¸ª¼b›êWÒ¥E!“Ü´¼ó+•"”Z‡,ù4÷ôÒ„]{9N:­ÒðŒ©„A½IèyQ¿"’„>ÿ S¤ÿ~GMvf™è/øé$/>ÉçùFù÷òÛí:Ï&+J"n:~˸ËS³cf9ƒMÕ…™¯ŸÛ̈œA¼™Q²ÈR|;§gÍ¿ùký‰‹€–_$\ý'U!Äer("Âq²Â“#&¤5¯]}åý¯çê5Ñóà\(†m2[©øÂÛÞbB  Ùú¥xcÀ;„šGææžÛ¼‰u?ˆÂ1nd+<šn¦ætã¿Ë©`Åò‚ذ¼TVþð±õRŠ§éª—Œ¯±å-2X€Ò¦€ÂÅûÓÞ¹T:š®>—ÄшE_2‘–»¦·‘OÑÿ(¥K[®¥ë·±ìâ +ÜŒ;AZ˜tï³E ÏÉÎËdJ5~"b>Ùzz8€ ’c*µà›¯ÞaU)§´A §yŠOµÍ²^ÇT¸¸äÁÛTWÚE¢9ZdÑjœógס ä!ëô3ÛµÈGWqP‘Ìë*råà]}k#ÈôHŽÙéJ +âÜÓµ0^suájÎö¤ÂéSa-U<ÝÞ šbØ$7Ê‹0ƒI¥éëâMòF÷’R¢¼ {r7«Êo˜›ð³KJ’ÜþX±‡TþO.öËÀQÜšô¬ìש€O‚„Ä‹Gê;ÌvŠ(;+£×°=‘;’¶C6儃Ç[|ƒ™ð,Õôúb˜‚O—´·×s²€[¬áš‚ÏmzÛìú÷ÍŠ?‘dÀ³Òç0ã1¼8ÄÚ^€` é¨-¹¼ÆM Â7eWu-˜Ÿ‚£³ùT‡¢ Ý¥;é-a:ä¶&ªà@ýs5!!ðK ¬PfÉ i®" QÌ<‘Úxò'ûè‘®Ÿ»ã>yw‚¤Hõ˜¶mÌWo¾¹|¥œ7|0†µ!69ûT ‘BQ{-ŽÍúËfYT‰¡‰ÑŸ÷¬LKéäb™ó&ÖìPþ4a¶a2˜*PÖ­ 6·Ijï^“Èòuµæ²!ï†1•’¯³D¡™KÎ;Ï}BG«¼H…›"¶ãÂGh™8ÞÀ÷¨a'V$“=m'ülÿ Þ–}ƒÌ‹îv¶Кö© }>û¿P,&iM~ý„F×?c´HšÅ1‡V Ù¡tö잎VË}¨Àc§OÈG¸¾,î£dOØà5à«óÊ£¼PÝ¡|Œn¢„~Ѐ’Ãg’D€þqu%Üž~K´‹pïðÉe]K+Nml¼˜[5>èÓÚZÎ@AW¹Ö‹+I +Gɽ¢@d:4ÂêÛ·›“ZïNõT@ê¸óQ5x»¥q`ÅÍÌÃÝRN{¢‚kwVd¡F_vjPטÙÁÙvÊ£%¥$Öv›¿Q†èaÄî I0‚ní‘.ÀP«ŒT#@Ã6¸§–5bÄù:füŒO”‰†'F* ô‹ë¨ä GÁ–ÆTìÛ^ŸÇþpx-CÚÖY¬ÿ%F©ƒæ Æ“®Ÿ¹õ¢ †ùÔ·hy¢ìh©6oˆß?:$¦F$Êg<‹nW¬„›2ÍkŸæ€[X¥?Öñ53ô #1ÌJ§–“Ó EÓG]~9Ò¼u ðÁV{G.D©÷WHIø¨ãêf¸HœÞè¼"<Ûðhf‡§×6éõýcß"SÉ­ÎS CM¯8aÀµšÆÕH“±Û‡7Ñþw¯¦…ž> +/Length 1007 >> -% 1913 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmex10-builtin-0) +%%Title: (TeX-cmex10-builtin-0 TeX cmex10-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmex10-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmex10-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +1 beginbfrange + <03A5> +endbfrange +31 beginbfchar +<30> +<31> +<32> +<33> +<34> +<35> +<36> +<37> +<38> +<39> +<3A> +<3B> +<3C> +<3D> +<3E> +<3F> +<40> +<41> +<42> +<43> +<9F> <221A> + <0020> + <0393> + <2206> + <0398> + <039B> + <039E> + <03A0> + <03A3> + <03A8> + <2126> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2322 0 obj << -/D [1911 0 R /XYZ 98.895 753.953 null] +/Length 1577 >> -% 573 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmitt10-builtin-0) +%%Title: (TeX-cmitt10-builtin-0 TeX cmitt10-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmitt10-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmitt10-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +6 beginbfrange +<07> <08> <03A5> +<21> <23> <0021> +<25> <26> <0025> +<28> <5F> <0028> +<61> <7E> <0061> + <03A5> +endbfrange +71 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0B> <2191> +<0C> <2193> +<0D> <0027> +<0E> <00A1> +<0F> <00BF> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<20> <2423> +<24> <00A3> +<27> <2019> +<60> <2018> +<7F> <00A8> +<80> <2423> + <0020> + <0393> + <2206> + <0398> + <039B> + <039E> + <03A0> + <03A3> + <03A8> + <00AD> + <00A0> + <2126> + <2191> + <2193> + <0027> + <00A1> + <00BF> + <0131> + <0237> + <0060> + <00B4> + <02C7> + <02D8> + <00AF> + <02DA> + <00B8> + <00DF> + <00E6> + <0153> + <00F8> + <00C6> + <0152> + <00D8> + <2423> + <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2323 0 obj << -/D [1911 0 R /XYZ 99.895 716.092 null] +/Length 1535 >> -% 1914 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmmi10-builtin-0) +%%Title: (TeX-cmmi10-builtin-0 TeX cmmi10-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmmi10-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmmi10-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +20 beginbfrange +<07> <08> <03A5> +<0B> <0E> <03B1> +<10> <15> <03B6> +<17> <18> <03BD> +<19> <1A> <03C0> +<1B> <1D> <03C3> +<1F> <21> <03C7> +<28> <29> <21BC> +<2A> <2B> <21C0> +<30> <39> <0030> +<41> <5A> <0041> +<5B> <5D> <266D> +<61> <7A> <0061> + <03A5> + <03B1> + <03B6> + <03BD> + <03C0> + <03C3> + <03C7> +endbfrange +48 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0F> <03F5> +<16> <00B5> +<1E> <03D5> +<22> <03B5> +<23> <03D1> +<24> <03D6> +<25> <03F1> +<26> <03C2> +<27> <03C6> +<2E> <25B7> +<2F> <25C1> +<3A> <002E> +<3B> <002C> +<3C> <003C> +<3D> <002F> +<3E> <003E> +<3F> <22C6> +<40> <2202> +<5E> <2323> +<5F> <2322> +<60> <2113> +<7B> <0131> +<7C> <0237> +<7D> <2118> +<7E> <20D7> +<80> <03C8> + <0020> + <0393> + <2206> + <0398> + <039B> + <039E> + <03A0> + <03A3> + <03A8> + <2126> + <03F5> + <00B5> + <03D5> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2324 0 obj << -/D [1911 0 R /XYZ 99.895 444.811 null] +/Length 1724 >> -% 1915 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmr10-builtin-0) +%%Title: (TeX-cmr10-builtin-0 TeX cmr10-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmr10-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmr10-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +7 beginbfrange +<07> <08> <03A5> +<23> <26> <0023> +<28> <3B> <0028> +<3F> <5B> <003F> +<61> <7A> <0061> +<7B> <7C> <2013> + <03A5> +endbfrange +78 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0B> <00660066> +<0C> <00660069> +<0D> <0066006C> +<0E> <006600660069> +<0F> <00660066006C> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<21> <0021> +<22> <201D> +<27> <2019> +<3C> <00A1> +<3D> <003D> +<3E> <00BF> +<5C> <201C> +<5D> <005D> +<5E> <02C6> +<5F> <02D9> +<60> <2018> +<7D> <02DD> +<7E> <02DC> +<7F> <00A8> + <0020> + <0393> + <2206> + <0398> + <039B> + <039E> + <03A0> + <03A3> + <03A8> + <00AD> + <00A0> + <2126> + <00660066> + <00660069> + <0066006C> + <006600660069> + <00660066006C> + <0131> + <0237> + <0060> + <00B4> + <02C7> + <02D8> + <00AF> + <02DA> + <00B8> + <00DF> + <00E6> + <0153> + <00F8> + <00C6> + <0152> + <00D8> + <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2325 0 obj << -/D [1911 0 R /XYZ 99.895 444.971 null] +/Length 2050 >> -% 1916 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmsy10-builtin-0) +%%Title: (TeX-cmsy10-builtin-0 TeX cmsy10-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmsy10-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmsy10-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +27 beginbfrange +<08> <0C> <2295> +<12> <13> <2286> +<14> <15> <2264> +<16> <17> <2AAF> +<1A> <1B> <2282> +<1C> <1D> <226A> +<1E> <1F> <227A> +<23> <24> <2193> +<25> <26> <2197> +<2B> <2C> <21D3> +<3E> <3F> <22A4> +<41> <5A> <0041> +<5E> <5F> <2227> +<60> <61> <22A2> +<62> <63> <230A> +<64> <65> <2308> +<68> <69> <27E8> +<76> <77> <2291> +<79> <7A> <2020> + <2295> + <2297> + <2286> + <2264> + <2AAF> + <2282> + <226A> + <227A> +endbfrange +81 beginbfchar +<00> <2212> +<01> <00B7> +<02> <00D7> +<03> <2217> +<04> <00F7> +<05> <22C4> +<06> <00B1> +<07> <2213> +<0D> <20DD> +<0E> <25E6> +<0F> <2022> +<10> <224D> +<11> <2261> +<18> <223C> +<19> <2248> +<20> <2190> +<21> <2192> +<22> <2191> +<27> <2243> +<28> <21D0> +<29> <21D2> +<2A> <21D1> +<2D> <2196> +<2E> <2199> +<2F> <221D> +<30> <2032> +<31> <221E> +<32> <2208> +<33> <220B> +<34> <25B3> +<35> <25BD> +<36> <0338> +<38> <2200> +<39> <2203> +<3A> <00AC> +<3B> <2205> +<3C> <211C> +<3D> <2111> +<40> <2135> +<5B> <222A> +<5C> <2229> +<5D> <228E> +<66> <007B> +<67> <007D> +<6A> <007C> +<6B> <2225> +<6C> <2195> +<6D> <21D5> +<6E> <005C> +<6F> <2240> +<70> <221A> +<71> <2A3F> +<72> <2207> +<73> <222B> +<74> <2294> +<75> <2293> +<78> <00A7> +<7B> <00B6> +<7C> <2663> +<7D> <2662> +<7E> <2661> +<7F> <2660> +<80> <2190> + <0020> + <2212> + <00B7> + <00D7> + <2217> + <00F7> + <22C4> + <00B1> + <2213> + <20DD> + <25E6> + <2022> + <224D> + <2261> + <223C> + <2248> + <2190> + <2660> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2326 0 obj << -/D [1911 0 R /XYZ 99.895 433.015 null] +/Length 1543 >> -% 1917 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmtt10-builtin-0) +%%Title: (TeX-cmtt10-builtin-0 TeX cmtt10-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmtt10-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmtt10-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +5 beginbfrange +<07> <08> <03A5> +<21> <26> <0021> +<28> <5F> <0028> +<61> <7E> <0061> + <03A5> +endbfrange +70 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0B> <2191> +<0C> <2193> +<0D> <0027> +<0E> <00A1> +<0F> <00BF> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<20> <2423> +<27> <2019> +<60> <2018> +<7F> <00A8> +<80> <2423> + <0020> + <0393> + <2206> + <0398> + <039B> + <039E> + <03A0> + <03A3> + <03A8> + <00AD> + <00A0> + <2126> + <2191> + <2193> + <0027> + <00A1> + <00BF> + <0131> + <0237> + <0060> + <00B4> + <02C7> + <02D8> + <00AF> + <02DA> + <00B8> + <00DF> + <00E6> + <0153> + <00F8> + <00C6> + <0152> + <00D8> + <2423> + <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2327 0 obj << -/D [1911 0 R /XYZ 114.242 129.79 null] +/Length 1538 >> -% 1910 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmtt8-builtin-0) +%%Title: (TeX-cmtt8-builtin-0 TeX cmtt8-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmtt8-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmtt8-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +5 beginbfrange +<07> <08> <03A5> +<21> <26> <0021> +<28> <5F> <0028> +<61> <7E> <0061> + <03A5> +endbfrange +70 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0B> <2191> +<0C> <2193> +<0D> <0027> +<0E> <00A1> +<0F> <00BF> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<20> <2423> +<27> <2019> +<60> <2018> +<7F> <00A8> +<80> <2423> + <0020> + <0393> + <2206> + <0398> + <039B> + <039E> + <03A0> + <03A3> + <03A8> + <00AD> + <00A0> + <2126> + <2191> + <2193> + <0027> + <00A1> + <00BF> + <0131> + <0237> + <0060> + <00B4> + <02C7> + <02D8> + <00AF> + <02DA> + <00B8> + <00DF> + <00E6> + <0153> + <00F8> + <00C6> + <0152> + <00D8> + <2423> + <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2328 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F61 1360 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Length 1538 >> -% 1922 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmtt9-builtin-0) +%%Title: (TeX-cmtt9-builtin-0 TeX cmtt9-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmtt9-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmtt9-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +5 beginbfrange +<07> <08> <03A5> +<21> <26> <0021> +<28> <5F> <0028> +<61> <7E> <0061> + <03A5> +endbfrange +70 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0B> <2191> +<0C> <2193> +<0D> <0027> +<0E> <00A1> +<0F> <00BF> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<20> <2423> +<27> <2019> +<60> <2018> +<7F> <00A8> +<80> <2423> + <0020> + <0393> + <2206> + <0398> + <039B> + <039E> + <03A0> + <03A3> + <03A8> + <00AD> + <00A0> + <2126> + <2191> + <2193> + <0027> + <00A1> + <00BF> + <0131> + <0237> + <0060> + <00B4> + <02C7> + <02D8> + <00AF> + <02DA> + <00B8> + <00DF> + <00E6> + <0153> + <00F8> + <00C6> + <0152> + <00D8> + <2423> + <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2329 0 obj << -/Type /Page -/Contents 1923 0 R -/Resources 1921 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R -/Annots [ 1909 0 R 1918 0 R 1919 0 R 1920 0 R ] +/Length 853 >> -% 1909 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-fplmr-builtin-0) +%%Title: (TeX-fplmr-builtin-0 TeX fplmr-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (fplmr-builtin) +/Supplement 0 +>> def +/CMapName /TeX-fplmr-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +0 beginbfrange +endbfrange +20 beginbfchar +<20> <0020> +<44> <2206> +<46> <03A6> +<47> <0393> +<4C> <039B> +<50> <03A0> +<51> <0398> +<53> <03A3> +<55> <03A5> +<57> <2126> +<58> <039E> +<59> <03A8> + <20AC> + <221E> + <221D> + <2205> + <220F> + <0237> + <2A3F> + <2211> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2330 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 654.503 409.811 666.562] -/A << /S /GoTo /D (precdata) >> +/Length 1113 >> -% 1918 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-fplmri-builtin-0) +%%Title: (TeX-fplmri-builtin-0 TeX fplmri-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (fplmri-builtin) +/Supplement 0 +>> def +/CMapName /TeX-fplmri-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +3 beginbfrange +<61> <62> <03B1> +<6B> <6C> <03BA> +<73> <75> <03C3> +endbfrange +37 beginbfchar +<20> <0020> +<23> <03B5> +<24> <03F1> +<44> <2206> +<46> <03A6> +<47> <0393> +<4A> <03D1> +<4C> <039B> +<50> <03A0> +<51> <0398> +<53> <03A3> +<55> <03A5> +<56> <03C2> +<57> <2126> +<58> <039E> +<59> <03A8> +<63> <03C7> +<64> <03B4> +<65> <03F5> +<66> <03D5> +<67> <03B3> +<68> <03B7> +<69> <03B9> +<6A> <03C6> +<6D> <00B5> +<6E> <03BD> +<70> <03C0> +<71> <03B8> +<72> <03C1> +<76> <03D6> +<77> <03C9> +<78> <03BE> +<79> <03C8> +<7A> <03B6> + <20AC> + <2202> + <0237> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2331 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [393.303 584.479 469.357 596.539] -/A << /S /GoTo /D (vdata) >> +/Length 1477 >> -% 1919 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-pplb8r-8r-0) +%%Title: (TeX-pplb8r-8r-0 TeX pplb8r-8r 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (pplb8r-8r) +/Supplement 0 +>> def +/CMapName /TeX-pplb8r-8r-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +15 beginbfrange +<06> <07> <0141> +<0E> <0F> <017D> +<18> <19> <2264> +<20> <26> <0020> +<28> <5F> <0028> +<61> <7E> <0061> +<86> <87> <2020> +<93> <94> <201C> +<96> <97> <2013> + <00A1> + <00AE> + <00D8> + <00ED> + <00F7> + <00FC> +endbfrange +50 beginbfchar +<01> <02D9> +<02> <00660069> +<03> <0066006C> +<04> <2044> +<05> <02DD> +<08> <02DB> +<09> <02DA> +<0B> <02D8> +<0C> <2212> +<10> <02C7> +<11> <0131> +<12> <0237> +<13> <00660066> +<14> <006600660069> +<15> <00660066006C> +<16> <2260> +<17> <221E> +<1A> <2202> +<1B> <2211> +<1C> <220F> +<1D> <03C0> +<1E> <0060> +<1F> <0027> +<27> <2019> +<60> <2018> +<80> <20AC> +<81> <222B> +<82> <201A> +<83> <0192> +<84> <201E> +<85> <2026> +<88> <02C6> +<89> <2030> +<8A> <0160> +<8B> <2039> +<8C> <0152> +<8D> <2126> +<8E> <221A> +<8F> <2248> +<95> <2022> +<98> <02DC> +<99> <2122> +<9A> <0161> +<9B> <203A> +<9C> <0153> +<9D> <2206> +<9E> <25CA> +<9F> <0178> + <002D> + <00FF> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2332 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [393.303 514.456 469.357 526.516] -/A << /S /GoTo /D (vdata) >> +/Length 1477 >> -% 1920 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-pplr8r-8r-0) +%%Title: (TeX-pplr8r-8r-0 TeX pplr8r-8r 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (pplr8r-8r) +/Supplement 0 +>> def +/CMapName /TeX-pplr8r-8r-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +15 beginbfrange +<06> <07> <0141> +<0E> <0F> <017D> +<18> <19> <2264> +<20> <26> <0020> +<28> <5F> <0028> +<61> <7E> <0061> +<86> <87> <2020> +<93> <94> <201C> +<96> <97> <2013> + <00A1> + <00AE> + <00D8> + <00ED> + <00F7> + <00FC> +endbfrange +50 beginbfchar +<01> <02D9> +<02> <00660069> +<03> <0066006C> +<04> <2044> +<05> <02DD> +<08> <02DB> +<09> <02DA> +<0B> <02D8> +<0C> <2212> +<10> <02C7> +<11> <0131> +<12> <0237> +<13> <00660066> +<14> <006600660069> +<15> <00660066006C> +<16> <2260> +<17> <221E> +<1A> <2202> +<1B> <2211> +<1C> <220F> +<1D> <03C0> +<1E> <0060> +<1F> <0027> +<27> <2019> +<60> <2018> +<80> <20AC> +<81> <222B> +<82> <201A> +<83> <0192> +<84> <201E> +<85> <2026> +<88> <02C6> +<89> <2030> +<8A> <0160> +<8B> <2039> +<8C> <0152> +<8D> <2126> +<8E> <221A> +<8F> <2248> +<95> <2022> +<98> <02DC> +<99> <2122> +<9A> <0161> +<9B> <203A> +<9C> <0153> +<9D> <2206> +<9E> <25CA> +<9F> <0178> + <002D> + <00FF> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2333 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 374.41 409.811 386.47] -/A << /S /GoTo /D (descdata) >> +/Length 1482 >> -% 1924 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-pplri8r-8r-0) +%%Title: (TeX-pplri8r-8r-0 TeX pplri8r-8r 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (pplri8r-8r) +/Supplement 0 +>> def +/CMapName /TeX-pplri8r-8r-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +15 beginbfrange +<06> <07> <0141> +<0E> <0F> <017D> +<18> <19> <2264> +<20> <26> <0020> +<28> <5F> <0028> +<61> <7E> <0061> +<86> <87> <2020> +<93> <94> <201C> +<96> <97> <2013> + <00A1> + <00AE> + <00D8> + <00ED> + <00F7> + <00FC> +endbfrange +50 beginbfchar +<01> <02D9> +<02> <00660069> +<03> <0066006C> +<04> <2044> +<05> <02DD> +<08> <02DB> +<09> <02DA> +<0B> <02D8> +<0C> <2212> +<10> <02C7> +<11> <0131> +<12> <0237> +<13> <00660066> +<14> <006600660069> +<15> <00660066006C> +<16> <2260> +<17> <221E> +<1A> <2202> +<1B> <2211> +<1C> <220F> +<1D> <03C0> +<1E> <0060> +<1F> <0027> +<27> <2019> +<60> <2018> +<80> <20AC> +<81> <222B> +<82> <201A> +<83> <0192> +<84> <201E> +<85> <2026> +<88> <02C6> +<89> <2030> +<8A> <0160> +<8B> <2039> +<8C> <0152> +<8D> <2126> +<8E> <221A> +<8F> <2248> +<95> <2022> +<98> <02DC> +<99> <2122> +<9A> <0161> +<9B> <203A> +<9C> <0153> +<9D> <2206> +<9E> <25CA> +<9F> <0178> + <002D> + <00FF> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2274 0 obj << -/D [1922 0 R /XYZ 149.705 753.953 null] +/Type /ObjStm +/N 100 +/First 1012 +/Length 18437 >> -% 1921 0 obj +stream +917 0 933 57 913 114 914 170 2272 227 909 285 2273 342 2266 400 2276 494 2278 612 +1074 671 945 730 915 788 912 846 908 904 2142 962 911 1021 2279 1079 910 1138 2127 1195 +2128 1254 2280 1313 2275 1372 2281 1467 2282 1487 2283 1858 2284 1961 2285 2120 2286 2143 2287 2598 +2288 2727 2289 3025 2290 3671 2292 4142 2293 4773 2294 5244 2296 5819 2298 6044 2300 6376 2302 6620 +2304 6891 2306 7239 2308 7745 2310 7979 2312 8453 2314 8679 2316 8910 2318 9389 2320 9965 2291 10383 +1882 10824 1813 10987 1490 11150 942 11311 941 11470 940 11630 1000 11791 1044 11952 1285 12113 1157 12278 +685 12448 687 12638 686 12828 688 13018 894 13131 978 13244 1052 13361 1087 13481 1117 13601 1158 13721 +1205 13841 1244 13961 1300 14081 1351 14201 1400 14321 1457 14441 1492 14561 1534 14681 1577 14801 1626 14921 +1663 15041 1698 15161 1739 15281 1780 15401 1805 15521 1841 15641 1875 15761 1914 15881 1952 16001 1989 16121 +2034 16241 2070 16361 2115 16481 2211 16601 2261 16721 2334 16823 2335 16941 2336 17062 2337 17183 2338 17304 +% 917 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F61 1360 0 R >> -/ProcSet [ /PDF /Text ] +/D [2267 0 R /XYZ 99.895 433.422 null] >> -% 1927 0 obj +% 933 0 obj << -/Type /Page -/Contents 1928 0 R -/Resources 1926 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R -/Annots [ 1925 0 R ] +/D [2267 0 R /XYZ 99.895 398.456 null] >> -% 1925 0 obj +% 913 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.493 554.876 418.548 566.936] -/A << /S /GoTo /D (vdata) >> +/D [2267 0 R /XYZ 99.895 352.81 null] >> -% 1929 0 obj +% 914 0 obj << -/D [1927 0 R /XYZ 98.895 753.953 null] +/D [2267 0 R /XYZ 99.895 307.164 null] >> -% 1926 0 obj +% 2272 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R >> -/ProcSet [ /PDF /Text ] +/D [2267 0 R /XYZ 99.895 261.519 null] >> -% 1931 0 obj +% 909 0 obj << -/Type /Page -/Contents 1932 0 R -/Resources 1930 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R +/D [2267 0 R /XYZ 99.895 215.873 null] >> -% 1933 0 obj +% 2273 0 obj << -/D [1931 0 R /XYZ 149.705 753.953 null] +/D [2267 0 R /XYZ 99.895 170.895 null] >> -% 1930 0 obj +% 2266 0 obj << -/Font << /F54 586 0 R >> +/Font << /F75 685 0 R /F84 687 0 R /F78 686 0 R >> /ProcSet [ /PDF /Text ] >> -% 1935 0 obj +% 2276 0 obj << /Type /Page -/Contents 1936 0 R -/Resources 1934 0 R +/Contents 2277 0 R +/Resources 2275 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1942 0 R ->> -% 1937 0 obj -<< -/D [1935 0 R /XYZ 98.895 753.953 null] ->> -% 1938 0 obj -<< -/D [1935 0 R /XYZ 99.895 723.717 null] ->> -% 783 0 obj -<< -/D [1935 0 R /XYZ 99.895 698.622 null] ->> -% 1939 0 obj -<< -/D [1935 0 R /XYZ 99.895 640.564 null] ->> -% 831 0 obj -<< -/D [1935 0 R /XYZ 99.895 585.057 null] +/Parent 2261 0 R >> -% 830 0 obj -<< -/D [1935 0 R /XYZ 99.895 532.2 null] ->> -% 789 0 obj -<< -/D [1935 0 R /XYZ 99.895 474.043 null] ->> -% 790 0 obj -<< -/D [1935 0 R /XYZ 99.895 431.766 null] ->> -% 805 0 obj -<< -/D [1935 0 R /XYZ 99.895 388.215 null] ->> -% 786 0 obj -<< -/D [1935 0 R /XYZ 99.895 343.387 null] ->> -% 787 0 obj +% 2278 0 obj << -/D [1935 0 R /XYZ 99.895 299.836 null] +/D [2276 0 R /XYZ 149.705 753.953 null] >> -% 1940 0 obj -<< -/D [1935 0 R /XYZ 99.895 256.284 null] ->> -% 782 0 obj +% 1074 0 obj << -/D [1935 0 R /XYZ 99.895 212.732 null] +/D [2276 0 R /XYZ 150.705 716.092 null] >> -% 1941 0 obj +% 945 0 obj << -/D [1935 0 R /XYZ 99.895 169.848 null] +/D [2276 0 R /XYZ 150.705 687.379 null] >> -% 1934 0 obj +% 915 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R >> -/ProcSet [ /PDF /Text ] +/D [2276 0 R /XYZ 150.705 632.184 null] >> -% 1944 0 obj +% 912 0 obj << -/Type /Page -/Contents 1945 0 R -/Resources 1943 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1942 0 R +/D [2276 0 R /XYZ 150.705 590.403 null] >> -% 1946 0 obj +% 908 0 obj << -/D [1944 0 R /XYZ 149.705 753.953 null] +/D [2276 0 R /XYZ 150.705 545.192 null] >> -% 946 0 obj +% 2142 0 obj << -/D [1944 0 R /XYZ 150.705 716.092 null] +/D [2276 0 R /XYZ 150.705 512.037 null] >> -% 817 0 obj +% 911 0 obj << -/D [1944 0 R /XYZ 150.705 687.379 null] +/D [2276 0 R /XYZ 150.705 480.156 null] >> -% 788 0 obj +% 2279 0 obj << -/D [1944 0 R /XYZ 150.705 632.184 null] +/D [2276 0 R /XYZ 150.705 448.276 null] >> -% 785 0 obj +% 910 0 obj << -/D [1944 0 R /XYZ 150.705 590.403 null] +/D [2276 0 R /XYZ 150.705 407.09 null] >> -% 781 0 obj +% 2127 0 obj << -/D [1944 0 R /XYZ 150.705 545.192 null] +/D [2276 0 R /XYZ 150.705 348.649 null] >> -% 784 0 obj +% 2128 0 obj << -/D [1944 0 R /XYZ 150.705 512.037 null] +/D [2276 0 R /XYZ 150.705 304.874 null] >> -% 1947 0 obj +% 2280 0 obj << -/D [1944 0 R /XYZ 150.705 480.156 null] +/D [2276 0 R /XYZ 150.705 260.978 null] >> -% 1943 0 obj +% 2275 0 obj << -/Font << /F54 586 0 R /F52 585 0 R /F59 812 0 R >> +/Font << /F84 687 0 R /F78 686 0 R /F145 940 0 R >> /ProcSet [ /PDF /Text ] >> -% 1948 0 obj +% 2281 0 obj [1000] -% 1949 0 obj -[525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] -% 1950 0 obj -[777.8 500 777.8] -% 1951 0 obj +% 2282 0 obj +[525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] +% 2283 0 obj +[277.8 277.8 500 500 500 500 500 500 500 500 500 500 500 500 277.8 277.8 777.8 500 777.8] +% 2284 0 obj [853 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 666 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 747 0 0 0 0 0 0 0 0 0 0 0 0 0 0 881 0 0 0 0 0 0 0 0 0 0 0 0 234 0 881 767] -% 1952 0 obj +% 2285 0 obj [528 542] -% 1953 0 obj -[525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] -% 1954 0 obj +% 2286 0 obj +[525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] +% 2287 0 obj [531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3] -% 1955 0 obj -[388.9 388.9 500 777.8 277.8 333.3 277.8 500 500 500 500 500 500 500 500 500 500 500 277.8 277.8 277.8 777.8] -% 1956 0 obj +% 2288 0 obj +[388.9 388.9 500 777.8 277.8 333.3 277.8 500 500 500 500 500 500 500 500 500 500 500 277.8 277.8 277.8 777.8 472.2 472.2 777.8 750 708.3 722.2 763.9 680.6 652.8 784.7 750 361.1 513.9 777.8 625 916.7 750 777.8 680.6 777.8 736.1 555.6 722.2 750 750 1027.8 750 750 611.1 277.8 500 277.8] +% 2289 0 obj [777.8 277.8 777.8 500 777.8 500 777.8 777.8 777.8 777.8 777.8 777.8 777.8 1000 500 500 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 1000 1000 777.8 777.8 1000 1000 500 500 1000 1000 1000 777.8 1000 1000 611.1 611.1 1000 1000 1000 777.8 275 1000 666.7 666.7 888.9 888.9 0 0 555.6 555.6 666.7 500 722.2 722.2 777.8 777.8 611.1 798.5 656.8 526.5 771.4 527.8 718.7 594.9 844.5 544.5 677.8 761.9 689.7 1200.9 820.5 796.1 695.6 816.7 847.5 605.6 544.6 625.8 612.8 987.8 713.3 668.3 724.7 666.7 666.7 666.7 666.7 666.7 611.1 611.1 444.4 444.4 444.4 444.4 500 500 388.9 388.9 277.8 500 500 611.1 500 277.8 833.3] -% 1957 0 obj +% 2290 0 obj [525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] -% 1959 0 obj +% 2292 0 obj [605 608 167 380 611 291 313 333 0 333 606 0 667 500 333 287 0 0 0 0 0 0 0 0 0 0 0 0 333 208 250 278 371 500 500 840 778 278 333 333 389 606 250 333 250 606 500 500 500 500 500 500 500 500 500 500 250 250 606 606 606 444 747 778 611 709 774 611 556 763 832 337 333 726 611 946 831 786 604 786 668 525 613 778 722 1000 667 667 667 333 606 333 606 500 278 500 553 444 611 479 333 556 582 291 234 556 291 883 582 546 601 560 395 424 326 603 565 834 516 556 500 333 606 333 606 0 0 0 278 500 500 1000 500 500 333 1144 525 331 998 0 0 0 0 0 0 500 500 606 500 1000 333 979 424 331 827 0 0 667 0 278 500 500 500 500 606 500] -% 1960 0 obj +% 2293 0 obj [528 545 167 333 556 278 333 333 0 333 606 0 667 444 333 278 0 0 0 0 0 0 0 0 0 0 0 0 333 333 250 333 500 500 500 889 778 278 333 333 389 606 250 333 250 296 500 500 500 500 500 500 500 500 500 500 250 250 606 606 606 500 747 722 611 667 778 611 556 722 778 333 333 667 556 944 778 778 611 778 667 556 611 778 722 944 722 667 667 333 606 333 606 500 278 444 463 407 500 389 278 500 500 278 278 444 278 778 556 444 500 463 389 389 333 556 500 722 500 500 444] -% 1961 0 obj +% 2294 0 obj [611 611 167 333 611 333 333 333 0 333 606 0 667 500 333 333 0 0 0 0 0 0 0 0 0 0 0 0 333 227 250 278 402 500 500 889 833 278 333 333 444 606 250 333 250 296 500 500 500 500 500 500 500 500 500 500 250 250 606 606 606 444 747 778 667 722 833 611 556 833 833 389 389 778 611 1000 833 833 611 833 722 611 667 778 778 1000 667 667 667 333 606 333 606 500 278 500 611 444 611 500 389 556 611 333 333 611 333 889 611 556 611 611 389 444 333 611 556 833 500 556 500 310 606 310 606 0 0 0 333 500 500 1000 500 500 333 1000 611 389 1000 0 0 0 0 0 0 500 500 606 500 1000] -% 1963 0 obj +% 2296 0 obj << /Type /FontDescriptor /FontName /MNPEHI+CMEX10 @@ -28025,12 +36117,12 @@ stream /StemV 47 /XHeight 431 /CharSet (/radicalbigg) -/FontFile 1962 0 R +/FontFile 2295 0 R >> -% 1965 0 obj +% 2298 0 obj << /Type /FontDescriptor -/FontName /MPVPBL+CMITT10 +/FontName /SFGIZH+CMITT10 /Flags 4 /FontBBox [11 -233 669 696] /Ascent 611 @@ -28039,13 +36131,13 @@ stream /ItalicAngle -14 /StemV 69 /XHeight 431 -/CharSet (/D/a/c/d/e/exclam/n/o/period/s/t) -/FontFile 1964 0 R +/CharSet (/A/C/D/E/H/I/K/L/M/P/T/V/a/c/comma/d/e/exclam/f/g/h/hyphen/i/k/m/n/o/p/parenleft/parenright/period/r/s/slash/t/w/x/y) +/FontFile 2297 0 R >> -% 1967 0 obj +% 2300 0 obj << /Type /FontDescriptor -/FontName /SYFPBV+CMMI10 +/FontName /TPELEW+CMMI10 /Flags 4 /FontBBox [-32 -250 1048 750] /Ascent 694 @@ -28054,13 +36146,13 @@ stream /ItalicAngle -14 /StemV 72 /XHeight 431 -/CharSet (/greater/less) -/FontFile 1966 0 R +/CharSet (/arrowhookleft/greater/less) +/FontFile 2299 0 R >> -% 1969 0 obj +% 2302 0 obj << /Type /FontDescriptor -/FontName /GIGFZE+CMR10 +/FontName /SOSTRQ+CMR10 /Flags 4 /FontBBox [-40 -250 1009 750] /Ascent 694 @@ -28069,13 +36161,13 @@ stream /ItalicAngle 0 /StemV 69 /XHeight 431 -/CharSet (/equal/parenleft/parenright/plus) -/FontFile 1968 0 R +/CharSet (/bracketleft/bracketright/equal/parenleft/parenright/plus) +/FontFile 2301 0 R >> -% 1971 0 obj +% 2304 0 obj << /Type /FontDescriptor -/FontName /DMJGRR+CMSY10 +/FontName /VKSUEJ+CMSY10 /Flags 4 /FontBBox [-29 -960 1116 775] /Ascent 750 @@ -28084,13 +36176,13 @@ stream /ItalicAngle -14 /StemV 40 /XHeight 431 -/CharSet (/B/H/I/arrowleft/bar/bardbl/braceleft/braceright/element/greaterequal/lessequal/minus/negationslash/radical) -/FontFile 1970 0 R +/CharSet (/B/H/I/arrowleft/arrowright/asteriskmath/bar/bardbl/braceleft/braceright/element/greaterequal/lessequal/minus/negationslash/radical) +/FontFile 2303 0 R >> -% 1973 0 obj +% 2306 0 obj << /Type /FontDescriptor -/FontName /UFPYIQ+CMTT10 +/FontName /XIQVGP+CMTT10 /Flags 4 /FontBBox [-4 -233 537 696] /Ascent 611 @@ -28099,13 +36191,13 @@ stream /ItalicAngle 0 /StemV 69 /XHeight 431 -/CharSet (/A/B/C/D/E/F/I/K/L/M/N/O/P/R/S/T/U/W/Y/a/ampersand/asciitilde/asterisk/b/backslash/bracketleft/bracketright/c/colon/comma/d/e/eight/equal/f/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/percent/period/plus/q/quotesingle/r/s/six/slash/t/three/two/u/underscore/v/w/x/y/z/zero) -/FontFile 1972 0 R +/CharSet (/A/B/C/D/E/F/H/I/J/K/L/M/N/O/P/R/S/T/U/V/W/X/Y/Z/a/ampersand/asciitilde/asterisk/b/backslash/bracketleft/bracketright/c/colon/comma/d/e/equal/f/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/percent/period/plus/q/quotesingle/r/s/six/slash/t/three/two/u/underscore/v/w/x/y/z/zero) +/FontFile 2305 0 R >> -% 1975 0 obj +% 2308 0 obj << /Type /FontDescriptor -/FontName /HZGQIC+CMTT8 +/FontName /KPZRIA+CMTT8 /Flags 4 /FontBBox [-5 -232 545 699] /Ascent 611 @@ -28114,13 +36206,13 @@ stream /ItalicAngle 0 /StemV 76 /XHeight 431 -/CharSet (/b/c/e/i/l/n/p/r/s/t) -/FontFile 1974 0 R +/CharSet (/b/c/d/e/i/l/n/p/r/s/t) +/FontFile 2307 0 R >> -% 1977 0 obj +% 2310 0 obj << /Type /FontDescriptor -/FontName /BQXTWV+CMTT9 +/FontName /FYMOSO+CMTT9 /Flags 4 /FontBBox [-6 -233 542 698] /Ascent 611 @@ -28129,10 +36221,10 @@ stream /ItalicAngle 0 /StemV 74 /XHeight 431 -/CharSet (/D/E/I/K/N/P/T/Y/a/ampersand/b/c/colon/comma/d/e/equal/f/g/greater/h/i/k/l/less/m/n/o/p/parenleft/parenright/period/q/r/s/semicolon/t/two/u/underscore/v/w/x/y/z) -/FontFile 1976 0 R +/CharSet (/A/B/C/D/E/F/G/H/I/K/L/M/N/O/P/R/S/T/U/V/W/X/Y/a/ampersand/asterisk/b/c/colon/comma/d/e/equal/f/four/g/greater/h/hyphen/i/j/k/l/less/m/n/nine/o/one/p/parenleft/parenright/percent/period/plus/q/quotesingle/r/s/semicolon/six/slash/t/two/u/underscore/v/w/x/y/z/zero) +/FontFile 2309 0 R >> -% 1979 0 obj +% 2312 0 obj << /Type /FontDescriptor /FontName /IKXQUG+PazoMath @@ -28145,9 +36237,9 @@ stream /StemV 95 /XHeight 0 /CharSet (/infinity/summation) -/FontFile 1978 0 R +/FontFile 2311 0 R >> -% 1981 0 obj +% 2314 0 obj << /Type /FontDescriptor /FontName /DUJUUF+PazoMath-Italic @@ -28160,12 +36252,12 @@ stream /StemV 65 /XHeight 0 /CharSet (/alpha/beta) -/FontFile 1980 0 R +/FontFile 2313 0 R >> -% 1983 0 obj +% 2316 0 obj << /Type /FontDescriptor -/FontName /TVMKYN+URWPalladioL-Bold +/FontName /BDDEWM+URWPalladioL-Bold /Flags 4 /FontBBox [-152 -301 1000 935] /Ascent 708 @@ -28174,13 +36266,13 @@ stream /ItalicAngle 0 /StemV 123 /XHeight 471 -/CharSet (/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/Z/a/b/c/colon/comma/d/e/eight/emdash/endash/equal/f/fi/five/fl/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/period/q/quoteright/r/s/seven/six/slash/t/three/two/u/v/w/x/y/z/zero) -/FontFile 1982 0 R +/CharSet (/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/Y/Z/a/b/c/colon/comma/d/e/eight/emdash/endash/equal/f/fi/five/fl/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/period/q/question/quoteright/r/s/seven/six/slash/t/three/two/u/v/w/x/y/z/zero) +/FontFile 2315 0 R >> -% 1985 0 obj +% 2318 0 obj << /Type /FontDescriptor -/FontName /TCRNJT+URWPalladioL-Roma +/FontName /GLTUCO+URWPalladioL-Roma /Flags 4 /FontBBox [-166 -283 1021 943] /Ascent 715 @@ -28189,13 +36281,13 @@ stream /ItalicAngle 0 /StemV 84 /XHeight 469 -/CharSet (/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/R/S/T/U/V/W/X/a/ampersand/b/bracketleft/bracketright/bullet/c/colon/comma/d/e/eight/emdash/endash/equal/f/fi/five/fl/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/period/q/quotedblleft/quotedblright/quoteright/r/s/section/semicolon/seven/six/slash/t/three/two/u/v/w/x/y/z/zero) -/FontFile 1984 0 R +/CharSet (/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/R/S/T/U/V/W/X/Y/Z/a/ampersand/asterisk/b/bracketleft/bracketright/bullet/c/colon/comma/d/e/eight/emdash/endash/equal/f/fi/five/fl/four/g/grave/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/period/plus/q/quotedblleft/quotedblright/quoteright/r/s/section/semicolon/seven/six/slash/t/three/two/u/v/w/x/y/z/zero) +/FontFile 2317 0 R >> -% 1987 0 obj +% 2320 0 obj << /Type /FontDescriptor -/FontName /RUEFYH+URWPalladioL-Ital +/FontName /ZZXCQL+URWPalladioL-Ital /Flags 4 /FontBBox [-170 -305 1010 941] /Ascent 722 @@ -28204,447 +36296,690 @@ stream /ItalicAngle -9 /StemV 78 /XHeight 482 -/CharSet (/A/B/C/D/E/F/G/H/I/L/M/N/O/P/Q/R/S/T/U/V/X/a/b/c/colon/d/e/f/fi/five/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/period/q/quoteright/r/s/slash/t/three/two/u/v/w/x/y/z/zero) -/FontFile 1986 0 R +/CharSet (/A/B/C/D/E/F/G/H/I/K/L/M/N/O/P/Q/R/S/T/U/V/X/a/b/bracketleft/bracketright/c/colon/comma/d/e/f/fi/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/period/plus/q/quoteright/r/s/t/three/two/u/v/w/x/y/z/zero) +/FontFile 2319 0 R >> -% 1958 0 obj +% 2291 0 obj << /Type /Encoding -/Differences [2/fi/fl 38/ampersand/quoteright/parenleft/parenright 44/comma/hyphen/period/slash/zero/one/two/three/four/five/six/seven/eight/nine/colon/semicolon 61/equal 65/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X 90/Z/bracketleft 93/bracketright 97/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z 147/quotedblleft/quotedblright/bullet/endash/emdash 167/section] +/Differences [2/fi/fl 30/grave 38/ampersand/quoteright/parenleft/parenright/asterisk/plus/comma/hyphen/period/slash/zero/one/two/three/four/five/six/seven/eight/nine/colon/semicolon 61/equal 63/question 65/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z/bracketleft 93/bracketright 97/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z 147/quotedblleft/quotedblright/bullet/endash/emdash 167/section] >> -% 1750 0 obj +% 1882 0 obj << /Type /Font /Subtype /Type1 /BaseFont /MNPEHI+CMEX10 -/FontDescriptor 1963 0 R +/FontDescriptor 2296 0 R /FirstChar 114 /LastChar 114 -/Widths 1948 0 R +/Widths 2281 0 R +/ToUnicode 2321 0 R >> -% 1682 0 obj +% 1813 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /MPVPBL+CMITT10 -/FontDescriptor 1965 0 R +/BaseFont /SFGIZH+CMITT10 +/FontDescriptor 2298 0 R /FirstChar 33 -/LastChar 116 -/Widths 1949 0 R +/LastChar 121 +/Widths 2282 0 R +/ToUnicode 2322 0 R >> -% 1360 0 obj +% 1490 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /SYFPBV+CMMI10 -/FontDescriptor 1967 0 R -/FirstChar 60 +/BaseFont /TPELEW+CMMI10 +/FontDescriptor 2300 0 R +/FirstChar 44 /LastChar 62 -/Widths 1950 0 R +/Widths 2283 0 R +/ToUnicode 2323 0 R >> -% 814 0 obj +% 942 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /GIGFZE+CMR10 -/FontDescriptor 1969 0 R +/BaseFont /SOSTRQ+CMR10 +/FontDescriptor 2302 0 R /FirstChar 40 -/LastChar 61 -/Widths 1955 0 R +/LastChar 93 +/Widths 2288 0 R +/ToUnicode 2324 0 R >> -% 813 0 obj +% 941 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /DMJGRR+CMSY10 -/FontDescriptor 1971 0 R +/BaseFont /VKSUEJ+CMSY10 +/FontDescriptor 2304 0 R /FirstChar 0 /LastChar 112 -/Widths 1956 0 R +/Widths 2289 0 R +/ToUnicode 2325 0 R >> -% 812 0 obj +% 940 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /UFPYIQ+CMTT10 -/FontDescriptor 1973 0 R +/BaseFont /XIQVGP+CMTT10 +/FontDescriptor 2306 0 R /FirstChar 13 /LastChar 126 -/Widths 1957 0 R +/Widths 2290 0 R +/ToUnicode 2326 0 R >> -% 870 0 obj +% 1000 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /HZGQIC+CMTT8 -/FontDescriptor 1975 0 R +/BaseFont /KPZRIA+CMTT8 +/FontDescriptor 2308 0 R /FirstChar 98 /LastChar 116 -/Widths 1954 0 R +/Widths 2287 0 R +/ToUnicode 2327 0 R >> -% 915 0 obj +% 1044 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /BQXTWV+CMTT9 -/FontDescriptor 1977 0 R -/FirstChar 38 +/BaseFont /FYMOSO+CMTT9 +/FontDescriptor 2310 0 R +/FirstChar 13 /LastChar 122 -/Widths 1953 0 R +/Widths 2286 0 R +/ToUnicode 2328 0 R >> - -endstream -endobj -1988 0 obj -<< -/Type /ObjStm -/N 100 -/First 925 -/Length 11101 ->> -stream -1154 0 1027 145 584 295 586 465 585 635 587 805 780 918 871 1031 927 1144 962 1257 -991 1370 1034 1487 1079 1607 1121 1727 1180 1847 1232 1967 1281 2087 1330 2207 1371 2327 1409 2447 -1456 2567 1500 2687 1537 2807 1574 2927 1612 3047 1653 3167 1683 3287 1717 3407 1754 3527 1789 3647 -1828 3767 1865 3887 1902 4007 1942 4127 1989 4211 1990 4326 1991 4447 1992 4568 1993 4689 1994 4801 -1995 4897 574 4966 570 5026 566 5137 562 5211 558 5299 554 5387 550 5475 546 5563 542 5637 -538 5762 534 5836 530 5924 526 6012 522 6100 518 6188 514 6262 510 6387 506 6461 502 6549 -498 6637 494 6711 490 6836 486 6910 482 6998 478 7086 474 7174 470 7262 466 7350 462 7438 -458 7526 454 7614 450 7702 446 7790 442 7878 438 7966 434 8054 430 8142 426 8230 422 8304 -418 8430 414 8504 410 8592 406 8680 401 8768 397 8856 393 8944 389 9032 385 9120 381 9208 -377 9296 373 9384 369 9472 365 9560 361 9648 357 9736 353 9824 349 9912 345 10000 341 10088 -% 1154 0 obj +% 1285 0 obj << /Type /Font /Subtype /Type1 /BaseFont /IKXQUG+PazoMath -/FontDescriptor 1979 0 R +/FontDescriptor 2312 0 R /FirstChar 165 /LastChar 229 -/Widths 1951 0 R +/Widths 2284 0 R +/ToUnicode 2329 0 R >> -% 1027 0 obj +% 1157 0 obj << /Type /Font /Subtype /Type1 /BaseFont /DUJUUF+PazoMath-Italic -/FontDescriptor 1981 0 R +/FontDescriptor 2314 0 R /FirstChar 97 /LastChar 98 -/Widths 1952 0 R +/Widths 2285 0 R +/ToUnicode 2330 0 R >> -% 584 0 obj +% 685 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /TVMKYN+URWPalladioL-Bold -/FontDescriptor 1983 0 R +/BaseFont /BDDEWM+URWPalladioL-Bold +/FontDescriptor 2316 0 R /FirstChar 2 /LastChar 151 -/Widths 1961 0 R -/Encoding 1958 0 R +/Widths 2294 0 R +/Encoding 2291 0 R +/ToUnicode 2331 0 R >> -% 586 0 obj +% 687 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /TCRNJT+URWPalladioL-Roma -/FontDescriptor 1985 0 R +/BaseFont /GLTUCO+URWPalladioL-Roma +/FontDescriptor 2318 0 R /FirstChar 2 /LastChar 167 -/Widths 1959 0 R -/Encoding 1958 0 R +/Widths 2292 0 R +/Encoding 2291 0 R +/ToUnicode 2332 0 R >> -% 585 0 obj +% 686 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /RUEFYH+URWPalladioL-Ital -/FontDescriptor 1987 0 R +/BaseFont /ZZXCQL+URWPalladioL-Ital +/FontDescriptor 2320 0 R /FirstChar 2 /LastChar 122 -/Widths 1960 0 R -/Encoding 1958 0 R +/Widths 2293 0 R +/Encoding 2291 0 R +/ToUnicode 2333 0 R >> -% 587 0 obj +% 688 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [577 0 R 590 0 R 636 0 R 694 0 R 740 0 R 761 0 R] +/Parent 2334 0 R +/Kids [678 0 R 691 0 R 737 0 R 793 0 R 841 0 R 884 0 R] >> -% 780 0 obj +% 894 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [778 0 R 797 0 R 809 0 R 822 0 R 835 0 R 840 0 R] +/Parent 2334 0 R +/Kids [892 0 R 906 0 R 924 0 R 937 0 R 950 0 R 962 0 R] >> -% 871 0 obj +% 978 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [853 0 R 874 0 R 885 0 R 893 0 R 904 0 R 920 0 R] +/Parent 2334 0 R +/Kids [967 0 R 982 0 R 1003 0 R 1014 0 R 1022 0 R 1033 0 R] >> -% 927 0 obj +% 1052 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [924 0 R 930 0 R 935 0 R 942 0 R 949 0 R 954 0 R] +/Parent 2334 0 R +/Kids [1049 0 R 1054 0 R 1058 0 R 1063 0 R 1070 0 R 1078 0 R] >> -% 962 0 obj +% 1087 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [959 0 R 964 0 R 968 0 R 972 0 R 976 0 R 982 0 R] +/Parent 2334 0 R +/Kids [1083 0 R 1089 0 R 1093 0 R 1097 0 R 1103 0 R 1107 0 R] >> -% 991 0 obj +% 1117 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [988 0 R 995 0 R 1002 0 R 1008 0 R 1013 0 R 1024 0 R] +/Parent 2334 0 R +/Kids [1113 0 R 1120 0 R 1126 0 R 1133 0 R 1139 0 R 1143 0 R] >> -% 1034 0 obj +% 1158 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1031 0 R 1041 0 R 1047 0 R 1058 0 R 1063 0 R 1070 0 R] +/Parent 2335 0 R +/Kids [1154 0 R 1162 0 R 1172 0 R 1178 0 R 1189 0 R 1194 0 R] >> -% 1079 0 obj +% 1205 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1075 0 R 1084 0 R 1092 0 R 1097 0 R 1105 0 R 1110 0 R] +/Parent 2335 0 R +/Kids [1201 0 R 1207 0 R 1215 0 R 1223 0 R 1228 0 R 1236 0 R] >> -% 1121 0 obj +% 1244 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1117 0 R 1124 0 R 1137 0 R 1144 0 R 1151 0 R 1162 0 R] +/Parent 2335 0 R +/Kids [1241 0 R 1249 0 R 1254 0 R 1267 0 R 1274 0 R 1282 0 R] >> -% 1180 0 obj +% 1300 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1177 0 R 1184 0 R 1195 0 R 1201 0 R 1212 0 R 1217 0 R] +/Parent 2335 0 R +/Kids [1293 0 R 1309 0 R 1315 0 R 1326 0 R 1332 0 R 1343 0 R] >> -% 1232 0 obj +% 1351 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1228 0 R 1235 0 R 1244 0 R 1250 0 R 1258 0 R 1265 0 R] +/Parent 2335 0 R +/Kids [1348 0 R 1360 0 R 1365 0 R 1374 0 R 1380 0 R 1389 0 R] >> -% 1281 0 obj +% 1400 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1278 0 R 1286 0 R 1295 0 R 1303 0 R 1307 0 R 1322 0 R] +/Parent 2335 0 R +/Kids [1396 0 R 1410 0 R 1417 0 R 1426 0 R 1434 0 R 1438 0 R] >> -% 1330 0 obj +% 1457 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1327 0 R 1334 0 R 1341 0 R 1345 0 R 1351 0 R 1357 0 R] +/Parent 2336 0 R +/Kids [1453 0 R 1459 0 R 1465 0 R 1472 0 R 1476 0 R 1481 0 R] >> -% 1371 0 obj +% 1492 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1363 0 R 1374 0 R 1379 0 R 1388 0 R 1395 0 R 1400 0 R] +/Parent 2336 0 R +/Kids [1487 0 R 1494 0 R 1505 0 R 1510 0 R 1519 0 R 1526 0 R] >> -% 1409 0 obj +% 1534 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1406 0 R 1411 0 R 1419 0 R 1424 0 R 1432 0 R 1438 0 R] +/Parent 2336 0 R +/Kids [1531 0 R 1538 0 R 1542 0 R 1550 0 R 1555 0 R 1563 0 R] >> -% 1456 0 obj +% 1577 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1447 0 R 1461 0 R 1466 0 R 1479 0 R 1485 0 R 1492 0 R] +/Parent 2336 0 R +/Kids [1569 0 R 1579 0 R 1592 0 R 1596 0 R 1609 0 R 1616 0 R] >> -% 1500 0 obj +% 1626 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1496 0 R 1504 0 R 1508 0 R 1517 0 R 1525 0 R 1529 0 R] +/Parent 2336 0 R +/Kids [1623 0 R 1628 0 R 1635 0 R 1639 0 R 1648 0 R 1656 0 R] >> -% 1537 0 obj +% 1663 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1534 0 R 1539 0 R 1546 0 R 1551 0 R 1557 0 R 1563 0 R] +/Parent 2336 0 R +/Kids [1660 0 R 1666 0 R 1670 0 R 1677 0 R 1682 0 R 1688 0 R] >> -% 1574 0 obj +% 1698 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1570 0 R 1577 0 R 1584 0 R 1591 0 R 1595 0 R 1605 0 R] +/Parent 2337 0 R +/Kids [1694 0 R 1701 0 R 1707 0 R 1714 0 R 1722 0 R 1726 0 R] >> -% 1612 0 obj +% 1739 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1609 0 R 1614 0 R 1627 0 R 1631 0 R 1637 0 R 1643 0 R] +/Parent 2337 0 R +/Kids [1736 0 R 1741 0 R 1745 0 R 1758 0 R 1762 0 R 1768 0 R] >> -% 1653 0 obj +% 1780 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1650 0 R 1655 0 R 1659 0 R 1663 0 R 1667 0 R 1671 0 R] +/Parent 2337 0 R +/Kids [1774 0 R 1782 0 R 1786 0 R 1790 0 R 1794 0 R 1798 0 R] >> -% 1683 0 obj +% 1805 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1676 0 R 1685 0 R 1689 0 R 1696 0 R 1700 0 R 1707 0 R] +/Parent 2337 0 R +/Kids [1802 0 R 1807 0 R 1815 0 R 1819 0 R 1826 0 R 1831 0 R] >> -% 1717 0 obj +% 1841 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1711 0 R 1719 0 R 1723 0 R 1730 0 R 1734 0 R 1741 0 R] +/Parent 2337 0 R +/Kids [1838 0 R 1843 0 R 1850 0 R 1854 0 R 1861 0 R 1865 0 R] >> -% 1754 0 obj +% 1875 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1745 0 R 1756 0 R 1761 0 R 1768 0 R 1774 0 R 1778 0 R] +/Parent 2337 0 R +/Kids [1872 0 R 1877 0 R 1887 0 R 1892 0 R 1899 0 R 1905 0 R] >> -% 1789 0 obj +% 1914 0 obj << /Type /Pages /Count 6 -/Parent 1993 0 R -/Kids [1784 0 R 1792 0 R 1798 0 R 1804 0 R 1809 0 R 1816 0 R] +/Parent 2338 0 R +/Kids [1909 0 R 1916 0 R 1922 0 R 1928 0 R 1934 0 R 1940 0 R] >> -% 1828 0 obj +% 1952 0 obj << /Type /Pages /Count 6 -/Parent 1993 0 R -/Kids [1823 0 R 1831 0 R 1838 0 R 1845 0 R 1851 0 R 1855 0 R] +/Parent 2338 0 R +/Kids [1947 0 R 1955 0 R 1962 0 R 1969 0 R 1976 0 R 1982 0 R] >> -% 1865 0 obj +% 1989 0 obj << /Type /Pages /Count 6 -/Parent 1993 0 R -/Kids [1861 0 R 1871 0 R 1875 0 R 1883 0 R 1888 0 R 1893 0 R] +/Parent 2338 0 R +/Kids [1986 0 R 1994 0 R 2005 0 R 2009 0 R 2018 0 R 2022 0 R] >> -% 1902 0 obj +% 2034 0 obj << /Type /Pages /Count 6 -/Parent 1993 0 R -/Kids [1899 0 R 1904 0 R 1911 0 R 1922 0 R 1927 0 R 1931 0 R] +/Parent 2338 0 R +/Kids [2031 0 R 2037 0 R 2041 0 R 2047 0 R 2056 0 R 2063 0 R] >> -% 1942 0 obj +% 2070 0 obj << /Type /Pages -/Count 2 -/Parent 1993 0 R -/Kids [1935 0 R 1944 0 R] +/Count 6 +/Parent 2338 0 R +/Kids [2067 0 R 2075 0 R 2086 0 R 2091 0 R 2099 0 R 2108 0 R] >> -% 1989 0 obj +% 2115 0 obj +<< +/Type /Pages +/Count 6 +/Parent 2338 0 R +/Kids [2112 0 R 2123 0 R 2137 0 R 2146 0 R 2157 0 R 2186 0 R] +>> +% 2211 0 obj +<< +/Type /Pages +/Count 6 +/Parent 2339 0 R +/Kids [2206 0 R 2225 0 R 2241 0 R 2245 0 R 2249 0 R 2254 0 R] +>> +% 2261 0 obj +<< +/Type /Pages +/Count 4 +/Parent 2339 0 R +/Kids [2258 0 R 2263 0 R 2267 0 R 2276 0 R] +>> +% 2334 0 obj << /Type /Pages /Count 36 -/Parent 1994 0 R -/Kids [587 0 R 780 0 R 871 0 R 927 0 R 962 0 R 991 0 R] +/Parent 2340 0 R +/Kids [688 0 R 894 0 R 978 0 R 1052 0 R 1087 0 R 1117 0 R] >> -% 1990 0 obj +% 2335 0 obj << /Type /Pages /Count 36 -/Parent 1994 0 R -/Kids [1034 0 R 1079 0 R 1121 0 R 1180 0 R 1232 0 R 1281 0 R] +/Parent 2340 0 R +/Kids [1158 0 R 1205 0 R 1244 0 R 1300 0 R 1351 0 R 1400 0 R] >> -% 1991 0 obj +% 2336 0 obj << /Type /Pages /Count 36 -/Parent 1994 0 R -/Kids [1330 0 R 1371 0 R 1409 0 R 1456 0 R 1500 0 R 1537 0 R] +/Parent 2340 0 R +/Kids [1457 0 R 1492 0 R 1534 0 R 1577 0 R 1626 0 R 1663 0 R] >> -% 1992 0 obj +% 2337 0 obj << /Type /Pages /Count 36 -/Parent 1994 0 R -/Kids [1574 0 R 1612 0 R 1653 0 R 1683 0 R 1717 0 R 1754 0 R] +/Parent 2340 0 R +/Kids [1698 0 R 1739 0 R 1780 0 R 1805 0 R 1841 0 R 1875 0 R] >> -% 1993 0 obj +% 2338 0 obj << /Type /Pages -/Count 26 -/Parent 1994 0 R -/Kids [1789 0 R 1828 0 R 1865 0 R 1902 0 R 1942 0 R] +/Count 36 +/Parent 2340 0 R +/Kids [1914 0 R 1952 0 R 1989 0 R 2034 0 R 2070 0 R 2115 0 R] >> -% 1994 0 obj + +endstream +endobj +2341 0 obj +<< +/Type /ObjStm +/N 100 +/First 888 +/Length 9739 +>> +stream +2339 0 2340 85 2342 190 675 259 671 333 667 421 663 509 659 597 655 685 651 773 +647 861 643 949 639 1037 635 1125 631 1213 627 1301 623 1389 619 1463 615 1575 611 1649 +607 1737 602 1825 598 1899 594 2024 590 2098 586 2172 582 2297 578 2371 574 2459 570 2547 +566 2635 562 2723 558 2811 554 2899 550 2987 546 3061 542 3186 538 3260 534 3348 530 3436 +526 3524 522 3612 518 3686 514 3811 510 3885 506 3973 502 4061 498 4135 494 4260 490 4334 +486 4422 482 4510 478 4598 474 4686 470 4774 466 4862 462 4950 458 5038 454 5126 450 5214 +446 5302 442 5390 438 5478 434 5566 430 5654 426 5728 422 5854 418 5928 414 6016 410 6104 +406 6192 401 6280 397 6368 393 6456 389 6544 385 6632 381 6720 377 6808 373 6896 369 6984 +365 7072 361 7160 357 7248 353 7336 349 7424 345 7512 341 7600 337 7688 333 7776 329 7864 +325 7952 321 8040 317 8128 313 8202 309 8328 305 8402 301 8490 297 8578 293 8652 289 8777 +% 2339 0 obj << /Type /Pages -/Count 170 -/Kids [1989 0 R 1990 0 R 1991 0 R 1992 0 R 1993 0 R] +/Count 10 +/Parent 2340 0 R +/Kids [2211 0 R 2261 0 R] >> -% 1995 0 obj +% 2340 0 obj +<< +/Type /Pages +/Count 190 +/Kids [2334 0 R 2335 0 R 2336 0 R 2337 0 R 2338 0 R 2339 0 R] +>> +% 2342 0 obj << /Type /Outlines /First 4 0 R /Last 4 0 R /Count 1 >> +% 675 0 obj +<< +/Title 676 0 R +/A 673 0 R +/Parent 619 0 R +/Prev 671 0 R +>> +% 671 0 obj +<< +/Title 672 0 R +/A 669 0 R +/Parent 619 0 R +/Prev 667 0 R +/Next 675 0 R +>> +% 667 0 obj +<< +/Title 668 0 R +/A 665 0 R +/Parent 619 0 R +/Prev 663 0 R +/Next 671 0 R +>> +% 663 0 obj +<< +/Title 664 0 R +/A 661 0 R +/Parent 619 0 R +/Prev 659 0 R +/Next 667 0 R +>> +% 659 0 obj +<< +/Title 660 0 R +/A 657 0 R +/Parent 619 0 R +/Prev 655 0 R +/Next 663 0 R +>> +% 655 0 obj +<< +/Title 656 0 R +/A 653 0 R +/Parent 619 0 R +/Prev 651 0 R +/Next 659 0 R +>> +% 651 0 obj +<< +/Title 652 0 R +/A 649 0 R +/Parent 619 0 R +/Prev 647 0 R +/Next 655 0 R +>> +% 647 0 obj +<< +/Title 648 0 R +/A 645 0 R +/Parent 619 0 R +/Prev 643 0 R +/Next 651 0 R +>> +% 643 0 obj +<< +/Title 644 0 R +/A 641 0 R +/Parent 619 0 R +/Prev 639 0 R +/Next 647 0 R +>> +% 639 0 obj +<< +/Title 640 0 R +/A 637 0 R +/Parent 619 0 R +/Prev 635 0 R +/Next 643 0 R +>> +% 635 0 obj +<< +/Title 636 0 R +/A 633 0 R +/Parent 619 0 R +/Prev 631 0 R +/Next 639 0 R +>> +% 631 0 obj +<< +/Title 632 0 R +/A 629 0 R +/Parent 619 0 R +/Prev 627 0 R +/Next 635 0 R +>> +% 627 0 obj +<< +/Title 628 0 R +/A 625 0 R +/Parent 619 0 R +/Prev 623 0 R +/Next 631 0 R +>> +% 623 0 obj +<< +/Title 624 0 R +/A 621 0 R +/Parent 619 0 R +/Next 627 0 R +>> +% 619 0 obj +<< +/Title 620 0 R +/A 617 0 R +/Parent 4 0 R +/Prev 598 0 R +/First 623 0 R +/Last 675 0 R +/Count -14 +>> +% 615 0 obj +<< +/Title 616 0 R +/A 613 0 R +/Parent 598 0 R +/Prev 611 0 R +>> +% 611 0 obj +<< +/Title 612 0 R +/A 609 0 R +/Parent 598 0 R +/Prev 607 0 R +/Next 615 0 R +>> +% 607 0 obj +<< +/Title 608 0 R +/A 604 0 R +/Parent 598 0 R +/Prev 602 0 R +/Next 611 0 R +>> +% 602 0 obj +<< +/Title 603 0 R +/A 600 0 R +/Parent 598 0 R +/Next 607 0 R +>> +% 598 0 obj +<< +/Title 599 0 R +/A 596 0 R +/Parent 4 0 R +/Prev 586 0 R +/Next 619 0 R +/First 602 0 R +/Last 615 0 R +/Count -4 +>> +% 594 0 obj +<< +/Title 595 0 R +/A 592 0 R +/Parent 586 0 R +/Prev 590 0 R +>> +% 590 0 obj +<< +/Title 591 0 R +/A 588 0 R +/Parent 586 0 R +/Next 594 0 R +>> +% 586 0 obj +<< +/Title 587 0 R +/A 584 0 R +/Parent 4 0 R +/Prev 546 0 R +/Next 598 0 R +/First 590 0 R +/Last 594 0 R +/Count -2 +>> +% 582 0 obj +<< +/Title 583 0 R +/A 580 0 R +/Parent 546 0 R +/Prev 578 0 R +>> +% 578 0 obj +<< +/Title 579 0 R +/A 576 0 R +/Parent 546 0 R +/Prev 574 0 R +/Next 582 0 R +>> % 574 0 obj << /Title 575 0 R /A 572 0 R -/Parent 570 0 R +/Parent 546 0 R +/Prev 570 0 R +/Next 578 0 R >> % 570 0 obj << /Title 571 0 R /A 568 0 R -/Parent 4 0 R -/Prev 542 0 R -/First 574 0 R -/Last 574 0 R -/Count -1 +/Parent 546 0 R +/Prev 566 0 R +/Next 574 0 R >> % 566 0 obj << /Title 567 0 R /A 564 0 R -/Parent 542 0 R +/Parent 546 0 R /Prev 562 0 R +/Next 570 0 R >> % 562 0 obj << /Title 563 0 R /A 560 0 R -/Parent 542 0 R +/Parent 546 0 R /Prev 558 0 R /Next 566 0 R >> @@ -28652,7 +36987,7 @@ stream << /Title 559 0 R /A 556 0 R -/Parent 542 0 R +/Parent 546 0 R /Prev 554 0 R /Next 562 0 R >> @@ -28660,7 +36995,7 @@ stream << /Title 555 0 R /A 552 0 R -/Parent 542 0 R +/Parent 546 0 R /Prev 550 0 R /Next 558 0 R >> @@ -28668,40 +37003,40 @@ stream << /Title 551 0 R /A 548 0 R -/Parent 542 0 R -/Prev 546 0 R +/Parent 546 0 R /Next 554 0 R >> % 546 0 obj << /Title 547 0 R /A 544 0 R -/Parent 542 0 R -/Next 550 0 R +/Parent 4 0 R +/Prev 518 0 R +/Next 586 0 R +/First 550 0 R +/Last 582 0 R +/Count -9 >> % 542 0 obj << /Title 543 0 R /A 540 0 R -/Parent 4 0 R -/Prev 514 0 R -/Next 570 0 R -/First 546 0 R -/Last 566 0 R -/Count -6 +/Parent 518 0 R +/Prev 538 0 R >> % 538 0 obj << /Title 539 0 R /A 536 0 R -/Parent 514 0 R +/Parent 518 0 R /Prev 534 0 R +/Next 542 0 R >> % 534 0 obj << /Title 535 0 R /A 532 0 R -/Parent 514 0 R +/Parent 518 0 R /Prev 530 0 R /Next 538 0 R >> @@ -28709,7 +37044,7 @@ stream << /Title 531 0 R /A 528 0 R -/Parent 514 0 R +/Parent 518 0 R /Prev 526 0 R /Next 534 0 R >> @@ -28717,7 +37052,7 @@ stream << /Title 527 0 R /A 524 0 R -/Parent 514 0 R +/Parent 518 0 R /Prev 522 0 R /Next 530 0 R >> @@ -28725,40 +37060,40 @@ stream << /Title 523 0 R /A 520 0 R -/Parent 514 0 R -/Prev 518 0 R +/Parent 518 0 R /Next 526 0 R >> % 518 0 obj << /Title 519 0 R /A 516 0 R -/Parent 514 0 R -/Next 522 0 R +/Parent 4 0 R +/Prev 498 0 R +/Next 546 0 R +/First 522 0 R +/Last 542 0 R +/Count -6 >> % 514 0 obj << /Title 515 0 R /A 512 0 R -/Parent 4 0 R -/Prev 494 0 R -/Next 542 0 R -/First 518 0 R -/Last 538 0 R -/Count -6 +/Parent 498 0 R +/Prev 510 0 R >> % 510 0 obj << /Title 511 0 R /A 508 0 R -/Parent 494 0 R +/Parent 498 0 R /Prev 506 0 R +/Next 514 0 R >> % 506 0 obj << /Title 507 0 R /A 504 0 R -/Parent 494 0 R +/Parent 498 0 R /Prev 502 0 R /Next 510 0 R >> @@ -28766,40 +37101,40 @@ stream << /Title 503 0 R /A 500 0 R -/Parent 494 0 R -/Prev 498 0 R +/Parent 498 0 R /Next 506 0 R >> % 498 0 obj << /Title 499 0 R /A 496 0 R -/Parent 494 0 R -/Next 502 0 R +/Parent 4 0 R +/Prev 426 0 R +/Next 518 0 R +/First 502 0 R +/Last 514 0 R +/Count -4 >> % 494 0 obj << /Title 495 0 R /A 492 0 R -/Parent 4 0 R -/Prev 422 0 R -/Next 514 0 R -/First 498 0 R -/Last 510 0 R -/Count -4 +/Parent 426 0 R +/Prev 490 0 R >> % 490 0 obj << /Title 491 0 R /A 488 0 R -/Parent 422 0 R +/Parent 426 0 R /Prev 486 0 R +/Next 494 0 R >> % 486 0 obj << /Title 487 0 R /A 484 0 R -/Parent 422 0 R +/Parent 426 0 R /Prev 482 0 R /Next 490 0 R >> @@ -28807,7 +37142,7 @@ stream << /Title 483 0 R /A 480 0 R -/Parent 422 0 R +/Parent 426 0 R /Prev 478 0 R /Next 486 0 R >> @@ -28815,7 +37150,7 @@ stream << /Title 479 0 R /A 476 0 R -/Parent 422 0 R +/Parent 426 0 R /Prev 474 0 R /Next 482 0 R >> @@ -28823,7 +37158,7 @@ stream << /Title 475 0 R /A 472 0 R -/Parent 422 0 R +/Parent 426 0 R /Prev 470 0 R /Next 478 0 R >> @@ -28831,7 +37166,7 @@ stream << /Title 471 0 R /A 468 0 R -/Parent 422 0 R +/Parent 426 0 R /Prev 466 0 R /Next 474 0 R >> @@ -28839,7 +37174,7 @@ stream << /Title 467 0 R /A 464 0 R -/Parent 422 0 R +/Parent 426 0 R /Prev 462 0 R /Next 470 0 R >> @@ -28847,7 +37182,7 @@ stream << /Title 463 0 R /A 460 0 R -/Parent 422 0 R +/Parent 426 0 R /Prev 458 0 R /Next 466 0 R >> @@ -28855,7 +37190,7 @@ stream << /Title 459 0 R /A 456 0 R -/Parent 422 0 R +/Parent 426 0 R /Prev 454 0 R /Next 462 0 R >> @@ -28863,7 +37198,7 @@ stream << /Title 455 0 R /A 452 0 R -/Parent 422 0 R +/Parent 426 0 R /Prev 450 0 R /Next 458 0 R >> @@ -28871,7 +37206,7 @@ stream << /Title 451 0 R /A 448 0 R -/Parent 422 0 R +/Parent 426 0 R /Prev 446 0 R /Next 454 0 R >> @@ -28879,7 +37214,7 @@ stream << /Title 447 0 R /A 444 0 R -/Parent 422 0 R +/Parent 426 0 R /Prev 442 0 R /Next 450 0 R >> @@ -28887,7 +37222,7 @@ stream << /Title 443 0 R /A 440 0 R -/Parent 422 0 R +/Parent 426 0 R /Prev 438 0 R /Next 446 0 R >> @@ -28895,7 +37230,7 @@ stream << /Title 439 0 R /A 436 0 R -/Parent 422 0 R +/Parent 426 0 R /Prev 434 0 R /Next 442 0 R >> @@ -28903,7 +37238,7 @@ stream << /Title 435 0 R /A 432 0 R -/Parent 422 0 R +/Parent 426 0 R /Prev 430 0 R /Next 438 0 R >> @@ -28911,40 +37246,40 @@ stream << /Title 431 0 R /A 428 0 R -/Parent 422 0 R -/Prev 426 0 R +/Parent 426 0 R /Next 434 0 R >> % 426 0 obj << /Title 427 0 R /A 424 0 R -/Parent 422 0 R -/Next 430 0 R +/Parent 4 0 R +/Prev 313 0 R +/Next 498 0 R +/First 430 0 R +/Last 494 0 R +/Count -17 >> % 422 0 obj << /Title 423 0 R /A 420 0 R -/Parent 4 0 R -/Prev 309 0 R -/Next 494 0 R -/First 426 0 R -/Last 490 0 R -/Count -17 +/Parent 313 0 R +/Prev 418 0 R >> % 418 0 obj << /Title 419 0 R /A 416 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 414 0 R +/Next 422 0 R >> % 414 0 obj << /Title 415 0 R /A 412 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 410 0 R /Next 418 0 R >> @@ -28952,7 +37287,7 @@ stream << /Title 411 0 R /A 408 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 406 0 R /Next 414 0 R >> @@ -28960,7 +37295,7 @@ stream << /Title 407 0 R /A 403 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 401 0 R /Next 410 0 R >> @@ -28968,7 +37303,7 @@ stream << /Title 402 0 R /A 399 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 397 0 R /Next 406 0 R >> @@ -28976,7 +37311,7 @@ stream << /Title 398 0 R /A 395 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 393 0 R /Next 401 0 R >> @@ -28984,7 +37319,7 @@ stream << /Title 394 0 R /A 391 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 389 0 R /Next 397 0 R >> @@ -28992,7 +37327,7 @@ stream << /Title 390 0 R /A 387 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 385 0 R /Next 393 0 R >> @@ -29000,7 +37335,7 @@ stream << /Title 386 0 R /A 383 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 381 0 R /Next 389 0 R >> @@ -29008,7 +37343,7 @@ stream << /Title 382 0 R /A 379 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 377 0 R /Next 385 0 R >> @@ -29016,7 +37351,7 @@ stream << /Title 378 0 R /A 375 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 373 0 R /Next 381 0 R >> @@ -29024,7 +37359,7 @@ stream << /Title 374 0 R /A 371 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 369 0 R /Next 377 0 R >> @@ -29032,7 +37367,7 @@ stream << /Title 370 0 R /A 367 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 365 0 R /Next 373 0 R >> @@ -29040,7 +37375,7 @@ stream << /Title 366 0 R /A 363 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 361 0 R /Next 369 0 R >> @@ -29048,7 +37383,7 @@ stream << /Title 362 0 R /A 359 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 357 0 R /Next 365 0 R >> @@ -29056,7 +37391,7 @@ stream << /Title 358 0 R /A 355 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 353 0 R /Next 361 0 R >> @@ -29064,7 +37399,7 @@ stream << /Title 354 0 R /A 351 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 349 0 R /Next 357 0 R >> @@ -29072,7 +37407,7 @@ stream << /Title 350 0 R /A 347 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 345 0 R /Next 353 0 R >> @@ -29080,7 +37415,7 @@ stream << /Title 346 0 R /A 343 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 341 0 R /Next 349 0 R >> @@ -29088,36 +37423,15 @@ stream << /Title 342 0 R /A 339 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 337 0 R /Next 345 0 R >> - -endstream -endobj -1996 0 obj -<< -/Type /ObjStm -/N 100 -/First 875 -/Length 11008 ->> -stream -337 0 333 88 329 176 325 264 321 352 317 440 313 528 309 602 305 728 301 802 -297 890 293 978 289 1052 285 1177 281 1251 277 1339 273 1427 269 1515 265 1603 261 1691 -257 1779 253 1867 249 1955 245 2043 241 2131 237 2219 233 2307 229 2395 225 2483 221 2557 -217 2682 213 2755 209 2842 205 2916 200 3004 196 3092 192 3180 188 3268 184 3342 180 3468 -176 3542 172 3630 168 3718 164 3806 160 3894 156 3982 152 4070 148 4158 144 4246 140 4334 -136 4422 132 4510 128 4598 124 4686 120 4774 116 4862 112 4950 108 5038 104 5112 100 5238 -96 5309 92 5392 88 5474 84 5556 80 5638 76 5720 72 5802 68 5884 64 5966 60 6048 -56 6130 52 6212 48 6294 44 6376 40 6445 36 6554 32 6674 28 6743 24 6799 20 6918 -16 7000 12 7069 8 7186 4 7251 1997 7344 1998 7540 1999 7713 2000 7893 2001 8070 2002 8247 -2003 8427 2004 8605 2005 8785 2006 8963 2007 9134 2008 9299 2009 9465 2010 9629 2011 9793 2012 9963 % 337 0 obj << /Title 338 0 R /A 335 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 333 0 R /Next 341 0 R >> @@ -29125,7 +37439,7 @@ stream << /Title 334 0 R /A 331 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 329 0 R /Next 337 0 R >> @@ -29133,7 +37447,7 @@ stream << /Title 330 0 R /A 327 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 325 0 R /Next 333 0 R >> @@ -29141,7 +37455,7 @@ stream << /Title 326 0 R /A 323 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 321 0 R /Next 329 0 R >> @@ -29149,7 +37463,7 @@ stream << /Title 322 0 R /A 319 0 R -/Parent 309 0 R +/Parent 313 0 R /Prev 317 0 R /Next 325 0 R >> @@ -29157,40 +37471,40 @@ stream << /Title 318 0 R /A 315 0 R -/Parent 309 0 R -/Prev 313 0 R +/Parent 313 0 R /Next 321 0 R >> % 313 0 obj << /Title 314 0 R /A 311 0 R -/Parent 309 0 R -/Next 317 0 R +/Parent 4 0 R +/Prev 293 0 R +/Next 426 0 R +/First 317 0 R +/Last 422 0 R +/Count -27 >> % 309 0 obj << /Title 310 0 R /A 307 0 R -/Parent 4 0 R -/Prev 289 0 R -/Next 422 0 R -/First 313 0 R -/Last 418 0 R -/Count -27 +/Parent 293 0 R +/Prev 305 0 R >> % 305 0 obj << /Title 306 0 R /A 303 0 R -/Parent 289 0 R +/Parent 293 0 R /Prev 301 0 R +/Next 309 0 R >> % 301 0 obj << /Title 302 0 R /A 299 0 R -/Parent 289 0 R +/Parent 293 0 R /Prev 297 0 R /Next 305 0 R >> @@ -29198,40 +37512,61 @@ stream << /Title 298 0 R /A 295 0 R -/Parent 289 0 R -/Prev 293 0 R +/Parent 293 0 R /Next 301 0 R >> % 293 0 obj << /Title 294 0 R /A 291 0 R -/Parent 289 0 R -/Next 297 0 R +/Parent 4 0 R +/Prev 225 0 R +/Next 313 0 R +/First 297 0 R +/Last 309 0 R +/Count -4 >> % 289 0 obj << /Title 290 0 R /A 287 0 R -/Parent 4 0 R -/Prev 221 0 R -/Next 309 0 R -/First 293 0 R -/Last 305 0 R -/Count -4 +/Parent 225 0 R +/Prev 285 0 R +>> + +endstream +endobj +2343 0 obj +<< +/Type /ObjStm +/N 100 +/First 895 +/Length 12279 >> +stream +285 0 281 88 277 176 273 264 269 352 265 440 261 528 257 616 253 704 249 792 +245 880 241 968 237 1056 233 1144 229 1232 225 1306 221 1431 217 1504 213 1591 209 1665 +205 1753 200 1841 196 1929 192 2017 188 2091 184 2217 180 2291 176 2379 172 2467 168 2555 +164 2643 160 2731 156 2819 152 2907 148 2995 144 3083 140 3171 136 3259 132 3347 128 3435 +124 3523 120 3611 116 3699 112 3787 108 3861 104 3987 100 4060 96 4145 92 4228 88 4310 +84 4392 80 4474 76 4556 72 4638 68 4720 64 4802 60 4884 56 4966 52 5048 48 5130 +44 5199 40 5308 36 5428 32 5497 28 5553 24 5672 20 5754 16 5823 12 5941 8 6021 +4 6086 2344 6179 2345 6375 2346 6548 2347 6728 2348 6905 2349 7082 2350 7262 2351 7440 2352 7620 +2353 7798 2354 7978 2355 8149 2356 8315 2357 8487 2358 8656 2359 8825 2360 8997 2361 9167 2362 9339 +2363 9509 2364 9681 2365 9851 2366 10023 2367 10193 2368 10365 2369 10534 2370 10708 2371 10930 2372 11135 % 285 0 obj << /Title 286 0 R /A 283 0 R -/Parent 221 0 R +/Parent 225 0 R /Prev 281 0 R +/Next 289 0 R >> % 281 0 obj << /Title 282 0 R /A 279 0 R -/Parent 221 0 R +/Parent 225 0 R /Prev 277 0 R /Next 285 0 R >> @@ -29239,7 +37574,7 @@ stream << /Title 278 0 R /A 275 0 R -/Parent 221 0 R +/Parent 225 0 R /Prev 273 0 R /Next 281 0 R >> @@ -29247,7 +37582,7 @@ stream << /Title 274 0 R /A 271 0 R -/Parent 221 0 R +/Parent 225 0 R /Prev 269 0 R /Next 277 0 R >> @@ -29255,7 +37590,7 @@ stream << /Title 270 0 R /A 267 0 R -/Parent 221 0 R +/Parent 225 0 R /Prev 265 0 R /Next 273 0 R >> @@ -29263,7 +37598,7 @@ stream << /Title 266 0 R /A 263 0 R -/Parent 221 0 R +/Parent 225 0 R /Prev 261 0 R /Next 269 0 R >> @@ -29271,7 +37606,7 @@ stream << /Title 262 0 R /A 259 0 R -/Parent 221 0 R +/Parent 225 0 R /Prev 257 0 R /Next 265 0 R >> @@ -29279,7 +37614,7 @@ stream << /Title 258 0 R /A 255 0 R -/Parent 221 0 R +/Parent 225 0 R /Prev 253 0 R /Next 261 0 R >> @@ -29287,7 +37622,7 @@ stream << /Title 254 0 R /A 251 0 R -/Parent 221 0 R +/Parent 225 0 R /Prev 249 0 R /Next 257 0 R >> @@ -29295,7 +37630,7 @@ stream << /Title 250 0 R /A 247 0 R -/Parent 221 0 R +/Parent 225 0 R /Prev 245 0 R /Next 253 0 R >> @@ -29303,7 +37638,7 @@ stream << /Title 246 0 R /A 243 0 R -/Parent 221 0 R +/Parent 225 0 R /Prev 241 0 R /Next 249 0 R >> @@ -29311,7 +37646,7 @@ stream << /Title 242 0 R /A 239 0 R -/Parent 221 0 R +/Parent 225 0 R /Prev 237 0 R /Next 245 0 R >> @@ -29319,7 +37654,7 @@ stream << /Title 238 0 R /A 235 0 R -/Parent 221 0 R +/Parent 225 0 R /Prev 233 0 R /Next 241 0 R >> @@ -29327,7 +37662,7 @@ stream << /Title 234 0 R /A 231 0 R -/Parent 221 0 R +/Parent 225 0 R /Prev 229 0 R /Next 237 0 R >> @@ -29335,55 +37670,55 @@ stream << /Title 230 0 R /A 227 0 R -/Parent 221 0 R -/Prev 225 0 R +/Parent 225 0 R /Next 233 0 R >> % 225 0 obj << /Title 226 0 R /A 223 0 R -/Parent 221 0 R -/Next 229 0 R +/Parent 4 0 R +/Prev 40 0 R +/Next 293 0 R +/First 229 0 R +/Last 289 0 R +/Count -16 >> % 221 0 obj << /Title 222 0 R /A 219 0 R -/Parent 4 0 R -/Prev 36 0 R -/Next 289 0 R -/First 225 0 R -/Last 285 0 R -/Count -16 +/Parent 40 0 R +/Prev 217 0 R >> % 217 0 obj << /Title 218 0 R /A 215 0 R -/Parent 36 0 R -/Prev 213 0 R +/Parent 40 0 R +/Prev 188 0 R +/Next 221 0 R >> % 213 0 obj << /Title 214 0 R /A 211 0 R -/Parent 36 0 R -/Prev 184 0 R -/Next 217 0 R +/Parent 188 0 R +/Prev 209 0 R >> % 209 0 obj << /Title 210 0 R /A 207 0 R -/Parent 184 0 R +/Parent 188 0 R /Prev 205 0 R +/Next 213 0 R >> % 205 0 obj << /Title 206 0 R /A 202 0 R -/Parent 184 0 R +/Parent 188 0 R /Prev 200 0 R /Next 209 0 R >> @@ -29391,7 +37726,7 @@ stream << /Title 201 0 R /A 198 0 R -/Parent 184 0 R +/Parent 188 0 R /Prev 196 0 R /Next 205 0 R >> @@ -29399,7 +37734,7 @@ stream << /Title 197 0 R /A 194 0 R -/Parent 184 0 R +/Parent 188 0 R /Prev 192 0 R /Next 200 0 R >> @@ -29407,40 +37742,40 @@ stream << /Title 193 0 R /A 190 0 R -/Parent 184 0 R -/Prev 188 0 R +/Parent 188 0 R /Next 196 0 R >> % 188 0 obj << /Title 189 0 R /A 186 0 R -/Parent 184 0 R -/Next 192 0 R +/Parent 40 0 R +/Prev 108 0 R +/Next 217 0 R +/First 192 0 R +/Last 213 0 R +/Count -6 >> % 184 0 obj << /Title 185 0 R /A 182 0 R -/Parent 36 0 R -/Prev 104 0 R -/Next 213 0 R -/First 188 0 R -/Last 209 0 R -/Count -6 +/Parent 108 0 R +/Prev 180 0 R >> % 180 0 obj << /Title 181 0 R /A 178 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 176 0 R +/Next 184 0 R >> % 176 0 obj << /Title 177 0 R /A 174 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 172 0 R /Next 180 0 R >> @@ -29448,7 +37783,7 @@ stream << /Title 173 0 R /A 170 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 168 0 R /Next 176 0 R >> @@ -29456,7 +37791,7 @@ stream << /Title 169 0 R /A 166 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 164 0 R /Next 172 0 R >> @@ -29464,7 +37799,7 @@ stream << /Title 165 0 R /A 162 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 160 0 R /Next 168 0 R >> @@ -29472,7 +37807,7 @@ stream << /Title 161 0 R /A 158 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 156 0 R /Next 164 0 R >> @@ -29480,7 +37815,7 @@ stream << /Title 157 0 R /A 154 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 152 0 R /Next 160 0 R >> @@ -29488,7 +37823,7 @@ stream << /Title 153 0 R /A 150 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 148 0 R /Next 156 0 R >> @@ -29496,7 +37831,7 @@ stream << /Title 149 0 R /A 146 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 144 0 R /Next 152 0 R >> @@ -29504,7 +37839,7 @@ stream << /Title 145 0 R /A 142 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 140 0 R /Next 148 0 R >> @@ -29512,7 +37847,7 @@ stream << /Title 141 0 R /A 138 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 136 0 R /Next 144 0 R >> @@ -29520,7 +37855,7 @@ stream << /Title 137 0 R /A 134 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 132 0 R /Next 140 0 R >> @@ -29528,7 +37863,7 @@ stream << /Title 133 0 R /A 130 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 128 0 R /Next 136 0 R >> @@ -29536,7 +37871,7 @@ stream << /Title 129 0 R /A 126 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 124 0 R /Next 132 0 R >> @@ -29544,7 +37879,7 @@ stream << /Title 125 0 R /A 122 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 120 0 R /Next 128 0 R >> @@ -29552,7 +37887,7 @@ stream << /Title 121 0 R /A 118 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 116 0 R /Next 124 0 R >> @@ -29560,7 +37895,7 @@ stream << /Title 117 0 R /A 114 0 R -/Parent 104 0 R +/Parent 108 0 R /Prev 112 0 R /Next 120 0 R >> @@ -29568,40 +37903,40 @@ stream << /Title 113 0 R /A 110 0 R -/Parent 104 0 R -/Prev 108 0 R +/Parent 108 0 R /Next 116 0 R >> % 108 0 obj << /Title 109 0 R /A 106 0 R -/Parent 104 0 R -/Next 112 0 R +/Parent 40 0 R +/Prev 44 0 R +/Next 188 0 R +/First 112 0 R +/Last 184 0 R +/Count -19 >> % 104 0 obj << /Title 105 0 R /A 102 0 R -/Parent 36 0 R -/Prev 40 0 R -/Next 184 0 R -/First 108 0 R -/Last 180 0 R -/Count -19 +/Parent 44 0 R +/Prev 100 0 R >> % 100 0 obj << /Title 101 0 R /A 98 0 R -/Parent 40 0 R +/Parent 44 0 R /Prev 96 0 R +/Next 104 0 R >> % 96 0 obj << /Title 97 0 R /A 94 0 R -/Parent 40 0 R +/Parent 44 0 R /Prev 92 0 R /Next 100 0 R >> @@ -29609,7 +37944,7 @@ stream << /Title 93 0 R /A 90 0 R -/Parent 40 0 R +/Parent 44 0 R /Prev 88 0 R /Next 96 0 R >> @@ -29617,7 +37952,7 @@ stream << /Title 89 0 R /A 86 0 R -/Parent 40 0 R +/Parent 44 0 R /Prev 84 0 R /Next 92 0 R >> @@ -29625,7 +37960,7 @@ stream << /Title 85 0 R /A 82 0 R -/Parent 40 0 R +/Parent 44 0 R /Prev 80 0 R /Next 88 0 R >> @@ -29633,7 +37968,7 @@ stream << /Title 81 0 R /A 78 0 R -/Parent 40 0 R +/Parent 44 0 R /Prev 76 0 R /Next 84 0 R >> @@ -29641,7 +37976,7 @@ stream << /Title 77 0 R /A 74 0 R -/Parent 40 0 R +/Parent 44 0 R /Prev 72 0 R /Next 80 0 R >> @@ -29649,7 +37984,7 @@ stream << /Title 73 0 R /A 70 0 R -/Parent 40 0 R +/Parent 44 0 R /Prev 68 0 R /Next 76 0 R >> @@ -29657,7 +37992,7 @@ stream << /Title 69 0 R /A 66 0 R -/Parent 40 0 R +/Parent 44 0 R /Prev 64 0 R /Next 72 0 R >> @@ -29665,7 +38000,7 @@ stream << /Title 65 0 R /A 62 0 R -/Parent 40 0 R +/Parent 44 0 R /Prev 60 0 R /Next 68 0 R >> @@ -29673,7 +38008,7 @@ stream << /Title 61 0 R /A 58 0 R -/Parent 40 0 R +/Parent 44 0 R /Prev 56 0 R /Next 64 0 R >> @@ -29681,7 +38016,7 @@ stream << /Title 57 0 R /A 54 0 R -/Parent 40 0 R +/Parent 44 0 R /Prev 52 0 R /Next 60 0 R >> @@ -29689,7 +38024,7 @@ stream << /Title 53 0 R /A 50 0 R -/Parent 40 0 R +/Parent 44 0 R /Prev 48 0 R /Next 56 0 R >> @@ -29697,8 +38032,7 @@ stream << /Title 49 0 R /A 46 0 R -/Parent 40 0 R -/Prev 44 0 R +/Parent 44 0 R /Next 52 0 R >> % 44 0 obj @@ -29706,67 +38040,71 @@ stream /Title 45 0 R /A 42 0 R /Parent 40 0 R -/Next 48 0 R +/Next 108 0 R +/First 48 0 R +/Last 104 0 R +/Count -15 >> % 40 0 obj << /Title 41 0 R /A 38 0 R -/Parent 36 0 R -/Next 104 0 R +/Parent 4 0 R +/Prev 16 0 R +/Next 225 0 R /First 44 0 R -/Last 100 0 R -/Count -15 +/Last 221 0 R +/Count -5 >> % 36 0 obj << /Title 37 0 R /A 34 0 R -/Parent 4 0 R -/Prev 12 0 R -/Next 221 0 R -/First 40 0 R -/Last 217 0 R -/Count -5 +/Parent 16 0 R +/Prev 28 0 R >> % 32 0 obj << /Title 33 0 R /A 30 0 R -/Parent 12 0 R -/Prev 24 0 R +/Parent 28 0 R >> % 28 0 obj << /Title 29 0 R /A 26 0 R -/Parent 24 0 R +/Parent 16 0 R +/Prev 24 0 R +/Next 36 0 R +/First 32 0 R +/Last 32 0 R +/Count -1 >> % 24 0 obj << /Title 25 0 R /A 22 0 R -/Parent 12 0 R +/Parent 16 0 R /Prev 20 0 R -/Next 32 0 R -/First 28 0 R -/Last 28 0 R -/Count -1 +/Next 28 0 R >> % 20 0 obj << /Title 21 0 R /A 18 0 R -/Parent 12 0 R -/Prev 16 0 R +/Parent 16 0 R /Next 24 0 R >> % 16 0 obj << /Title 17 0 R /A 14 0 R -/Parent 12 0 R -/Next 20 0 R +/Parent 4 0 R +/Prev 12 0 R +/Next 40 0 R +/First 20 0 R +/Last 36 0 R +/Count -4 >> % 12 0 obj << @@ -29774,10 +38112,7 @@ stream /A 10 0 R /Parent 4 0 R /Prev 8 0 R -/Next 36 0 R -/First 16 0 R -/Last 32 0 R -/Count -4 +/Next 16 0 R >> % 8 0 obj << @@ -29790,652 +38125,730 @@ stream << /Title 5 0 R /A 1 0 R -/Parent 1995 0 R +/Parent 2342 0 R /First 8 0 R -/Last 570 0 R -/Count -11 +/Last 619 0 R +/Count -14 >> -% 1997 0 obj +% 2344 0 obj << -/Names [(Doc-Start) 583 0 R (Hfootnote.1) 815 0 R (Hfootnote.2) 816 0 R (Hfootnote.3) 869 0 R (Hfootnote.4) 1864 0 R (Hfootnote.5) 1917 0 R] +/Names [(Doc-Start) 684 0 R (Hfootnote.1) 943 0 R (Hfootnote.2) 944 0 R (Hfootnote.3) 999 0 R (Hfootnote.4) 1997 0 R (Hfootnote.5) 2081 0 R] /Limits [(Doc-Start) (Hfootnote.5)] >> -% 1998 0 obj +% 2345 0 obj << -/Names [(Item.1) 843 0 R (Item.10) 857 0 R (Item.100) 1598 0 R (Item.101) 1599 0 R (Item.102) 1600 0 R (Item.103) 1617 0 R] +/Names [(Item.1) 970 0 R (Item.10) 986 0 R (Item.100) 1711 0 R (Item.101) 1717 0 R (Item.102) 1718 0 R (Item.103) 1729 0 R] /Limits [(Item.1) (Item.103)] >> -% 1999 0 obj +% 2346 0 obj << -/Names [(Item.104) 1618 0 R (Item.105) 1619 0 R (Item.106) 1620 0 R (Item.107) 1621 0 R (Item.108) 1622 0 R (Item.109) 1623 0 R] +/Names [(Item.104) 1730 0 R (Item.105) 1731 0 R (Item.106) 1748 0 R (Item.107) 1749 0 R (Item.108) 1750 0 R (Item.109) 1751 0 R] /Limits [(Item.104) (Item.109)] >> -% 2000 0 obj +% 2347 0 obj << -/Names [(Item.11) 858 0 R (Item.110) 1624 0 R (Item.111) 1625 0 R (Item.112) 1634 0 R (Item.113) 1635 0 R (Item.114) 1640 0 R] +/Names [(Item.11) 987 0 R (Item.110) 1752 0 R (Item.111) 1753 0 R (Item.112) 1754 0 R (Item.113) 1755 0 R (Item.114) 1756 0 R] /Limits [(Item.11) (Item.114)] >> -% 2001 0 obj +% 2348 0 obj << -/Names [(Item.115) 1641 0 R (Item.116) 1646 0 R (Item.117) 1647 0 R (Item.118) 1648 0 R (Item.119) 1679 0 R (Item.12) 859 0 R] +/Names [(Item.115) 1765 0 R (Item.116) 1766 0 R (Item.117) 1771 0 R (Item.118) 1772 0 R (Item.119) 1777 0 R (Item.12) 988 0 R] /Limits [(Item.115) (Item.12)] >> -% 2002 0 obj +% 2349 0 obj << -/Names [(Item.120) 1680 0 R (Item.121) 1681 0 R (Item.122) 1692 0 R (Item.123) 1693 0 R (Item.124) 1694 0 R (Item.125) 1703 0 R] +/Names [(Item.120) 1778 0 R (Item.121) 1779 0 R (Item.122) 1810 0 R (Item.123) 1811 0 R (Item.124) 1812 0 R (Item.125) 1822 0 R] /Limits [(Item.120) (Item.125)] >> -% 2003 0 obj +% 2350 0 obj << -/Names [(Item.126) 1704 0 R (Item.127) 1705 0 R (Item.128) 1714 0 R (Item.129) 1715 0 R (Item.13) 860 0 R (Item.130) 1716 0 R] +/Names [(Item.126) 1823 0 R (Item.127) 1824 0 R (Item.128) 1834 0 R (Item.129) 1835 0 R (Item.13) 989 0 R (Item.130) 1836 0 R] /Limits [(Item.126) (Item.130)] >> -% 2004 0 obj +% 2351 0 obj << -/Names [(Item.131) 1726 0 R (Item.132) 1727 0 R (Item.133) 1728 0 R (Item.134) 1737 0 R (Item.135) 1738 0 R (Item.136) 1739 0 R] +/Names [(Item.131) 1846 0 R (Item.132) 1847 0 R (Item.133) 1848 0 R (Item.134) 1857 0 R (Item.135) 1858 0 R (Item.136) 1859 0 R] /Limits [(Item.131) (Item.136)] >> -% 2005 0 obj +% 2352 0 obj << -/Names [(Item.137) 1748 0 R (Item.138) 1749 0 R (Item.139) 1751 0 R (Item.14) 861 0 R (Item.140) 1752 0 R (Item.141) 1753 0 R] +/Names [(Item.137) 1868 0 R (Item.138) 1869 0 R (Item.139) 1870 0 R (Item.14) 990 0 R (Item.140) 1880 0 R (Item.141) 1881 0 R] /Limits [(Item.137) (Item.141)] >> -% 2006 0 obj +% 2353 0 obj << -/Names [(Item.142) 1759 0 R (Item.143) 1764 0 R (Item.15) 862 0 R (Item.16) 863 0 R (Item.17) 864 0 R (Item.18) 865 0 R] -/Limits [(Item.142) (Item.18)] +/Names [(Item.142) 1883 0 R (Item.143) 1884 0 R (Item.144) 1885 0 R (Item.145) 1890 0 R (Item.146) 1895 0 R (Item.147) 2150 0 R] +/Limits [(Item.142) (Item.147)] >> -% 2007 0 obj +% 2354 0 obj +<< +/Names [(Item.148) 2151 0 R (Item.149) 2252 0 R (Item.15) 991 0 R (Item.16) 992 0 R (Item.17) 993 0 R (Item.18) 994 0 R] +/Limits [(Item.148) (Item.18)] +>> +% 2355 0 obj << -/Names [(Item.19) 866 0 R (Item.2) 844 0 R (Item.20) 867 0 R (Item.21) 868 0 R (Item.22) 877 0 R (Item.23) 878 0 R] +/Names [(Item.19) 995 0 R (Item.2) 971 0 R (Item.20) 996 0 R (Item.21) 997 0 R (Item.22) 998 0 R (Item.23) 1006 0 R] /Limits [(Item.19) (Item.23)] >> -% 2008 0 obj +% 2356 0 obj << -/Names [(Item.24) 879 0 R (Item.25) 880 0 R (Item.26) 881 0 R (Item.27) 882 0 R (Item.28) 896 0 R (Item.29) 897 0 R] +/Names [(Item.24) 1007 0 R (Item.25) 1008 0 R (Item.26) 1009 0 R (Item.27) 1010 0 R (Item.28) 1011 0 R (Item.29) 1025 0 R] /Limits [(Item.24) (Item.29)] >> -% 2009 0 obj +% 2357 0 obj << -/Names [(Item.3) 845 0 R (Item.30) 898 0 R (Item.31) 899 0 R (Item.32) 900 0 R (Item.33) 907 0 R (Item.34) 908 0 R] +/Names [(Item.3) 972 0 R (Item.30) 1026 0 R (Item.31) 1027 0 R (Item.32) 1028 0 R (Item.33) 1029 0 R (Item.34) 1036 0 R] /Limits [(Item.3) (Item.34)] >> -% 2010 0 obj +% 2358 0 obj << -/Names [(Item.35) 909 0 R (Item.36) 910 0 R (Item.37) 911 0 R (Item.38) 912 0 R (Item.39) 913 0 R (Item.4) 846 0 R] +/Names [(Item.35) 1037 0 R (Item.36) 1038 0 R (Item.37) 1039 0 R (Item.38) 1040 0 R (Item.39) 1041 0 R (Item.4) 973 0 R] /Limits [(Item.35) (Item.4)] >> -% 2011 0 obj +% 2359 0 obj << -/Names [(Item.40) 914 0 R (Item.41) 957 0 R (Item.42) 1050 0 R (Item.43) 1078 0 R (Item.44) 1100 0 R (Item.45) 1127 0 R] +/Names [(Item.40) 1042 0 R (Item.41) 1043 0 R (Item.42) 1086 0 R (Item.43) 1100 0 R (Item.44) 1101 0 R (Item.45) 1181 0 R] /Limits [(Item.40) (Item.45)] >> -% 2012 0 obj +% 2360 0 obj << -/Names [(Item.46) 1298 0 R (Item.47) 1299 0 R (Item.48) 1300 0 R (Item.49) 1354 0 R (Item.5) 847 0 R (Item.50) 1361 0 R] +/Names [(Item.46) 1210 0 R (Item.47) 1231 0 R (Item.48) 1257 0 R (Item.49) 1429 0 R (Item.5) 974 0 R (Item.50) 1430 0 R] /Limits [(Item.46) (Item.50)] >> - -endstream -endobj -2112 0 obj -<< - /Title (Parallel Sparse BLAS V. 3.8.0) /Subject (Parallel Sparse Basic Linear Algebra Subroutines) /Keywords (Computer Science Linear Algebra Fluid Dynamics Parallel Linux MPI PSBLAS Iterative Solvers Preconditioners) /Creator (pdfLaTeX) /Producer ($Id$) /Author()/Title()/Subject()/Creator(LaTeX with hyperref)/Keywords() -/CreationDate (D:20220616091307+02'00') -/ModDate (D:20220616091307+02'00') -/Trapped /False -/PTEX.Fullbanner (This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2) ->> -endobj -2014 0 obj +% 2361 0 obj << -/Type /ObjStm -/N 98 -/First 1015 -/Length 18404 ->> -stream -2013 0 2015 172 2016 342 2017 514 2018 684 2019 856 2020 1026 2021 1198 2022 1367 2023 1544 -2024 1742 2025 1968 2026 2184 2027 2373 2028 2555 2029 2767 2030 2987 2031 3212 2032 3445 2033 3673 -2034 3846 2035 4026 2036 4203 2037 4380 2038 4560 2039 4738 2040 4918 2041 5096 2042 5276 2043 5454 -2044 5634 2045 5812 2046 5980 2047 6146 2048 6316 2049 6488 2050 6658 2051 6830 2052 7000 2053 7172 -2054 7341 2055 7510 2056 7682 2057 7852 2058 8024 2059 8194 2060 8366 2061 8534 2062 8719 2063 8899 -2064 9097 2065 9324 2066 9542 2067 9768 2068 9995 2069 10217 2070 10441 2071 10671 2072 10900 2073 11130 -2074 11352 2075 11579 2076 11806 2077 12028 2078 12250 2079 12485 2080 12749 2081 13005 2082 13269 2083 13539 -2084 13803 2085 14065 2086 14284 2087 14464 2088 14640 2089 14809 2090 14903 2091 15017 2092 15129 2093 15239 -2094 15350 2095 15463 2096 15577 2097 15690 2098 15802 2099 15913 2100 16024 2101 16143 2102 16270 2103 16396 -2104 16527 2105 16651 2106 16724 2107 16838 2108 16958 2109 17057 2110 17141 2111 17176 -% 2013 0 obj -<< -/Names [(Item.51) 1366 0 R (Item.52) 1367 0 R (Item.53) 1368 0 R (Item.54) 1369 0 R (Item.55) 1370 0 R (Item.56) 1382 0 R] +/Names [(Item.51) 1431 0 R (Item.52) 1484 0 R (Item.53) 1491 0 R (Item.54) 1497 0 R (Item.55) 1498 0 R (Item.56) 1499 0 R] /Limits [(Item.51) (Item.56)] >> -% 2015 0 obj +% 2362 0 obj << -/Names [(Item.57) 1383 0 R (Item.58) 1384 0 R (Item.59) 1391 0 R (Item.6) 848 0 R (Item.60) 1414 0 R (Item.61) 1415 0 R] +/Names [(Item.57) 1500 0 R (Item.58) 1501 0 R (Item.59) 1513 0 R (Item.6) 975 0 R (Item.60) 1514 0 R (Item.61) 1515 0 R] /Limits [(Item.57) (Item.61)] >> -% 2016 0 obj +% 2363 0 obj << -/Names [(Item.62) 1422 0 R (Item.63) 1427 0 R (Item.64) 1428 0 R (Item.65) 1429 0 R (Item.66) 1441 0 R (Item.67) 1442 0 R] +/Names [(Item.62) 1522 0 R (Item.63) 1545 0 R (Item.64) 1546 0 R (Item.65) 1553 0 R (Item.66) 1558 0 R (Item.67) 1559 0 R] /Limits [(Item.62) (Item.67)] >> -% 2017 0 obj +% 2364 0 obj << -/Names [(Item.68) 1443 0 R (Item.69) 1444 0 R (Item.7) 849 0 R (Item.70) 1445 0 R (Item.71) 1450 0 R (Item.72) 1451 0 R] +/Names [(Item.68) 1560 0 R (Item.69) 1572 0 R (Item.7) 976 0 R (Item.70) 1573 0 R (Item.71) 1574 0 R (Item.72) 1575 0 R] /Limits [(Item.68) (Item.72)] >> -% 2018 0 obj +% 2365 0 obj << -/Names [(Item.73) 1452 0 R (Item.74) 1453 0 R (Item.75) 1454 0 R (Item.76) 1455 0 R (Item.77) 1469 0 R (Item.78) 1470 0 R] +/Names [(Item.73) 1576 0 R (Item.74) 1582 0 R (Item.75) 1583 0 R (Item.76) 1584 0 R (Item.77) 1585 0 R (Item.78) 1586 0 R] /Limits [(Item.73) (Item.78)] >> -% 2019 0 obj +% 2366 0 obj << -/Names [(Item.79) 1471 0 R (Item.8) 850 0 R (Item.80) 1472 0 R (Item.81) 1473 0 R (Item.82) 1474 0 R (Item.83) 1475 0 R] +/Names [(Item.79) 1587 0 R (Item.8) 977 0 R (Item.80) 1599 0 R (Item.81) 1600 0 R (Item.82) 1601 0 R (Item.83) 1602 0 R] /Limits [(Item.79) (Item.83)] >> -% 2020 0 obj +% 2367 0 obj << -/Names [(Item.84) 1488 0 R (Item.85) 1499 0 R (Item.86) 1511 0 R (Item.87) 1512 0 R (Item.88) 1520 0 R (Item.89) 1521 0 R] +/Names [(Item.84) 1603 0 R (Item.85) 1604 0 R (Item.86) 1605 0 R (Item.87) 1619 0 R (Item.88) 1631 0 R (Item.89) 1642 0 R] /Limits [(Item.84) (Item.89)] >> -% 2021 0 obj +% 2368 0 obj << -/Names [(Item.9) 856 0 R (Item.90) 1542 0 R (Item.91) 1543 0 R (Item.92) 1554 0 R (Item.93) 1560 0 R (Item.94) 1566 0 R] +/Names [(Item.9) 985 0 R (Item.90) 1643 0 R (Item.91) 1651 0 R (Item.92) 1652 0 R (Item.93) 1673 0 R (Item.94) 1674 0 R] /Limits [(Item.9) (Item.94)] >> -% 2022 0 obj +% 2369 0 obj << -/Names [(Item.95) 1573 0 R (Item.96) 1580 0 R (Item.97) 1581 0 R (Item.98) 1587 0 R (Item.99) 1588 0 R (cite.2007c) 830 0 R] -/Limits [(Item.95) (cite.2007c)] +/Names [(Item.95) 1685 0 R (Item.96) 1691 0 R (Item.97) 1697 0 R (Item.98) 1704 0 R (Item.99) 1710 0 R (algocf.1) 2155 0 R] +/Limits [(Item.95) (algocf.1)] >> -% 2023 0 obj +% 2370 0 obj << -/Names [(cite.2007d) 831 0 R (cite.BLACS) 805 0 R (cite.BLAS1) 788 0 R (cite.BLAS2) 789 0 R (cite.BLAS3) 790 0 R (cite.DesPat:11) 783 0 R] -/Limits [(cite.2007d) (cite.DesPat:11)] +/Names [(algocf.2) 2202 0 R (algocfline.1) 2141 0 R (algocfline.2) 2209 0 R (cite.2007c) 957 0 R (cite.2007d) 958 0 R (cite.BERTACCINIFILIPPONE) 2012 0 R] +/Limits [(algocf.2) (cite.BERTACCINIFILIPPONE)] >> -% 2024 0 obj +% 2371 0 obj << -/Names [(cite.DesignPatterns) 946 0 R (cite.KIVA3PSBLAS) 1941 0 R (cite.METIS) 817 0 R (cite.MPI1) 1947 0 R (cite.PARA04FOREST) 1939 0 R (cite.PSBLAS) 1940 0 R] -/Limits [(cite.DesignPatterns) (cite.PSBLAS)] +/Names [(cite.BLACS) 933 0 R (cite.BLAS1) 915 0 R (cite.BLAS2) 916 0 R (cite.BLAS3) 917 0 R (cite.CaFiRo:2014) 2127 0 R (cite.DesPat:11) 910 0 R] +/Limits [(cite.BLACS) (cite.DesPat:11)] >> -% 2025 0 obj +% 2372 0 obj << -/Names [(cite.RouXiaXu:11) 784 0 R (cite.Sparse03) 782 0 R (cite.machiels) 785 0 R (cite.metcalf) 781 0 R (cite.sblas02) 787 0 R (cite.sblas97) 786 0 R] -/Limits [(cite.RouXiaXu:11) (cite.sblas97)] +/Names [(cite.DesignPatterns) 1074 0 R (cite.Filippone:2017:SMM:3034774.3017994) 2280 0 R (cite.KIVA3PSBLAS) 2273 0 R (cite.METIS) 945 0 R (cite.MPI1) 2279 0 R (cite.MRC:11) 2142 0 R] +/Limits [(cite.DesignPatterns) (cite.MRC:11)] >> -% 2026 0 obj + +endstream +endobj +2374 0 obj << -/Names [(descdata) 888 0 R (equation.4.1) 1165 0 R (equation.4.2) 1166 0 R (equation.4.3) 1167 0 R (figure.1) 799 0 R (figure.2) 825 0 R] -/Limits [(descdata) (figure.2)] +/Type /ObjStm +/N 100 +/First 1040 +/Length 19083 >> -% 2027 0 obj +stream +2373 0 2375 226 2376 431 2377 617 2378 800 2379 1002 2380 1227 2381 1447 2382 1678 2383 1914 +2384 2148 2385 2376 2386 2564 2387 2744 2388 2923 2389 3103 2390 3281 2391 3459 2392 3639 2393 3818 +2394 3998 2395 4177 2396 4357 2397 4536 2398 4716 2399 4895 2400 5075 2401 5253 2402 5423 2403 5595 +2404 5764 2405 5933 2406 6105 2407 6275 2408 6447 2409 6617 2410 6789 2411 6959 2412 7131 2413 7301 +2414 7473 2415 7643 2416 7812 2417 7992 2418 8190 2419 8386 2420 8580 2421 8768 2422 8948 2423 9148 +2424 9378 2425 9608 2426 9827 2427 10051 2428 10281 2429 10503 2430 10725 2431 10953 2432 11181 2433 11411 +2434 11636 2435 11861 2436 12091 2437 12313 2438 12535 2439 12757 2440 13018 2441 13278 2442 13538 2443 13808 +2444 14074 2445 14336 2446 14575 2447 14755 2448 14934 2449 15109 2450 15276 2451 15390 2452 15502 2453 15613 +2454 15724 2455 15840 2456 15962 2457 16081 2458 16194 2459 16307 2460 16418 2461 16529 2462 16644 2463 16767 +2464 16893 2465 17019 2466 17150 2467 17274 2468 17357 2469 17476 2470 17598 2471 17711 2472 17795 2473 17830 +% 2373 0 obj << -/Names [(figure.3) 1268 0 R (figure.4) 1301 0 R (listing.1) 916 0 R (listing.2) 947 0 R (listing.3) 986 0 R (listing.4) 1006 0 R] -/Limits [(figure.3) (listing.4)] +/Names [(cite.OurTechRep) 2128 0 R (cite.PARA04FOREST) 2271 0 R (cite.PSBLAS) 2272 0 R (cite.RouXiaXu:11) 911 0 R (cite.Sparse03) 909 0 R (cite.machiels) 912 0 R] +/Limits [(cite.OurTechRep) (cite.machiels)] >> -% 2028 0 obj +% 2375 0 obj << -/Names [(listing.5) 1771 0 R (listing.6) 1772 0 R (lstlisting.-1) 1128 0 R (lstlisting.-10) 1841 0 R (lstlisting.-11) 1848 0 R (lstlisting.-12) 1914 0 R] -/Limits [(listing.5) (lstlisting.-12)] +/Names [(cite.metcalf) 908 0 R (cite.sblas02) 914 0 R (cite.sblas97) 913 0 R (descdata) 1017 0 R (equation.4.1) 1296 0 R (equation.4.2) 1297 0 R] +/Limits [(cite.metcalf) (equation.4.2)] >> -% 2029 0 obj +% 2376 0 obj << -/Names [(lstlisting.-2) 1781 0 R (lstlisting.-3) 1787 0 R (lstlisting.-4) 1795 0 R (lstlisting.-5) 1801 0 R (lstlisting.-6) 1812 0 R (lstlisting.-7) 1819 0 R] -/Limits [(lstlisting.-2) (lstlisting.-7)] +/Names [(equation.4.3) 1298 0 R (figure.1) 927 0 R (figure.2) 953 0 R (figure.3) 1399 0 R (figure.4) 1432 0 R (figure.5) 2149 0 R] +/Limits [(equation.4.3) (figure.5)] >> -% 2030 0 obj +% 2377 0 obj << -/Names [(lstlisting.-8) 1826 0 R (lstlisting.-9) 1834 0 R (lstnumber.-1.1) 1129 0 R (lstnumber.-1.2) 1130 0 R (lstnumber.-1.3) 1131 0 R (lstnumber.-1.4) 1132 0 R] -/Limits [(lstlisting.-8) (lstnumber.-1.4)] +/Names [(figure.6) 2144 0 R (figure.7) 2189 0 R (figure.8) 2201 0 R (figure.9) 2228 0 R (listing.1) 1045 0 R (listing.2) 1075 0 R] +/Limits [(figure.6) (listing.2)] >> -% 2031 0 obj +% 2378 0 obj << -/Names [(lstnumber.-10.1) 1842 0 R (lstnumber.-11.1) 1849 0 R (lstnumber.-12.1) 1915 0 R (lstnumber.-12.2) 1916 0 R (lstnumber.-2.1) 1782 0 R (lstnumber.-3.1) 1788 0 R] -/Limits [(lstnumber.-10.1) (lstnumber.-3.1)] +/Names [(listing.3) 1118 0 R (listing.4) 1137 0 R (listing.5) 1902 0 R (listing.6) 1903 0 R (lstlisting.-1) 1258 0 R (lstlisting.-10) 1972 0 R] +/Limits [(listing.3) (lstlisting.-10)] >> -% 2032 0 obj +% 2379 0 obj << -/Names [(lstnumber.-4.1) 1796 0 R (lstnumber.-5.1) 1802 0 R (lstnumber.-6.1) 1813 0 R (lstnumber.-7.1) 1820 0 R (lstnumber.-8.1) 1827 0 R (lstnumber.-9.1) 1835 0 R] -/Limits [(lstnumber.-4.1) (lstnumber.-9.1)] +/Names [(lstlisting.-11) 1979 0 R (lstlisting.-12) 2078 0 R (lstlisting.-13) 2102 0 R (lstlisting.-14) 2160 0 R (lstlisting.-2) 1912 0 R (lstlisting.-3) 1919 0 R] +/Limits [(lstlisting.-11) (lstlisting.-3)] >> -% 2033 0 obj +% 2380 0 obj << -/Names [(page.1) 582 0 R (page.10) 895 0 R (page.100) 1553 0 R (page.101) 1559 0 R (page.102) 1565 0 R (page.103) 1572 0 R] -/Limits [(page.1) (page.103)] +/Names [(lstlisting.-4) 1925 0 R (lstlisting.-5) 1931 0 R (lstlisting.-6) 1943 0 R (lstlisting.-7) 1950 0 R (lstlisting.-8) 1958 0 R (lstlisting.-9) 1965 0 R] +/Limits [(lstlisting.-4) (lstlisting.-9)] >> -% 2034 0 obj +% 2381 0 obj << -/Names [(page.104) 1579 0 R (page.105) 1586 0 R (page.106) 1593 0 R (page.107) 1597 0 R (page.108) 1607 0 R (page.109) 1611 0 R] -/Limits [(page.104) (page.109)] +/Names [(lstnumber.-1.1) 1259 0 R (lstnumber.-1.2) 1260 0 R (lstnumber.-1.3) 1261 0 R (lstnumber.-1.4) 1262 0 R (lstnumber.-10.1) 1973 0 R (lstnumber.-11.1) 1980 0 R] +/Limits [(lstnumber.-1.1) (lstnumber.-11.1)] >> -% 2035 0 obj +% 2382 0 obj << -/Names [(page.11) 906 0 R (page.110) 1616 0 R (page.111) 1629 0 R (page.112) 1633 0 R (page.113) 1639 0 R (page.114) 1645 0 R] -/Limits [(page.11) (page.114)] +/Names [(lstnumber.-12.1) 2079 0 R (lstnumber.-12.2) 2080 0 R (lstnumber.-13.1) 2103 0 R (lstnumber.-13.2) 2104 0 R (lstnumber.-14.1) 2161 0 R (lstnumber.-14.2) 2162 0 R] +/Limits [(lstnumber.-12.1) (lstnumber.-14.2)] >> -% 2036 0 obj +% 2383 0 obj << -/Names [(page.115) 1652 0 R (page.116) 1657 0 R (page.117) 1661 0 R (page.118) 1665 0 R (page.119) 1669 0 R (page.12) 922 0 R] -/Limits [(page.115) (page.12)] +/Names [(lstnumber.-14.3) 2163 0 R (lstnumber.-14.4) 2164 0 R (lstnumber.-14.5) 2165 0 R (lstnumber.-14.6) 2166 0 R (lstnumber.-14.7) 2167 0 R (lstnumber.-2.1) 1913 0 R] +/Limits [(lstnumber.-14.3) (lstnumber.-2.1)] >> -% 2037 0 obj +% 2384 0 obj << -/Names [(page.120) 1673 0 R (page.121) 1678 0 R (page.122) 1687 0 R (page.123) 1691 0 R (page.124) 1698 0 R (page.125) 1702 0 R] -/Limits [(page.120) (page.125)] +/Names [(lstnumber.-3.1) 1920 0 R (lstnumber.-4.1) 1926 0 R (lstnumber.-5.1) 1932 0 R (lstnumber.-6.1) 1944 0 R (lstnumber.-7.1) 1951 0 R (lstnumber.-8.1) 1959 0 R] +/Limits [(lstnumber.-3.1) (lstnumber.-8.1)] >> -% 2038 0 obj +% 2385 0 obj << -/Names [(page.126) 1709 0 R (page.127) 1713 0 R (page.128) 1721 0 R (page.129) 1725 0 R (page.13) 926 0 R (page.130) 1732 0 R] -/Limits [(page.126) (page.130)] +/Names [(lstnumber.-9.1) 1966 0 R (page.1) 683 0 R (page.10) 1016 0 R (page.100) 1679 0 R (page.101) 1684 0 R (page.102) 1690 0 R] +/Limits [(lstnumber.-9.1) (page.102)] >> -% 2039 0 obj +% 2386 0 obj << -/Names [(page.131) 1736 0 R (page.132) 1743 0 R (page.133) 1747 0 R (page.134) 1758 0 R (page.135) 1763 0 R (page.136) 1770 0 R] -/Limits [(page.131) (page.136)] +/Names [(page.103) 1696 0 R (page.104) 1703 0 R (page.105) 1709 0 R (page.106) 1716 0 R (page.107) 1724 0 R (page.108) 1728 0 R] +/Limits [(page.103) (page.108)] >> -% 2040 0 obj +% 2387 0 obj << -/Names [(page.137) 1776 0 R (page.138) 1780 0 R (page.139) 1786 0 R (page.14) 932 0 R (page.140) 1794 0 R (page.141) 1800 0 R] -/Limits [(page.137) (page.141)] +/Names [(page.109) 1738 0 R (page.11) 1024 0 R (page.110) 1743 0 R (page.111) 1747 0 R (page.112) 1760 0 R (page.113) 1764 0 R] +/Limits [(page.109) (page.113)] >> -% 2041 0 obj +% 2388 0 obj << -/Names [(page.142) 1806 0 R (page.143) 1811 0 R (page.144) 1818 0 R (page.145) 1825 0 R (page.146) 1833 0 R (page.147) 1840 0 R] -/Limits [(page.142) (page.147)] +/Names [(page.114) 1770 0 R (page.115) 1776 0 R (page.116) 1784 0 R (page.117) 1788 0 R (page.118) 1792 0 R (page.119) 1796 0 R] +/Limits [(page.114) (page.119)] >> -% 2042 0 obj +% 2389 0 obj << -/Names [(page.148) 1847 0 R (page.149) 1853 0 R (page.15) 937 0 R (page.150) 1857 0 R (page.151) 1863 0 R (page.152) 1873 0 R] -/Limits [(page.148) (page.152)] +/Names [(page.12) 1035 0 R (page.120) 1800 0 R (page.121) 1804 0 R (page.122) 1809 0 R (page.123) 1817 0 R (page.124) 1821 0 R] +/Limits [(page.12) (page.124)] >> -% 2043 0 obj +% 2390 0 obj << -/Names [(page.153) 1877 0 R (page.154) 1885 0 R (page.155) 1890 0 R (page.156) 1895 0 R (page.157) 1901 0 R (page.158) 1906 0 R] -/Limits [(page.153) (page.158)] +/Names [(page.125) 1828 0 R (page.126) 1833 0 R (page.127) 1840 0 R (page.128) 1845 0 R (page.129) 1852 0 R (page.13) 1051 0 R] +/Limits [(page.125) (page.13)] >> -% 2044 0 obj +% 2391 0 obj << -/Names [(page.159) 1913 0 R (page.16) 944 0 R (page.160) 1924 0 R (page.161) 1929 0 R (page.162) 1933 0 R (page.163) 1937 0 R] -/Limits [(page.159) (page.163)] +/Names [(page.130) 1856 0 R (page.131) 1863 0 R (page.132) 1867 0 R (page.133) 1874 0 R (page.134) 1879 0 R (page.135) 1889 0 R] +/Limits [(page.130) (page.135)] >> -% 2045 0 obj +% 2392 0 obj << -/Names [(page.164) 1946 0 R (page.17) 951 0 R (page.18) 956 0 R (page.19) 961 0 R (page.2) 592 0 R (page.20) 966 0 R] -/Limits [(page.164) (page.20)] +/Names [(page.136) 1894 0 R (page.137) 1901 0 R (page.138) 1907 0 R (page.139) 1911 0 R (page.14) 1056 0 R (page.140) 1918 0 R] +/Limits [(page.136) (page.140)] >> -% 2046 0 obj +% 2393 0 obj << -/Names [(page.21) 970 0 R (page.22) 974 0 R (page.23) 978 0 R (page.24) 984 0 R (page.25) 990 0 R (page.26) 997 0 R] -/Limits [(page.21) (page.26)] +/Names [(page.141) 1924 0 R (page.142) 1930 0 R (page.143) 1936 0 R (page.144) 1942 0 R (page.145) 1949 0 R (page.146) 1957 0 R] +/Limits [(page.141) (page.146)] >> -% 2047 0 obj +% 2394 0 obj << -/Names [(page.27) 1004 0 R (page.28) 1010 0 R (page.29) 1015 0 R (page.3) 811 0 R (page.30) 1026 0 R (page.31) 1033 0 R] -/Limits [(page.27) (page.31)] +/Names [(page.147) 1964 0 R (page.148) 1971 0 R (page.149) 1978 0 R (page.15) 1060 0 R (page.150) 1984 0 R (page.151) 1988 0 R] +/Limits [(page.147) (page.151)] >> -% 2048 0 obj +% 2395 0 obj << -/Names [(page.32) 1043 0 R (page.33) 1049 0 R (page.34) 1060 0 R (page.35) 1065 0 R (page.36) 1072 0 R (page.37) 1077 0 R] -/Limits [(page.32) (page.37)] +/Names [(page.152) 1996 0 R (page.153) 2007 0 R (page.154) 2011 0 R (page.155) 2020 0 R (page.156) 2024 0 R (page.157) 2033 0 R] +/Limits [(page.152) (page.157)] >> -% 2049 0 obj +% 2396 0 obj << -/Names [(page.38) 1086 0 R (page.39) 1094 0 R (page.4) 824 0 R (page.40) 1099 0 R (page.41) 1107 0 R (page.42) 1112 0 R] -/Limits [(page.38) (page.42)] +/Names [(page.158) 2039 0 R (page.159) 2043 0 R (page.16) 1065 0 R (page.160) 2049 0 R (page.161) 2058 0 R (page.162) 2065 0 R] +/Limits [(page.158) (page.162)] >> -% 2050 0 obj +% 2397 0 obj << -/Names [(page.43) 1119 0 R (page.44) 1126 0 R (page.45) 1139 0 R (page.46) 1146 0 R (page.47) 1153 0 R (page.48) 1164 0 R] -/Limits [(page.43) (page.48)] +/Names [(page.163) 2069 0 R (page.164) 2077 0 R (page.165) 2088 0 R (page.166) 2093 0 R (page.167) 2101 0 R (page.168) 2110 0 R] +/Limits [(page.163) (page.168)] >> -% 2051 0 obj +% 2398 0 obj << -/Names [(page.49) 1179 0 R (page.5) 837 0 R (page.50) 1186 0 R (page.51) 1197 0 R (page.52) 1203 0 R (page.53) 1214 0 R] -/Limits [(page.49) (page.53)] +/Names [(page.169) 2114 0 R (page.17) 1072 0 R (page.170) 2125 0 R (page.171) 2139 0 R (page.172) 2148 0 R (page.173) 2159 0 R] +/Limits [(page.169) (page.173)] >> -% 2052 0 obj +% 2399 0 obj << -/Names [(page.54) 1219 0 R (page.55) 1230 0 R (page.56) 1237 0 R (page.57) 1246 0 R (page.58) 1252 0 R (page.59) 1260 0 R] -/Limits [(page.54) (page.59)] +/Names [(page.174) 2188 0 R (page.175) 2208 0 R (page.176) 2227 0 R (page.177) 2243 0 R (page.178) 2247 0 R (page.179) 2251 0 R] +/Limits [(page.174) (page.179)] >> -% 2053 0 obj +% 2400 0 obj << -/Names [(page.6) 842 0 R (page.60) 1267 0 R (page.61) 1280 0 R (page.62) 1288 0 R (page.63) 1297 0 R (page.64) 1305 0 R] -/Limits [(page.6) (page.64)] +/Names [(page.18) 1080 0 R (page.180) 2256 0 R (page.181) 2260 0 R (page.182) 2265 0 R (page.183) 2269 0 R (page.184) 2278 0 R] +/Limits [(page.18) (page.184)] >> -% 2054 0 obj +% 2401 0 obj << -/Names [(page.65) 1309 0 R (page.66) 1324 0 R (page.67) 1329 0 R (page.68) 1336 0 R (page.69) 1343 0 R (page.7) 855 0 R] -/Limits [(page.65) (page.7)] +/Names [(page.19) 1085 0 R (page.2) 693 0 R (page.20) 1091 0 R (page.21) 1095 0 R (page.22) 1099 0 R (page.23) 1105 0 R] +/Limits [(page.19) (page.23)] >> -% 2055 0 obj +% 2402 0 obj << -/Names [(page.70) 1347 0 R (page.71) 1353 0 R (page.72) 1359 0 R (page.73) 1365 0 R (page.74) 1376 0 R (page.75) 1381 0 R] -/Limits [(page.70) (page.75)] +/Names [(page.24) 1109 0 R (page.25) 1115 0 R (page.26) 1122 0 R (page.27) 1128 0 R (page.28) 1135 0 R (page.29) 1141 0 R] +/Limits [(page.24) (page.29)] >> -% 2056 0 obj +% 2403 0 obj << -/Names [(page.76) 1390 0 R (page.77) 1397 0 R (page.78) 1402 0 R (page.79) 1408 0 R (page.8) 876 0 R (page.80) 1413 0 R] -/Limits [(page.76) (page.80)] +/Names [(page.3) 926 0 R (page.30) 1145 0 R (page.31) 1156 0 R (page.32) 1164 0 R (page.33) 1174 0 R (page.34) 1180 0 R] +/Limits [(page.3) (page.34)] >> -% 2057 0 obj +% 2404 0 obj << -/Names [(page.81) 1421 0 R (page.82) 1426 0 R (page.83) 1434 0 R (page.84) 1440 0 R (page.85) 1449 0 R (page.86) 1463 0 R] -/Limits [(page.81) (page.86)] +/Names [(page.35) 1191 0 R (page.36) 1196 0 R (page.37) 1203 0 R (page.38) 1209 0 R (page.39) 1217 0 R (page.4) 939 0 R] +/Limits [(page.35) (page.4)] >> -% 2058 0 obj +% 2405 0 obj << -/Names [(page.87) 1468 0 R (page.88) 1481 0 R (page.89) 1487 0 R (page.9) 887 0 R (page.90) 1494 0 R (page.91) 1498 0 R] -/Limits [(page.87) (page.91)] +/Names [(page.40) 1225 0 R (page.41) 1230 0 R (page.42) 1238 0 R (page.43) 1243 0 R (page.44) 1251 0 R (page.45) 1256 0 R] +/Limits [(page.40) (page.45)] >> -% 2059 0 obj +% 2406 0 obj << -/Names [(page.92) 1506 0 R (page.93) 1510 0 R (page.94) 1519 0 R (page.95) 1527 0 R (page.96) 1531 0 R (page.97) 1536 0 R] -/Limits [(page.92) (page.97)] +/Names [(page.46) 1269 0 R (page.47) 1276 0 R (page.48) 1284 0 R (page.49) 1295 0 R (page.5) 952 0 R (page.50) 1311 0 R] +/Limits [(page.46) (page.50)] >> -% 2060 0 obj +% 2407 0 obj << -/Names [(page.98) 1541 0 R (page.99) 1548 0 R (page.i) 638 0 R (page.ii) 696 0 R (page.iii) 742 0 R (page.iv) 763 0 R] -/Limits [(page.98) (page.iv)] +/Names [(page.51) 1317 0 R (page.52) 1328 0 R (page.53) 1334 0 R (page.54) 1345 0 R (page.55) 1350 0 R (page.56) 1362 0 R] +/Limits [(page.51) (page.56)] >> -% 2061 0 obj +% 2408 0 obj << -/Names [(precdata) 1005 0 R (section*.1) 639 0 R (section*.2) 1938 0 R (section.1) 7 0 R (section.10) 541 0 R (section.11) 569 0 R] -/Limits [(precdata) (section.11)] +/Names [(page.57) 1367 0 R (page.58) 1376 0 R (page.59) 1382 0 R (page.6) 964 0 R (page.60) 1391 0 R (page.61) 1398 0 R] +/Limits [(page.57) (page.61)] >> -% 2062 0 obj +% 2409 0 obj +<< +/Names [(page.62) 1412 0 R (page.63) 1419 0 R (page.64) 1428 0 R (page.65) 1436 0 R (page.66) 1440 0 R (page.67) 1455 0 R] +/Limits [(page.62) (page.67)] +>> +% 2410 0 obj +<< +/Names [(page.68) 1461 0 R (page.69) 1467 0 R (page.7) 969 0 R (page.70) 1474 0 R (page.71) 1478 0 R (page.72) 1483 0 R] +/Limits [(page.68) (page.72)] +>> +% 2411 0 obj +<< +/Names [(page.73) 1489 0 R (page.74) 1496 0 R (page.75) 1507 0 R (page.76) 1512 0 R (page.77) 1521 0 R (page.78) 1528 0 R] +/Limits [(page.73) (page.78)] +>> +% 2412 0 obj +<< +/Names [(page.79) 1533 0 R (page.8) 984 0 R (page.80) 1540 0 R (page.81) 1544 0 R (page.82) 1552 0 R (page.83) 1557 0 R] +/Limits [(page.79) (page.83)] +>> +% 2413 0 obj +<< +/Names [(page.84) 1565 0 R (page.85) 1571 0 R (page.86) 1581 0 R (page.87) 1594 0 R (page.88) 1598 0 R (page.89) 1611 0 R] +/Limits [(page.84) (page.89)] +>> +% 2414 0 obj +<< +/Names [(page.9) 1005 0 R (page.90) 1618 0 R (page.91) 1625 0 R (page.92) 1630 0 R (page.93) 1637 0 R (page.94) 1641 0 R] +/Limits [(page.9) (page.94)] +>> +% 2415 0 obj +<< +/Names [(page.95) 1650 0 R (page.96) 1658 0 R (page.97) 1662 0 R (page.98) 1668 0 R (page.99) 1672 0 R (page.i) 739 0 R] +/Limits [(page.95) (page.i)] +>> +% 2416 0 obj +<< +/Names [(page.ii) 795 0 R (page.iii) 843 0 R (page.iv) 886 0 R (precdata) 1136 0 R (section*.1) 740 0 R (section*.10) 634 0 R] +/Limits [(page.ii) (section*.10)] +>> +% 2417 0 obj +<< +/Names [(section*.11) 638 0 R (section*.12) 642 0 R (section*.13) 646 0 R (section*.14) 650 0 R (section*.15) 654 0 R (section*.16) 658 0 R] +/Limits [(section*.11) (section*.16)] +>> +% 2418 0 obj +<< +/Names [(section*.17) 662 0 R (section*.18) 666 0 R (section*.19) 670 0 R (section*.2) 7 0 R (section*.20) 674 0 R (section*.21) 2270 0 R] +/Limits [(section*.17) (section*.21)] +>> +% 2419 0 obj +<< +/Names [(section*.3) 2140 0 R (section*.4) 2168 0 R (section*.5) 2190 0 R (section*.6) 2210 0 R (section*.7) 622 0 R (section*.8) 626 0 R] +/Limits [(section*.3) (section*.8)] +>> +% 2420 0 obj +<< +/Names [(section*.9) 630 0 R (section.1) 11 0 R (section.10) 545 0 R (section.11) 585 0 R (section.12) 597 0 R (section.13) 618 0 R] +/Limits [(section*.9) (section.13)] +>> +% 2421 0 obj << -/Names [(section.2) 11 0 R (section.3) 35 0 R (section.4) 220 0 R (section.5) 288 0 R (section.6) 308 0 R (section.7) 421 0 R] +/Names [(section.2) 15 0 R (section.3) 39 0 R (section.4) 224 0 R (section.5) 292 0 R (section.6) 312 0 R (section.7) 425 0 R] /Limits [(section.2) (section.7)] >> -% 2063 0 obj +% 2422 0 obj << -/Names [(section.8) 493 0 R (section.9) 513 0 R (spbasedata) 952 0 R (spdata) 945 0 R (subsection.10.1) 545 0 R (subsection.10.2) 549 0 R] +/Names [(section.8) 497 0 R (section.9) 517 0 R (spbasedata) 1081 0 R (spdata) 1073 0 R (subsection.10.1) 549 0 R (subsection.10.2) 553 0 R] /Limits [(section.8) (subsection.10.2)] >> -% 2064 0 obj +% 2423 0 obj << -/Names [(subsection.10.3) 553 0 R (subsection.10.4) 557 0 R (subsection.10.5) 561 0 R (subsection.10.6) 565 0 R (subsection.11.1) 573 0 R (subsection.2.1) 15 0 R] -/Limits [(subsection.10.3) (subsection.2.1)] +/Names [(subsection.10.3) 557 0 R (subsection.10.4) 561 0 R (subsection.10.5) 565 0 R (subsection.10.6) 569 0 R (subsection.10.7) 573 0 R (subsection.10.8) 577 0 R] +/Limits [(subsection.10.3) (subsection.10.8)] >> -% 2065 0 obj +% 2424 0 obj << -/Names [(subsection.2.2) 19 0 R (subsection.2.3) 23 0 R (subsection.2.4) 31 0 R (subsection.3.1) 39 0 R (subsection.3.2) 103 0 R (subsection.3.3) 183 0 R] -/Limits [(subsection.2.2) (subsection.3.3)] +/Names [(subsection.10.9) 581 0 R (subsection.11.1) 589 0 R (subsection.11.2) 593 0 R (subsection.12.1) 601 0 R (subsection.12.2) 606 0 R (subsection.12.3) 610 0 R] +/Limits [(subsection.10.9) (subsection.12.3)] >> -% 2066 0 obj +% 2425 0 obj << -/Names [(subsection.3.4) 212 0 R (subsection.3.5) 216 0 R (subsection.4.1) 224 0 R (subsection.4.10) 260 0 R (subsection.4.11) 264 0 R (subsection.4.12) 268 0 R] -/Limits [(subsection.3.4) (subsection.4.12)] +/Names [(subsection.12.4) 614 0 R (subsection.2.1) 19 0 R (subsection.2.2) 23 0 R (subsection.2.3) 27 0 R (subsection.2.4) 35 0 R (subsection.3.1) 43 0 R] +/Limits [(subsection.12.4) (subsection.3.1)] >> -% 2067 0 obj +% 2426 0 obj << -/Names [(subsection.4.13) 272 0 R (subsection.4.14) 276 0 R (subsection.4.15) 280 0 R (subsection.4.16) 284 0 R (subsection.4.2) 228 0 R (subsection.4.3) 232 0 R] -/Limits [(subsection.4.13) (subsection.4.3)] +/Names [(subsection.3.2) 107 0 R (subsection.3.3) 187 0 R (subsection.3.4) 216 0 R (subsection.3.5) 220 0 R (subsection.4.1) 228 0 R (subsection.4.10) 264 0 R] +/Limits [(subsection.3.2) (subsection.4.10)] >> -% 2068 0 obj +% 2427 0 obj << -/Names [(subsection.4.4) 236 0 R (subsection.4.5) 240 0 R (subsection.4.6) 244 0 R (subsection.4.7) 248 0 R (subsection.4.8) 252 0 R (subsection.4.9) 256 0 R] -/Limits [(subsection.4.4) (subsection.4.9)] +/Names [(subsection.4.11) 268 0 R (subsection.4.12) 272 0 R (subsection.4.13) 276 0 R (subsection.4.14) 280 0 R (subsection.4.15) 284 0 R (subsection.4.16) 288 0 R] +/Limits [(subsection.4.11) (subsection.4.16)] >> -% 2069 0 obj +% 2428 0 obj << -/Names [(subsection.5.1) 292 0 R (subsection.5.2) 296 0 R (subsection.5.3) 300 0 R (subsection.5.4) 304 0 R (subsection.6.1) 312 0 R (subsection.6.10) 348 0 R] -/Limits [(subsection.5.1) (subsection.6.10)] +/Names [(subsection.4.2) 232 0 R (subsection.4.3) 236 0 R (subsection.4.4) 240 0 R (subsection.4.5) 244 0 R (subsection.4.6) 248 0 R (subsection.4.7) 252 0 R] +/Limits [(subsection.4.2) (subsection.4.7)] >> -% 2070 0 obj +% 2429 0 obj << -/Names [(subsection.6.11) 352 0 R (subsection.6.12) 356 0 R (subsection.6.13) 360 0 R (subsection.6.14) 364 0 R (subsection.6.15) 368 0 R (subsection.6.16) 372 0 R] -/Limits [(subsection.6.11) (subsection.6.16)] +/Names [(subsection.4.8) 256 0 R (subsection.4.9) 260 0 R (subsection.5.1) 296 0 R (subsection.5.2) 300 0 R (subsection.5.3) 304 0 R (subsection.5.4) 308 0 R] +/Limits [(subsection.4.8) (subsection.5.4)] >> -% 2071 0 obj +% 2430 0 obj << -/Names [(subsection.6.17) 376 0 R (subsection.6.18) 380 0 R (subsection.6.19) 384 0 R (subsection.6.2) 316 0 R (subsection.6.20) 388 0 R (subsection.6.21) 392 0 R] -/Limits [(subsection.6.17) (subsection.6.21)] +/Names [(subsection.6.1) 316 0 R (subsection.6.10) 352 0 R (subsection.6.11) 356 0 R (subsection.6.12) 360 0 R (subsection.6.13) 364 0 R (subsection.6.14) 368 0 R] +/Limits [(subsection.6.1) (subsection.6.14)] >> -% 2072 0 obj +% 2431 0 obj << -/Names [(subsection.6.22) 396 0 R (subsection.6.23) 400 0 R (subsection.6.24) 405 0 R (subsection.6.25) 409 0 R (subsection.6.26) 413 0 R (subsection.6.27) 417 0 R] -/Limits [(subsection.6.22) (subsection.6.27)] +/Names [(subsection.6.15) 372 0 R (subsection.6.16) 376 0 R (subsection.6.17) 380 0 R (subsection.6.18) 384 0 R (subsection.6.19) 388 0 R (subsection.6.2) 320 0 R] +/Limits [(subsection.6.15) (subsection.6.2)] >> -% 2073 0 obj +% 2432 0 obj << -/Names [(subsection.6.3) 320 0 R (subsection.6.4) 324 0 R (subsection.6.5) 328 0 R (subsection.6.6) 332 0 R (subsection.6.7) 336 0 R (subsection.6.8) 340 0 R] -/Limits [(subsection.6.3) (subsection.6.8)] +/Names [(subsection.6.20) 392 0 R (subsection.6.21) 396 0 R (subsection.6.22) 400 0 R (subsection.6.23) 405 0 R (subsection.6.24) 409 0 R (subsection.6.25) 413 0 R] +/Limits [(subsection.6.20) (subsection.6.25)] >> -% 2074 0 obj +% 2433 0 obj << -/Names [(subsection.6.9) 344 0 R (subsection.7.1) 425 0 R (subsection.7.10) 461 0 R (subsection.7.11) 465 0 R (subsection.7.12) 469 0 R (subsection.7.13) 473 0 R] -/Limits [(subsection.6.9) (subsection.7.13)] +/Names [(subsection.6.26) 417 0 R (subsection.6.27) 421 0 R (subsection.6.3) 324 0 R (subsection.6.4) 328 0 R (subsection.6.5) 332 0 R (subsection.6.6) 336 0 R] +/Limits [(subsection.6.26) (subsection.6.6)] >> -% 2075 0 obj +% 2434 0 obj << -/Names [(subsection.7.14) 477 0 R (subsection.7.15) 481 0 R (subsection.7.16) 485 0 R (subsection.7.17) 489 0 R (subsection.7.2) 429 0 R (subsection.7.3) 433 0 R] -/Limits [(subsection.7.14) (subsection.7.3)] +/Names [(subsection.6.7) 340 0 R (subsection.6.8) 344 0 R (subsection.6.9) 348 0 R (subsection.7.1) 429 0 R (subsection.7.10) 465 0 R (subsection.7.11) 469 0 R] +/Limits [(subsection.6.7) (subsection.7.11)] >> -% 2076 0 obj +% 2435 0 obj << -/Names [(subsection.7.4) 437 0 R (subsection.7.5) 441 0 R (subsection.7.6) 445 0 R (subsection.7.7) 449 0 R (subsection.7.8) 453 0 R (subsection.7.9) 457 0 R] -/Limits [(subsection.7.4) (subsection.7.9)] +/Names [(subsection.7.12) 473 0 R (subsection.7.13) 477 0 R (subsection.7.14) 481 0 R (subsection.7.15) 485 0 R (subsection.7.16) 489 0 R (subsection.7.17) 493 0 R] +/Limits [(subsection.7.12) (subsection.7.17)] >> -% 2077 0 obj +% 2436 0 obj << -/Names [(subsection.8.1) 497 0 R (subsection.8.2) 501 0 R (subsection.8.3) 505 0 R (subsection.8.4) 509 0 R (subsection.9.1) 517 0 R (subsection.9.2) 521 0 R] -/Limits [(subsection.8.1) (subsection.9.2)] +/Names [(subsection.7.2) 433 0 R (subsection.7.3) 437 0 R (subsection.7.4) 441 0 R (subsection.7.5) 445 0 R (subsection.7.6) 449 0 R (subsection.7.7) 453 0 R] +/Limits [(subsection.7.2) (subsection.7.7)] >> -% 2078 0 obj +% 2437 0 obj << -/Names [(subsection.9.3) 525 0 R (subsection.9.4) 529 0 R (subsection.9.5) 533 0 R (subsection.9.6) 537 0 R (subsubsection.2.3.1) 27 0 R (subsubsection.3.1.1) 43 0 R] -/Limits [(subsection.9.3) (subsubsection.3.1.1)] +/Names [(subsection.7.8) 457 0 R (subsection.7.9) 461 0 R (subsection.8.1) 501 0 R (subsection.8.2) 505 0 R (subsection.8.3) 509 0 R (subsection.8.4) 513 0 R] +/Limits [(subsection.7.8) (subsection.8.4)] >> -% 2079 0 obj +% 2438 0 obj << -/Names [(subsubsection.3.1.10) 79 0 R (subsubsection.3.1.11) 83 0 R (subsubsection.3.1.12) 87 0 R (subsubsection.3.1.13) 91 0 R (subsubsection.3.1.14) 95 0 R (subsubsection.3.1.15) 99 0 R] -/Limits [(subsubsection.3.1.10) (subsubsection.3.1.15)] +/Names [(subsection.9.1) 521 0 R (subsection.9.2) 525 0 R (subsection.9.3) 529 0 R (subsection.9.4) 533 0 R (subsection.9.5) 537 0 R (subsection.9.6) 541 0 R] +/Limits [(subsection.9.1) (subsection.9.6)] >> -% 2080 0 obj +% 2439 0 obj << -/Names [(subsubsection.3.1.2) 47 0 R (subsubsection.3.1.3) 51 0 R (subsubsection.3.1.4) 55 0 R (subsubsection.3.1.5) 59 0 R (subsubsection.3.1.6) 63 0 R (subsubsection.3.1.7) 67 0 R] -/Limits [(subsubsection.3.1.2) (subsubsection.3.1.7)] +/Names [(subsubsection.2.3.1) 31 0 R (subsubsection.3.1.1) 47 0 R (subsubsection.3.1.10) 83 0 R (subsubsection.3.1.11) 87 0 R (subsubsection.3.1.12) 91 0 R (subsubsection.3.1.13) 95 0 R] +/Limits [(subsubsection.2.3.1) (subsubsection.3.1.13)] >> -% 2081 0 obj +% 2440 0 obj << -/Names [(subsubsection.3.1.8) 71 0 R (subsubsection.3.1.9) 75 0 R (subsubsection.3.2.1) 107 0 R (subsubsection.3.2.10) 143 0 R (subsubsection.3.2.11) 147 0 R (subsubsection.3.2.12) 151 0 R] -/Limits [(subsubsection.3.1.8) (subsubsection.3.2.12)] +/Names [(subsubsection.3.1.14) 99 0 R (subsubsection.3.1.15) 103 0 R (subsubsection.3.1.2) 51 0 R (subsubsection.3.1.3) 55 0 R (subsubsection.3.1.4) 59 0 R (subsubsection.3.1.5) 63 0 R] +/Limits [(subsubsection.3.1.14) (subsubsection.3.1.5)] >> -% 2082 0 obj +% 2441 0 obj << -/Names [(subsubsection.3.2.13) 155 0 R (subsubsection.3.2.14) 159 0 R (subsubsection.3.2.15) 163 0 R (subsubsection.3.2.16) 167 0 R (subsubsection.3.2.17) 171 0 R (subsubsection.3.2.18) 175 0 R] -/Limits [(subsubsection.3.2.13) (subsubsection.3.2.18)] +/Names [(subsubsection.3.1.6) 67 0 R (subsubsection.3.1.7) 71 0 R (subsubsection.3.1.8) 75 0 R (subsubsection.3.1.9) 79 0 R (subsubsection.3.2.1) 111 0 R (subsubsection.3.2.10) 147 0 R] +/Limits [(subsubsection.3.1.6) (subsubsection.3.2.10)] >> -% 2083 0 obj +% 2442 0 obj << -/Names [(subsubsection.3.2.19) 179 0 R (subsubsection.3.2.2) 111 0 R (subsubsection.3.2.3) 115 0 R (subsubsection.3.2.4) 119 0 R (subsubsection.3.2.5) 123 0 R (subsubsection.3.2.6) 127 0 R] -/Limits [(subsubsection.3.2.19) (subsubsection.3.2.6)] +/Names [(subsubsection.3.2.11) 151 0 R (subsubsection.3.2.12) 155 0 R (subsubsection.3.2.13) 159 0 R (subsubsection.3.2.14) 163 0 R (subsubsection.3.2.15) 167 0 R (subsubsection.3.2.16) 171 0 R] +/Limits [(subsubsection.3.2.11) (subsubsection.3.2.16)] >> -% 2084 0 obj +% 2443 0 obj << -/Names [(subsubsection.3.2.7) 131 0 R (subsubsection.3.2.8) 135 0 R (subsubsection.3.2.9) 139 0 R (subsubsection.3.3.1) 187 0 R (subsubsection.3.3.2) 191 0 R (subsubsection.3.3.3) 195 0 R] -/Limits [(subsubsection.3.2.7) (subsubsection.3.3.3)] +/Names [(subsubsection.3.2.17) 175 0 R (subsubsection.3.2.18) 179 0 R (subsubsection.3.2.19) 183 0 R (subsubsection.3.2.2) 115 0 R (subsubsection.3.2.3) 119 0 R (subsubsection.3.2.4) 123 0 R] +/Limits [(subsubsection.3.2.17) (subsubsection.3.2.4)] >> -% 2085 0 obj +% 2444 0 obj << -/Names [(subsubsection.3.3.4) 199 0 R (subsubsection.3.3.5) 204 0 R (subsubsection.3.3.6) 208 0 R (table.1) 998 0 R (table.10) 1147 0 R (table.11) 1155 0 R] -/Limits [(subsubsection.3.3.4) (table.11)] +/Names [(subsubsection.3.2.5) 127 0 R (subsubsection.3.2.6) 131 0 R (subsubsection.3.2.7) 135 0 R (subsubsection.3.2.8) 139 0 R (subsubsection.3.2.9) 143 0 R (subsubsection.3.3.1) 191 0 R] +/Limits [(subsubsection.3.2.5) (subsubsection.3.3.1)] >> -% 2086 0 obj +% 2445 0 obj << -/Names [(table.12) 1168 0 R (table.13) 1187 0 R (table.14) 1215 0 R (table.15) 1231 0 R (table.16) 1247 0 R (table.17) 1261 0 R] -/Limits [(table.12) (table.17)] +/Names [(subsubsection.3.3.2) 195 0 R (subsubsection.3.3.3) 199 0 R (subsubsection.3.3.4) 204 0 R (subsubsection.3.3.5) 208 0 R (subsubsection.3.3.6) 212 0 R (table.1) 1129 0 R] +/Limits [(subsubsection.3.3.2) (table.1)] >> -% 2087 0 obj +% 2446 0 obj << -/Names [(table.18) 1289 0 R (table.19) 1325 0 R (table.2) 1044 0 R (table.20) 1337 0 R (table.3) 1061 0 R (table.4) 1073 0 R] -/Limits [(table.18) (table.4)] +/Names [(table.10) 1277 0 R (table.11) 1286 0 R (table.12) 1299 0 R (table.13) 1318 0 R (table.14) 1346 0 R (table.15) 1363 0 R] +/Limits [(table.10) (table.15)] >> -% 2088 0 obj +% 2447 0 obj << -/Names [(table.5) 1087 0 R (table.6) 1095 0 R (table.7) 1108 0 R (table.8) 1120 0 R (table.9) 1140 0 R (title.0) 3 0 R] -/Limits [(table.5) (title.0)] +/Names [(table.16) 1377 0 R (table.17) 1392 0 R (table.18) 1420 0 R (table.19) 1456 0 R (table.2) 1175 0 R (table.20) 1468 0 R] +/Limits [(table.16) (table.20)] >> -% 2089 0 obj +% 2448 0 obj << -/Names [(vbasedata) 933 0 R (vdata) 985 0 R] -/Limits [(vbasedata) (vdata)] +/Names [(table.21) 1998 0 R (table.22) 2143 0 R (table.3) 1192 0 R (table.4) 1204 0 R (table.5) 1218 0 R (table.6) 1226 0 R] +/Limits [(table.21) (table.6)] >> -% 2090 0 obj +% 2449 0 obj +<< +/Names [(table.7) 1239 0 R (table.8) 1252 0 R (table.9) 1270 0 R (title.0) 3 0 R (vbasedata) 1061 0 R (vdata) 1116 0 R] +/Limits [(table.7) (vdata)] +>> +% 2450 0 obj << -/Kids [1997 0 R 1998 0 R 1999 0 R 2000 0 R 2001 0 R 2002 0 R] +/Kids [2344 0 R 2345 0 R 2346 0 R 2347 0 R 2348 0 R 2349 0 R] /Limits [(Doc-Start) (Item.125)] >> -% 2091 0 obj +% 2451 0 obj << -/Kids [2003 0 R 2004 0 R 2005 0 R 2006 0 R 2007 0 R 2008 0 R] -/Limits [(Item.126) (Item.29)] +/Kids [2350 0 R 2351 0 R 2352 0 R 2353 0 R 2354 0 R 2355 0 R] +/Limits [(Item.126) (Item.23)] >> -% 2092 0 obj +% 2452 0 obj << -/Kids [2009 0 R 2010 0 R 2011 0 R 2012 0 R 2013 0 R 2015 0 R] -/Limits [(Item.3) (Item.61)] +/Kids [2356 0 R 2357 0 R 2358 0 R 2359 0 R 2360 0 R 2361 0 R] +/Limits [(Item.24) (Item.56)] >> -% 2093 0 obj +% 2453 0 obj << -/Kids [2016 0 R 2017 0 R 2018 0 R 2019 0 R 2020 0 R 2021 0 R] -/Limits [(Item.62) (Item.94)] +/Kids [2362 0 R 2363 0 R 2364 0 R 2365 0 R 2366 0 R 2367 0 R] +/Limits [(Item.57) (Item.89)] >> -% 2094 0 obj +% 2454 0 obj << -/Kids [2022 0 R 2023 0 R 2024 0 R 2025 0 R 2026 0 R 2027 0 R] -/Limits [(Item.95) (listing.4)] +/Kids [2368 0 R 2369 0 R 2370 0 R 2371 0 R 2372 0 R 2373 0 R] +/Limits [(Item.9) (cite.machiels)] >> -% 2095 0 obj +% 2455 0 obj << -/Kids [2028 0 R 2029 0 R 2030 0 R 2031 0 R 2032 0 R 2033 0 R] -/Limits [(listing.5) (page.103)] +/Kids [2375 0 R 2376 0 R 2377 0 R 2378 0 R 2379 0 R 2380 0 R] +/Limits [(cite.metcalf) (lstlisting.-9)] >> -% 2096 0 obj +% 2456 0 obj << -/Kids [2034 0 R 2035 0 R 2036 0 R 2037 0 R 2038 0 R 2039 0 R] -/Limits [(page.104) (page.136)] +/Kids [2381 0 R 2382 0 R 2383 0 R 2384 0 R 2385 0 R 2386 0 R] +/Limits [(lstnumber.-1.1) (page.108)] >> -% 2097 0 obj +% 2457 0 obj << -/Kids [2040 0 R 2041 0 R 2042 0 R 2043 0 R 2044 0 R 2045 0 R] -/Limits [(page.137) (page.20)] +/Kids [2387 0 R 2388 0 R 2389 0 R 2390 0 R 2391 0 R 2392 0 R] +/Limits [(page.109) (page.140)] >> -% 2098 0 obj +% 2458 0 obj << -/Kids [2046 0 R 2047 0 R 2048 0 R 2049 0 R 2050 0 R 2051 0 R] -/Limits [(page.21) (page.53)] +/Kids [2393 0 R 2394 0 R 2395 0 R 2396 0 R 2397 0 R 2398 0 R] +/Limits [(page.141) (page.173)] >> -% 2099 0 obj +% 2459 0 obj << -/Kids [2052 0 R 2053 0 R 2054 0 R 2055 0 R 2056 0 R 2057 0 R] -/Limits [(page.54) (page.86)] +/Kids [2399 0 R 2400 0 R 2401 0 R 2402 0 R 2403 0 R 2404 0 R] +/Limits [(page.174) (page.4)] >> -% 2100 0 obj +% 2460 0 obj << -/Kids [2058 0 R 2059 0 R 2060 0 R 2061 0 R 2062 0 R 2063 0 R] -/Limits [(page.87) (subsection.10.2)] +/Kids [2405 0 R 2406 0 R 2407 0 R 2408 0 R 2409 0 R 2410 0 R] +/Limits [(page.40) (page.72)] >> -% 2101 0 obj +% 2461 0 obj << -/Kids [2064 0 R 2065 0 R 2066 0 R 2067 0 R 2068 0 R 2069 0 R] -/Limits [(subsection.10.3) (subsection.6.10)] +/Kids [2411 0 R 2412 0 R 2413 0 R 2414 0 R 2415 0 R 2416 0 R] +/Limits [(page.73) (section*.10)] >> -% 2102 0 obj +% 2462 0 obj << -/Kids [2070 0 R 2071 0 R 2072 0 R 2073 0 R 2074 0 R 2075 0 R] -/Limits [(subsection.6.11) (subsection.7.3)] +/Kids [2417 0 R 2418 0 R 2419 0 R 2420 0 R 2421 0 R 2422 0 R] +/Limits [(section*.11) (subsection.10.2)] >> -% 2103 0 obj +% 2463 0 obj << -/Kids [2076 0 R 2077 0 R 2078 0 R 2079 0 R 2080 0 R 2081 0 R] -/Limits [(subsection.7.4) (subsubsection.3.2.12)] +/Kids [2423 0 R 2424 0 R 2425 0 R 2426 0 R 2427 0 R 2428 0 R] +/Limits [(subsection.10.3) (subsection.4.7)] >> -% 2104 0 obj +% 2464 0 obj << -/Kids [2082 0 R 2083 0 R 2084 0 R 2085 0 R 2086 0 R 2087 0 R] -/Limits [(subsubsection.3.2.13) (table.4)] +/Kids [2429 0 R 2430 0 R 2431 0 R 2432 0 R 2433 0 R 2434 0 R] +/Limits [(subsection.4.8) (subsection.7.11)] >> -% 2105 0 obj +% 2465 0 obj << -/Kids [2088 0 R 2089 0 R] -/Limits [(table.5) (vdata)] +/Kids [2435 0 R 2436 0 R 2437 0 R 2438 0 R 2439 0 R 2440 0 R] +/Limits [(subsection.7.12) (subsubsection.3.1.5)] >> -% 2106 0 obj +% 2466 0 obj << -/Kids [2090 0 R 2091 0 R 2092 0 R 2093 0 R 2094 0 R 2095 0 R] -/Limits [(Doc-Start) (page.103)] +/Kids [2441 0 R 2442 0 R 2443 0 R 2444 0 R 2445 0 R 2446 0 R] +/Limits [(subsubsection.3.1.6) (table.15)] >> -% 2107 0 obj +% 2467 0 obj << -/Kids [2096 0 R 2097 0 R 2098 0 R 2099 0 R 2100 0 R 2101 0 R] -/Limits [(page.104) (subsection.6.10)] +/Kids [2447 0 R 2448 0 R 2449 0 R] +/Limits [(table.16) (vdata)] >> -% 2108 0 obj +% 2468 0 obj +<< +/Kids [2450 0 R 2451 0 R 2452 0 R 2453 0 R 2454 0 R 2455 0 R] +/Limits [(Doc-Start) (lstlisting.-9)] +>> +% 2469 0 obj +<< +/Kids [2456 0 R 2457 0 R 2458 0 R 2459 0 R 2460 0 R 2461 0 R] +/Limits [(lstnumber.-1.1) (section*.10)] +>> +% 2470 0 obj << -/Kids [2102 0 R 2103 0 R 2104 0 R 2105 0 R] -/Limits [(subsection.6.11) (vdata)] +/Kids [2462 0 R 2463 0 R 2464 0 R 2465 0 R 2466 0 R 2467 0 R] +/Limits [(section*.11) (vdata)] >> -% 2109 0 obj +% 2471 0 obj << -/Kids [2106 0 R 2107 0 R 2108 0 R] +/Kids [2468 0 R 2469 0 R 2470 0 R] /Limits [(Doc-Start) (vdata)] >> -% 2110 0 obj +% 2472 0 obj << -/Dests 2109 0 R +/Dests 2471 0 R >> -% 2111 0 obj +% 2473 0 obj << /Type /Catalog -/Pages 1994 0 R -/Outlines 1995 0 R -/Names 2110 0 R +/Pages 2340 0 R +/Outlines 2342 0 R +/Names 2472 0 R /URI (http://ce.uniroma2.it/psblas) /PageMode/UseOutlines/PageLabels<>2<>6<>]>> -/OpenAction 576 0 R +/OpenAction 677 0 R >> endstream endobj -2113 0 obj +2474 0 obj +<< + /Title (Parallel Sparse BLAS V. 3.9.0) /Subject (Parallel Sparse Basic Linear Algebra Subroutines) /Keywords (Computer Science Linear Algebra Fluid Dynamics Parallel Linux MPI PSBLAS Iterative Solvers Preconditioners) /Creator (pdfLaTeX) /Producer ($Id$) /Author()/Title()/Subject()/Creator(LaTeX with hyperref)/Keywords() +/CreationDate (D:20250605185524+02'00') +/ModDate (D:20250605185524+02'00') +/Trapped /False +/PTEX.Fullbanner (This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5) +>> +endobj +2475 0 obj << /Type /XRef -/Index [0 2114] -/Size 2114 +/Index [0 2476] +/Size 2476 /W [1 3 1] -/Root 2111 0 R -/Info 2112 0 R -/ID [<3E2BE59A045B6F63645E5D8EE5B06F10> <3E2BE59A045B6F63645E5D8EE5B06F10>] -/Length 10570 ->> -stream -ÿ”[ÌSìKÌRìUÌQì]ÌP=ÌO  -=ÌN  =.ÌM =5ÌL=:ÌK=;ÌJ=[ÌI=\ÌH=]ÌG=^ÌF=bÌE=cÌD  ÌC!" ÌB#$ ÌA%& Ì@'( Ì?)*  Ì>+,  Ì=-. Ì</0 Ì;12 Ì:34 Ì956 Ì878 "Ì79: #Ì6;< %Ì5=> )Ì4?@ *Ì3AB +Ì2CD /Ì1EF 0Ì0GH 4Ì/IJ 5Ì.KL 9Ì-MN :Ì,OP ;Ì+QR ?Ì*ST EÌ)UV FÌ(WX GÌ'YZ HÌ&[\ NÌ%]^ OÌ$_` PÌ#ab VÌ"cËR [Ì!ËË \Ì ËË ]ÌËËóÌËËóÌË Ë -óÌË Ë óÌË Ëó.ÌËËó9ÌËËóEÌËËóMÌËËóYÌËËbÌËËbÌËËbÌËËbÌËË b(ÌË!Ë"b<ÌË#Ë$bWÌË%Ë&ÑÌË'Ë(ÑÌ Ë)Ë*ÑÌ Ë+Ë,ÑÌ Ë-Ë.Ñ3Ì -Ë/Ë0ÑQÌ Ë1Ë2ÑZÌË3Ë4DÌË5Ë6DÌË7Ë8DÌË9Ë:D&ÌË;Ë<D-ÌË=Ë>D2ÌË?Ë@D8ÌËAËBDCÌËCËDDNÄcËEËF¸ÄbËGËH¸ÄaËIËJ¸Ä`ËKËL¸"Ä_ËMËN¸,Ä^ËOËP¸8Ä]ËQËR¸@Ä\ËSËT¸DÄ[ËUËV¸IÄZËWËX¸SÄYËYËZ¸XÄXË[Ë\¸^ÄWË]Ë^ÄVË_Ë`ÄUËaËb ÄTËc”3ÄS””ÄR””'ÄQ””+ÄP””;ÄO” ” -?ÄN” ” EÄM” ”KÄL””RÄK””VÄJ””ZÄI””^ÄH””bÄG””ŠÄF””Š ÄE””ŠÄD”” Š ÄC”!”"Š*ÄB”#”$Š4ÄA”%”&Š>Ä@”'”(ŠJÄ?”)”*ŠOÄ>”+”,ŠVÄ=”-”.Š_Ä<”/”0þÄ;”1”2þÄ:”3”4þ Ä9”5”6þÄ8”7”8þÄ7”9”:þÄ6”;”<þ&Ä5”=”>þ-Ä4”?”@þ4Ä3”A”Bþ;Ä2”C”DþDÄ1”E”FþJÄ0”G”HþSÄ/”I”Jþ^Ä.”K”LþcÄ-”M”NcÄ,”O”Pc Ä+”Q”Rc Ä*”S”TcÄ)”U”V”W5‘”X”\2}”Y”ZÄÄÄÄF=”_”]öÛ”^”a”b”c………………………… … -… … … ………………………………………!…"…#…$…%…&…'…(…)…*…/…-”`÷e…+…,…………… ×…0…1…2…3…4…5…6…7…8…9…:…;…<…=…>…?…@…A…B…C…D…E…F…G…H…I…J…K…L…M…N…O…P…Q…R…S…T…U…V…W…X…Y…Z…[…\…]…a…_….QX…^…b…cìììììììììì ì -ì ì ì ìììììììììììììììììì ì!ì"ì#ì%ì&ì(ì*ì/ì-…`¬ì,ìì$ì'ì)ì+¨àì1ì3ì5ì6ì7ì8ì9ì:ì;ì<ì=ì?ì.Aì>ì0ì2ì4ìAìBìCìDìEìFìGìHìIìJìLì@)ÒÄc=c4c*c>c<c1c2c;c.c/ìRìSìT^KìYìWìMIzìVìNìOìPìQcc0ìZì[ì`ìXn¤ì\cac`c_ì^ì_c:==¢6=ìa==ìbìc=ÇÆc-c,== = =äï= -=== øj= =========+=)=`======= =!="=#=$=%=&='=(cbÄ=,=6=*6Ø=-=/=0=1=2=3=4=8===7S=9=<=?=@=A=H=>sP=B=C=D=E=F=G=J=K=V=I‹B=L=M=N=O=P=Q=R=S=Tcc=U=X=Y=_=W¢â=Z =`¶{=aÄ   ë      -þn       ²  c9   2ê   &  Nž ! $ , ']î (Ä  1 -nO . 6 2‚¢ 3 < 7”[ 8 @ =¦ > B C J A¾„ D I M Q K܃ LÄ - S T W RíÆ Uó Y a _ Xü™ Z ^ có `F bVòóóAKóóóó ó -ó ó óóóB&ó Äóóóó^fóÄ óóóóóó óhóóó"ó%ó!…[ó#ó$ó'ó(ó)ó*ó+ó,ó0ó&”‹ó-ó/ó3ó1µó2ó5ó6ó7ó;ó4·žó8ó:ó?ó<Õó=ó>Ä óAóBóCóGó@ßuóDóFóIóJóKóOóHøóLóNóSóP óQóRóUóVóWó[óTuóXóZó^ó\;>ó]ó`óaóbbó_=ÂócbÄ –¸b b†ˆbbbbbb b -b bbbb —§bbbbbb¯†bbbbb!bÄÌbÄb b#b$b%b&b/b-b"Ú$b'b)b*b+b,b0b1b2b3b4b5b6b8b.ö£b7Äb:b@b>b9db;b=bAbBbCbDbEbFbHb?,“bGbJbKbMbIIObLbObPbQbRbSbTbUbYbN[IbVbXb\bZxnb[b^b_b`babbÑÑÑb]yåbcÑÄ©µÑÑÇ¢ÑÑ Ñ -Ñ Ñ Ñ ÑÑÌçÑÑÑÑÑéfÑÑÑÑÑ&ÑÑìÑÑ/Ñ'Ñ*ÑÑ(Ñ)Ñ Ñ!Ñ"Ñ#Ñ$Ñ%ŠRùÑ-Ñ+€‘Ñ,ÄÑ/Ñ0Ñ1Ñ5Ñ.Œ¶Ñ2Ñ4Ñ7ÏpÑ8Ñ9Ñ>Ñ6ªRÑ:Ñ;Ñ<Ñ=ÑJÑAÑ?¿ÖÑ@ÑKÑBÍòÑIÑCÑDÑEÑFÑGÑHÞ}ÑMÑNÑOÑSÑL?ÑPÑRÑVÑT^ïÑUÄÑXÑ^Ñ\ÑWdµÑYÑ[Ñ_Ñ`ÑbÑ]€üÑaDÑcÉDô}DDDÛ“DDD -D D ô‘D c^D DDÏDDDDDDÄDDD&DDD!DA¢DDDD D#D$D(D"MžD%D'D*D+D.D)e,D,D0D3D/qÉD1D5D6D9D4z}D7ÄD>D:‘D;D<D=D@DADED?—DBDDDKDF­‘DGDHDIDJDQDODL²CDMDRDSDZDPÇDTDUDVDWDXDYDcD[â‰D\D]D^D_D`DaDbĸ¸¸¸¸š¸%Џ¸7\¸¸ ¸ -¸ ¸ ¸ ¸¸¸¸¸¸C㸸¸¸¸O˸¸¸¸ ¸#¸_¸!¸'¸$wm¸%¸&ĸ)¸*¸-¸({G¸+¸2¸.”D¸/¸0¸1¸4¸5¸6¸;¸3–œ¸7¸9¸:¸=¸>¸A¸<®Á¸?¸E¸B»–¸C¸G¸J¸FÈe¸HĸO¸Ká¸L¸M¸N¸Q¸T¸PãݸR¸V¸Z¸Uùl¸W¸Y¸\¸`¸[ -¸]¸_¸b¸a¸cDÑOsÄ - ba  pÝ~œ!”A #$%("¢G&,)²*Ä8-ÈV./01234567<9å:B=åÿ>@AHCûÂDFGOI hJLMNSPùQÄWT'âU[X5(Y_\9è]c`?‘aŠŠjŠ ýŠ ŠìŠŠŠŠc]ÄŠ Š -’ýŠ ŠŠ©ÿŠŠŠŠŠŠ»£ŠŠŠÑŒŠŠŠŠŠ!Šä7ŠŠ'Š"û)Š#Š$Š%Š&ÄŠ+Š( »Š)Š1Š, "èŠ-Š.Š/Š0Š5Š2 5½Š3Š;Š6 KíŠ7Š8Š9Š:Š?Š< ^»Š=ŠGŠ@ uˆŠAŠBŠCc\ŠDŠEŠFÄŠLŠH Œ\ŠIŠKŠQŠM ¡ŠNŠPŠSŠTŠWŠR ¶ªŠUŠZŠ[Š\ŠX ÏHŠYŠbŠ] éwŠ^Š`ŠaþŠc ÷¦þþþÄ"#þ -þ!#7þþþ þþ !)jþ þþþþ!1‡þþþþ!3¿þþþþþ"þ!EÅþþ þ!þ$þ)þ#!YVþ%þ'þ(Äþ+þ0þ*!giþ,þ.þ/þ2þ7þ1!wáþ3þ5þ6þ9þ>þ8!’½þ:þ<þ=þAþ?!­:þ@þEþB!¯ÓþCþGþHþLþF!´eþIþKÄþNþOþPþQþTþM!È:þRþWþU!æ þVþYþZþ[þ\þ_þX!êEþ]þacþ`"«þb%cc"5öcccc -c":cÄ cc "DÌc ccccc"G-cccccccccc"g cc!c#c "ƒkc"c&c$"•1c%c6c'"•¿c(c)c+c3c5Ä!c@c7"¯ºc8c?cAcBcCcDcEcFcGcHcIcJc[cKcLcM"»¶cN"Ø•cO"êcP#•cQ#& cR#L~cS#œcT#¬DcU#äcV#ôdcW$ cX$`ÁcY$ócZ%Q1Ä"Ä#Ä$Ä%Ä&Ä'Ä(%|ëÌTÌUÌVÌWÌXÌYÌZÌ[Ì\Ì]Ì^Ì_Ì`ÌaÌbÌcÞ%ªhÞÞÞÞÞÞÞÞÞ Þ -Þ Þ Þ ÞÞÞÞÞÞÞÞÞÞÞÞÞÞÞÞÞÞÞ Þ!Þ"Þ#Þ$Þ%Þ&Þ'Þ(Þ)Þ*Þ+Þ,Þ-Þ.Þ/Þ0Þ1Þ2Þ3Þ4Þ5Þ6Þ7Þ8Þ9Þ:Þ;Þ<Þ=Þ>Þ?Þ@ÞAÞBÞCÞDÞEÞFÞGÞHÞIÞJÞKÞLÞMÞNÞOÞPÞQÞRÞSÞTÞUÞVÞWÞXÞYÞZÞ[Þ\Þ]Þ^Þ_Þ`Þa%¨H%ò© +/Root 2473 0 R +/Info 2474 0 R +/ID [ ] +/Length 12380 +>> +stream +ÿ]) 'Fz& 'Ez3 'Dz> 'CzF 'B  +zR 'A  z[ '@ Ó '?Ó '>Ó$ '=Ó% '<ÓE ';ÓF ':ÓG '9ÓH '8ÓL '7 ÓM '6!"ÓN '5#$ÓR '4%&ÓS '3'(ÓU '2)*ÓV '1+,ÓZ '0-.Ó[ '//0Ó\ '.12Óc '-344 ',564 '+784 '*9:4  ');<4  '(=>4 ''?@4 '&AB4 '%CD4 '$EF4 '#GH4 '"IJ4 '!KL4 ' MN4% 'OP4& 'QR4' 'ST4+ 'UV41 'WX42 'YZ43 '[\44 ']^4: '_`4; 'ab4< 'cË>\4B 'ËË4G 'ËË4H 'ËË4I 'ËË4P 'Ë Ë +4T 'Ë Ë 4^ 'Ë Ë + ' ËË ' ËË% ' ËË1 ' +ËË9 ' ËËE 'ËËP 'ËËa 'ËËþ 'ËË þ  'Ë!Ë"þ 'Ë#Ë$þ( 'Ë%Ë&þC 'Ë'Ë(þP 'Ë)Ë*þ_ %cË+Ë,g %bË-Ë.g %aË/Ë0g %`Ë1Ë2g= %_Ë3Ë4gF %^Ë5Ë6gQ %]Ë7Ë8gR %\Ë9Ë:Þ %[Ë;Ë<Þ %ZË=Ë>Þ %YË?Ë@Þ %XËAËBÞ$ %WËCËDÞ/ %VËEËFÞ: %UËGËHÞU %TËIËJL %SËKËLL %RËMËNL %QËOËPL %PËQËRL$ %OËSËTL, %NËUËVL0 %MËWËXL5 %LËYËZL? %KË[Ë\LD %JË]Ë^LJ %IË_Ë`LP %HËaËbLV %GËc”|YL\ %F””Lc %E””· %D””· %C””· %B” ” +·' %A” ” ·+ %@” ”·1 %?””·7 %>””·> %=””·B %<””·F %;””·J %:””·N %9””·R %8””·\ %7”” % %6”!”"%  %5”#”$% %4”%”&%  %3”'”(%* %2”)”*%6 %1”+”,%; %0”-”.%B %/”/”0%K %.”1”2%Q %-”3”4%W %,”5”6%] %+”7”8%c %*”9”:‘ %)”;”<‘  %(”=”>‘ %'”?”@‘ %&”A”B‘  %%”C”D‘' %$”E”F‘0 %#”G”H‘7 %"”I”J‘@ %!”K”L‘M % ”M”N‘Y %”O”P‘^ %”Q”R‘b %”S”T %”U”V  %”W”X %”Y”Z %”[”\ %”]”^3 %”_”`J %”a”bK %”c]ý{R %]]S %]]~> %]]~B %]]~C %] ] +~E %] ] ~I %] ]~J % ]]~K % ]]~L % ]]~M % +]]~N % ]]~O %]]~P %]]~T %]]~U %]] ~V %]!]"~W %]#]$]%Áæ]&]*¾«]'](â<â>â=â?Ò’]-]+ƒ0],]/]0]1]2]3]4]5]6]7]8]9]:];]<]=]>]?]@]A]B]D]F]G]H]I]J]K]L]M]N]P]R]S]T]U]V]W]X]Y]Z][]`]^].ƒº]\]]]C]E]O]Q]a]b]c  +    !"#$%&'()*.,]_¿L+¶Ñ/0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTVWY[`^-4]UXZ\bzzzzzzzzz z +z z z zzzzzzzzzzzzzzzzzzz z!z"z$_}!z#aczÎJz'z%÷Tâ@z)z*z+z,z-z.z/z0z1z2z4z(Åâââââ âââ ~câz:z;z<BÍzBz@z5+£z=z?z6z7z8z9GƒâzCzDzIzAS&zEâ7â6â5zGzHâ zNzOŒ¯zSzJwúzPzQzKzLzM~b~azUzVzXzT“YzWÓÓzY¨^zZz\z]z^z_z`zazbzcâAÓÓÓ4ÓÓÓÓÓÓÓ Ó +Ó Ó Ó ÓÓÓÓÓâ8ÓÓ Ó&ÓÓÓÓÓÓÓÓ"Ó'Ó!EoÓ#Ó&Ó)Ó*Ó+Ó2Ó(gêÓ,Ó-Ó.Ó/Ó0Ó1Ó4Ó5Ó@Ó3:Ó6Ó7Ó8Ó9Ó:Ó;Ó<Ó=Ó>â9Ó?ÓBÓCÓIÓAïÓDâBÓOÓJ²IÓKÓWÓPÄÓQÓTÓ]ÓX×çÓYÓ_Ó`Óa4Ó^îæÓb4â +4^à4 444444 +T54 4âC44d444v44"4Ž44 4!4(4#¢4$4,4)´°4*4.4/464-Ît4045âD494=47í¢484?4@4C4>4A4_4E4M4K4Dí4F4J4O4Q4L$ç4N4U4R6)4S4W4X4Y4Z4[4\4b4`4V74]â;âE4c4aU!°± …F   ¤µ»ؼ!"#' Û>$&âF+(úx)*-./3,ñ02567;4 †8:?<>X=>ABCG@J×DFJHhÏIâGLMNRKkROQ[S…òTUVWXYZ]^_c\—Í`bþþþþÞNþþ÷eþþ þ þóŸþ +â:þ þþþþþþþþþþþþþâHþþþþþ þ!þ"þ$þ(¿þ#þ&þ,þ*þ%C1þ'þ)þ-þ.þ/þ0þ1þ2þ4þ+c5þ3þ6þ7þ9þ5€›þ8þ;þ<þ=þ>þ?þ@þAþEþ:’øþBþDþHþF±^þGâIþJþKþLþMþNþTþUþRþI²ÓþOþQþWþSÑ$þVþYþZþ[þ\þ]þaþXÖ þ^þ`þcgþbô™gHggggg +g*¤gg R[ggg E“ggâJg g gggg`¶”%ggÁ½ggggg!gÍãgg g#Êg$g%g*g"ìÅg&g'g(g)g6g-g+0g,g7g.Lg5g/g0g1g2g3g4"hU×g9g:g;g?g8ƒog<g>âKgBg@¥NgAgDgJgHgC«gEgGgKgLgNgIÉgMgSgOÙDgPgUgXgTó¯gVgWgZg]gYŽg[â4g\âLÞg^<-g_g`gagbgcÞ†fÞÞÞwîÞÞÞ Þ”@Þ Þ +Þ Þ ÞÞÞÞ ˆÞÞÞÞÞÞ¸ïÞÞÞÞÅžÞâMÞ!Þ"Þ%Þ ÎTÞ#Þ*Þ&å×Þ'Þ(Þ)Þ,Þ-Þ1Þ+ìèÞ.Þ0Þ7Þ2dÞ3Þ4Þ5Þ6Þ=Þ;Þ8 ºÞ9Þ>Þ?ÞFÞ<iÞ@ÞAÞBÞCÞDÞEâNÞOÞGLBLFLA_ëLCLELHLLLGmLILKLNLRLM+LOLQâQLTLXLSŽ3LULWLZL_LY¢OL[L]L^La·L`±GLb··!d···é +·· ·þà· · +· · ····· ê·âR·· ê··$· 5Ø······· ·!·"·#·(·% V·&·.·) W·*·,·-·4·/ qú·0·2·3·;·5 „¹·6·8·9·:âS·?·< –‘·=·C·@  ý·A·G·D ®q·E·K·H ³A·I·O·L ¹8·M·S·P ¾|·QâT·Y·T Öj·U·V·W·Xâ3·]·Z ë·[·c·^!†·_·`·a·b%%!<Š%"¶%% %!Ts%%%%% % +!i % âU%%!F%%%%%%!•…%%%!­ÿ%%%%%!%!Â%%'%"!Ú‹%#%$%%%&%+%(!îÜ%)âV%3%,"ú%-%.%/â2%0%1%2%8%4" æ%5%7%=%9"7%:%<%?%@%C%>"MP%A%F%G%H%D"hc%E%N%I"‘;%J%L%MâW%T%O"Ÿf%P%R%S%Z%U"¥%V%X%Y%`%["«r%\%^%_‘%a"³Ñ%b$ n‘‘‘"ÛK‘‘‘‘ ‘‘"î‘ +‘ ‘ âX‘‘‘#v‘‘‘‘‘‘#‘‘‘‘‘#‘#"]‘‘!‘"‘%‘*‘$#@‘&‘(‘)‘-‘+#]2‘,‘1‘.#_ÿ‘/âY‘3‘4‘5‘9‘2#e³‘6‘8‘E‘;‘<‘=‘>‘C‘A‘:#y­‘?‘F‘B#Šñ‘D~`‘H‘I‘J‘K‘N‘G#À‘L‘Q‘O#Þª‘P‘S‘T‘U‘V‘W‘Z‘R#ãT‘XâZ‘\‘_‘[#û…‘]‘c‘`$ d‘a$8¢%Œt  + $Cz $[’$hIâ[$"$jå !%&')#$Œ4(+-*$¨v,/0197.$º—2456:;=8$Ý <@>$÷Þ?â\BDEFGHLA$ü]ICââNO%SÖP%‚%~ZVM%†QTUâ\~`W%@´[]^_XY%bð~~a%dm~~~~ ~ +~ ~ ~ ~~bc%¼Y~~'p~%½a~%¾W%¿a%Þ;& j~~~~~%çm~~~~~~~~&î~&&&%Á~)~+&dÙ~'~-~ &D—~(~*~,â]~!~"~#~$~%&oâ~&&pÞ&qî&‹‹&¼Å~5~8~.&–¾~6~7~/~0&Ê~1~2~3&Ëq~4&Ìm&Í&ê˜~;~9&÷Ù~:~?~<&ü_~=~F~@']~A~D~Q~G'2n~H~X~R'Gã~Sâ^~[~Y'RÅ~Zâ~\'SS~]~^~_ââ*‰ ââ'žÈâ ââââââââââââ â1â!â"â#'·â$'Óøâ%'÷èâ&(õâ'(5"â((]â)(®ìâ*(¿½â+)Õâ,)2â-)(îâ.)‡¤â/)í»â0*<*@>*F¤*Là*SÙ*\*b\*h›*nÚ*rl*w*}*ƒâ_â`âaâbâc % %*Ñp %*÷Ø 'G 'H 'I 'J 'K 'L 'M 'N 'O 'P 'Q 'R 'S 'T 'U 'V 'W 'X 'Y 'Z '[ '\ '] '^ '_ '` 'a 'b 'c F+(, F F F F F F F F F  F + F  F  F  F F F F F F F F F F F F F F F F F F F  F! F" F# F$ F% F& F' F( F) F* F+ F, F- F. F/ F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F: F; F< F= F> F? F@ FA FB FC FD FE FF FG FH FI FJ FK FL FM FN FO FP FQ FR FS FT FU FV FW FX FY FZ F[ F\ F] F^ F_ F` Fa Fb Fc+s+u6 endstream endobj startxref -2486953 +2848054 %%EOF diff --git a/docs/src/Makefile b/docs/src/Makefile index e6ee0c5e..385d9051 100644 --- a/docs/src/Makefile +++ b/docs/src/Makefile @@ -86,7 +86,8 @@ TOPFILE = userguide.tex HTMLFILE = userhtml.tex SECFILE = intro.tex commrout.tex datastruct.tex psbrout.tex toolsrout.tex\ - methods.tex precs.tex penv.tex error.tex util.tex biblio.tex + methods.tex precs.tex penv.tex error.tex util.tex biblio.tex \ + ext-intro.tex cuda.tex FIGDIR = figures XPDFFLAGS = @@ -139,7 +140,7 @@ PDF = $(join $(BASEFILE),.pdf) PS = $(join $(BASEFILE),.ps) GXS = $(join $(BASEFILE),.gxs) GLX = $(join $(BASEFILE),.glx) -TARGETPDF= ../psblas-3.8.pdf +TARGETPDF= ../psblas-3.9.pdf BASEHTML = $(patsubst %.tex,%,$(HTMLFILE)) HTML = $(join $(BASEHTML),.html) HTMLDIR = ../html diff --git a/docs/src/biblio.tex b/docs/src/biblio.tex index 14c6dbdd..ed22734b 100644 --- a/docs/src/biblio.tex +++ b/docs/src/biblio.tex @@ -1,9 +1,5 @@ \begin{thebibliography}{99} -\bibitem{DesPat:11} - D.~Barbieri, V.~Cardellini, S.~Filippone and D.~Rouson -{\em Design Patterns for Scientific Computations on Sparse Matrices}, - HPSS 2011, Algorithms and Programming Tools for Next-Generation High-Performance Scientific Software, Bordeaux, Sep. 2011 \bibitem{PARA04FOREST} G.~Bella, S.~Filippone, A.~De Maio and M.~Testa, @@ -12,6 +8,11 @@ in J.~Dongarra, K.~Madsen, J.~Wasniewski, editors, Proceedings of PARA~04 Workshop on State of the Art in Scientific Computing, pp.~546--553, Lecture Notes in Computer Science, Springer, 2005. +\bibitem{BERTACCINIFILIPPONE} +D. Bertaccini\ and\ S. Filippone, +{\em Sparse approximate inverse preconditioners on high performance GPU platforms}, +Comput. Math. Appl., 71, (2016), no.~3, 693--711. +% \bibitem{2007d} A. Buttari, D. di Serafino, P. D'Ambra, S. Filippone,\newblock 2LEV-D2P4: a package of high-performance preconditioners,\newblock Applicable Algebra in Engineering, Communications and Computing, @@ -150,9 +151,14 @@ Lawson, C., Hanson, R., Kincaid, D. and Krogh, F., of partial differential equations.} {ACM Trans. Math. Softw.} vol.~{23}, 32--49. \bibitem{metcalf} +{Metcalf, M., Reid, J., Cohen, M., Bader, R.} +{\em Modern Fortran explained.} +{Oxford University Press}, 2024. +% +\bibitem{MRC:11} {Metcalf, M., Reid, J. and Cohen, M.} -{\em Fortran 95/2003 explained.} -{Oxford University Press}, 2004. +{\em Modern Fortran explained.} +{Oxford University Press}, 2011. % %% \bibitem{DD2} %% B.~Smith, P.~Bjorstad and W.~Gropp, @@ -169,4 +175,27 @@ M.~Snir, S.~Otto, S.~Huss-Lederman, D.~Walker and J.~Dongarra, {\em MPI: The Complete Reference. Volume 1 - The MPI Core}, second edition, MIT Press, 1998. % + +\bibitem{DesPat:11} + D.~Barbieri, V.~Cardellini, S.~Filippone and D.~Rouson +{\em Design Patterns for Scientific Computations on Sparse Matrices}, + HPSS 2011, Algorithms and Programming Tools for Next-Generation High-Performance Scientific Software, Bordeaux, Sep. 2011 + +\bibitem{CaFiRo:2014} +{ Cardellini, V.}, { Filippone, S.}, { and} { Rouson, D.} 2014, + Design patterns for sparse-matrix computations on hybrid {CPU/GPU} + platforms, +{\em Scientific Programming\/}~{\em 22,\/}~1, 1--19. +\bibitem{OurTechRep} +D.~Barbieri, V.~Cardellini, A.~Fanfarillo, S.~Filippone, Three storage formats + for sparse matrices on {GPGPUs}, Tech. Rep. DICII RR-15.6, + Universit\`a di + Roma Tor Vergata (February 2015). +\bibitem{Filippone:2017:SMM:3034774.3017994} +S.~Filippone, V.~Cardellini, D.~Barbieri, and A.~Fanfarillo. + Sparse matrix-vector multiplication on {GPGPUs}. + {\em ACM Trans. Math. Softw.}, 43(4):30:1--30:49, 2017. + + + \end{thebibliography} diff --git a/docs/src/commrout.tex b/docs/src/commrout.tex index f2e38965..d1d831a4 100644 --- a/docs/src/commrout.tex +++ b/docs/src/commrout.tex @@ -6,7 +6,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% The routines in this chapter implement various global communication operators on vectors associated with a discretization mesh. For auxiliary communication -routines not tied to a discretization space see~\ref{sec:toolsrout}. +routines not tied to a discretization space see~\ref{sec:parenv}. \clearpage\subsection{psb\_halo --- Halo Data Communication} diff --git a/docs/src/cuda.tex b/docs/src/cuda.tex new file mode 100644 index 00000000..e075ada8 --- /dev/null +++ b/docs/src/cuda.tex @@ -0,0 +1,395 @@ + +\subsection{CUDA-class extensions} +\label{sec:cudastruct} +For computing with CUDA we define a dual memorization strategy in +which each variable on the CPU (``host'') side has a GPU (``device'') +side. When a GPU-type variable is initialized, the data contained is +(usually) the same on both sides. Each operator invoked on the +variable may change the data so that only the host side or the device +side are up-to-date. + +Keeping track of the updates to data in the variables is essential: we want +to perform most computations on the GPU, but we cannot afford the time +needed to move data between the host memory and the device memory +because the bandwidth of the interconnection bus would become the main +bottleneck of the computation. Thus, each and every computational +routine in the library is built according to the following principles: +\begin{itemize} +\item If the data type being handled is {GPU}-enabled, make sure that + its device copy is up to date, perform any arithmetic operation on + the {GPU}, and if the data has been altered as a result, mark + the main-memory copy as outdated. +\item The main-memory copy is never updated unless this is requested + by the user either +\begin{description} +\item[explicitly] by invoking a synchronization method; +\item[implicitly] by invoking a method that involves other data items + that are not {GPU}-enabled, e.g., by assignment ov a vector to a + normal array. +\end{description} +\end{itemize} +In this way, data items are put on the {GPU} memory ``on demand'' and +remain there as long as ``normal'' computations are carried out. +As an example, the following call to a matrix-vector product +\ifpdf +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + call psb_spmm(alpha,a,x,beta,y,desc_a,info) +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + call psb_spmm(alpha,a,x,beta,y,desc_a,info) +\end{verbatim} + \end{minipage} + \end{center} +\fi +will transparently and automatically be performed on the {GPU} whenever +all three data inputs \fortinline|a|, \fortinline|x| and +\fortinline|y| are {GPU}-enabled. If a program makes many such calls +sequentially, then +\begin{itemize} +\item The first kernel invocation will find the data in main memory, + and will copy it to the {GPU} memory, thus incurring a significant + overhead; the result is however \emph{not} copied back, and + therefore: +\item Subsequent kernel invocations involving the same vector will + find the data on the {GPU} side so that they will run at full + speed. +\end{itemize} +For all invocations after the first the only data that will have to be +transferred to/from the main memory will be the scalars \fortinline|alpha| +and \fortinline|beta|, and the return code \fortinline|info|. + +\begin{description} +\item[Vectors:] The data type \fortinline|psb_T_vect_gpu| provides a + GPU-enabled extension of the inner type \fortinline|psb_T_base_vect_type|, + and must be used together with the other inner matrix type to make + full use of the GPU computational capabilities; +\item[CSR:] The data type \fortinline|psb_T_csrg_sparse_mat| provides an + interface to the GPU version of CSR available in the NVIDIA CuSPARSE + library; +\item[HYB:] The data type \fortinline|psb_T_hybg_sparse_mat| provides an + interface to the HYB GPU storage available in the NVIDIA CuSPARSE + library. The internal structure is opaque, hence the host side is + just CSR; the HYB data format is only available up to CUDA version + 10. +\item[ELL:] The data type \fortinline|psb_T_elg_sparse_mat| provides an + interface to the ELLPACK implementation from SPGPU; + +\item[HLL:] The data type \fortinline|psb_T_hlg_sparse_mat| provides an + interface to the Hacked ELLPACK implementation from SPGPU; +\item[HDIA:] The data type \fortinline|psb_T_hdiag_sparse_mat| provides an + interface to the Hacked DIAgonals implementation from SPGPU; +\end{description} + + +\section{CUDA Environment Routines} +\label{sec:cudaenv} + +\subsection*{psb\_cuda\_init --- Initializes PSBLAS-CUDA + environment} +\addcontentsline{toc}{subsection}{psb\_cuda\_init} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +call psb_cuda_init(ctxt [, device]) +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +call psb_cuda_init(ctxt [, device]) +\end{verbatim} + \end{minipage} + \end{center} +\fi + +This subroutine initializes the PSBLAS-CUDA environment. +\begin{description} +\item[Type:] Synchronous. +\item[\bf On Entry ] +\item[device] ID of CUDA device to attach to.\\ +Scope: {\bf local}.\\ +Type: {\bf optional}.\\ +Intent: {\bf in}.\\ +Specified as: an integer value. \ +Default: use \fortinline|mod(iam,ngpu)| where \fortinline|iam| is the calling +process index and \fortinline|ngpu| is the total number of CUDA devices +available on the current node. +\end{description} + + +{\par\noindent\large\bfseries Notes} +\begin{enumerate} +\item A call to this routine must precede any other PSBLAS-CUDA call. +\end{enumerate} + +\subsection*{psb\_cuda\_exit --- Exit from PSBLAS-CUDA + environment} +\addcontentsline{toc}{subsection}{psb\_cuda\_exit} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +call psb_cuda_exit(ctxt) +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +call psb_cuda_exit(ctxt) +\end{verbatim} + \end{minipage} + \end{center} +\fi + +This subroutine exits from the PSBLAS CUDA context. +\begin{description} +\item[Type:] Synchronous. +\item[\bf On Entry ] +\item[ctxt] the communication context identifying the virtual + parallel machine.\\ +Scope: {\bf global}.\\ +Type: {\bf required}.\\ +Intent: {\bf in}.\\ +Specified as: an integer variable. +\end{description} + + + + +\subsection*{psb\_cuda\_DeviceSync --- Synchronize CUDA device} +\addcontentsline{toc}{subsection}{psb\_cuda\_DeviceSync} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +call psb_cuda_DeviceSync() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +call psb_cuda_DeviceSync() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +This subroutine ensures that all previosly invoked kernels, i.e. all +invocation of CUDA-side code, have completed. + + +\subsection*{psb\_cuda\_getDeviceCount } +\addcontentsline{toc}{subsection}{psb\_cuda\_getDeviceCount} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +ngpus = psb_cuda_getDeviceCount() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +ngpus = psb_cuda_getDeviceCount() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Get number of devices available on current computing node. + +\subsection*{psb\_cuda\_getDevice } +\addcontentsline{toc}{subsection}{psb\_cuda\_getDevice} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +dev = psb_cuda_getDevice() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +dev = psb_cuda_getDevice() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Get device in use by current process. + +\subsection*{psb\_cuda\_setDevice } +\addcontentsline{toc}{subsection}{psb\_cuda\_setDevice} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +info = psb_cuda_setDevice(dev) +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +info = psb_cuda_setDevice(dev) +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Set device to be used by current process. + +\subsection*{psb\_cuda\_DeviceHasUVA } +\addcontentsline{toc}{subsection}{psb\_cuda\_DeviceHasUVA} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +hasUva = psb_cuda_DeviceHasUVA() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +hasUva = psb_cuda_DeviceHasUVA() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Returns true if device currently in use supports UVA +(Unified Virtual Addressing). + +\subsection*{psb\_cuda\_WarpSize } +\addcontentsline{toc}{subsection}{psb\_cuda\_WarpSize} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +nw = psb_cuda_WarpSize() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +nw = psb_cuda_WarpSize() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Returns the warp size. + + +\subsection*{psb\_cuda\_MultiProcessors } +\addcontentsline{toc}{subsection}{psb\_cuda\_MultiProcessors} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +nmp = psb_cuda_MultiProcessors() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +nmp = psb_cuda_MultiProcessors() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Returns the number of multiprocessors in the CUDA device. + +\subsection*{psb\_cuda\_MaxThreadsPerMP } +\addcontentsline{toc}{subsection}{psb\_cuda\_MaxThreadsPerMP} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +nt = psb_cuda_MaxThreadsPerMP() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +nt = psb_cuda_MaxThreadsPerMP() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Returns the maximum number of threads per multiprocessor. + + +\subsection*{psb\_cuda\_MaxRegistersPerBlock } +\addcontentsline{toc}{subsection}{psb\_cuda\_MaxRegisterPerBlock} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +nr = psb_cuda_MaxRegistersPerBlock() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +nr = psb_cuda_MaxRegistersPerBlock() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Returns the maximum number of register per thread block. + + +\subsection*{psb\_cuda\_MemoryClockRate } +\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryClockRate} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +cl = psb_cuda_MemoryClockRate() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +cl = psb_cuda_MemoryClockRate() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Returns the memory clock rate in KHz, as an integer. + +\subsection*{psb\_cuda\_MemoryBusWidth } +\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryBusWidth} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +nb = psb_cuda_MemoryBusWidth() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +nb = psb_cuda_MemoryBusWidth() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Returns the memory bus width in bits. + +\subsection*{psb\_cuda\_MemoryPeakBandwidth } +\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryPeakBandwidth} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +bw = psb_cuda_MemoryPeakBandwidth() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +bw = psb_cuda_MemoryPeakBandwidth() +\end{verbatim} + \end{minipage} + \end{center} +\fi +Returns the peak memory bandwidth in MB/s (real double precision). + + + diff --git a/docs/src/datastruct.tex b/docs/src/datastruct.tex index 07fb6375..65481912 100644 --- a/docs/src/datastruct.tex +++ b/docs/src/datastruct.tex @@ -387,11 +387,11 @@ accelerators. -\subsubsection{psb\_cd\_get\_large\_threshold --- Get threshold for +\subsubsection{psb\_cd\_get\_hash\_threshold --- Get threshold for index mapping switch} \begin{verbatim} -ith = psb_cd_get_large_threshold() +ith = psb_cd_get_hash_threshold() \end{verbatim} \begin{description} @@ -403,12 +403,12 @@ ith = psb_cd_get_large_threshold() -\subsubsection{psb\_cd\_set\_large\_threshold --- Set threshold for +\subsubsection{psb\_cd\_set\_hash\_threshold --- Set threshold for index mapping switch} %\addcontentsline{toc}{paragraph}{psb\_cd\_set\_large\_threshold} \begin{verbatim} -call psb_cd_set_large_threshold(ith) +call psb_cd_set_hash_threshold(ith) \end{verbatim} \begin{description} @@ -420,6 +420,11 @@ Type: {\bf required}.\\ Intent: {\bf in}.\\ Specified as: an integer value greater than zero. \end{description} +This threshold guides the library into using a list based or a +hash-table based descriptor for global to local index conversion; +if the size of the global index space is below this threshold, a list +based structure is used, if it is above a hash-table based structure +is used. Note: the threshold value is only queried by the library at the time a call to \fortinline|psb_cdall| is executed, therefore changing the threshold has no effect on communication descriptors that have already been @@ -898,9 +903,7 @@ A variable of type \fortinline|psb_Tspmat_type|. %\addcontentsline{toc}{paragraph}{clean\_zeros} \fortinline|call a%clean_zeros(info)| -Eliminates zero coefficients in the input matrix. Note that depending -on the internal storage format, there may still be some amount of -zero padding in the output. +Eliminates zero coefficients explicitly stored in the input matrix. \begin{description} \item[Type:] Asynchronous. @@ -915,6 +918,13 @@ Scope: {\bf local}.\\ A variable of type \fortinline|psb_Tspmat_type|. \item[info] Return code. \end{description} +{\par\noindent\bfseries Notes} +\begin{enumerate} +\item Depending on the internal storage format, there may still be some amount of + zero padding in the output. +\item Any explicit zeros on the main diagonal are always kept in the + data structure. +\end{enumerate} \subsubsection{get\_diag --- Get main diagonal} %\addcontentsline{toc}{paragraph}{get\_diag} @@ -1317,13 +1327,14 @@ like Diagonal Scaling or Block Jacobi with incomplete factorization ILU(0). A preconditioner is held in the \hypertarget{precdata}{{\tt - psb\_prec\_type}} data structure reported in -figure~\ref{fig:prectype}. The \fortinline|psb_prec_type| + psb\_Tprec\_type}} data structure reported in +figure~\ref{fig:prectype}. The \fortinline|psb_Tprec_type| data type may contain a simple preconditioning matrix with the -associated communication descriptor.%% which may be different than the +associated communication descriptor. +%% which may be different from the %% system communication descriptor in the case of parallel %% preconditioners like the Additive Schwarz one. Then the -%% \fortinline|psb_prec_type| may contain more than one preconditioning matrix +%% \fortinline|psb_Tprec_type| may contain more than one preconditioning matrix %% like in the case of Two-Level (in general Multi-Level) preconditioners. %% The user can choose the type of preconditioner to be used by means of %% the \fortinline|psb_precset| subroutine; once the type of preconditioning @@ -1407,8 +1418,8 @@ Given a heap object, the following methods are defined on it: \item[dump] Print on file; \item[free] Release memory. \end{description} -These objects are used in AMG4PSBLAS to implement the factorization -algorithms. +These objects are used to implement the factorization +and approximate inversion algorithms. %%% Local Variables: %%% mode: latex diff --git a/docs/src/error.tex b/docs/src/error.tex index e6bb0d2d..07d009d8 100644 --- a/docs/src/error.tex +++ b/docs/src/error.tex @@ -1,6 +1,6 @@ -\section{Error handling} +\section{Error handling\label{sec:errors}} The PSBLAS library error handling policy has been completely rewritten in version 2.0. The idea behind the design of this new error handling diff --git a/docs/src/ext-intro.tex b/docs/src/ext-intro.tex new file mode 100644 index 00000000..44e9915c --- /dev/null +++ b/docs/src/ext-intro.tex @@ -0,0 +1,598 @@ +\section{Extensions}\label{sec:ext-intro} + +The EXT, CUDA and RSB subdirectories contains a set of extensions to the base +library. The extensions provide additional storage formats beyond the +ones already contained in the base library, as well as interfaces +to: +\begin{description} +\item[SPGPU] a CUDA library originally published as + \url{https://code.google.com/p/spgpu/} and now included in the + \verb|cuda| subdir, for computations on NVIDIA GPUs; +\item[LIBRSB] \url{http://sourceforge.net/projects/librsb/}, for + computations on multicore parallel machines. +\end{description} +The infrastructure laid out in the base library to allow for these +extensions is detailed in the references~\cite{DesPat:11,CaFiRo:2014,Sparse03}; +the CUDA-specific data formats are described in~\cite{OurTechRep}. + + +\subsection{Using the extensions} +\label{sec:ext-appstruct} +A sample application using the PSBLAS extensions will contain the +following steps: +\begin{itemize} +\item \verb|USE| the appropriat modules (\verb|psb_ext_mod|, + \verb|psb_cuda_mod|); +\item Declare a \emph{mold} variable of the necessary type + (e.g. \verb|psb_d_ell_sparse_mat|, \verb|psb_d_hlg_sparse_mat|, + \verb|psb_d_vect_cuda|); +\item Pass the mold variable to the base library interface where + needed to ensure the appropriate dynamic type. +\end{itemize} +Suppose you want to use the CUDA-enabled ELLPACK data structure; you +would use a piece of code like this (and don't forget, you need +CUDA-side vectors along with the matrices): +\ifpdf +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} +program my_cuda_test + use psb_base_mod + use psb_util_mod + use psb_ext_mod + use psb_cuda_mod + type(psb_dspmat_type) :: a, agpu + type(psb_d_vect_type) :: x, xg, bg + + real(psb_dpk_), allocatable :: xtmp(:) + type(psb_d_vect_cuda) :: vmold + type(psb_d_elg_sparse_mat) :: aelg + type(psb_ctxt_type) :: ctxt + integer :: iam, np + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + call psb_cuda_init(ctxt, iam) + + + ! My own home-grown matrix generator + call gen_matrix(ctxt,idim,desc_a,a,x,info) + if (info /= 0) goto 9999 + + call a%cscnv(agpu,info,mold=aelg) + if (info /= 0) goto 9999 + xtmp = x%get_vect() + call xg%bld(xtmp,mold=vmold) + call bg%bld(size(xtmp),mold=vmold) + + ! Do sparse MV + call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info) + + +9999 continue + if (info == 0) then + write(*,*) '42' + else + write(*,*) 'Something went wrong ',info + end if + + + call psb_cuda_exit() + call psb_exit(ctxt) + stop +end program my_cuda_test +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +program my_cuda_test + use psb_base_mod + use psb_util_mod + use psb_ext_mod + use psb_cuda_mod + type(psb_dspmat_type) :: a, agpu + type(psb_d_vect_type) :: x, xg, bg + + real(psb_dpk_), allocatable :: xtmp(:) + type(psb_d_vect_cuda) :: vmold + type(psb_d_elg_sparse_mat) :: aelg + type(psb_ctxt_type) :: ctxt + integer :: iam, np + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + call psb_cuda_init(ctxt, iam) + + + ! My own home-grown matrix generator + call gen_matrix(ctxt,idim,desc_a,a,x,info) + if (info /= 0) goto 9999 + + call a%cscnv(agpu,info,mold=aelg) + if (info /= 0) goto 9999 + xtmp = x%get_vect() + call xg%bld(xtmp,mold=vmold) + call bg%bld(size(xtmp),mold=vmold) + + ! Do sparse MV + call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info) + + +9999 continue + if (info == 0) then + write(*,*) '42' + else + write(*,*) 'Something went wrong ',info + end if + + + call psb_cuda_exit() + call psb_exit(ctxt) + stop +end program my_cuda_test +\end{verbatim} + \end{minipage} + \end{center} +\fi + +A full example of this strategy can be seen in the +\texttt{test/ext/kernel} and \texttt{test/\-cuda/\-kernel} subdirectories, +where we provide sample programs +to test the speed of the sparse matrix-vector product with the various +data structures included in the library. + + +\subsection{Extensions' Data Structures} +\label{sec:ext-datastruct} +%\ifthenelse{\boolean{mtc}}{\minitoc}{} + +Access to the facilities provided by the EXT library is mainly +achieved through the data types that are provided within. +The data classes are derived from the base classes in PSBLAS, through +the Fortran~2003 mechanism of \emph{type extension}~\cite{MRC:11}. + +The data classes are divided between the general purpose CPU +extensions, the GPU interfaces and the RSB interfaces. +In the description we will make use of the notation introduced in +Table~\ref{tab:notation}. + +\begin{table}[ht] +\caption{Notation for parameters describing a sparse matrix} +\begin{center} +{\footnotesize +\begin{tabular}{ll} +\hline +Name & Description \\ +\hline +M & Number of rows in matrix \\ +N & Number of columns in matrix \\ +NZ & Number of nonzeros in matrix \\ +AVGNZR & Average number of nonzeros per row \\ +MAXNZR & Maximum number of nonzeros per row \\ +NDIAG & Numero of nonzero diagonals\\ +AS & Coefficients array \\ +IA & Row indices array \\ +JA & Column indices array \\ +IRP & Row start pointers array \\ +JCP & Column start pointers array \\ +NZR & Number of nonzeros per row array \\ +OFFSET & Offset for diagonals \\ +\hline +\end{tabular} +} +\end{center} +\label{tab:notation} +\end{table} + +\begin{figure}[ht] + \centering +% \includegraphics[width=5.2cm]{figures/mat.eps} +\ifcase\pdfoutput + \includegraphics[width=5.2cm]{mat.png} +\or + \includegraphics[width=5.2cm]{figures/mat.pdf} +\fi + \caption{Example of sparse matrix} + \label{fig:dense} +\end{figure} + +\subsection{CPU-class extensions} + + +\subsubsection*{ELLPACK} + +The ELLPACK/ITPACK format (shown in Figure~\ref{fig:ell}) +comprises two 2-dimensional arrays \verb|AS| and +\verb|JA| with \verb|M| rows and \verb|MAXNZR| columns, where +\verb|MAXNZR| is the maximum +number of nonzeros in any row~\cite{ELLPACK}. +Each row of the arrays \verb|AS| and \verb|JA| contains the +coefficients and column indices; rows shorter than +\verb|MAXNZR| are padded with zero coefficients and appropriate column +indices, e.g. the last valid one found in the same row. + +\begin{figure}[ht] + \centering +% \includegraphics[width=8.2cm]{figures/ell.eps} +\ifcase\pdfoutput + \includegraphics[width=8.2cm]{ell.png} +\or + \includegraphics[width=8.2cm]{figures/ell.pdf} +\fi + \caption{ELLPACK compression of matrix in Figure~\ref{fig:dense}} + \label{fig:ell} +\end{figure} + + +\begin{algorithm} +\lstset{language=Fortran} +\small + \begin{lstlisting} + do i=1,n + t=0 + do j=1,maxnzr + t = t + as(i,j)*x(ja(i,j)) + end do + y(i) = t + end do + \end{lstlisting} + \caption{\label{alg:ell} Matrix-Vector product in ELL format} +\end{algorithm} +The matrix-vector product $y=Ax$ can be computed with the code shown in +Alg.~\ref{alg:ell}; it costs one memory write per outer iteration, +plus three memory reads and two floating-point operations per inner +iteration. + +Unless all rows have exactly the same number of nonzeros, some of the +coefficients in the \verb|AS| array will be zeros; therefore this +data structure will have an overhead both in terms of memory space +and redundant operations (multiplications by zero). The overhead can +be acceptable if: +\begin{enumerate} +\item The maximum number of nonzeros per row is not much larger than + the average; +\item The regularity of the data structure allows for faster code, + e.g. by allowing vectorization, thereby offsetting the additional + storage requirements. +\end{enumerate} +In the extreme case where the input matrix has one full row, the +ELLPACK structure would require more memory than the normal 2D array +storage. The ELLPACK storage format was very popular in the vector +computing days; in modern CPUs it is not quite as popular, but it +is the basis for many GPU formats. + +The relevant data type is \verb|psb_T_ell_sparse_mat|: +\ifpdf +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! + + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + real(psb_dpk_), allocatable :: val(:,:) + + contains + .... + end type psb_d_ell_sparse_mat +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! + + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + real(psb_dpk_), allocatable :: val(:,:) + + contains + .... + end type psb_d_ell_sparse_mat +\end{verbatim} + \end{minipage} + \end{center} +\fi + + +\subsubsection*{Hacked ELLPACK} + +The \textit{hacked ELLPACK} (\textbf{HLL}) format +alleviates the main problem of the ELLPACK format, that is, +the amount of memory required by padding for sparse matrices in +which the maximum row length is larger than the average. + +The number of elements allocated to padding is $[(m*maxNR) - +(m*avgNR) = m*(maxNR-avgNR)]$ +for both \verb|AS| and \verb|JA| arrays, +where $m$ is equal to the number of rows of the matrix, $maxNR$ is the +maximum number of nonzero elements +in every row and $avgNR$ is the average number of nonzeros. +Therefore a single densely populated row can seriously affect the +total size of the allocation. + +To limit this effect, in the HLL format we break the original matrix +into equally sized groups of rows (called \textit{hacks}), and then store +these groups as independent matrices in ELLPACK format. +The groups can be arranged selecting rows in an arbitrarily manner; +indeed, if the rows are sorted by decreasing number of nonzeros we +obtain essentially the JAgged Diagonals format. +If the rows are not in the original order, then an additional vector +\textit{rIdx} is required, storing the actual row index for each row +in the data structure. + +The multiple ELLPACK-like buffers are stacked together inside a +single, one dimensional array; +an additional vector \textit{hackOffsets} is provided to keep track +of the individual submatrices. +All hacks have the same number of rows \textit{hackSize}; hence, +the \textit{hackOffsets} vector is an array of +$(m/hackSize)+1$ elements, each one pointing to the first index of a +submatrix inside the stacked \textit{cM}/\textit{rP} buffers, plus an +additional element pointing past the end of the last block, where the +next one would begin. +We thus have the property that +the elements of the $k$-th \textit{hack} are stored between +\verb|hackOffsets[k]| and +\verb|hackOffsets[k+1]|, similarly to what happens in the CSR format. + +\begin{figure}[ht] + \centering +% \includegraphics[width=8.2cm]{../figures/hll.eps} +\ifcase\pdfoutput + \includegraphics[width=.72\textwidth]{hll.png} +\or + \includegraphics[width=.72\textwidth]{../figures/hll.pdf} +\fi + \caption{Hacked ELLPACK compression of matrix in Figure~\ref{fig:dense}} + \label{fig:hll} +\end{figure} + +With this data structure a very long row only affects one hack, and +therefore the additional memory is limited to the hack in which the +row appears. + +The relevant data type is \verb|psb_T_hll_sparse_mat|: +\ifpdf +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! + integer(psb_ipk_) :: hksz + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + real(psb_dpk_), allocatable :: val(:) + + contains + .... + end type +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! + integer(psb_ipk_) :: hksz + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + real(psb_dpk_), allocatable :: val(:) + + contains + .... + end type +\end{verbatim} + \end{minipage} + \end{center} +\fi + +\subsubsection*{Diagonal storage} + + +The DIAgonal (DIA) format (shown in Figure~\ref{fig:dia}) +has a 2-dimensional array \verb|AS| containing in each column the +coefficients along a diagonal of the matrix, and an integer array +\verb|OFFSET| that determines where each diagonal starts. The +diagonals in \verb|AS| are padded with zeros as necessary. + +The code to compute the matrix-vector product $y=Ax$ is shown in Alg.~\ref{alg:dia}; +it costs one memory read per outer iteration, +plus three memory reads, one memory write and two floating-point +operations per inner iteration. The accesses to \verb|AS| and +\verb|x| are in strict sequential order, therefore no indirect +addressing is required. + +\begin{figure}[ht] + \centering +% \includegraphics[width=8.2cm]{figures/dia.eps} +\ifcase\pdfoutput + \includegraphics[width=.72\textwidth]{dia.png} +\or + \includegraphics[width=.72\textwidth]{figures/dia.pdf} +\fi + \caption{DIA compression of matrix in Figure~\ref{fig:dense}} + \label{fig:dia} +\end{figure} + + +\begin{algorithm} +\ifpdf +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + do j=1,ndiag + if (offset(j) > 0) then + ir1 = 1; ir2 = m - offset(j); + else + ir1 = 1 - offset(j); ir2 = m; + end if + do i=ir1,ir2 + y(i) = y(i) + alpha*as(i,j)*x(i+offset(j)) + end do + end do +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + do j=1,ndiag + if (offset(j) > 0) then + ir1 = 1; ir2 = m - offset(j); + else + ir1 = 1 - offset(j); ir2 = m; + end if + do i=ir1,ir2 + y(i) = y(i) + alpha*as(i,j)*x(i+offset(j)) + end do + end do +\end{verbatim} + \end{minipage} + \end{center} +\fi + \caption{\label{alg:dia} Matrix-Vector product in DIA format} +\end{algorithm} + + +The relevant data type is \verb|psb_T_dia_sparse_mat|: +\ifpdf +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + real(psb_dpk_), allocatable :: data(:,:) + + end type +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + real(psb_dpk_), allocatable :: data(:,:) + + end type +\end{verbatim} + \end{minipage} + \end{center} +\fi + + + +\subsubsection*{Hacked DIA} + +Storage by DIAgonals is an attractive option for matrices whose +coefficients are located on a small set of diagonals, since they do +away with storing explicitly the indices and therefore reduce +significantly memory traffic. However, having a few coefficients +outside of the main set of diagonals may significantly increase the +amount of needed padding; moreover, while the DIA code is easily +vectorized, it does not necessarily make optimal use of the memory +hierarchy. While processing each diagonal we are updating entries in +the output vector \verb|y|, which is then accessed multiple times; if +the vector \verb|y| is too large to remain in the cache memory, the +associated cache miss penalty is paid multiple times. + +The \textit{hacked DIA} (\textbf{HDIA}) format was designed to contain +the amount of padding, by breaking the original matrix +into equally sized groups of rows (\textit{hacks}), and then storing +these groups as independent matrices in DIA format. This approach is +similar to that of HLL, and requires using an offset vector for each +submatrix. Again, similarly to HLL, the various submatrices are +stacked inside a linear array to improve memory management. The fact +that the matrix is accessed in slices helps in reducing cache misses, +especially regarding accesses to the %output +vector \verb|y|. + + +An additional vector \textit{hackOffsets} is provided to complete +the matrix format; given that \textit{hackSize} is the number of rows of each hack, +the \textit{hackOffsets} vector is made by an array of +$(m/hackSize)+1$ elements, pointing to the first diagonal offset of a +submatrix inside the stacked \textit{offsets} buffers, plus an +additional element equal to the number of nonzero diagonals in the whole matrix. +We thus have the property that +the number of diagonals of the $k$-th \textit{hack} is given by +\textit{hackOffsets[k+1] - hackOffsets[k]}. + +\begin{figure}[ht] + \centering +% \includegraphics[width=8.2cm]{../figures/hdia.eps} +\ifcase\pdfoutput + \includegraphics[width=.72\textwidth]{hdia.png} +\or + \includegraphics[width=.72\textwidth]{../figures/hdia.pdf} +\fi + \caption{Hacked DIA compression of matrix in Figure~\ref{fig:dense}} + \label{fig:hdia} +\end{figure} + +The relevant data type is \verb|psb_T_hdia_sparse_mat|: +\ifpdf +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + type pm + real(psb_dpk_), allocatable :: data(:,:) + end type pm + + type po + integer(psb_ipk_), allocatable :: off(:) + end type po + + type, extends(psb_d_base_sparse_mat) :: psb_d_hdia_sparse_mat + ! + ! HDIA format, extended. + ! + + type(pm), allocatable :: hdia(:) + type(po), allocatable :: offset(:) + integer(psb_ipk_) :: nblocks, nzeros + integer(psb_ipk_) :: hack = 64 + integer(psb_long_int_k_) :: dim=0 + + contains + .... + end type +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + type pm + real(psb_dpk_), allocatable :: data(:,:) + end type pm + + type po + integer(psb_ipk_), allocatable :: off(:) + end type po + + type, extends(psb_d_base_sparse_mat) :: psb_d_hdia_sparse_mat + ! + ! HDIA format, extended. + ! + + type(pm), allocatable :: hdia(:) + type(po), allocatable :: offset(:) + integer(psb_ipk_) :: nblocks, nzeros + integer(psb_ipk_) :: hack = 64 + integer(psb_long_int_k_) :: dim=0 + + contains + .... + end type +\end{verbatim} + \end{minipage} + \end{center} +\fi + + diff --git a/docs/src/figures/dia.pdf b/docs/src/figures/dia.pdf new file mode 100644 index 00000000..04b1777d Binary files /dev/null and b/docs/src/figures/dia.pdf differ diff --git a/docs/src/figures/dia.png b/docs/src/figures/dia.png new file mode 100644 index 00000000..de7db919 Binary files /dev/null and b/docs/src/figures/dia.png differ diff --git a/docs/src/figures/ell.pdf b/docs/src/figures/ell.pdf new file mode 100644 index 00000000..b3f2f177 Binary files /dev/null and b/docs/src/figures/ell.pdf differ diff --git a/docs/src/figures/ell.png b/docs/src/figures/ell.png new file mode 100644 index 00000000..31911882 Binary files /dev/null and b/docs/src/figures/ell.png differ diff --git a/docs/src/figures/hdia.pdf b/docs/src/figures/hdia.pdf new file mode 100644 index 00000000..62f570bf Binary files /dev/null and b/docs/src/figures/hdia.pdf differ diff --git a/docs/src/figures/hdia.png b/docs/src/figures/hdia.png new file mode 100644 index 00000000..08bfb5ff Binary files /dev/null and b/docs/src/figures/hdia.png differ diff --git a/docs/src/figures/hll.pdf b/docs/src/figures/hll.pdf new file mode 100644 index 00000000..47267c4f Binary files /dev/null and b/docs/src/figures/hll.pdf differ diff --git a/docs/src/figures/hll.png b/docs/src/figures/hll.png new file mode 100644 index 00000000..219b751a Binary files /dev/null and b/docs/src/figures/hll.png differ diff --git a/docs/src/figures/mat.pdf b/docs/src/figures/mat.pdf new file mode 100644 index 00000000..2c47520d Binary files /dev/null and b/docs/src/figures/mat.pdf differ diff --git a/docs/src/figures/mat.png b/docs/src/figures/mat.png new file mode 100644 index 00000000..d4f5c6f9 Binary files /dev/null and b/docs/src/figures/mat.png differ diff --git a/docs/src/figures/psblaslibraryext.png b/docs/src/figures/psblaslibraryext.png new file mode 100644 index 00000000..da3b3a9c Binary files /dev/null and b/docs/src/figures/psblaslibraryext.png differ diff --git a/docs/src/intro.tex b/docs/src/intro.tex index ca0b47ad..a7bef845 100644 --- a/docs/src/intro.tex +++ b/docs/src/intro.tex @@ -1,3 +1,59 @@ +\section*{Preface} +\addcontentsline{toc}{section}{Preface} +This manual describes the main features of PSBLAS, a library for +parallel sparse computations that has been developed over a number of +years. + +Our work has been mainly devoted to providing a foundational toolkit +on which many algorithms can be implemented; the toolkit has proven +its effectiveness and flexibility in many ways. +The PSBLAS component deals mostly with the computational kernels and +environment handling; it supports computations on normal CPUs, +including the usage of OpenMP for parallellizing across multiple +cores. + +This foundational package provides linear solvers and some very +simple preconditioners; the companion package AMG4PSBLAS explores how +to use the base toolkit to build much more sophisticated +preconditioners which can be plugged seamlessly into the base solvers. + +The software architecture allows us to offer support for many +alternatives in the implementation, including usage of +heterogeneous platforms, and computations performed on GPUs throuh +CUDA. +There is support for GPU computations through OpenACC, but it is at +this time a highly experimental version; we plan to also look at using +accelerators through OpenMP as support from compilers improves. + +The project is lead by Salvatore Filippone; a number of people have been contributing to this package over the +years; contributors in roughly reverse chronological order: +\begin{obeylines} + Luca Pepè Sciarria + Theophane Loloum + Dimitri Walther + Andea Di Iorio + Stefano Petrilli + Soren Rasmussen + Zaak Beekman + Ambra Abdullahi Hassan + Pasqua D'Ambra + Daniela di Serafino + Michele Martone + Michele Colajanni + Fabio Cerioni + Stefano Maiolatesi + Dario Pascucci +\end{obeylines} + +\begin{flushright} + Salvatore Filippone\\ + Alfredo Buttari\\ + Fabio Durastante +\end{flushright} +\clearpage + + + \section{Introduction}\label{sec:intro} The PSBLAS library, developed with the aim to facilitate the @@ -12,19 +68,20 @@ addresses a distributed memory execution model operating with message passing. The PSBLAS library version 3 is implemented in - the Fortran~2003~\cite{metcalf} programming language, with reuse and/or + the Fortran~2008~\cite{metcalf} programming language, with reuse and/or adaptation of existing Fortran~77 and Fortran~95 software, plus a handful of C routines. -The use of Fortran~2003 offers a number of advantages over Fortran~95, +The use of Fortran~2008 offers a number of advantages over Fortran~95, mostly in the handling of requirements for evolution and adaptation of the library to new computing architectures and integration of new algorithms. For a detailed discussion of our design see~\cite{Sparse03}; other -works discussing advanced programming in Fortran~2003 +works discussing advanced programming in Fortran~2008 include~\cite{DesPat:11,RouXiaXu:11}; sufficient support for -Fortran~2003 is now available from many compilers, including the GNU -Fortran compiler from the Free Software Foundation (as of version 4.8). +Fortran~2008 is now available from many compilers, including recent +versions of the GNU Fortran compiler from the Free Software +Foundation, and the FLANG compiler from the LLVM project. Previous approaches have been based on mixing Fortran~95, with its @@ -83,7 +140,7 @@ influenced by the structure of the ScaLAPACK parallel library. The layered structure of the PSBLAS library is shown in figure~\ref{fig:psblas}; lower layers of the library indicate an encapsulation relationship with upper layers. The ongoing -discussion focuses on the Fortran~2003 layer immediately below the +discussion focuses on the Fortran~2008 layer immediately below the application layer. The serial parts of the computation on each process are executed through calls to the serial sparse BLAS subroutines. @@ -193,7 +250,7 @@ domain is usually a halo point for some other domain\footnote{This is two variables is reciprocal. If the matrix pattern is non-symmetric we may have one-way interactions, and these could cause a situation in which a boundary point is not a halo point for its neighbour.}; therefore -the cardinality of the boundary points set denotes the amount of data +the cardinality of the boundary points set determines the amount of data sent to other domains. \item[Overlap.] An overlap point is a boundary point assigned to multiple domains. Any operation that involves an overlap point @@ -257,7 +314,7 @@ systems solution for block diagonal matrices; \item Sparse matrix and data distribution preprocessing. \end{itemize} \item[Preconditioner routines] -\item[Iterative methods] a subset of Krylov subspace iterative +\item[Iterative methods] a subset of classical and Krylov subspace iterative methods \end{description} The following naming scheme has been adopted for all the symbols @@ -293,7 +350,7 @@ are classified as: To finish our general description, we define a version string with the constant \[ \verb|psb_version_string_|\] -whose current value is \verb|3.8.0| +whose current value is \verb|3.9.0| \subsection{Application structure} \label{sec:appstruct} @@ -344,24 +401,25 @@ A simple application structure will walk through the index space allocation, matrix/vector creation and linear system solution as follows: \begin{enumerate} -\item Initialize parallel environment with \verb|psb_init| -\item Initialize index space with \verb|psb_cdall| +\item Initialize parallel environment with \verb|psb_init|; +\item Initialize index space with \verb|psb_cdall|; \item Allocate sparse matrix and dense vectors with \verb|psb_spall| - and \verb|psb_geall| + and \verb|psb_geall|; \item Loop over all local rows, generate matrix and vector entries, and insert them with \verb|psb_spins| and \verb|psb_geins| \item Assemble the various entities: \begin{enumerate} -\item \verb|psb_cdasb| -\item \verb|psb_spasb| -\item \verb|psb_geasb| +\item \verb|psb_cdasb|, +\item \verb|psb_spasb|, +\item \verb|psb_geasb|; \end{enumerate} \item Choose the preconditioner to be used with \verb|prec%init| and + \verb|prec%set|, and build it with \verb|prec%build|\footnote{The subroutine style {\tt - psb\_precinit} and {\tt psb\_precbl} are still supported for - backward compatibility}. -\item Call the iterative driver \verb|psb_krylov| with the method of - choice, e.g. \verb|bicgstab|. + psb\_precinit} and {\tt psb\_precbld} are still supported for + backward compatibility}; +\item Call one of the iterative drivers with the method of + choice, e.g. \verb|psb_krylov| with \verb|bicgstab|. \end{enumerate} This is the structure of the sample programs in the directory \verb|test/pargen/|. @@ -372,21 +430,23 @@ multiple time steps, the following structure may be more appropriate: \item Initialize parallel environment with \verb|psb_init| \item Initialize index space with \verb|psb_cdall| \item Loop over the topology of the discretization mesh and build the - descriptor with \verb|psb_cdins| -\item Assemble the descriptor with \verb|psb_cdasb| -\item Allocate the sparse matrices and dense vectors with - \verb|psb_spall| and \verb|psb_geall| + descriptor with \verb|psb_cdins|; +\item Assemble the descriptor with \verb|psb_cdasb|; +\item Allocate the sparse matrices and dense vectors with; + \verb|psb_spall| and \verb|psb_geall|; \item Loop over the time steps: \begin{enumerate} \item If after first time step, reinitialize the sparse matrix with \verb|psb_sprn|; also zero out the dense vectors; \item Loop over the mesh, generate the coefficients and insert/update - them with \verb|psb_spins| and \verb|psb_geins| -\item Assemble with \verb|psb_spasb| and \verb|psb_geasb| -\item Choose and build preconditioner with \verb|prec%init| and - \verb|prec%build| -\item Call the iterative method of choice, e.g. \verb|psb_bicgstab| + them with \verb|psb_spins| and \verb|psb_geins|; +\item Assemble with \verb|psb_spasb| and \verb|psb_geasb|; +\item \item Choose the preconditioner to be used with \verb|prec%init| and + \verb|prec%set|, and + build it with \verb|prec%build|; +\item Call one of the iterative drivers with the method of + choice, e.g. \verb|psb_krylov| with \verb|bicgstab|. \end{enumerate} \end{enumerate} The insertion routines will be called as many times as needed; diff --git a/docs/src/methods.tex b/docs/src/methods.tex index 4b2f8b66..55b7b4e0 100644 --- a/docs/src/methods.tex +++ b/docs/src/methods.tex @@ -2,8 +2,8 @@ \label{sec:methods} In this chapter we provide routines for preconditioners and iterative -methods. The interfaces for Krylov subspace methods are available in -the module \verb|psb_krylov_mod|. +methods. The interfaces for iterative methods are available in +the module \verb|psb_linsolve_mod|. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % @@ -146,6 +146,119 @@ An integer value; 0 means no error has been detected. \end{description} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% +% Richardson driver routine +% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\clearpage\subsection{psb\_richardson \label{richardson} --- + Richardson Iteration Driver Routine} + +This subroutine is a driver implementig a Richardson iteration +\[ x_{k+1} = M^-1 (b-Ax_k) +x_k,\] +with the preconditioner operator $M$ defined in the previous section. + +The stopping criterion can take the following values: +\begin{description} +\item[1] normwise backward error in the infinity +norm; the iteration is stopped when +\[ err = \frac{\|r_i\|}{(\|A\|\|x_i\|+\|b\|)} < eps \] +\item[2] Relative residual in the 2-norm; the iteration is stopped +when +\[ err = \frac{\|r_i\|}{\|b\|_2} < eps \] +\item[3] Relative residual reduction in the 2-norm; the iteration is stopped +when +\[ err = \frac{\|r_i\|}{\|r_0\|_2} < eps \] +\end{description} +The behaviour is controlled by the istop argument (see +later). In the above formulae, $x_i$ is the tentative solution and +$r_i=b-Ax_i$ the corresponding residual at the $i$-th iteration. + + +\begin{lstlisting} +call psb_richardson(a,prec,b,x,eps,desc_a,info,& + & itmax,iter,err,itrace,istop) +\end{lstlisting} + +\begin{description} +\item[Type:] Synchronous. +\item[\bf On Entry] +\item[a] the local portion of global sparse matrix +$A$. \\ +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf in}.\\ +Specified as: a structured data of type \spdata. +\item[prec] The data structure containing the preconditioner.\\ +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf in}.\\ +Specified as: a structured data of type \precdata. +\item[b] The RHS vector. \\ +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf in}.\\ +Specified as: a rank one array or an object of type \vdata. +\item[x] The initial guess. \\ +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf inout}.\\ +Specified as: a rank one array or an object of type \vdata. +\item[eps] The stopping tolerance. \\ +Scope: {\bf global} \\ +Type: {\bf required}\\ +Intent: {\bf in}.\\ +Specified as: a real number. +\item[desc\_a] contains data structures for communications.\\ +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf in}.\\ +Specified as: a structured data of type \descdata. +\item[itmax] The maximum number of iterations to perform.\\ +Scope: {\bf global} \\ +Type: {\bf optional}\\ +Intent: {\bf in}.\\ +Default: $itmax = 1000$.\\ +Specified as: an integer variable $itmax \ge 1$. +\item[itrace] If $>0$ print out an informational message about + convergence every $itrace$ iterations. If $=0$ print a message in + case of convergence failure.\\ +Scope: {\bf global} \\ +Type: {\bf optional}\\ +Intent: {\bf in}.\\ +Default: $itrace = -1$.\\ + +\item[istop] An integer specifying the stopping criterion.\\ +Scope: {\bf global} \\ +Type: {\bf optional}.\\ +Intent: {\bf in}.\\ +Values: 1: use the normwise backward error, 2: use the scaled 2-norm +of the residual, 3: use the residual reduction in the 2-norm. Default: 2. +\item[\bf On Return] +\item[x] The computed solution. \\ +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf inout}.\\ +Specified as: a rank one array or an object of type \vdata. +\item[iter] The number of iterations performed.\\ +Scope: {\bf global} \\ +Type: {\bf optional}\\ +Intent: {\bf out}.\\ +Returned as: an integer variable. +\item[err] The convergence estimate on exit.\\ +Scope: {\bf global} \\ +Type: {\bf optional}\\ +Intent: {\bf out}.\\ +Returned as: a real number. +\item[info] Error code.\\ +Scope: {\bf local} \\ +Type: {\bf required} \\ +Intent: {\bf out}.\\ +An integer value; 0 means no error has been detected. +\end{description} + + %%% Local Variables: %%% mode: latex %%% TeX-master: "userguide" diff --git a/docs/src/penv.tex b/docs/src/penv.tex index 10dbeb0f..31c2fc2e 100644 --- a/docs/src/penv.tex +++ b/docs/src/penv.tex @@ -7,7 +7,7 @@ environment} \begin{verbatim} -call psb_init(ctxt, np, basectxt, ids) +call psb_init(ctxt, np, basectxt, ids, extcomm) \end{verbatim} This subroutine initializes the PSBLAS parallel environment, defining @@ -21,7 +21,7 @@ Type: {\bf optional}.\\ Intent: {\bf in}.\\ Specified as: an integer value. \ Default: use all available processes. -\item[basectxt] the initial communication context. The new context +\item[basectxt] the initial PSBLAS communication context. The new context will be defined from the processes participating in the initial one.\\ Scope: {\bf global}.\\ Type: {\bf optional}.\\ @@ -37,6 +37,13 @@ Type: {\bf optional}.\\ Intent: {\bf in}.\\ Specified as: an integer array. \ Default: use the indices $(0\dots np-1)$. +\item[extcomm] an alternative initial MPI communicator. The new context + will be defined from the processes participating in the initial one.\\ + Scope: {\bf global}.\\ +Type: {\bf optional}.\\ +Intent: {\bf in}.\\ +Specified as: an integer value. \ +Default: use MPI\_COMM\_WORLD. \end{description} \begin{description} @@ -320,7 +327,7 @@ Default: both fields are selected (i.e. require synchronous completion).\\ Scope: {\bf local}.\\ Type: {\bf optional}.\\ Intent: {\bf inout}.\\ -If \verb|mode| does not specify synchronous completion, then this +If \verb|mode| specifies non-blocking action, then this variable must be present. \end{description} @@ -338,7 +345,7 @@ Type, kind, rank and size must agree on all processes. Scope: {\bf local}.\\ Type: {\bf optional}.\\ Intent: {\bf inout}.\\ -If \verb|mode| does not specify synchronous completion, then this +If \verb|mode| specifies non-blocking action, then this variable must be present. \end{description} @@ -435,7 +442,7 @@ Default: both fields are selected (i.e. require synchronous completion).\\ Scope: {\bf local}.\\ Type: {\bf optional}.\\ Intent: {\bf inout}.\\ -If \verb|mode| does not specify synchronous completion, then this +If \verb|mode| specifies non-blocking action, then this variable must be present. \end{description} @@ -452,7 +459,7 @@ Type, kind, rank and size must agree on all processes. Scope: {\bf local}.\\ Type: {\bf optional}.\\ Intent: {\bf inout}.\\ -If \verb|mode| does not specify synchronous completion, then this +If \verb|mode| specifies non-blocking action, then this variable must be present. \end{description} @@ -548,7 +555,7 @@ Default: both fields are selected (i.e. require synchronous completion).\\ Scope: {\bf local}.\\ Type: {\bf optional}.\\ Intent: {\bf inout}.\\ -If \verb|mode| does not specify synchronous completion, then this +If \verb|mode| specifies non-blocking action, then this variable must be present. \end{description} @@ -566,7 +573,7 @@ Type, kind, rank and size must agree on all processes. Scope: {\bf local}.\\ Type: {\bf optional}.\\ Intent: {\bf inout}.\\ -If \verb|mode| does not specify synchronous completion, then this +If \verb|mode| specifies non-blocking action, then this variable must be present. \end{description} @@ -661,7 +668,7 @@ Default: both fields are selected (i.e. require synchronous completion).\\ Scope: {\bf local}.\\ Type: {\bf optional}.\\ Intent: {\bf inout}.\\ -If \verb|mode| does not specify synchronous completion, then this +If \verb|mode| specifies non-blocking action, then this variable must be present. \end{description} @@ -679,7 +686,7 @@ Type, kind, rank and size must agree on all processes. Scope: {\bf local}.\\ Type: {\bf optional}.\\ Intent: {\bf inout}.\\ -If \verb|mode| does not specify synchronous completion, then this +If \verb|mode| specifies non-blocking action, then this variable must be present. \end{description} @@ -774,7 +781,7 @@ Default: both fields are selected (i.e. require synchronous completion).\\ Scope: {\bf local}.\\ Type: {\bf optional}.\\ Intent: {\bf inout}.\\ -If \verb|mode| does not specify synchronous completion, then this +If \verb|mode| specifies non-blocking action, then this variable must be present. \end{description} @@ -792,7 +799,7 @@ Type, kind, rank and size must agree on all processes. Scope: {\bf local}.\\ Type: {\bf optional}.\\ Intent: {\bf inout}.\\ -If \verb|mode| does not specify synchronous completion, then this +If \verb|mode| specifies non-blocking action, then this variable must be present. \end{description} @@ -887,7 +894,7 @@ Default: both fields are selected (i.e. require synchronous completion).\\ Scope: {\bf local}.\\ Type: {\bf optional}.\\ Intent: {\bf inout}.\\ -If \verb|mode| does not specify synchronous completion, then this +If \verb|mode| specifies non-blocking action, then this variable must be present. \end{description} @@ -905,7 +912,7 @@ Type, kind, rank and size must agree on all processes. Scope: {\bf local}.\\ Type: {\bf optional}.\\ Intent: {\bf inout}.\\ -If \verb|mode| does not specify synchronous completion, then this +If \verb|mode| specifies non-blocking action, then this variable must be present. \end{description} @@ -1000,7 +1007,7 @@ Default: both fields are selected (i.e. require synchronous completion).\\ Scope: {\bf local}.\\ Type: {\bf optional}.\\ Intent: {\bf inout}.\\ -If \verb|mode| does not specify synchronous completion, then this +If \verb|mode| specifies non-blocking action, then this variable must be present. \end{description} @@ -1018,7 +1025,7 @@ Kind, rank and size must agree on all processes. Scope: {\bf local}.\\ Type: {\bf optional}.\\ Intent: {\bf inout}.\\ -If \verb|mode| does not specify synchronous completion, then this +If \verb|mode| specifies non-blocking action, then this variable must be present. \end{description} diff --git a/docs/src/precs.tex b/docs/src/precs.tex index 26492be9..767f29df 100644 --- a/docs/src/precs.tex +++ b/docs/src/precs.tex @@ -3,11 +3,12 @@ % \section{Preconditioners} \label{sec:psprecs} -The base PSBLAS library contains the implementation of two simple +The base PSBLAS library contains the implementation of some simple preconditioning techniques: \begin{itemize} \item Diagonal Scaling \item Block Jacobi with ILU(0) factorization +\item Block Jacobi with an approximate inverse %% \item Additive Schwarz with the Restricted Additive Schwarz and %% Additive Schwarz with Harmonic extensions; \end{itemize} @@ -76,14 +77,125 @@ $ptype$ string as follows\footnote{The string is case-insensitive}: \item[DIAG] Diagonal scaling; each entry of the input vector is multiplied by the reciprocal of the sum of the absolute values of the coefficients in the corresponding row of matrix $A$; -\item[BJAC] Precondition by a factorization of the - block-diagonal of matrix $A$, where block boundaries are determined - by the data allocation boundaries for each process; requires no - communication. Only the incomplete factorization $ILU(0)$ is - currently implemented. +\item[BJAC] Precondition by a factorization or an approximante inverse + of the block-diagonal of matrix $A$, where block boundaries are + determined by the data allocation boundaries for each process; + requires no communication. See also Table-\ref{tab:p_subsolve_1}. \end{description} + +\clearpage + +\subsection{Set\label{sec:precset} --- set preconditioner parameters} + +\begin{center} +\fortinline|call p%set(what,val,info)| +\end{center} + +\noindent +This method sets the parameters defining the subdomain solver when the +preconditioner type is \verb|BJAC|. More precisely, the parameter +identified by \fortinline|what| is assigned the value +contained in \fortinline|val|. + +{\vskip1.5\baselineskip\noindent\large\bfseries Arguments} \smallskip + +\begin{tabular}{p{1.2cm}p{12cm}} +\fortinline|what| & \fortinline|character(len=*)|. \\ + & The parameter to be set. It can be specified through its name; + the string is case-insensitive. See + Table~\ref{tab:p_subsolve_1}.\\ +\fortinline|val | & \fortinline|integer| \emph{or} \fortinline|character(len=*)| \emph{or} + \fortinline|real(psb_spk_)| \emph{or} \fortinline|real(psb_dpk_)|, + \fortinline|intent(in)|.\\ + & The value of the parameter to be set. The list of allowed + values and the corresponding data types is given in + Table~\ref{tab:p_subsolve_1}. + When the value is of type \fortinline|character(len=*)|, + it is also treated as case insensitive.\\ +\fortinline|info| & \fortinline|integer, intent(out)|.\\ + & Error code. If no error, 0 is returned. See Section~\ref{sec:errors} + for details. +\end{tabular} + + +\noindent +A number of subdomain solvers can be chosen with this method; +a list of the parameters that can be set, along with their allowed and +default values, is given in Table-\ref{tab:p_subsolve_1}.\\ + + +\bsideways +\begin{center} +\small +% \begin{tabular}{|p{3.6cm}|l|p{1.9cm}|p{3.6cm}|p{6.5cm}|} +\begin{tabular}{|p{3.2cm}|l|p{2.6cm}|p{2.6cm}|p{6.7cm}|} +\hline +\fortinline|what| & \textsc{data type} & \fortinline|val| & \textsc{default} & +\textsc{comments} \\ \hline + +\fortinline|'SUB_SOLVE'| & \fortinline|character(len=*)| + & \fortinline|'ILU'| \par + \fortinline|'ILUT'| \par + \par \fortinline|'INVT'| \par \fortinline|'INVK'| \par \fortinline|'AINV'| + & + & The local solver to be used with the smoother or one-level + preconditioner ILU($p$), ILU($p,t$), + Approximate Inverses INVK($p,q$), + INVT($p_1,p2,t_1,t_2$) and + AINV($t$); note that approximate inverses + are specifically suited for GPUs since they + do not employ triangular system solve + kernels, + see~\cite{BERTACCINIFILIPPONE}.\\ \hline +\fortinline|'SUB_FILLIN'| & \fortinline|integer| + & Any integer \par number~$\ge 0$ + & 0 + & Fill-in level $p$ of the incomplete LU factorizations. \\ \hline +\fortinline|'SUB_ILUTHRS'| & \fortinline|real(kind_parameter)| + & Any real number~$\ge 0$ + & 0 + & Drop tolerance $t$ in the ILU($p,t$) factorization. \\ \hline +\fortinline|'ILU_ALG'| & \fortinline|character(len=*)| + & \fortinline|'MILU'| + & \fortinline|'NONE'| + & ILU algorithmic variant \\ \hline +\fortinline|'ILUT_SCALE'| & \fortinline|character(len=*)| + & \fortinline|'MAXVAL'| \par + \fortinline|'DIAG'| \par + \fortinline|'ARSWUM'| \par + \fortinline|'ARCSUM'| \par + \fortinline|'ACLSUM'| \par + \fortinline|'NONE'| + & \fortinline|'NONE'| + & ILU scaling strategy \\ \hline +\fortinline|'INV_FILLIN'| & \fortinline|integer| + & Any integer \par number~$\ge 0$ + & 0 + & Second fill-in level $q$ of the INVK($p,q$) + approximate inverse. \\ \hline +\fortinline|'INV_ILUTHRS'| & \fortinline|real(kind_parameter)| + & Any real number~$\ge 0$ + & 0 + & Second drop tolerance $s$ in the + INVT($t,s$) approximate inverse. \\ \hline +\fortinline|'AINV_ALG'| & \fortinline|character(len=*)| + & \fortinline|'LLK'| \par + \fortinline|'SYM-LLK'| \par + \fortinline|'STAB-LLK'| \par + \fortinline|'MLK,LMX'| + & \fortinline|'LLK'| + & AINV algorithmic strategy. \\ \hline +\end{tabular} +\end{center} +\caption{Parameters defining the solver of the BJAC + preconditioner.\label{tab:p_subsolve_1}} + \esideways + + + + \clearpage\subsection{build --- Builds a preconditioner} \begin{verbatim} @@ -200,7 +312,10 @@ Type: {\bf required} \\ Intent: {\bf out}.\\ An integer value; 0 means no error has been detected. \end{description} - +{\par\noindent\large\bfseries Notes} +This method is almost always called by the iterative methods of +Sec.~\ref{sec:methods}; it is extremely unlikely to be needed directly +by the application developer. \clearpage\subsection{descr --- Prints a description of current @@ -306,6 +421,99 @@ Error code: if no error, 0 is returned. %% installed; see~\cite{SUPERLU,UMFPACK}. Releases all internal storage. + +\clearpage\subsection{allocate\_wrk --- preconditioner} +\label{sec:allocatewrk} +\begin{verbatim} +call prec%allocate_wrk(info[,vmold]) +\end{verbatim} + +\begin{description} +\item[Type:] Synchronous. +\item[\bf On Entry] +\item[prec] the preconditioner.\\ +Scope: {\bf local}.\\ +Type: {\bf required}\\ +Intent: {\bf inout}.\\ +Specified as: a preconditioner data structure \precdata. +\item[vmold] The desired dynamic type for the internal vector storage.\\ +Scope: {\bf local}.\\ +Type: {\bf optional}.\\ +Intent: {\bf in}.\\ +Specified as: an object of a class derived from \vbasedata. +\item[\bf On Exit] + +\item[prec] +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf inout}.\\ +Specified as: a preconditioner data structure \precdata. +\item[info] +Scope: {\bf global} \\ +Type: {\bf required}\\ +Intent: {\bf out}.\\ +Error code: if no error, 0 is returned. +\end{description} +{\par\noindent\large\bfseries Notes} +%% The PSBLAS 2.0 contains a number of preconditioners, ranging from a +%% simple diagonal scaling to 2-level domain decomposition. These +%% preconditioners may use the SuperLU or the UMFPACK software, if +%% installed; see~\cite{SUPERLU,UMFPACK}. +Preconditioners often need internal work storage during their +application at each iteration of a linear solver method: in many +situations this can be accomplished by allocating and releasing +memory ``on the fly''. However, when running on an accelerator through +e.g. the CUDA enabled data strutures of Sec.~\ref{sec:cudastruct} and +~\ref{sec:cudaenv}, memory allocation and deallocation usually have a +much larger overhead, significantly affecting performance. To +alleviate this problem we define this method that preallocates +internal storage; it is intended to be invoked prior to the iterative +solver method, so that the necessary internal work storage is +available throughout the iterative method application. + +When using GPUs or other specialized devices, the \fortinline|vmold| +argument is also necessary to ensure the internal work vectors are of +the appropriate dynamic type to exploit the accelerator hardware. + + +\clearpage\subsection{deallocate\_wrk --- preconditioner} + +\begin{verbatim} +call prec%allocate_wrk(info) +call prec%free_wrk(info) +\end{verbatim} + +\begin{description} +\item[Type:] Synchronous. +\item[\bf On Entry] +\item[prec] the preconditioner.\\ +Scope: {\bf local}.\\ +Type: {\bf required}\\ +Intent: {\bf inout}.\\ +Specified as: a preconditioner data structure \precdata. +\item[\bf On Exit] + +\item[prec] +Scope: {\bf local} \\ +Type: {\bf required}\\ +Intent: {\bf inout}.\\ +Specified as: a preconditioner data structure \precdata. +\item[info] +Scope: {\bf global} \\ +Type: {\bf required}\\ +Intent: {\bf out}.\\ +Error code: if no error, 0 is returned. +\end{description} +{\par\noindent\large\bfseries Notes} +%% The PSBLAS 2.0 contains a number of preconditioners, ranging from a +%% simple diagonal scaling to 2-level domain decomposition. These +%% preconditioners may use the SuperLU or the UMFPACK software, if +%% installed; see~\cite{SUPERLU,UMFPACK}. +Deallocates preconditioner internal work storage; to be invoked after an +iterative solver has completed execution, see the discussion in +Sec.~\ref{sec:allocatewrk}. + + %%% Local Variables: %%% mode: latex %%% TeX-master: "userguide" diff --git a/docs/src/userguide.tex b/docs/src/userguide.tex index 9d4e1814..2b1e89f3 100644 --- a/docs/src/userguide.tex +++ b/docs/src/userguide.tex @@ -17,6 +17,9 @@ \newtheorem{theorem}{Theorem} \newtheorem{corollary}{Corollary} \usepackage{listings} +\usepackage{rotating} +\usepackage{microtype} +\usepackage{algorithm2e} \usepackage{minted} \usemintedstyle{friendly} \definecolor{bg}{rgb}{0.95,0.95,0.95} @@ -36,7 +39,7 @@ \relax \pdfcompresslevel=0 %-- 0 = none, 9 = best \pdfinfo{ %-- Info dictionary of PDF output /Author (Alfredo Buttari) - /Title (Parallel Sparse BLAS V. 3.8.0) + /Title (Parallel Sparse BLAS V. 3.9.0) /Subject (Parallel Sparse Basic Linear Algebra Subroutines) /Keywords (Computer Science Linear Algebra Fluid Dynamics Parallel Linux MPI PSBLAS Iterative Solvers Preconditioners) /Creator (pdfLaTeX) @@ -90,16 +93,18 @@ \newcommand{\example}{\stepcounter{example}% \section*{\examplename~\theexample}} -\newcommand{\precdata}{\hyperlink{precdata}{{\tt psb\_prec\_type}}} +\newcommand{\precdata}{\hyperlink{precdata}{{\tt psb\_Tprec\_type}}} \newcommand{\descdata}{\hyperlink{descdata}{{\tt psb\_desc\_type}}} \newcommand{\spdata}{\hyperlink{spdata}{{\tt psb\_Tspmat\_type}}} \newcommand{\vdata}{\hyperlink{vdata}{{\tt psb\_T\_vect\_type}}} \newcommand{\spbasedata}{\hypertarget{spbasedata}{{\tt psb\_T\_base\_sparse\_mat}}} \newcommand{\vbasedata}{\hypertarget{vbasedata}{{\tt psb\_T\_base\_vect\_type}}} +\def\bsideways{\begin{sidewaystable}} +\def\esideways{\end{sidewaystable}} \begin{document} { -\pdfbookmark{PSBLAS-v3.8.0 User's Guide}{title} +\pdfbookmark{PSBLAS-v3.9.0 User's Guide}{title} \lstset{language=Fortran} \newlength{\centeroffset} \setlength{\centeroffset}{-0.5\oddsidemargin} @@ -109,7 +114,7 @@ \vspace*{\stretch{1}} \noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth} \flushright -{\Huge\bfseries PSBLAS 3.8.0 User's guide +{\Huge\bfseries PSBLAS 3.9.0 User's guide } \noindent\rule[-1ex]{\textwidth}{5pt}\\[2.5ex] \hfill\emph{\Large A reference guide for the Parallel Sparse BLAS library} @@ -129,8 +134,9 @@ \flushright {\bfseries by Salvatore Filippone\\ -and Alfredo Buttari}\\ -May 1st, 2022 +Alfredo Buttari \\ +Fabio Durastante}\\ +June 9th, 2025 \end{minipage}} } %\addtolength{\textwidth}{\centeroffset} @@ -159,7 +165,8 @@ May 1st, 2022 \include{util} \include{precs} \include{methods} - +\include{ext-intro} +\include{cuda} \cleardoublepage \input{biblio} diff --git a/docs/src/userhtml.tex b/docs/src/userhtml.tex index 5dc0e6cf..2ac85f59 100644 --- a/docs/src/userhtml.tex +++ b/docs/src/userhtml.tex @@ -17,7 +17,14 @@ \newtheorem{theorem}{Theorem} \newtheorem{corollary}{Corollary} \usepackage{listings} +\usepackage{rotating} \usepackage{microtype} +\usepackage{algorithm2e} + +\definecolor{bg}{rgb}{0.95,0.95,0.95} +\usepackage{breakurl} +\usepackage{mathpazo} +\usepackage[english]{babel} \ifpdf \newmintinline[fortinline]{fortran}{} \else% @@ -77,12 +84,14 @@ \newcommand{\example}{\stepcounter{example}% \section*{\examplename~\theexample}} -\newcommand{\precdata}{\hyperlink{precdata}{{\tt psb\_prec\_type}}} +\newcommand{\precdata}{\hyperlink{precdata}{{\tt psb\_Tprec\_type}}} \newcommand{\descdata}{\hyperlink{descdata}{{\tt psb\_desc\_type}}} \newcommand{\spdata}{\hyperlink{spdata}{{\tt psb\_Tspmat\_type}}} \newcommand{\vdata}{\hyperlink{vdata}{{\tt psb\_T\_vect\_type}}} \newcommand{\spbasedata}{\hypertarget{spbasedata}{{\tt psb\_T\_base\_sparse\_mat}}} \newcommand{\vbasedata}{\hypertarget{vbasedata}{{\tt psb\_T\_base\_vect\_type}}} +\def\bsideways{\begin{table}} +\def\esideways{\end{table}} \begin{document} \lstset{language=Fortran} @@ -90,13 +99,14 @@ {\LARGE\bfseries PSBLAS\\[.8ex] User's and Reference Guide}\\[\baselineskip] \emph{\large A reference guide for the Parallel Sparse BLAS library}\\[3ex] -{\bfseries Salvatore Filippone\\ - Alfredo Buttari } \\ +{\bfseries by Salvatore Filippone\\ +Alfredo Buttari \\ +Fabio Durastante } \\ %\\[10ex] %\today -Software version: 3.8.0\\ +Software version: 3.9.0\\ %\today -May 1st, 2022 +June 9th, 2025 \cleardoublepage \begingroup \renewcommand*{\thepage}{toc} @@ -120,7 +130,8 @@ May 1st, 2022 \include{util} \include{precs} \include{methods} - +\include{ext-intro} +\include{cuda} \cleardoublepage \input{biblio} diff --git a/ext/CMakeLists.txt b/ext/CMakeLists.txt new file mode 100644 index 00000000..c678d01e --- /dev/null +++ b/ext/CMakeLists.txt @@ -0,0 +1,428 @@ +set(PSB_ext_source_files + psb_s_hdia_mat_mod.f90 + impl/psb_d_ell_reinit.f90 + impl/psb_d_hll_reallocate_nz.f90 + impl/psb_z_dia_arwsum.f90 + impl/psb_d_mv_hdia_from_coo.f90 + impl/psb_z_hll_csgetptn.f90 + impl/psb_d_mv_ell_to_coo.f90 + impl/psb_d_ell_csgetblk.f90 + impl/psb_d_ell_reallocate_nz.f90 + impl/psb_d_dia_csgetptn.f90 + impl/psb_c_ell_rowsum.f90 + impl/psb_d_dia_reallocate_nz.f90 + impl/psb_z_mv_ell_to_fmt.f90 + impl/psi_d_xtr_ell_from_coo.f90 + impl/psb_s_mv_ell_to_fmt.f90 + impl/psb_z_mv_ell_from_coo.f90 + impl/psb_s_hll_scals.f90 + impl/psb_d_dia_get_diag.f90 + impl/psb_s_dia_csgetrow.f90 + impl/psb_d_hll_colsum.f90 + impl/psb_s_mv_ell_to_coo.f90 + impl/psb_z_cp_ell_from_fmt.f90 + impl/psb_c_hll_csnm1.f90 + impl/psb_c_hll_maxval.f90 + impl/psb_c_mv_hll_to_fmt.f90 + impl/psb_c_ell_scal.f90 + impl/psb_d_dia_allocate_mnnz.f90 + impl/psb_s_hll_reallocate_nz.f90 + impl/psb_s_hll_csgetptn.f90 + impl/psb_s_hdia_print.f90 + impl/psb_s_mv_hll_to_coo.f90 + impl/psb_c_hll_scal.f90 + impl/psb_s_cp_hll_to_fmt.f90 + impl/psb_s_hll_csnm1.f90 + impl/psb_z_hll_scals.f90 + impl/psb_z_dia_get_diag.f90 + impl/psb_c_cp_hll_from_coo.f90 + impl/psb_d_hll_arwsum.f90 + impl/psb_c_cp_dia_from_coo.f90 + impl/psb_c_ell_reallocate_nz.f90 + impl/psb_z_cp_hll_from_fmt.f90 + impl/psi_s_xtr_dia_from_coo.f90 + impl/psb_d_cp_hdia_from_coo.f90 + impl/psb_s_ell_csgetrow.f90 + impl/psb_s_mv_dia_to_coo.f90 + impl/psb_c_mv_hdia_to_coo.f90 + impl/psb_c_cp_dia_to_coo.f90 + impl/psb_s_hdia_allocate_mnnz.f90 + impl/psb_s_hll_print.f90 + impl/psb_d_ell_aclsum.f90 + impl/psb_c_cp_ell_to_coo.f90 + impl/psb_s_dia_mold.f90 + impl/psi_d_convert_dia_from_coo.f90 + impl/psb_d_hll_allocate_mnnz.f90 + impl/psb_d_dia_mold.f90 + impl/psi_z_convert_ell_from_coo.f90 + impl/psb_s_mv_dia_from_coo.f90 + impl/psb_d_hll_cssv.f90 + impl/psb_c_hll_rowsum.f90 + impl/psb_d_ell_mold.f90 + impl/psb_z_hll_csput.f90 + impl/psb_d_ell_colsum.f90 + impl/psb_s_ell_arwsum.f90 + impl/psb_c_hll_cssv.f90 + impl/psb_c_dia_reinit.f90 + impl/psi_z_xtr_dia_from_coo.f90 + impl/psb_z_hll_reallocate_nz.f90 + impl/psb_d_mv_ell_from_coo.f90 + impl/psb_d_ell_print.f90 + impl/psb_c_mv_ell_from_fmt.f90 + impl/psb_z_hll_csnmi.f90 + impl/psb_d_hll_maxval.f90 + impl/psb_z_ell_csmv.f90 + impl/psb_c_hdia_print.f90 + impl/psb_d_mv_hll_to_fmt.f90 + impl/psb_z_ell_cssm.f90 + impl/psb_s_dia_maxval.f90 + impl/psi_c_convert_dia_from_coo.f90 + impl/psb_c_cp_hdia_from_coo.f90 + impl/psb_s_dia_reallocate_nz.f90 + impl/psb_s_hll_csnmi.f90 + impl/psb_z_dia_scals.f90 + impl/psb_c_hll_csmm.f90 + impl/psb_z_ell_csgetptn.f90 + impl/psi_s_convert_hll_from_coo.f90 + impl/psb_d_cp_ell_from_fmt.f90 + impl/psb_z_hll_maxval.f90 + impl/psb_c_hll_reallocate_nz.f90 + impl/psb_c_mv_hdia_from_coo.f90 + impl/psb_c_ell_get_diag.f90 + impl/psb_s_cp_hll_to_coo.f90 + impl/psb_z_cp_hll_from_coo.f90 + impl/psb_s_dia_csmm.f90 + impl/psb_z_cp_hll_to_coo.f90 + impl/psi_c_xtr_ell_from_coo.f90 + impl/psb_z_hll_csmm.f90 + impl/psb_z_dia_reallocate_nz.f90 + impl/psb_d_dia_scal.f90 + impl/psb_s_mv_hll_to_fmt.f90 + impl/psb_d_hdia_mold.f90 + impl/psb_c_ell_maxval.f90 + impl/psb_z_hll_rowsum.f90 + impl/psb_z_ell_aclsum.f90 + impl/psb_d_cp_ell_to_coo.f90 + impl/psb_z_ell_print.f90 + impl/psb_d_ell_cssv.f90 + impl/psi_c_xtr_coo_from_dia.f90 + impl/psb_d_dia_csmm.f90 + impl/psi_s_convert_dia_from_coo.f90 + impl/psb_c_hll_csput.f90 + impl/psb_d_cp_hll_to_coo.f90 + impl/psb_s_ell_scals.f90 + impl/psb_s_ell_print.f90 + impl/psb_z_cp_hdia_to_coo.f90 + impl/psb_c_hll_mold.f90 + impl/psb_z_hll_print.f90 + impl/psb_s_cp_ell_from_coo.f90 + impl/psb_c_dns_mat_impl.f90 + impl/psb_c_mv_hll_from_fmt.f90 + impl/psb_z_hll_get_diag.f90 + impl/psb_z_cp_dia_from_coo.f90 + impl/psb_s_mv_hdia_from_coo.f90 + impl/psb_s_dia_colsum.f90 + impl/psb_z_cp_dia_to_coo.f90 + impl/psb_z_ell_allocate_mnnz.f90 + impl/psb_c_hll_colsum.f90 + impl/psb_s_ell_cssv.f90 + impl/psb_z_hll_csgetrow.f90 + impl/psb_d_ell_scals.f90 + impl/psb_c_dia_csmv.f90 + impl/psb_z_dia_csmm.f90 + impl/psb_s_ell_rowsum.f90 + impl/psb_c_cp_ell_from_fmt.f90 + impl/psb_z_dia_colsum.f90 + impl/psb_c_ell_mold.f90 + impl/psb_z_ell_maxval.f90 + impl/psb_z_ell_csgetblk.f90 + impl/psb_c_mv_ell_from_coo.f90 + impl/psb_c_mv_dia_from_coo.f90 + impl/psb_d_dia_csmv.f90 + impl/psb_z_hll_csgetblk.f90 + impl/psb_s_cp_hll_from_coo.f90 + impl/psb_d_mv_ell_to_fmt.f90 + impl/psb_c_cp_ell_to_fmt.f90 + impl/psb_z_ell_reinit.f90 + impl/psb_z_cp_hdia_from_coo.f90 + impl/psi_d_xtr_dia_from_coo.f90 + impl/psb_s_ell_scal.f90 + impl/psb_s_hll_rowsum.f90 + impl/psb_d_mv_hll_from_fmt.f90 + impl/psb_c_hdia_allocate_mnnz.f90 + impl/psb_s_ell_csmv.f90 + impl/psb_z_ell_scals.f90 + impl/psi_s_xtr_ell_from_coo.f90 + impl/psb_z_hdia_mold.f90 + impl/psb_s_cp_hdia_to_coo.f90 + impl/psb_s_hll_csput.f90 + impl/psb_s_hll_allocate_mnnz.f90 + impl/psb_z_ell_csmm.f90 + impl/psb_d_cp_dia_from_coo.f90 + impl/psb_s_ell_csgetptn.f90 + impl/psb_c_dia_csmm.f90 + impl/psb_z_ell_csput.f90 + impl/psb_s_cp_dia_to_coo.f90 + impl/psb_c_dia_scal.f90 + impl/psb_c_ell_print.f90 + impl/psb_z_hdia_print.f90 + impl/psb_d_ell_csnmi.f90 + impl/psb_d_mv_hdia_to_coo.f90 + impl/psb_c_mv_dia_to_coo.f90 + impl/psb_s_mv_hll_from_coo.f90 + impl/psb_d_hdia_print.f90 + impl/psb_s_hll_cssm.f90 + impl/psb_d_dia_colsum.f90 + impl/psb_d_mv_ell_from_fmt.f90 + impl/psb_d_hll_get_diag.f90 + impl/psb_z_dia_mold.f90 + impl/psb_z_dia_scal.f90 + impl/psb_d_hll_csgetptn.f90 + impl/psi_z_convert_dia_from_coo.f90 + impl/psb_s_dia_print.f90 + impl/psb_z_ell_arwsum.f90 + impl/psb_d_mv_dia_from_coo.f90 + impl/psi_c_xtr_dia_from_coo.f90 + impl/psb_d_hll_mold.f90 + impl/psi_s_xtr_coo_from_dia.f90 + impl/psb_z_dia_allocate_mnnz.f90 + impl/psb_z_cp_ell_from_coo.f90 + impl/psb_d_ell_allocate_mnnz.f90 + impl/psb_z_dia_csgetrow.f90 + impl/psb_s_ell_csmm.f90 + impl/psi_s_convert_ell_from_coo.f90 + impl/psb_c_dia_get_diag.f90 + impl/psb_c_hll_csgetptn.f90 + impl/psb_s_ell_colsum.f90 + impl/psb_d_dia_print.f90 + impl/psb_c_hll_cssm.f90 + impl/psb_s_dia_csmv.f90 + impl/psb_z_hdia_allocate_mnnz.f90 + impl/psb_z_ell_reallocate_nz.f90 + impl/psb_s_ell_cssm.f90 + impl/psb_c_hll_csgetrow.f90 + impl/psb_s_mv_hll_from_fmt.f90 + impl/psb_c_ell_colsum.f90 + impl/psb_c_ell_reinit.f90 + impl/psb_c_ell_cssm.f90 + impl/psb_c_ell_csput.f90 + impl/psb_s_hll_aclsum.f90 + impl/psb_s_hll_get_diag.f90 + impl/psb_z_ell_csnmi.f90 + impl/psb_s_hll_mold.f90 + impl/psb_c_ell_csnm1.f90 + impl/psb_s_dia_scal.f90 + impl/psb_s_hdia_mold.f90 + impl/psb_d_cp_hdia_to_coo.f90 + impl/psb_c_dia_csgetptn.f90 + impl/psb_z_mv_hll_from_fmt.f90 + impl/psb_c_ell_scals.f90 + impl/psb_z_hll_cssv.f90 + impl/psb_d_mv_dia_to_coo.f90 + impl/psb_z_mv_hdia_from_coo.f90 + impl/psb_d_dia_arwsum.f90 + impl/psb_d_hll_aclsum.f90 + impl/psb_c_hll_allocate_mnnz.f90 + impl/psb_z_hll_reinit.f90 + impl/psb_c_dia_scals.f90 + impl/psb_s_mv_ell_from_fmt.f90 + impl/psb_c_dia_allocate_mnnz.f90 + impl/psb_c_ell_csmm.f90 + impl/psb_d_ell_csmm.f90 + impl/psb_z_mv_dia_from_coo.f90 + impl/psb_c_ell_arwsum.f90 + impl/psb_s_ell_maxval.f90 + impl/psb_s_ell_reinit.f90 + impl/psb_z_ell_mold.f90 + impl/psb_s_cp_hdia_from_coo.f90 + impl/psb_s_hdia_csmv.f90 + impl/psb_s_ell_get_diag.f90 + impl/psb_s_ell_reallocate_nz.f90 + impl/psb_d_ell_csgetptn.f90 + impl/psb_c_hll_csgetblk.f90 + impl/psb_z_cp_hll_to_fmt.f90 + impl/psb_z_mv_hll_from_coo.f90 + impl/psb_c_hdia_mold.f90 + impl/psb_c_hdia_csmv.f90 + impl/psb_c_ell_csgetrow.f90 + impl/psb_d_hll_rowsum.f90 + impl/psb_z_ell_trim.f90 + impl/psb_d_hll_reinit.f90 + impl/psb_c_dia_rowsum.f90 + impl/psb_z_cp_ell_to_coo.f90 + impl/psb_d_dia_csgetrow.f90 + impl/psb_c_mv_hll_to_coo.f90 + impl/psb_d_hll_csnm1.f90 + impl/psb_z_hll_allocate_mnnz.f90 + impl/psb_d_ell_trim.f90 + impl/psi_z_xtr_coo_from_dia.f90 + impl/psb_z_ell_colsum.f90 + impl/psb_c_hll_get_diag.f90 + impl/psb_z_hll_colsum.f90 + impl/psb_d_dia_reinit.f90 + impl/psb_z_ell_rowsum.f90 + impl/psb_c_dia_mold.f90 + impl/psb_c_mv_ell_to_fmt.f90 + impl/psb_z_dia_csmv.f90 + impl/psb_d_ell_rowsum.f90 + impl/psb_s_ell_allocate_mnnz.f90 + impl/psb_z_ell_scal.f90 + impl/psb_d_hdia_allocate_mnnz.f90 + impl/psb_c_ell_trim.f90 + impl/psb_d_hdia_csmv.f90 + impl/psb_s_ell_mold.f90 + impl/psb_z_hll_mold.f90 + impl/psb_z_dia_reinit.f90 + impl/psb_c_ell_csgetblk.f90 + impl/psb_s_ell_trim.f90 + impl/psb_s_cp_dia_from_coo.f90 + impl/psb_s_hll_csmv.f90 + impl/psb_d_ell_arwsum.f90 + impl/psb_z_ell_cssv.f90 + impl/psb_c_dia_reallocate_nz.f90 + impl/psb_z_cp_ell_to_fmt.f90 + impl/psb_s_ell_csgetblk.f90 + impl/psb_d_mv_hll_from_coo.f90 + impl/psb_d_dns_mat_impl.f90 + impl/psb_z_dia_maxval.f90 + impl/psb_z_dns_mat_impl.f90 + impl/psi_d_xtr_coo_from_dia.f90 + impl/psi_c_convert_hll_from_coo.f90 + impl/psi_z_convert_hll_from_coo.f90 + impl/psb_z_mv_hdia_to_coo.f90 + impl/psb_d_cp_hll_from_fmt.f90 + impl/psb_c_ell_csnmi.f90 + impl/psb_d_ell_maxval.f90 + impl/psb_d_cp_ell_to_fmt.f90 + impl/psb_c_hll_aclsum.f90 + impl/psb_d_cp_dia_to_coo.f90 + impl/psb_s_dia_csgetptn.f90 + impl/psb_d_ell_get_diag.f90 + impl/psb_z_hll_scal.f90 + impl/psb_d_hll_csnmi.f90 + impl/psb_c_dia_csgetrow.f90 + impl/psb_z_mv_ell_to_coo.f90 + impl/psb_d_ell_csgetrow.f90 + impl/psb_s_dia_rowsum.f90 + impl/psb_z_ell_csnm1.f90 + impl/psb_s_dia_get_diag.f90 + impl/psb_z_mv_hll_to_fmt.f90 + impl/psb_d_dia_maxval.f90 + impl/psb_z_mv_hll_to_coo.f90 + impl/psb_z_dia_aclsum.f90 + impl/psb_c_hll_arwsum.f90 + impl/psb_c_ell_cssv.f90 + impl/psb_s_dia_scals.f90 + impl/psb_c_hll_csnmi.f90 + impl/psb_d_dia_scals.f90 + impl/psb_d_cp_hll_to_fmt.f90 + impl/psb_d_ell_csmv.f90 + impl/psb_z_dia_print.f90 + impl/psb_d_hll_scals.f90 + impl/psb_d_ell_csnm1.f90 + impl/psb_d_mv_hll_to_coo.f90 + impl/psb_z_hdia_csmv.f90 + impl/psb_d_cp_hll_from_coo.f90 + impl/psb_s_cp_hll_from_fmt.f90 + impl/psi_z_xtr_ell_from_coo.f90 + impl/psb_s_cp_ell_to_coo.f90 + impl/psb_d_ell_scal.f90 + impl/psb_c_mv_ell_to_coo.f90 + impl/psb_c_hll_csmv.f90 + impl/psb_s_hll_reinit.f90 + impl/psb_c_hll_scals.f90 + impl/psb_s_hll_csgetrow.f90 + impl/psb_s_cp_ell_from_fmt.f90 + impl/psb_d_hll_csgetrow.f90 + impl/psb_c_ell_allocate_mnnz.f90 + impl/psb_s_ell_csput.f90 + impl/psb_z_ell_csgetrow.f90 + impl/psb_s_hll_maxval.f90 + impl/psb_d_hll_print.f90 + impl/psb_s_ell_csnmi.f90 + impl/psb_s_dia_reinit.f90 + impl/psb_s_mv_hdia_to_coo.f90 + impl/psb_d_dia_aclsum.f90 + impl/psb_s_dia_aclsum.f90 + impl/psb_d_hll_csmv.f90 + impl/psb_z_dia_csgetptn.f90 + impl/psb_c_dia_aclsum.f90 + impl/psb_d_cp_ell_from_coo.f90 + impl/psb_s_ell_aclsum.f90 + impl/psb_c_ell_csgetptn.f90 + impl/psb_c_hll_print.f90 + impl/psb_s_hll_csgetblk.f90 + impl/psb_c_cp_hll_to_coo.f90 + impl/psb_z_hll_csmv.f90 + impl/psb_c_cp_hll_to_fmt.f90 + impl/psb_s_hll_cssv.f90 + impl/psb_s_dia_allocate_mnnz.f90 + impl/psb_s_dia_arwsum.f90 + impl/psb_d_hll_csput.f90 + impl/psb_s_hll_csmm.f90 + impl/psb_s_dns_mat_impl.f90 + impl/psb_c_ell_csmv.f90 + impl/psb_c_dia_arwsum.f90 + impl/psb_s_hll_scal.f90 + impl/psb_c_cp_hll_from_fmt.f90 + impl/psb_c_dia_colsum.f90 + impl/psb_z_hll_csnm1.f90 + impl/psi_d_convert_ell_from_coo.f90 + impl/psb_d_ell_cssm.f90 + impl/psb_c_dia_maxval.f90 + impl/psb_d_dia_rowsum.f90 + impl/psi_c_convert_ell_from_coo.f90 + impl/psb_d_hll_cssm.f90 + impl/psb_s_mv_ell_from_coo.f90 + impl/psi_d_convert_hll_from_coo.f90 + impl/psb_c_dia_print.f90 + impl/psb_c_ell_aclsum.f90 + impl/psb_z_hll_cssm.f90 + impl/psb_s_hll_colsum.f90 + impl/psb_z_hll_aclsum.f90 + impl/psb_z_dia_rowsum.f90 + impl/psb_d_ell_csput.f90 + impl/psb_d_hll_scal.f90 + impl/psb_z_mv_ell_from_fmt.f90 + impl/psb_z_hll_arwsum.f90 + impl/psb_c_mv_hll_from_coo.f90 + impl/psb_z_mv_dia_to_coo.f90 + impl/psb_d_hll_csgetblk.f90 + impl/psb_z_ell_get_diag.f90 + impl/psb_s_ell_csnm1.f90 + impl/psb_s_cp_ell_to_fmt.f90 + impl/psb_d_hll_csmm.f90 + impl/psb_c_cp_ell_from_coo.f90 + impl/psb_c_hll_reinit.f90 + impl/psb_c_cp_hdia_to_coo.f90 + impl/psb_s_hll_arwsum.f90 + psb_c_hdia_mat_mod.f90 + psi_ext_util_mod.f90 + psb_z_hdia_mat_mod.f90 + psi_z_ext_util_mod.f90 + psb_c_dns_mat_mod.f90 + psb_c_hll_mat_mod.f90 + psi_s_ext_util_mod.f90 + psb_c_dia_mat_mod.f90 + psi_c_ext_util_mod.f90 + psb_d_hdia_mat_mod.f90 + psi_i_ext_util_mod.f90 + psb_s_dia_mat_mod.f90 + psb_s_dns_mat_mod.f90 + psb_d_ell_mat_mod.f90 + psb_d_dia_mat_mod.f90 + psb_d_hll_mat_mod.f90 + psb_d_dns_mat_mod.f90 + psb_ext_mod.F90 + psb_c_ell_mat_mod.f90 + psb_s_ell_mat_mod.f90 + psb_s_hll_mat_mod.f90 + psi_d_ext_util_mod.f90 + psb_z_dia_mat_mod.f90 + psb_z_hll_mat_mod.f90 + psb_z_dns_mat_mod.f90 + psb_z_ell_mat_mod.f90 + ) +foreach(file IN LISTS PSB_ext_source_files) + list(APPEND ext_source_files ${CMAKE_CURRENT_LIST_DIR}/${file}) +endforeach() diff --git a/ext/Makefile b/ext/Makefile new file mode 100755 index 00000000..6ef24cba --- /dev/null +++ b/ext/Makefile @@ -0,0 +1,84 @@ +include ../Make.inc +# +# Libraries used +# +LIBDIR=../lib +INCDIR=../include +MODDIR=../modules +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG). +CINCLUDES= +LIBNAME=libpsb_ext.a + + +FOBJS= psb_d_ell_mat_mod.o psb_d_hll_mat_mod.o \ + psb_s_hll_mat_mod.o psb_s_ell_mat_mod.o \ + psb_c_hll_mat_mod.o psb_c_ell_mat_mod.o \ + psb_z_hll_mat_mod.o psb_z_ell_mat_mod.o \ + psb_d_dia_mat_mod.o psb_d_hdia_mat_mod.o \ + psb_s_dia_mat_mod.o psb_s_hdia_mat_mod.o \ + psb_c_dia_mat_mod.o psb_c_hdia_mat_mod.o \ + psb_z_dia_mat_mod.o psb_z_hdia_mat_mod.o \ + psb_s_dns_mat_mod.o psb_d_dns_mat_mod.o \ + psb_c_dns_mat_mod.o psb_z_dns_mat_mod.o \ + psi_ext_util_mod.o psi_i_ext_util_mod.o \ + psi_s_ext_util_mod.o psi_c_ext_util_mod.o \ + psi_d_ext_util_mod.o psi_z_ext_util_mod.o \ + psb_ext_mod.o + +COBJS= + +OBJS=$(COBJS) $(FOBJS) + +lib: objs ilib + $(AR) $(LIBNAME) $(OBJS) + /bin/cp -p $(LIBNAME) $(LIBDIR) + +objs: $(OBJS) iobjs + /bin/cp -p *$(.mod) $(MODDIR) + + + +psb_ext_mod.o: psb_s_dia_mat_mod.o psb_d_dia_mat_mod.o \ + psb_c_dia_mat_mod.o psb_z_dia_mat_mod.o \ + psb_d_ell_mat_mod.o psb_d_hll_mat_mod.o \ + psb_s_hll_mat_mod.o psb_s_ell_mat_mod.o \ + psb_c_hll_mat_mod.o psb_c_ell_mat_mod.o \ + psb_z_hll_mat_mod.o psb_z_ell_mat_mod.o \ + psb_s_hdia_mat_mod.o psb_d_hdia_mat_mod.o \ + psb_c_hdia_mat_mod.o psb_z_hdia_mat_mod.o \ + psb_s_dns_mat_mod.o psb_d_dns_mat_mod.o \ + psb_c_dns_mat_mod.o psb_z_dns_mat_mod.o + +# psb_d_rsb_mat_mod.o psb_d_hdia_mat_mod.o +psi_ext_util_mod.o: psi_i_ext_util_mod.o \ + psi_s_ext_util_mod.o psi_c_ext_util_mod.o \ + psi_d_ext_util_mod.o psi_z_ext_util_mod.o + +psb_s_dia_mat_mod.o psb_c_dia_mat_mod.o psb_d_dia_mat_mod.o psb_z_dia_mat_mod.o: psi_ext_util_mod.o +psb_s_hdia_mat_mod.o psb_c_hdia_mat_mod.o psb_d_hdia_mat_mod.o psb_z_hdia_mat_mod.o: psi_ext_util_mod.o +psb_s_hll_mat_mod.o psb_c_hll_mat_mod.o psb_d_hll_mat_mod.o psb_z_hll_mat_mod.o: psi_ext_util_mod.o + +ilib: objs + $(MAKE) -C impl lib LIBNAME=$(LIBNAME) + +iobjs: $(OBJS) + $(MAKE) -C impl objs + +clean: cclean iclean + /bin/rm -f $(FOBJS) *$(.mod) *.a + +cclean: + /bin/rm -f $(COBJS) +iclean: + $(MAKE) -C impl clean + +veryclean: clean + /bin/rm -f $(HERE)/$(LIBNAME) $(LIBMOD) *$(.mod) + + + + diff --git a/ext/impl/Makefile b/ext/impl/Makefile new file mode 100755 index 00000000..4b952ed3 --- /dev/null +++ b/ext/impl/Makefile @@ -0,0 +1,412 @@ +include ../../Make.inc +LIBDIR=../../lib +INCDIR=../../include +MODDIR=../../modules +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. +LIBNAME=libpsb_ext.a + +OBJS= \ +psb_s_cp_dia_from_coo.o \ +psb_s_cp_dia_to_coo.o \ +psb_s_cp_ell_from_coo.o \ +psb_s_cp_ell_from_fmt.o \ +psb_s_cp_ell_to_coo.o \ +psb_s_cp_ell_to_fmt.o \ +psb_s_cp_hdia_from_coo.o \ +psb_s_cp_hdia_to_coo.o \ +psb_s_cp_hll_from_coo.o \ +psb_s_cp_hll_from_fmt.o \ +psb_s_cp_hll_to_coo.o \ +psb_s_cp_hll_to_fmt.o \ +psb_s_dia_aclsum.o \ +psb_s_dia_allocate_mnnz.o \ +psb_s_dia_arwsum.o \ +psb_s_dia_colsum.o \ +psb_s_dia_csgetptn.o \ +psb_s_dia_csgetrow.o \ +psb_s_dia_csmm.o \ +psb_s_dia_csmv.o \ +psb_s_dia_get_diag.o \ +psb_s_dia_maxval.o \ +psb_s_dia_mold.o \ +psb_s_dia_print.o \ +psb_s_dia_reallocate_nz.o \ +psb_s_dia_reinit.o \ +psb_s_dia_rowsum.o \ +psb_s_dia_scal.o \ +psb_s_dia_scals.o \ +psb_s_ell_aclsum.o \ +psb_s_ell_allocate_mnnz.o \ +psb_s_ell_arwsum.o \ +psb_s_ell_colsum.o \ +psb_s_ell_csgetblk.o \ +psb_s_ell_csgetptn.o \ +psb_s_ell_csgetrow.o \ +psb_s_ell_csmm.o \ +psb_s_ell_csmv.o \ +psb_s_ell_csnm1.o \ +psb_s_ell_csnmi.o \ +psb_s_ell_csput.o \ +psb_s_ell_cssm.o \ +psb_s_ell_cssv.o \ +psb_s_ell_get_diag.o \ +psb_s_ell_maxval.o \ +psb_s_ell_mold.o \ +psb_s_ell_print.o \ +psb_s_ell_reallocate_nz.o \ +psb_s_ell_reinit.o \ +psb_s_ell_rowsum.o \ +psb_s_ell_scal.o \ +psb_s_ell_scals.o \ +psb_s_ell_trim.o \ +psb_s_hdia_allocate_mnnz.o \ +psb_s_hdia_csmv.o \ +psb_s_hdia_mold.o \ +psb_s_hdia_print.o \ +psb_s_hll_aclsum.o \ +psb_s_hll_allocate_mnnz.o \ +psb_s_hll_arwsum.o \ +psb_s_hll_colsum.o \ +psb_s_hll_csgetblk.o \ +psb_s_hll_csgetptn.o \ +psb_s_hll_csgetrow.o \ +psb_s_hll_csmm.o \ +psb_s_hll_csmv.o \ +psb_s_hll_csnm1.o \ +psb_s_hll_csnmi.o \ +psb_s_hll_csput.o \ +psb_s_hll_cssm.o \ +psb_s_hll_cssv.o \ +psb_s_hll_get_diag.o \ +psb_s_hll_maxval.o \ +psb_s_hll_mold.o \ +psb_s_hll_print.o \ +psb_s_hll_reallocate_nz.o \ +psb_s_hll_reinit.o \ +psb_s_hll_rowsum.o \ +psb_s_hll_scal.o \ +psb_s_hll_scals.o \ +psb_s_mv_dia_from_coo.o \ +psb_s_mv_ell_from_coo.o \ +psb_s_mv_ell_from_fmt.o \ +psb_s_mv_ell_to_coo.o \ +psb_s_mv_ell_to_fmt.o \ +psb_s_mv_hdia_from_coo.o \ +psb_s_mv_hdia_to_coo.o \ +psb_s_mv_hll_from_coo.o \ +psb_s_mv_hll_from_fmt.o \ +psb_s_mv_hll_to_coo.o \ +psb_s_mv_hll_to_fmt.o \ +psb_c_cp_dia_from_coo.o \ +psb_c_cp_dia_to_coo.o \ +psb_c_cp_ell_from_coo.o \ +psb_c_cp_ell_from_fmt.o \ +psb_c_cp_ell_to_coo.o \ +psb_c_cp_ell_to_fmt.o \ +psb_c_cp_hdia_from_coo.o \ +psb_c_cp_hdia_to_coo.o \ +psb_c_cp_hll_from_coo.o \ +psb_c_cp_hll_from_fmt.o \ +psb_c_cp_hll_to_coo.o \ +psb_c_cp_hll_to_fmt.o \ +psb_c_dia_aclsum.o \ +psb_c_dia_allocate_mnnz.o \ +psb_c_dia_arwsum.o \ +psb_c_dia_colsum.o \ +psb_c_dia_csgetptn.o \ +psb_c_dia_csgetrow.o \ +psb_c_dia_csmm.o \ +psb_c_dia_csmv.o \ +psb_c_dia_get_diag.o \ +psb_c_dia_maxval.o \ +psb_c_dia_mold.o \ +psb_c_dia_print.o \ +psb_c_dia_reallocate_nz.o \ +psb_c_dia_reinit.o \ +psb_c_dia_rowsum.o \ +psb_c_dia_scal.o \ +psb_c_dia_scals.o \ +psb_c_ell_aclsum.o \ +psb_c_ell_allocate_mnnz.o \ +psb_c_ell_arwsum.o \ +psb_c_ell_colsum.o \ +psb_c_ell_csgetblk.o \ +psb_c_ell_csgetptn.o \ +psb_c_ell_csgetrow.o \ +psb_c_ell_csmm.o \ +psb_c_ell_csmv.o \ +psb_c_ell_csnm1.o \ +psb_c_ell_csnmi.o \ +psb_c_ell_csput.o \ +psb_c_ell_cssm.o \ +psb_c_ell_cssv.o \ +psb_c_ell_get_diag.o \ +psb_c_ell_maxval.o \ +psb_c_ell_mold.o \ +psb_c_ell_print.o \ +psb_c_ell_reallocate_nz.o \ +psb_c_ell_reinit.o \ +psb_c_ell_rowsum.o \ +psb_c_ell_scal.o \ +psb_c_ell_scals.o \ +psb_c_ell_trim.o \ +psb_c_hdia_allocate_mnnz.o \ +psb_c_hdia_csmv.o \ +psb_c_hdia_mold.o \ +psb_c_hdia_print.o \ +psb_c_hll_aclsum.o \ +psb_c_hll_allocate_mnnz.o \ +psb_c_hll_arwsum.o \ +psb_c_hll_colsum.o \ +psb_c_hll_csgetblk.o \ +psb_c_hll_csgetptn.o \ +psb_c_hll_csgetrow.o \ +psb_c_hll_csmm.o \ +psb_c_hll_csmv.o \ +psb_c_hll_csnm1.o \ +psb_c_hll_csnmi.o \ +psb_c_hll_csput.o \ +psb_c_hll_cssm.o \ +psb_c_hll_cssv.o \ +psb_c_hll_get_diag.o \ +psb_c_hll_maxval.o \ +psb_c_hll_mold.o \ +psb_c_hll_print.o \ +psb_c_hll_reallocate_nz.o \ +psb_c_hll_reinit.o \ +psb_c_hll_rowsum.o \ +psb_c_hll_scal.o \ +psb_c_hll_scals.o \ +psb_c_mv_dia_from_coo.o \ +psb_c_mv_ell_from_coo.o \ +psb_c_mv_ell_from_fmt.o \ +psb_c_mv_ell_to_coo.o \ +psb_c_mv_ell_to_fmt.o \ +psb_c_mv_hdia_from_coo.o \ +psb_c_mv_hdia_to_coo.o \ +psb_c_mv_hll_from_coo.o \ +psb_c_mv_hll_from_fmt.o \ +psb_c_mv_hll_to_coo.o \ +psb_c_mv_hll_to_fmt.o \ +psb_d_cp_dia_from_coo.o \ +psb_d_cp_dia_to_coo.o \ +psb_d_cp_ell_from_coo.o \ +psb_d_cp_ell_from_fmt.o \ +psb_d_cp_ell_to_coo.o \ +psb_d_cp_ell_to_fmt.o \ +psb_d_cp_hdia_from_coo.o \ +psb_d_cp_hdia_to_coo.o \ +psb_d_cp_hll_from_coo.o \ +psb_d_cp_hll_from_fmt.o \ +psb_d_cp_hll_to_coo.o \ +psb_d_cp_hll_to_fmt.o \ +psb_d_dia_aclsum.o \ +psb_d_dia_allocate_mnnz.o \ +psb_d_dia_arwsum.o \ +psb_d_dia_colsum.o \ +psb_d_dia_csgetptn.o \ +psb_d_dia_csgetrow.o \ +psb_d_dia_csmm.o \ +psb_d_dia_csmv.o \ +psb_d_dia_get_diag.o \ +psb_d_dia_maxval.o \ +psb_d_dia_mold.o \ +psb_d_dia_print.o \ +psb_d_dia_reallocate_nz.o \ +psb_d_dia_reinit.o \ +psb_d_dia_rowsum.o \ +psb_d_dia_scal.o \ +psb_d_dia_scals.o \ +psb_d_ell_aclsum.o \ +psb_d_ell_allocate_mnnz.o \ +psb_d_ell_arwsum.o \ +psb_d_ell_colsum.o \ +psb_d_ell_csgetblk.o \ +psb_d_ell_csgetptn.o \ +psb_d_ell_csgetrow.o \ +psb_d_ell_csmm.o \ +psb_d_ell_csmv.o \ +psb_d_ell_csnm1.o \ +psb_d_ell_csnmi.o \ +psb_d_ell_csput.o \ +psb_d_ell_cssm.o \ +psb_d_ell_cssv.o \ +psb_d_ell_get_diag.o \ +psb_d_ell_maxval.o \ +psb_d_ell_mold.o \ +psb_d_ell_print.o \ +psb_d_ell_reallocate_nz.o \ +psb_d_ell_reinit.o \ +psb_d_ell_rowsum.o \ +psb_d_ell_scal.o \ +psb_d_ell_scals.o \ +psb_d_ell_trim.o \ +psb_d_hdia_allocate_mnnz.o \ +psb_d_hdia_csmv.o \ +psb_d_hdia_mold.o \ +psb_d_hdia_print.o \ +psb_d_hll_aclsum.o \ +psb_d_hll_allocate_mnnz.o \ +psb_d_hll_arwsum.o \ +psb_d_hll_colsum.o \ +psb_d_hll_csgetblk.o \ +psb_d_hll_csgetptn.o \ +psb_d_hll_csgetrow.o \ +psb_d_hll_csmm.o \ +psb_d_hll_csmv.o \ +psb_d_hll_csnm1.o \ +psb_d_hll_csnmi.o \ +psb_d_hll_csput.o \ +psb_d_hll_cssm.o \ +psb_d_hll_cssv.o \ +psb_d_hll_get_diag.o \ +psb_d_hll_maxval.o \ +psb_d_hll_mold.o \ +psb_d_hll_print.o \ +psb_d_hll_reallocate_nz.o \ +psb_d_hll_reinit.o \ +psb_d_hll_rowsum.o \ +psb_d_hll_scal.o \ +psb_d_hll_scals.o \ +psb_d_mv_dia_from_coo.o \ +psb_d_mv_ell_from_coo.o \ +psb_d_mv_ell_from_fmt.o \ +psb_d_mv_ell_to_coo.o \ +psb_d_mv_ell_to_fmt.o \ +psb_d_mv_hdia_from_coo.o \ +psb_d_mv_hdia_to_coo.o \ +psb_d_mv_hll_from_coo.o \ +psb_d_mv_hll_from_fmt.o \ +psb_d_mv_hll_to_coo.o \ +psb_d_mv_hll_to_fmt.o \ +psb_z_cp_dia_from_coo.o \ +psb_z_cp_dia_to_coo.o \ +psb_z_cp_ell_from_coo.o \ +psb_z_cp_ell_from_fmt.o \ +psb_z_cp_ell_to_coo.o \ +psb_z_cp_ell_to_fmt.o \ +psb_z_cp_hdia_from_coo.o \ +psb_z_cp_hdia_to_coo.o \ +psb_z_cp_hll_from_coo.o \ +psb_z_cp_hll_from_fmt.o \ +psb_z_cp_hll_to_coo.o \ +psb_z_cp_hll_to_fmt.o \ +psb_z_dia_aclsum.o \ +psb_z_dia_allocate_mnnz.o \ +psb_z_dia_arwsum.o \ +psb_z_dia_colsum.o \ +psb_z_dia_csgetptn.o \ +psb_z_dia_csgetrow.o \ +psb_z_dia_csmm.o \ +psb_z_dia_csmv.o \ +psb_z_dia_get_diag.o \ +psb_z_dia_maxval.o \ +psb_z_dia_mold.o \ +psb_z_dia_print.o \ +psb_z_dia_reallocate_nz.o \ +psb_z_dia_reinit.o \ +psb_z_dia_rowsum.o \ +psb_z_dia_scal.o \ +psb_z_dia_scals.o \ +psb_z_ell_aclsum.o \ +psb_z_ell_allocate_mnnz.o \ +psb_z_ell_arwsum.o \ +psb_z_ell_colsum.o \ +psb_z_ell_csgetblk.o \ +psb_z_ell_csgetptn.o \ +psb_z_ell_csgetrow.o \ +psb_z_ell_csmm.o \ +psb_z_ell_csmv.o \ +psb_z_ell_csnm1.o \ +psb_z_ell_csnmi.o \ +psb_z_ell_csput.o \ +psb_z_ell_cssm.o \ +psb_z_ell_cssv.o \ +psb_z_ell_get_diag.o \ +psb_z_ell_maxval.o \ +psb_z_ell_mold.o \ +psb_z_ell_print.o \ +psb_z_ell_reallocate_nz.o \ +psb_z_ell_reinit.o \ +psb_z_ell_rowsum.o \ +psb_z_ell_scal.o \ +psb_z_ell_scals.o \ +psb_z_ell_trim.o \ +psb_z_hdia_allocate_mnnz.o \ +psb_z_hdia_csmv.o \ +psb_z_hdia_mold.o \ +psb_z_hdia_print.o \ +psb_z_hll_aclsum.o \ +psb_z_hll_allocate_mnnz.o \ +psb_z_hll_arwsum.o \ +psb_z_hll_colsum.o \ +psb_z_hll_csgetblk.o \ +psb_z_hll_csgetptn.o \ +psb_z_hll_csgetrow.o \ +psb_z_hll_csmm.o \ +psb_z_hll_csmv.o \ +psb_z_hll_csnm1.o \ +psb_z_hll_csnmi.o \ +psb_z_hll_csput.o \ +psb_z_hll_cssm.o \ +psb_z_hll_cssv.o \ +psb_z_hll_get_diag.o \ +psb_z_hll_maxval.o \ +psb_z_hll_mold.o \ +psb_z_hll_print.o \ +psb_z_hll_reallocate_nz.o \ +psb_z_hll_reinit.o \ +psb_z_hll_rowsum.o \ +psb_z_hll_scal.o \ +psb_z_hll_scals.o \ +psb_z_mv_dia_from_coo.o \ +psb_z_mv_ell_from_coo.o \ +psb_z_mv_ell_from_fmt.o \ +psb_z_mv_ell_to_coo.o \ +psb_z_mv_ell_to_fmt.o \ +psb_z_mv_hdia_from_coo.o \ +psb_z_mv_hdia_to_coo.o \ +psb_z_mv_hll_from_coo.o \ +psb_z_mv_hll_from_fmt.o \ +psb_z_mv_hll_to_coo.o \ +psb_z_mv_hll_to_fmt.o \ +psi_s_xtr_ell_from_coo.o \ +psi_c_xtr_ell_from_coo.o \ +psi_d_xtr_ell_from_coo.o \ +psi_z_xtr_ell_from_coo.o \ +psi_s_convert_ell_from_coo.o \ +psi_c_convert_ell_from_coo.o \ +psi_d_convert_ell_from_coo.o \ +psi_z_convert_ell_from_coo.o \ +psi_s_convert_hll_from_coo.o \ +psi_c_convert_hll_from_coo.o \ +psi_d_convert_hll_from_coo.o \ +psi_z_convert_hll_from_coo.o \ +psi_s_xtr_dia_from_coo.o \ +psi_c_xtr_dia_from_coo.o \ +psi_d_xtr_dia_from_coo.o \ +psi_z_xtr_dia_from_coo.o \ +psi_s_xtr_coo_from_dia.o \ +psi_d_xtr_coo_from_dia.o \ +psi_c_xtr_coo_from_dia.o \ +psi_z_xtr_coo_from_dia.o \ +psi_s_convert_dia_from_coo.o \ +psi_c_convert_dia_from_coo.o \ +psi_d_convert_dia_from_coo.o \ +psi_z_convert_dia_from_coo.o \ +psb_s_dns_mat_impl.o \ +psb_d_dns_mat_impl.o \ +psb_c_dns_mat_impl.o \ +psb_z_dns_mat_impl.o + +objs: $(OBJS) + +lib: objs + $(AR) ../$(LIBNAME) $(OBJS) + +clean: + /bin/rm -f $(OBJS) diff --git a/ext/impl/psb_c_cp_dia_from_coo.f90 b/ext/impl/psb_c_cp_dia_from_coo.f90 new file mode 100644 index 00000000..2d2b1caa --- /dev/null +++ b/ext/impl/psb_c_cp_dia_from_coo.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cp_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_cp_dia_from_coo + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_convert_dia_from_coo(a,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_dia_from_coo(a,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_dia_from_coo diff --git a/ext/impl/psb_c_cp_dia_to_coo.f90 b/ext/impl/psb_c_cp_dia_to_coo.f90 new file mode 100644 index 00000000..9975bec0 --- /dev/null +++ b/ext/impl/psb_c_cp_dia_to_coo.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_cp_dia_to_coo + implicit none + + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc, nzd + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + call psi_c_xtr_coo_from_dia(nr,nc,& + & b%ia, b%ja, b%val, nzd, & + & size(a%data,1),size(a%data,2),& + & a%data,a%offset,info) + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +end subroutine psb_c_cp_dia_to_coo diff --git a/ext/impl/psb_c_cp_ell_from_coo.f90 b/ext/impl/psb_c_cp_ell_from_coo.f90 new file mode 100644 index 00000000..28d7d242 --- /dev/null +++ b/ext/impl/psb_c_cp_ell_from_coo.f90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_cp_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_c_convert_ell_from_coo(a,b,info) + else + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call psi_c_convert_ell_from_coo(a,tmp,info) + if (info == psb_success_) call tmp%free() + end if + if (info /= psb_success_) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_c_cp_ell_from_coo diff --git a/ext/impl/psb_c_cp_ell_from_fmt.f90 b/ext/impl/psb_c_cp_ell_from_fmt.f90 new file mode 100644 index 00000000..309063b9 --- /dev/null +++ b/ext/impl/psb_c_cp_ell_from_fmt.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_cp_ell_from_fmt + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_c_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_c_cp_ell_from_fmt diff --git a/ext/impl/psb_c_cp_ell_to_coo.f90 b/ext/impl/psb_c_cp_ell_to_coo.f90 new file mode 100644 index 00000000..ec6bcff5 --- /dev/null +++ b/ext/impl/psb_c_cp_ell_to_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_cp_ell_to_coo + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: i, j, k, nr, nc, nza + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + call b%set_host() + +end subroutine psb_c_cp_ell_to_coo diff --git a/ext/impl/psb_c_cp_ell_to_fmt.f90 b/ext/impl/psb_c_cp_ell_to_fmt.f90 new file mode 100644 index 00000000..0c6a6903 --- /dev/null +++ b/ext/impl/psb_c_cp_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_cp_ell_to_fmt + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_c_ell_sparse_mat) + if (a%is_dev()) call a%sync() + + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_cp_ell_to_fmt diff --git a/ext/impl/psb_c_cp_hdia_from_coo.f90 b/ext/impl/psb_c_cp_hdia_from_coo.f90 new file mode 100644 index 00000000..a9e1ca21 --- /dev/null +++ b/ext/impl/psb_c_cp_hdia_from_coo.f90 @@ -0,0 +1,222 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_cp_hdia_from_coo + implicit none + + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call inner_cp_hdia_from_coo(a,b,info) + if (info /= psb_success_) goto 9999 + else + call b%cp_to_coo(tmp,info) + if (info /= psb_success_) goto 9999 + if (.not.tmp%is_by_rows()) call tmp%fix(info) + if (info /= psb_success_) goto 9999 + call inner_cp_hdia_from_coo(a,tmp,info) + if (info /= psb_success_) goto 9999 + call tmp%free() + end if + call a%set_host() + + return + +9999 continue + + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine inner_cp_hdia_from_coo(a,tmp,info) + use psb_base_mod + use psi_ext_util_mod + + implicit none + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w + integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext, nzout + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + logical, parameter :: debug=.false. + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + hacksize = a%hacksize + a%nhacks = (nr+hacksize-1)/hacksize + nhacks = a%nhacks + + ndiag = nr+nc-1 + if (info == psb_success_) call psb_realloc(nr,irsz,info) + if (info == psb_success_) call psb_realloc(ndiag,d,info) + if (info == psb_success_) call psb_realloc(ndiag,offset,info) + if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info) + if (info /= psb_success_) return + + irsz = 0 + do k=1,nza + ir = tmp%ia(k) + irsz(ir) = irsz(ir)+1 + end do + + a%nzeros = 0 + d = 0 + iszd = 0 + a%hackOffsets(1)=0 + klast1 = 1 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + if (debug) then + write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr + write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1) + write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1) + end if + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, offset, info, initd=.false., cleard=.true.) + iszd = iszd + nd + a%hackOffsets(k+1)=iszd + if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd + if (debug) write(*,*) 'offset ', offset(1:nd) + end do + if (debug) then + write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd + write(*,*) 'Hackoffsets ',a%hackOffsets(:) + end if + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info) + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info) + if (info /= psb_success_) return + klast1 = 1 + ! + ! Second run: copy elements + ! + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, a%diaOffsets(hackfirst+1:hacknext), info, & + & initd=.false., cleard=.false.) + if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext) + call psi_c_xtr_dia_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & tmp%val(kfirst:klast1-1), & + & d,hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,& + & initdata=.true.,rdisp=(i-1)) + + call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),& + & a%diaOffsets(hackfirst+1:hacknext),nzout) + a%nzeros = a%nzeros + nzout + call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext)) + + end do + if (debug) then + write(*,*) 'NZEROS: ',a%nzeros, nza + write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd) + write(*,*) 'values: ' + j=0 + do k=1,nhacks + write(*,*) 'Hack No. ',k + do i=1,hacksize*(iszd/nhacks) + j = j + 1 + write(*,*) j, a%val(j) + end do + end do + end if + end subroutine inner_cp_hdia_from_coo + + subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz) + implicit none + integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:) + integer(psb_ipk_), intent(out) :: nz + ! + integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1 + nz = 0 + nrcmdisp = min(nr-rdisp,nc-rdisp) + rdisp1 = 1-rdisp + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + nz = nz + (ir2-ir1+1) + end do + end subroutine countnz + + subroutine cleand(nr,nd,d,offset) + implicit none + integer(psb_ipk_), intent(in) :: nr,nd,offset(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_) :: i,id + + do i=1,nd + id = offset(i) + nr + d(id) = 0 + end do + end subroutine cleand + +end subroutine psb_c_cp_hdia_from_coo diff --git a/ext/impl/psb_c_cp_hdia_to_coo.f90 b/ext/impl/psb_c_cp_hdia_to_coo.f90 new file mode 100644 index 00000000..32801653 --- /dev/null +++ b/ext/impl/psb_c_cp_hdia_to_coo.f90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_cp_hdia_to_coo + use psi_ext_util_mod + implicit none + + class(psb_c_hdia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + call b%set_nzeros(nza) + call b%set_sort_status(psb_unsorted_) + nhacks = a%nhacks + hacksize = a%hacksize + j = 0 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_c_xtr_coo_from_dia(nr,nc,& + & b%ia(j+1:), b%ja(j+1:), b%val(j+1:), nzd, & + & hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) +!!$ write(*,*) 'diaoffsets',ib,' : ',ib - abs(a%diaOffsets(hackfirst+1:hacknext)) +!!$ write(*,*) 'sum',ib,j,' : ',sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + j = j + nzd + end do + if (nza /= j) then + write(*,*) 'Wrong counts in hdia_to_coo',j,nza + info = -8 + return + end if + call b%set_host() + call b%fix(info) + +end subroutine psb_c_cp_hdia_to_coo diff --git a/ext/impl/psb_c_cp_hll_from_coo.f90 b/ext/impl/psb_c_cp_hll_from_coo.f90 new file mode 100644 index 00000000..506196c2 --- /dev/null +++ b/ext/impl/psb_c_cp_hll_from_coo.f90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_cp_hll_from_coo + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + character(len=20) :: name='hll_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + if (b%is_dev()) call b%sync() + hksz = psi_get_hksz() + if (b%is_by_rows()) then + call psi_convert_hll_from_coo(a,hksz,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_hll_from_coo(a,hksz,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_hll_from_coo diff --git a/ext/impl/psb_c_cp_hll_from_fmt.f90 b/ext/impl/psb_c_cp_hll_from_fmt.f90 new file mode 100644 index 00000000..0849561f --- /dev/null +++ b/ext/impl/psb_c_cp_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_cp_hll_from_fmt + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + class is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_c_hll_sparse_mat) + ! write(0,*) 'From type_hll' + if (b%is_dev()) call b%sync() + + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%hkoffs, a%hkoffs, info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) a%hksz = b%hksz + if (info == 0) a%nzt = b%nzt + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_c_cp_hll_from_fmt diff --git a/ext/impl/psb_c_cp_hll_to_coo.f90 b/ext/impl/psb_c_cp_hll_to_coo.f90 new file mode 100644 index 00000000..0ff46352 --- /dev/null +++ b/ext/impl/psb_c_cp_hll_to_coo.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_cp_hll_to_coo + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j, jj,k,ir, isz,err_act, hksz, hk, mxrwl,& + & irs, nzblk, kc + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + j = 1 + kc = 1 + k = 1 + hksz = a%hksz + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + nzblk = sum(a%irn(i:i+ir-1)) + call inner_copy(i,ir,b%ia(kc:kc+nzblk-1),& + & b%ja(kc:kc+nzblk-1),b%val(kc:kc+nzblk-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & hksz) + k = k + isz + kc = kc + nzblk + + enddo + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +contains + + subroutine inner_copy(i,ir,iac,& + & jac,valc,ja,val,irn,ld) + integer(psb_ipk_) :: i,ir,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*) + complex(psb_spk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc + kc = 1 + do ii = 1, ir + nc = irn(ii) + do jj=1,nc + iac(kc) = i+ii-1 + jac(kc) = ja(ii,jj) + valc(kc) = val(ii,jj) + kc = kc + 1 + end do + end do + + end subroutine inner_copy + +end subroutine psb_c_cp_hll_to_coo diff --git a/ext/impl/psb_c_cp_hll_to_fmt.f90 b/ext/impl/psb_c_cp_hll_to_fmt.f90 new file mode 100644 index 00000000..df8fa3b7 --- /dev/null +++ b/ext/impl/psb_c_cp_hll_to_fmt.f90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_cp_hll_to_fmt + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_c_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%hkoffs, b%hkoffs , info) + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + if (info == 0) b%hksz = a%hksz + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_cp_hll_to_fmt diff --git a/ext/impl/psb_c_dia_aclsum.f90 b/ext/impl/psb_c_dia_aclsum.f90 new file mode 100644 index 00000000..4bd8d440 --- /dev/null +++ b/ext/impl/psb_c_dia_aclsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_aclsum(d,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_aclsum + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_aclsum diff --git a/ext/impl/psb_c_dia_allocate_mnnz.f90 b/ext/impl/psb_c_dia_allocate_mnnz.f90 new file mode 100644 index 00000000..37fb34e1 --- /dev/null +++ b/ext/impl/psb_c_dia_allocate_mnnz.f90 @@ -0,0 +1,88 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -ione )/m + else + nz_ = ((max(7*m,7*n,ione)+m-ione)/m) + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,nz_,a%data,info) + if (info == psb_success_) call psb_realloc(m+n,a%offset,info) + if (info == psb_success_) then + a%data = 0 + a%offset = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_allocate_mnnz diff --git a/ext/impl/psb_c_dia_arwsum.f90 b/ext/impl/psb_c_dia_arwsum.f90 new file mode 100644 index 00000000..fe40deb8 --- /dev/null +++ b/ext/impl/psb_c_dia_arwsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_arwsum(d,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_arwsum + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_arwsum diff --git a/ext/impl/psb_c_dia_colsum.f90 b/ext/impl/psb_c_dia_colsum.f90 new file mode 100644 index 00000000..ed43fa12 --- /dev/null +++ b/ext/impl/psb_c_dia_colsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_colsum(d,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_colsum + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = cone + else + d = czero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_colsum diff --git a/ext/impl/psb_c_dia_csgetptn.f90 b/ext/impl/psb_c_dia_csgetptn.f90 new file mode 100644 index 00000000..ad479d35 --- /dev/null +++ b/ext/impl/psb_c_dia_csgetptn.f90 @@ -0,0 +1,188 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_csgetptn + implicit none + + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + + + end subroutine dia_getptn + +end subroutine psb_c_dia_csgetptn diff --git a/ext/impl/psb_c_dia_csgetrow.f90 b/ext/impl/psb_c_dia_csgetrow.f90 new file mode 100644 index 00000000..2989b20f --- /dev/null +++ b/ext/impl/psb_c_dia_csgetrow.f90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_csgetrow + implicit none + + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + if (chksz) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,val,info) + end if + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + val(nzin_) = a%data(i,j) + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + end subroutine dia_getrow +end subroutine psb_c_dia_csgetrow diff --git a/ext/impl/psb_c_dia_csmm.f90 b/ext/impl/psb_c_dia_csmm.f90 new file mode 100644 index 00000000..b65c4651 --- /dev/null +++ b/ext/impl/psb_c_dia_csmm.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_csmm + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_dia_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i,1:nxy) = y(i,1:nxy) + alpha*data(i,j)*x(i+off(j),1:nxy) + enddo + enddo + + end subroutine psb_c_dia_csmm_inner + +end subroutine psb_c_dia_csmm diff --git a/ext/impl/psb_c_dia_csmv.f90 b/ext/impl/psb_c_dia_csmv.f90 new file mode 100644 index 00000000..cf1ef677 --- /dev/null +++ b/ext/impl/psb_c_dia_csmv.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_csmv + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_dia_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i) = y(i) + alpha*data(i,j)*x(i+off(j)) + enddo + enddo + + end subroutine psb_c_dia_csmv_inner + +end subroutine psb_c_dia_csmv diff --git a/ext/impl/psb_c_dia_get_diag.f90 b/ext/impl/psb_c_dia_get_diag.f90 new file mode 100644 index 00000000..d868b62d --- /dev/null +++ b/ext/impl/psb_c_dia_get_diag.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_get_diag(a,d,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_get_diag + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = cone + else + do i=1, size(a%offset) + if (a%offset(i) == 0) then + d(1:mnm) = a%data(1:mnm,i) + exit + end if + end do + end if + do i=mnm+1,size(d) + d(i) = czero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_get_diag diff --git a/ext/impl/psb_c_dia_maxval.f90 b/ext/impl/psb_c_dia_maxval.f90 new file mode 100644 index 00000000..03a2be82 --- /dev/null +++ b/ext/impl/psb_c_dia_maxval.f90 @@ -0,0 +1,54 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +function psb_c_dia_maxval(a) result(res) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_maxval + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_maxval' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = sone + else + res = szero + end if + + res = max(res,maxval(abs(a%data))) + +end function psb_c_dia_maxval diff --git a/ext/impl/psb_c_dia_mold.f90 b/ext/impl/psb_c_dia_mold.f90 new file mode 100644 index 00000000..1d694828 --- /dev/null +++ b/ext/impl/psb_c_dia_mold.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_mold(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_mold + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_dia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_mold diff --git a/ext/impl/psb_c_dia_print.f90 b/ext/impl/psb_c_dia_print.f90 new file mode 100644 index 00000000..f3233366 --- /dev/null +++ b/ext/impl/psb_c_dia_print.f90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_dia_print' + logical, parameter :: debug=.false. + + class(psb_c_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, jc, ir1, ir2 + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + write(iout,*) nr, nc, nz + + nc=size(a%data,2) + + + + if(present(iv)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) iv(i),iv(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),(i+jc),a%data(i,j) + enddo + enddo + + endif + +end subroutine psb_c_dia_print diff --git a/ext/impl/psb_c_dia_reallocate_nz.f90 b/ext/impl/psb_c_dia_reallocate_nz.f90 new file mode 100644 index 00000000..c46cd465 --- /dev/null +++ b/ext/impl/psb_c_dia_reallocate_nz.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_reallocate_nz(nz,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_dia_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! Ans: NOTHING. + ! + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_reallocate_nz diff --git a/ext/impl/psb_c_dia_reinit.f90 b/ext/impl/psb_c_dia_reinit.f90 new file mode 100644 index 00000000..04a345eb --- /dev/null +++ b/ext/impl/psb_c_dia_reinit.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_reinit(a,clear) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_reinit + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%data(:,:) = czero + call a%set_upd() + call a%set_host() + + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_reinit diff --git a/ext/impl/psb_c_dia_rowsum.f90 b/ext/impl/psb_c_dia_rowsum.f90 new file mode 100644 index 00000000..1f36dab4 --- /dev/null +++ b/ext/impl/psb_c_dia_rowsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_rowsum(d,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_rowsum + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_rowsum diff --git a/ext/impl/psb_c_dia_scal.f90 b/ext/impl/psb_c_dia_scal.f90 new file mode 100644 index 00000000..8f35b7c1 --- /dev/null +++ b/ext/impl/psb_c_dia_scal.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_scal(d,a,info,side) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_scal + implicit none + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), nc, jc, nr, ir1, ir2 + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%data(i,:) = a%data(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + nr=size(a%data,1) + nc=size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + a%data(i,j) = a%data(i,j) * d(i+jc) + enddo + enddo + + end if + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_scal diff --git a/ext/impl/psb_c_dia_scals.f90 b/ext/impl/psb_c_dia_scals.f90 new file mode 100644 index 00000000..a9ca5db1 --- /dev/null +++ b/ext/impl/psb_c_dia_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_scals(d,a,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_scals + implicit none + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%data(:,:) = a%data(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_scals diff --git a/ext/impl/psb_c_dns_mat_impl.f90 b/ext/impl/psb_c_dns_mat_impl.f90 new file mode 100644 index 00000000..8e99af8b --- /dev/null +++ b/ext/impl/psb_c_dns_mat_impl.f90 @@ -0,0 +1,724 @@ + +!> Function csmv: +!! \memberof psb_c_dns_sparse_mat +!! \brief Product by a dense rank 1 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_c_dns_csmv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_csmv + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act, m, n, lda + character(len=20) :: name='c_dns_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (trans_ == 'N') then + m=a%get_nrows() + n=a%get_ncols() + else + n=a%get_nrows() + m=a%get_ncols() + end if + lda = size(a%val,1) + + + call cgemv(trans_,a%get_nrows(),a%get_ncols(),alpha,& + & a%val,size(a%val,1),x,1,beta,y,1) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_csmv + + +!> Function csmm: +!! \memberof psb_c_dns_sparse_mat +!! \brief Product by a dense rank 2 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:,:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:,:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_c_dns_csmm(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_csmm + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act,m,n,k, lda, ldx, ldy + character(len=20) :: name='c_dns_csmm' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (psb_toupper(trans_)=='N') then + m = a%get_nrows() + k = a%get_ncols() + n = min(size(y,2),size(x,2)) + else + k = a%get_nrows() + m = a%get_ncols() + n = min(size(y,2),size(x,2)) + end if + lda = size(a%val,1) + ldx = size(x,1) + ldy = size(y,1) + call cgemm(trans_,'N',m,n,k,alpha,a%val,lda,x,ldx,beta,y,ldy) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_csmm + + + +! +! +!> Function csnmi: +!! \memberof psb_c_dns_sparse_mat +!! \brief Operator infinity norm +!! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) +!! +! +function psb_c_dns_csnmi(a) result(res) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_csnmi + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + ! + integer(psb_ipk_) :: i + real(psb_spk_) :: acc + + res = szero + if (a%is_dev()) call a%sync() + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_c_dns_csnmi + + +! +!> Function get_diag: +!! \memberof psb_c_dns_sparse_mat +!! \brief Extract the diagonal of A. +!! +!! D(i) = A(i:i), i=1:min(nrows,ncols) +!! +!! \param d(:) The output diagonal +!! \param info return code. +! +subroutine psb_c_dns_get_diag(a,d,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_get_diag + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act, mnm, i + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + do i=1, mnm + d(i) = a%val(i,i) + end do + do i=mnm+1,size(d) + d(i) = czero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_get_diag + + +! +! +!> Function reallocate_nz +!! \memberof psb_c_dns_sparse_mat +!! \brief One--parameters version of (re)allocate +!! +!! \param nz number of nonzeros to allocate for +!! i.e. makes sure that the internal storage +!! allows for NZ coefficients and their indices. +! +subroutine psb_c_dns_reallocate_nz(nz,a) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_dns_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! This is a no-op, allocation is fixed. + ! + if (a%is_dev()) call a%sync() + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_reallocate_nz + +! +!> Function mold: +!! \memberof psb_c_dns_sparse_mat +!! \brief Allocate a class(psb_c_dns_sparse_mat) with the +!! same dynamic type as the input. +!! This is equivalent to allocate( mold= ) and is provided +!! for those compilers not yet supporting mold. +!! \param b The output variable +!! \param info return code +! +subroutine psb_c_dns_mold(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_mold + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='dns_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_c_dns_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_mold + +! +! +!> Function allocate_mnnz +!! \memberof psb_c_dns_sparse_mat +!! \brief Three-parameters version of allocate +!! +!! \param m number of rows +!! \param n number of cols +!! \param nz [estimated internally] number of nonzeros to allocate for +! +subroutine psb_c_dns_allocate_mnnz(m,n,a,nz) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + ! + integer(psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_/)) + goto 9999 + endif + + + ! Basic stuff common to all formats + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_bld() + call a%set_host() + + ! We ignore NZ in this case. + + call psb_realloc(m,n,a%val,info) + if (info == psb_success_) then + a%val = czero + a%nnz = 0 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_allocate_mnnz + + +! +! +! +!> Function csgetrow: +!! \memberof psb_c_dns_sparse_mat +!! \brief Get a (subset of) row(s) +!! +!! getrow is the basic method by which the other (getblk, clip) can +!! be implemented. +!! +!! Returns the set +!! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) +!! each identifying the position of a nonzero in A +!! i.e. +!! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) +!! with IMIN<=IA(:)<=IMAX +!! with JMIN<=JA(:)<=JMAX +!! IA,JA are reallocated as necessary. +!! +!! \param imin the minimum row index we are interested in +!! \param imax the minimum row index we are interested in +!! \param nz the number of output coefficients +!! \param ia(:) the output row indices +!! \param ja(:) the output col indices +!! \param val(:) the output coefficients +!! \param info return code +!! \param jmin [1] minimum col index +!! \param jmax [a\%get_ncols()] maximum col index +!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) +!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] +!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] +!! ( iren cannot be specified with rscale/cscale) +!! \param append [false] append to ia,ja +!! \param nzin [none] if append, then first new entry should go in entry nzin+1 +!! +! +subroutine psb_c_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_csgetrow + implicit none + + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + ! + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i,j,k + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax Function trim +!! \memberof psb_c_dns_sparse_mat +!! \brief Memory trim +!! Make sure the memory allocation of the sparse matrix is as tight as +!! possible given the actual number of nonzeros it contains. +! +subroutine psb_c_dns_trim(a) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_trim + implicit none + class(psb_c_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + ! Do nothing, we are already at minimum memory. + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_trim + +! +!> Function cp_from_coo: +!! \memberof psb_c_dns_sparse_mat +!! \brief Copy and convert from psb_c_coo_sparse_mat +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! + +subroutine psb_c_cp_dns_from_coo(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_cp_dns_from_coo + implicit none + + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + ! + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: nza, nr, i,err_act, nc + integer(psb_ipk_), parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (.not.b%is_by_rows()) then + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = czero + do i=1, nza + a%val(tmp%ia(i),tmp%ja(i)) = tmp%val(i) + end do + a%nnz = nza + call tmp%free() + else + if (b%is_dev()) call b%sync() + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = czero + do i=1, nza + a%val(b%ia(i),b%ja(i)) = b%val(i) + end do + a%nnz = nza + end if + call a%set_host() + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_cp_dns_from_coo + + + +! +!> Function cp_to_coo: +!! \memberof psb_c_dns_sparse_mat +!! \brief Copy and convert to psb_c_coo_sparse_mat +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! + +subroutine psb_c_cp_dns_to_coo(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_cp_dns_to_coo + implicit none + + class(psb_c_dns_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_Ipk_) :: nza, nr, nc,i,j,k,err_act + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + k = 0 + do i=1,a%get_nrows() + do j=1,a%get_ncols() + if (a%val(i,j) /= czero) then + k = k + 1 + b%ia(k) = i + b%ja(k) = j + b%val(k) = a%val(i,j) + end if + end do + end do + + call b%set_nzeros(nza) + call b%set_sort_status(psb_row_major_) + call b%set_asb() + call b%set_host() + +end subroutine psb_c_cp_dns_to_coo + + + +! +!> Function mv_to_coo: +!! \memberof psb_c_dns_sparse_mat +!! \brief Convert to psb_c_coo_sparse_mat, freeing the source. +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! +subroutine psb_c_mv_dns_to_coo(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_mv_dns_to_coo + implicit none + + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_to_coo(b,info) + call a%free() + return + +end subroutine psb_c_mv_dns_to_coo + + +! +!> Function mv_from_coo: +!! \memberof psb_c_dns_sparse_mat +!! \brief Convert from psb_c_coo_sparse_mat, freeing the source. +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! +! +subroutine psb_c_mv_dns_from_coo(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_mv_dns_from_coo + implicit none + + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_c_mv_dns_from_coo + diff --git a/ext/impl/psb_c_ell_aclsum.f90 b/ext/impl/psb_c_ell_aclsum.f90 new file mode 100644 index 00000000..3d5a292a --- /dev/null +++ b/ext/impl/psb_c_ell_aclsum.f90 @@ -0,0 +1,82 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_aclsum(d,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_aclsum + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_aclsum diff --git a/ext/impl/psb_c_ell_allocate_mnnz.f90 b/ext/impl/psb_c_ell_allocate_mnnz.f90 new file mode 100644 index 00000000..b137eb04 --- /dev/null +++ b/ext/impl/psb_c_ell_allocate_mnnz.f90 @@ -0,0 +1,91 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = -1 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_allocate_mnnz diff --git a/ext/impl/psb_c_ell_arwsum.f90 b/ext/impl/psb_c_ell_arwsum.f90 new file mode 100644 index 00000000..c047c742 --- /dev/null +++ b/ext/impl/psb_c_ell_arwsum.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_arwsum(d,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_arwsum + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + is_unit = a%is_unit() + + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = sone + else + d(i) = szero + end if + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_arwsum diff --git a/ext/impl/psb_c_ell_colsum.f90 b/ext/impl/psb_c_ell_colsum.f90 new file mode 100644 index 00000000..6d06b589 --- /dev/null +++ b/ext/impl/psb_c_ell_colsum.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_colsum(d,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_colsum + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = cone + else + d = czero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_colsum diff --git a/ext/impl/psb_c_ell_csgetblk.f90 b/ext/impl/psb_c_ell_csgetblk.f90 new file mode 100644 index 00000000..deb07c25 --- /dev/null +++ b/ext/impl/psb_c_ell_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_csgetblk + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='ell_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_csgetblk diff --git a/ext/impl/psb_c_ell_csgetptn.f90 b/ext/impl/psb_c_ell_csgetptn.f90 new file mode 100644 index 00000000..821daa89 --- /dev/null +++ b/ext/impl/psb_c_ell_csgetptn.f90 @@ -0,0 +1,189 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_csgetptn + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_c_ell_csgetrow + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_c_ell_csmm + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_ell_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_ell_csmv + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_ell_csnm1 + + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_spk_), allocatable :: vt(:) + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_ell_csnm1' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = szero + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + if (a%is_unit()) then + vt(:) = sone + else + vt(:) = szero + end if + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_c_ell_csnm1 diff --git a/ext/impl/psb_c_ell_csnmi.f90 b/ext/impl/psb_c_ell_csnmi.f90 new file mode 100644 index 00000000..6dc9cfa4 --- /dev/null +++ b/ext/impl/psb_c_ell_csnmi.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_ell_csnmi(a) result(res) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_csnmi + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_spk_) :: acc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_csnmi' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = szero + is_unit = a%is_unit() + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + if (is_unit) acc = acc + sone + res = max(res,acc) + end do + +end function psb_c_ell_csnmi diff --git a/ext/impl/psb_c_ell_csput.f90 b/ext/impl/psb_c_ell_csput.f90 new file mode 100644 index 00000000..e0b0f47f --- /dev/null +++ b/ext/impl/psb_c_ell_csput.f90 @@ -0,0 +1,208 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_csput_a + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_ell_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + call psb_c_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info < 0) then + info = psb_err_internal_error_ + else if (info > 0) then + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarded entries not belonging to us.' + info = psb_success_ + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +contains + + subroutine psb_c_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='c_ell_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = a%val(ir,ip) + val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_c_ell_srch_upd +end subroutine psb_c_ell_csput_a diff --git a/ext/impl/psb_c_ell_cssm.f90 b/ext/impl/psb_c_ell_cssm.f90 new file mode 100644 index 00000000..26e76030 --- /dev/null +++ b/ext/impl/psb_c_ell_cssm.f90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_cssm + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_ell_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_c_ell_cssv + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + complex(psb_spk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_ell_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_c_ell_get_diag + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = cone + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) then + d(i) = a%val(i,a%idiag(i)) + else + d(i) = czero + end if + end do + end if + do i=mnm+1,size(d) + d(i) = czero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_get_diag diff --git a/ext/impl/psb_c_ell_maxval.f90 b/ext/impl/psb_c_ell_maxval.f90 new file mode 100644 index 00000000..4de58b11 --- /dev/null +++ b/ext/impl/psb_c_ell_maxval.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_ell_maxval(a) result(res) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_maxval + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_spk_) :: acc + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_csnmi' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = sone + else + res = szero + end if + + do i = 1, a%get_nrows() + acc = maxval(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_c_ell_maxval diff --git a/ext/impl/psb_c_ell_mold.f90 b/ext/impl/psb_c_ell_mold.f90 new file mode 100644 index 00000000..c7c5d621 --- /dev/null +++ b/ext/impl/psb_c_ell_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_mold(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_mold + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='ell_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_mold diff --git a/ext/impl/psb_c_ell_print.f90 b/ext/impl/psb_c_ell_print.f90 new file mode 100644 index 00000000..1b8117a8 --- /dev/null +++ b/ext/impl/psb_c_ell_print.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_ell_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% ELL' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_c_ell_print diff --git a/ext/impl/psb_c_ell_reallocate_nz.f90 b/ext/impl/psb_c_ell_reallocate_nz.f90 new file mode 100644 index 00000000..b0d77568 --- /dev/null +++ b/ext/impl/psb_c_ell_reallocate_nz.f90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_reallocate_nz(nz,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_reallocate_nz diff --git a/ext/impl/psb_c_ell_reinit.f90 b/ext/impl/psb_c_ell_reinit.f90 new file mode 100644 index 00000000..2b15dfea --- /dev/null +++ b/ext/impl/psb_c_ell_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_reinit(a,clear) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_reinit + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:,:) = czero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_reinit diff --git a/ext/impl/psb_c_ell_rowsum.f90 b/ext/impl/psb_c_ell_rowsum.f90 new file mode 100644 index 00000000..5ae7d42c --- /dev/null +++ b/ext/impl/psb_c_ell_rowsum.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_rowsum(d,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_rowsum + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical :: is_unit + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + is_unit = a%is_unit() + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = cone + else + d(i) = czero + end if + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_rowsum diff --git a/ext/impl/psb_c_ell_scal.f90 b/ext/impl/psb_c_ell_scal.f90 new file mode 100644 index 00000000..63150f32 --- /dev/null +++ b/ext/impl/psb_c_ell_scal.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_scal(d,a,info,side) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_scal + implicit none + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5) + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + do i=1, m + do j=1, a%irn(i) + a%val(i,j) = a%val(i,j) * d(a%ja(i,j)) + end do + enddo + + end if + + call a%set_host() + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_scal diff --git a/ext/impl/psb_c_ell_scals.f90 b/ext/impl/psb_c_ell_scals.f90 new file mode 100644 index 00000000..3e4cd92a --- /dev/null +++ b/ext/impl/psb_c_ell_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_scals(d,a,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_scals + implicit none + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_scals diff --git a/ext/impl/psb_c_ell_trim.f90 b/ext/impl/psb_c_ell_trim.f90 new file mode 100644 index 00000000..22aafefd --- /dev/null +++ b/ext/impl/psb_c_ell_trim.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_trim(a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_trim + implicit none + class(psb_c_ell_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_trim diff --git a/ext/impl/psb_c_hdia_allocate_mnnz.f90 b/ext/impl/psb_c_hdia_allocate_mnnz.f90 new file mode 100644 index 00000000..17a49ffe --- /dev/null +++ b/ext/impl/psb_c_hdia_allocate_mnnz.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_hdia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_hdia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hdia_allocate_mnnz diff --git a/ext/impl/psb_c_hdia_csmv.f90 b/ext/impl/psb_c_hdia_csmv.f90 new file mode 100644 index 00000000..a04fde07 --- /dev/null +++ b/ext/impl/psb_c_hdia_csmv.f90 @@ -0,0 +1,162 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_hdia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_hdia_csmv + implicit none + class(psb_c_hdia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc,nr,nc + integer(psb_ipk_) :: irs,ics, nmx, ni + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, ncd,ib, nzhack, & + & hackfirst, hacknext + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hdia_csmv' + logical, parameter :: debug=.false. + real :: start, finish + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + info = psb_err_transpose_not_n_unsupported_ + call psb_errpush(info,name) + goto 9999 + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1)=0) then + ir1 = 1 + ! min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + jc = ir1 + rdisp + offsets(j) + do i=ir1,ir2 + y(rdisp+i) = y(rdisp+i) + alpha*data(i,j)*x(jc) + jc = jc + 1 + enddo + end do + end subroutine psi_c_inner_dia_csmv + +end subroutine psb_c_hdia_csmv diff --git a/ext/impl/psb_c_hdia_mold.f90 b/ext/impl/psb_c_hdia_mold.f90 new file mode 100644 index 00000000..d9f85ec9 --- /dev/null +++ b/ext/impl/psb_c_hdia_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hdia_mold(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_hdia_mold + implicit none + class(psb_c_hdia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hdia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_hdia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hdia_mold diff --git a/ext/impl/psb_c_hdia_print.f90 b/ext/impl/psb_c_hdia_print.f90 new file mode 100644 index 00000000..477a5433 --- /dev/null +++ b/ext/impl/psb_c_hdia_print.f90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_hdia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_hdia_print + use psi_ext_util_mod + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_c_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdia_print' + logical, parameter :: debug=.false. + + class(psb_c_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, k, ncd,ib, nzhack, info,& + & hackfirst, hacknext + integer(psb_ipk_), allocatable :: ia(:), ja(:) + complex(psb_spk_), allocatable :: val(:) + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% HDIA' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + + nhacks = a%nhacks + hacksize = a%hacksize + maxnzhack = 0 + do k=1, nhacks + maxnzhack = max(maxnzhack,(a%hackoffsets(k+1)-a%hackoffsets(k))) + end do + maxnzhack = hacksize*maxnzhack + allocate(ia(maxnzhack),ja(maxnzhack),val(maxnzhack),stat=info) + if (info /= 0) return + + write(iout,*) nr, nc, nz + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + ncd = hacknext-hackfirst + + call psi_c_xtr_coo_from_dia(nr,nc,& + & ia, ja, val, nzhack,& + & hacksize,ncd,& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) + !nzhack = sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + + if(present(iv)) then + do j=1,nzhack + write(iout,frmt) iv(ia(j)),iv(ja(j)),val(j) + enddo + else + if (present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ja(j),val(j) + enddo + else if (present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ja(j),val(j) + enddo + endif + end if + + end do + +end subroutine psb_c_hdia_print diff --git a/ext/impl/psb_c_hll_aclsum.f90 b/ext/impl/psb_c_hll_aclsum.f90 new file mode 100644 index 00000000..f1bd8e89 --- /dev/null +++ b/ext/impl/psb_c_hll_aclsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_aclsum(d,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_aclsum + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call c_hll_aclsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine c_hll_aclsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine c_hll_aclsum + +end subroutine psb_c_hll_aclsum diff --git a/ext/impl/psb_c_hll_allocate_mnnz.f90 b/ext/impl/psb_c_hll_allocate_mnnz.f90 new file mode 100644 index 00000000..97b996bd --- /dev/null +++ b/ext/impl/psb_c_hll_allocate_mnnz.f90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m+1,a%hkoffs,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_hksz(psb_hksz_def_) + call a%set_host() + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_allocate_mnnz diff --git a/ext/impl/psb_c_hll_arwsum.f90 b/ext/impl/psb_c_hll_arwsum.f90 new file mode 100644 index 00000000..9c48e1c0 --- /dev/null +++ b/ext/impl/psb_c_hll_arwsum.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_arwsum(d,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_arwsum + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call c_hll_arwsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine c_hll_arwsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + abs(val(i,j)) + end do + end do + + end subroutine c_hll_arwsum + +end subroutine psb_c_hll_arwsum diff --git a/ext/impl/psb_c_hll_colsum.f90 b/ext/impl/psb_c_hll_colsum.f90 new file mode 100644 index 00000000..fbcb0934 --- /dev/null +++ b/ext/impl/psb_c_hll_colsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_colsum(d,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_colsum + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = cone + else + d = czero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call c_hll_colsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine c_hll_colsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + complex(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine c_hll_colsum + +end subroutine psb_c_hll_colsum diff --git a/ext/impl/psb_c_hll_csgetblk.f90 b/ext/impl/psb_c_hll_csgetblk.f90 new file mode 100644 index 00000000..9bf0b869 --- /dev/null +++ b/ext/impl/psb_c_hll_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csgetblk + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='hll_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_csgetblk diff --git a/ext/impl/psb_c_hll_csgetptn.f90 b/ext/impl/psb_c_hll_csgetptn.f90 new file mode 100644 index 00000000..0f6481ed --- /dev/null +++ b/ext/impl/psb_c_hll_csgetptn.f90 @@ -0,0 +1,209 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csgetptn + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_c_hll_csgetrow + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_c_hll_csmm + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy,ldx,ldy,hksz,mxrwl + complex(psb_spk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hll_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + nxy = min(size(x,2) , size(y,2) ) + + + ldx = size(x,1) + ldy = size(y,1) + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (ldx psb_c_hll_csmv + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hkpnt, mxrwl, mmhk + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_hll_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (size(x,1) 0) then + select case(hksz) + case(4) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_notra_4(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(8) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + &call psb_c_hll_csmv_notra_8(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(16) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_notra_16(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(24) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_notra_24(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(32) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_notra_32(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case default + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + end select + end if + if (mmhk < m) then + i = mmhk+1 + ir = m-mmhk + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + call psb_c_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + if (info /= psb_success_) goto 9999 + end if + j = j + 1 + end if + + else + + j=1 + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,m,hksz + j = ((i-1)/hksz)+1 + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + end if + end if + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_csmv_inner(ir,m,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,tra,ctra,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit,tra,ctra + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + if (tra) then + + if (beta == cone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*val(i,j)*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (ctra) then + + if (beta == cone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*conjg(val(i,j))*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (.not.(tra.or.ctra)) then + + if (alpha == czero) then + if (beta == czero) then + do i=1,m + y(ir+i-1) = czero + end do + else + do i=1,m + y(ir+i-1) = beta*y(ir+i-1) + end do + end if + + else + if (beta == czero) then + do i=1,m + tmp = czero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + end do + else + do i=1,m + tmp = czero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + beta*y(ir+i-1) + end do + endif + end if + end if + + if (is_unit) then + do i=1, min(m,n) + y(i) = y(i) + alpha*x(i) + end do + end if + + end subroutine psb_c_hll_csmv_inner + + subroutine psb_c_hll_csmv_notra_8(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=8 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:8)) + tmp(1:8) = tmp(1:8) + val(1:8,j)*x(ja(1:8,j)) + end do + end if + if (beta == czero) then + y(ir:ir+8-1) = alpha*tmp(1:8) + else + y(ir:ir+8-1) = alpha*tmp(1:8) + beta*y(ir:ir+8-1) + end if + + + if (is_unit) then + do i=1, min(8,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_8 + + subroutine psb_c_hll_csmv_notra_24(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=24 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:24)) + tmp(1:24) = tmp(1:24) + val(1:24,j)*x(ja(1:24,j)) + end do + end if + if (beta == czero) then + y(ir:ir+24-1) = alpha*tmp(1:24) + else + y(ir:ir+24-1) = alpha*tmp(1:24) + beta*y(ir:ir+24-1) + end if + + + if (is_unit) then + do i=1, min(24,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_24 + + subroutine psb_c_hll_csmv_notra_16(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=16 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:16)) + tmp(1:16) = tmp(1:16) + val(1:16,j)*x(ja(1:16,j)) + end do + end if + if (beta == czero) then + y(ir:ir+16-1) = alpha*tmp(1:16) + else + y(ir:ir+16-1) = alpha*tmp(1:16) + beta*y(ir:ir+16-1) + end if + + + if (is_unit) then + do i=1, min(16,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_16 + + subroutine psb_c_hll_csmv_notra_32(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=32 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:32)) + tmp(1:32) = tmp(1:32) + val(1:32,j)*x(ja(1:32,j)) + end do + end if + if (beta == czero) then + y(ir:ir+32-1) = alpha*tmp(1:32) + else + y(ir:ir+32-1) = alpha*tmp(1:32) + beta*y(ir:ir+32-1) + end if + + + if (is_unit) then + do i=1, min(32,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_32 + + subroutine psb_c_hll_csmv_notra_4(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=4 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:4)) + tmp(1:4) = tmp(1:4) + val(1:4,j)*x(ja(1:4,j)) + end do + end if + if (beta == czero) then + y(ir:ir+4-1) = alpha*tmp(1:4) + else + y(ir:ir+4-1) = alpha*tmp(1:4) + beta*y(ir:ir+4-1) + end if + + + if (is_unit) then + do i=1, min(4,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_4 + +end subroutine psb_c_hll_csmv diff --git a/ext/impl/psb_c_hll_csnm1.f90 b/ext/impl/psb_c_hll_csnm1.f90 new file mode 100644 index 00000000..25daa75d --- /dev/null +++ b/ext/impl/psb_c_hll_csnm1.f90 @@ -0,0 +1,111 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_hll_csnm1(a) result(res) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csnm1 + + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info, hksz, mxrwl + real(psb_spk_), allocatable :: vt(:) + logical :: is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hll_csnm1' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + res = szero + if (a%is_dev()) call a%sync() + n = a%get_ncols() + m = a%get_nrows() + allocate(vt(n),stat=info) + if (Info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + if (a%is_unit()) then + vt = sone + else + vt = szero + end if + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_c_hll_csnm1_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & vt,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + res = maxval(vt) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_csnm1_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & vt,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: vt(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + vt(jc) = vt(jc) + abs(val(i,j)) + end do + end do + end subroutine psb_c_hll_csnm1_inner + +end function psb_c_hll_csnm1 diff --git a/ext/impl/psb_c_hll_csnmi.f90 b/ext/impl/psb_c_hll_csnmi.f90 new file mode 100644 index 00000000..c70be9ce --- /dev/null +++ b/ext/impl/psb_c_hll_csnmi.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_hll_csnmi(a) result(res) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csnmi + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc, hksz, mxrwl, info + Integer(Psb_ipk_) :: err_act + logical :: is_unit + character(len=20) :: name='c_csnmi' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = 0 + res = szero + if (a%is_dev()) call a%sync() + + n = a%get_ncols() + m = a%get_nrows() + is_unit = a%is_unit() + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_c_hll_csnmi_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & res,is_unit,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_csnmi_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & res,is_unit,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: res + logical :: is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: tmp, acc + + info = psb_success_ + if (is_unit) then + tmp = sone + else + tmp = szero + end if + do i=1,m + acc = tmp + do j=1, irn(i) + acc = acc + abs(val(i,j)) + end do + res = max(acc,res) + end do + end subroutine psb_c_hll_csnmi_inner + +end function psb_c_hll_csnmi diff --git a/ext/impl/psb_c_hll_csput.f90 b/ext/impl/psb_c_hll_csput.f90 new file mode 100644 index 00000000..e46ae30a --- /dev/null +++ b/ext/impl/psb_c_hll_csput.f90 @@ -0,0 +1,233 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csput_a + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hll_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + + call psb_c_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + call a%set_host() + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ip, i1,i2,nr,nc,nnz,dupl,ng,& + & hksz, hk, hkzpnt, ihkr, mxrwl, lastrow + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='c_hll_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + lastrow = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + hksz = a%get_hksz() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_c_hll_srch_upd + +end subroutine psb_c_hll_csput_a diff --git a/ext/impl/psb_c_hll_cssm.f90 b/ext/impl/psb_c_hll_cssm.f90 new file mode 100644 index 00000000..90e3b978 --- /dev/null +++ b/ext/impl/psb_c_hll_cssm.f90 @@ -0,0 +1,506 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_cssm + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ldx, ldy, hksz, nxy, mk, mxrwl + complex(psb_spk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_hll_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + info = psb_err_missing_override_method_ + call psb_errpush(info,name) + goto 9999 + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + hksz = a%get_hksz() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + ldx = size(x,1) + ldy = size(y,1) + if (ldx psb_c_hll_cssv + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hk, mxrwl, noffs, kc, mk + complex(psb_spk_) :: acc + complex(psb_spk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hll_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_c_hll_get_diag + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k, ke, hksz, ld,ir, mxrwl + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + ld = size(d) + if (ld< mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,ld/)) + goto 9999 + end if + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = cone + else + + hksz = a%get_hksz() + j=1 + do i=1,mnm,hksz + ir = min(hksz,mnm-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + ke = a%hkoffs(j+1) + call psb_c_hll_get_diag_inner(ir,a%irn(i:i+ir-1),& + & a%ja(k:ke),hksz,a%val(k:ke),hksz,& + & a%idiag(i:i+ir-1),d(i:i+ir-1),info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + end if + + do i=mnm+1,size(d) + d(i) = czero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_get_diag_inner(m,irn,ja,ldj,val,ldv,& + & idiag,d,info) + integer(psb_ipk_), intent(in) :: m,ldj,ldv,ja(ldj,*),irn(*), idiag(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + complex(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + do i=1,m + if (idiag(i) /= 0) then + d(i) = val(i,idiag(i)) + else + d(i) = czero + end if + end do + + end subroutine psb_c_hll_get_diag_inner + +end subroutine psb_c_hll_get_diag diff --git a/ext/impl/psb_c_hll_maxval.f90 b/ext/impl/psb_c_hll_maxval.f90 new file mode 100644 index 00000000..ff82bb40 --- /dev/null +++ b/ext/impl/psb_c_hll_maxval.f90 @@ -0,0 +1,45 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_hll_maxval(a) result(res) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_maxval + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + if (a%is_dev()) call a%sync() + res = maxval(abs(a%val(:))) + if (a%is_unit()) res = max(res,sone) + +end function psb_c_hll_maxval diff --git a/ext/impl/psb_c_hll_mold.f90 b/ext/impl/psb_c_hll_mold.f90 new file mode 100644 index 00000000..4a6204b0 --- /dev/null +++ b/ext/impl/psb_c_hll_mold.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_mold(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_mold + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hll_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_mold diff --git a/ext/impl/psb_c_hll_print.f90 b/ext/impl/psb_c_hll_print.f90 new file mode 100644 index 00000000..a5eec378 --- /dev/null +++ b/ext/impl/psb_c_hll_print.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_hll_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, k, hksz, hk, mxrwl,ir, ix + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + hksz = a%get_hksz() + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + endif + endif + +end subroutine psb_c_hll_print diff --git a/ext/impl/psb_c_hll_reallocate_nz.f90 b/ext/impl/psb_c_hll_reallocate_nz.f90 new file mode 100644 index 00000000..44d9cfc9 --- /dev/null +++ b/ext/impl/psb_c_hll_reallocate_nz.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_reallocate_nz(nz,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,nz_ + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_hll_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + nz_ = max(nz,ione) + call psb_realloc(nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(nz_,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_reallocate_nz diff --git a/ext/impl/psb_c_hll_reinit.f90 b/ext/impl/psb_c_hll_reinit.f90 new file mode 100644 index 00000000..82d5cb16 --- /dev/null +++ b/ext/impl/psb_c_hll_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_reinit(a,clear) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_reinit + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:) = czero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_reinit diff --git a/ext/impl/psb_c_hll_rowsum.f90 b/ext/impl/psb_c_hll_rowsum.f90 new file mode 100644 index 00000000..e6eea227 --- /dev/null +++ b/ext/impl/psb_c_hll_rowsum.f90 @@ -0,0 +1,110 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_rowsum(d,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_rowsum + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + if (a%is_unit()) then + d = cone + else + d = czero + end if + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call c_hll_rowsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine c_hll_rowsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + complex(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + (val(i,j)) + end do + end do + + end subroutine c_hll_rowsum + +end subroutine psb_c_hll_rowsum diff --git a/ext/impl/psb_c_hll_scal.f90 b/ext/impl/psb_c_hll_scal.f90 new file mode 100644 index 00000000..0fd59f15 --- /dev/null +++ b/ext/impl/psb_c_hll_scal.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_scal(d,a,info,side) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_scal + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), ld, k, mxrwl, hksz, ir + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + info = psb_err_missing_override_method_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + ld = size(d) + if (left) then + m = a%get_nrows() + if (ld < m) then + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else + n = a%get_ncols() + if (ld < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_c_hll_scal_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & left,d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_scal_inner(ir,m,n,irn,ja,ldj,val,ldv,left,d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: d(*) + complex(psb_spk_), intent(inout) :: val(ldv,*) + logical, intent(in) :: left + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + if (left) then + do i=1,m + do j=1, irn(i) + val(i,j) = val(i,j)*d(ir+i-1) + end do + end do + else + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + val(i,j) = val(i,j)*d(jc) + end do + end do + + end if + + end subroutine psb_c_hll_scal_inner + + +end subroutine psb_c_hll_scal diff --git a/ext/impl/psb_c_hll_scals.f90 b/ext/impl/psb_c_hll_scals.f90 new file mode 100644 index 00000000..13a03a22 --- /dev/null +++ b/ext/impl/psb_c_hll_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_scals(d,a,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_scals + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:) = a%val(:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_scals diff --git a/ext/impl/psb_c_mv_dia_from_coo.f90 b/ext/impl/psb_c_mv_dia_from_coo.f90 new file mode 100644 index 00000000..99871348 --- /dev/null +++ b/ext/impl/psb_c_mv_dia_from_coo.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_mv_dia_from_coo + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_dia_from_coo diff --git a/ext/impl/psb_c_mv_dia_to_coo.f90 b/ext/impl/psb_c_mv_dia_to_coo.f90 new file mode 100644 index 00000000..1382cec3 --- /dev/null +++ b/ext/impl/psb_c_mv_dia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_mv_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_mv_dia_to_coo + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_c_mv_dia_to_coo diff --git a/ext/impl/psb_c_mv_ell_from_coo.f90 b/ext/impl/psb_c_mv_ell_from_coo.f90 new file mode 100644 index 00000000..64da3e8d --- /dev/null +++ b/ext/impl/psb_c_mv_ell_from_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_mv_ell_from_coo + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_c_mv_ell_from_coo diff --git a/ext/impl/psb_c_mv_ell_from_fmt.f90 b/ext/impl/psb_c_mv_ell_from_fmt.f90 new file mode 100644 index 00000000..d0fa9bc4 --- /dev/null +++ b/ext/impl/psb_c_mv_ell_from_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_mv_ell_from_fmt + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_c_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_ell_from_fmt diff --git a/ext/impl/psb_c_mv_ell_to_coo.f90 b/ext/impl/psb_c_mv_ell_to_coo.f90 new file mode 100644 index 00000000..a49e2e3c --- /dev/null +++ b/ext/impl/psb_c_mv_ell_to_coo.f90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_mv_ell_to_coo + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_c_mv_ell_to_coo diff --git a/ext/impl/psb_c_mv_ell_to_fmt.f90 b/ext/impl/psb_c_mv_ell_to_fmt.f90 new file mode 100644 index 00000000..3ea02d6b --- /dev/null +++ b/ext/impl/psb_c_mv_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_mv_ell_to_fmt + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_c_ell_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_ell_to_fmt diff --git a/ext/impl/psb_c_mv_hdia_from_coo.f90 b/ext/impl/psb_c_mv_hdia_from_coo.f90 new file mode 100644 index 00000000..4247fdf8 --- /dev/null +++ b/ext/impl/psb_c_mv_hdia_from_coo.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_mv_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_mv_hdia_from_coo + implicit none + + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.(b%is_by_rows())) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_hdia_from_coo diff --git a/ext/impl/psb_c_mv_hdia_to_coo.f90 b/ext/impl/psb_c_mv_hdia_to_coo.f90 new file mode 100644 index 00000000..3a91917a --- /dev/null +++ b/ext/impl/psb_c_mv_hdia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_mv_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_mv_hdia_to_coo + implicit none + + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_c_mv_hdia_to_coo diff --git a/ext/impl/psb_c_mv_hll_from_coo.f90 b/ext/impl/psb_c_mv_hll_from_coo.f90 new file mode 100644 index 00000000..b78bdd80 --- /dev/null +++ b/ext/impl/psb_c_mv_hll_from_coo.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_mv_hll_from_coo + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: hksz + info = psb_success_ + if (.not.b%is_by_rows()) call b%fix(info) + hksz = psi_get_hksz() + call psi_convert_hll_from_coo(a,hksz,b,info) + if (info /= 0) goto 9999 + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_hll_from_coo diff --git a/ext/impl/psb_c_mv_hll_from_fmt.f90 b/ext/impl/psb_c_mv_hll_from_fmt.f90 new file mode 100644 index 00000000..add90355 --- /dev/null +++ b/ext/impl/psb_c_mv_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_mv_hll_from_fmt + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_c_hll_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%hkoffs, a%hkoffs) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + a%hksz = b%hksz + a%nzt = b%nzt + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_hll_from_fmt diff --git a/ext/impl/psb_c_mv_hll_to_coo.f90 b/ext/impl/psb_c_mv_hll_to_coo.f90 new file mode 100644 index 00000000..fbf5dfcd --- /dev/null +++ b/ext/impl/psb_c_mv_hll_to_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_mv_hll_to_coo + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%cp_to_coo(b,info) + + if (info /= psb_success_) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_c_mv_hll_to_coo diff --git a/ext/impl/psb_c_mv_hll_to_fmt.f90 b/ext/impl/psb_c_mv_hll_to_fmt.f90 new file mode 100644 index 00000000..37d77e85 --- /dev/null +++ b/ext/impl/psb_c_mv_hll_to_fmt.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_mv_hll_to_fmt + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_c_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%hkoffs, b%hkoffs) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + b%hksz = a%hksz + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_hll_to_fmt diff --git a/ext/impl/psb_d_cp_dia_from_coo.f90 b/ext/impl/psb_d_cp_dia_from_coo.f90 new file mode 100644 index 00000000..b640565f --- /dev/null +++ b/ext/impl/psb_d_cp_dia_from_coo.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cp_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_cp_dia_from_coo + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_convert_dia_from_coo(a,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_dia_from_coo(a,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_dia_from_coo diff --git a/ext/impl/psb_d_cp_dia_to_coo.f90 b/ext/impl/psb_d_cp_dia_to_coo.f90 new file mode 100644 index 00000000..527c96d0 --- /dev/null +++ b/ext/impl/psb_d_cp_dia_to_coo.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_cp_dia_to_coo + implicit none + + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc, nzd + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + call psi_d_xtr_coo_from_dia(nr,nc,& + & b%ia, b%ja, b%val, nzd, & + & size(a%data,1),size(a%data,2),& + & a%data,a%offset,info) + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +end subroutine psb_d_cp_dia_to_coo diff --git a/ext/impl/psb_d_cp_ell_from_coo.f90 b/ext/impl/psb_d_cp_ell_from_coo.f90 new file mode 100644 index 00000000..cf23a0e0 --- /dev/null +++ b/ext/impl/psb_d_cp_ell_from_coo.f90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_cp_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_d_convert_ell_from_coo(a,b,info) + else + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call psi_d_convert_ell_from_coo(a,tmp,info) + if (info == psb_success_) call tmp%free() + end if + if (info /= psb_success_) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_d_cp_ell_from_coo diff --git a/ext/impl/psb_d_cp_ell_from_fmt.f90 b/ext/impl/psb_d_cp_ell_from_fmt.f90 new file mode 100644 index 00000000..ce8a8d7e --- /dev/null +++ b/ext/impl/psb_d_cp_ell_from_fmt.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_cp_ell_from_fmt + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_d_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_d_cp_ell_from_fmt diff --git a/ext/impl/psb_d_cp_ell_to_coo.f90 b/ext/impl/psb_d_cp_ell_to_coo.f90 new file mode 100644 index 00000000..8e7ad735 --- /dev/null +++ b/ext/impl/psb_d_cp_ell_to_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_cp_ell_to_coo + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: i, j, k, nr, nc, nza + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + call b%set_host() + +end subroutine psb_d_cp_ell_to_coo diff --git a/ext/impl/psb_d_cp_ell_to_fmt.f90 b/ext/impl/psb_d_cp_ell_to_fmt.f90 new file mode 100644 index 00000000..fd05d0fd --- /dev/null +++ b/ext/impl/psb_d_cp_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_cp_ell_to_fmt + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_d_ell_sparse_mat) + if (a%is_dev()) call a%sync() + + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cp_ell_to_fmt diff --git a/ext/impl/psb_d_cp_hdia_from_coo.f90 b/ext/impl/psb_d_cp_hdia_from_coo.f90 new file mode 100644 index 00000000..bbc34195 --- /dev/null +++ b/ext/impl/psb_d_cp_hdia_from_coo.f90 @@ -0,0 +1,222 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_cp_hdia_from_coo + implicit none + + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call inner_cp_hdia_from_coo(a,b,info) + if (info /= psb_success_) goto 9999 + else + call b%cp_to_coo(tmp,info) + if (info /= psb_success_) goto 9999 + if (.not.tmp%is_by_rows()) call tmp%fix(info) + if (info /= psb_success_) goto 9999 + call inner_cp_hdia_from_coo(a,tmp,info) + if (info /= psb_success_) goto 9999 + call tmp%free() + end if + call a%set_host() + + return + +9999 continue + + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine inner_cp_hdia_from_coo(a,tmp,info) + use psb_base_mod + use psi_ext_util_mod + + implicit none + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w + integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext, nzout + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + logical, parameter :: debug=.false. + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + hacksize = a%hacksize + a%nhacks = (nr+hacksize-1)/hacksize + nhacks = a%nhacks + + ndiag = nr+nc-1 + if (info == psb_success_) call psb_realloc(nr,irsz,info) + if (info == psb_success_) call psb_realloc(ndiag,d,info) + if (info == psb_success_) call psb_realloc(ndiag,offset,info) + if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info) + if (info /= psb_success_) return + + irsz = 0 + do k=1,nza + ir = tmp%ia(k) + irsz(ir) = irsz(ir)+1 + end do + + a%nzeros = 0 + d = 0 + iszd = 0 + a%hackOffsets(1)=0 + klast1 = 1 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + if (debug) then + write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr + write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1) + write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1) + end if + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, offset, info, initd=.false., cleard=.true.) + iszd = iszd + nd + a%hackOffsets(k+1)=iszd + if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd + if (debug) write(*,*) 'offset ', offset(1:nd) + end do + if (debug) then + write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd + write(*,*) 'Hackoffsets ',a%hackOffsets(:) + end if + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info) + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info) + if (info /= psb_success_) return + klast1 = 1 + ! + ! Second run: copy elements + ! + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, a%diaOffsets(hackfirst+1:hacknext), info, & + & initd=.false., cleard=.false.) + if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext) + call psi_d_xtr_dia_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & tmp%val(kfirst:klast1-1), & + & d,hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,& + & initdata=.true.,rdisp=(i-1)) + + call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),& + & a%diaOffsets(hackfirst+1:hacknext),nzout) + a%nzeros = a%nzeros + nzout + call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext)) + + end do + if (debug) then + write(*,*) 'NZEROS: ',a%nzeros, nza + write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd) + write(*,*) 'values: ' + j=0 + do k=1,nhacks + write(*,*) 'Hack No. ',k + do i=1,hacksize*(iszd/nhacks) + j = j + 1 + write(*,*) j, a%val(j) + end do + end do + end if + end subroutine inner_cp_hdia_from_coo + + subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz) + implicit none + integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:) + integer(psb_ipk_), intent(out) :: nz + ! + integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1 + nz = 0 + nrcmdisp = min(nr-rdisp,nc-rdisp) + rdisp1 = 1-rdisp + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + nz = nz + (ir2-ir1+1) + end do + end subroutine countnz + + subroutine cleand(nr,nd,d,offset) + implicit none + integer(psb_ipk_), intent(in) :: nr,nd,offset(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_) :: i,id + + do i=1,nd + id = offset(i) + nr + d(id) = 0 + end do + end subroutine cleand + +end subroutine psb_d_cp_hdia_from_coo diff --git a/ext/impl/psb_d_cp_hdia_to_coo.f90 b/ext/impl/psb_d_cp_hdia_to_coo.f90 new file mode 100644 index 00000000..bfa77b08 --- /dev/null +++ b/ext/impl/psb_d_cp_hdia_to_coo.f90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_cp_hdia_to_coo + use psi_ext_util_mod + implicit none + + class(psb_d_hdia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + call b%set_nzeros(nza) + call b%set_sort_status(psb_unsorted_) + nhacks = a%nhacks + hacksize = a%hacksize + j = 0 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_d_xtr_coo_from_dia(nr,nc,& + & b%ia(j+1:), b%ja(j+1:), b%val(j+1:), nzd, & + & hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) +!!$ write(*,*) 'diaoffsets',ib,' : ',ib - abs(a%diaOffsets(hackfirst+1:hacknext)) +!!$ write(*,*) 'sum',ib,j,' : ',sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + j = j + nzd + end do + if (nza /= j) then + write(*,*) 'Wrong counts in hdia_to_coo',j,nza + info = -8 + return + end if + call b%set_host() + call b%fix(info) + +end subroutine psb_d_cp_hdia_to_coo diff --git a/ext/impl/psb_d_cp_hll_from_coo.f90 b/ext/impl/psb_d_cp_hll_from_coo.f90 new file mode 100644 index 00000000..03028d20 --- /dev/null +++ b/ext/impl/psb_d_cp_hll_from_coo.f90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_cp_hll_from_coo + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + character(len=20) :: name='hll_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + if (b%is_dev()) call b%sync() + hksz = psi_get_hksz() + if (b%is_by_rows()) then + call psi_convert_hll_from_coo(a,hksz,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_hll_from_coo(a,hksz,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_hll_from_coo diff --git a/ext/impl/psb_d_cp_hll_from_fmt.f90 b/ext/impl/psb_d_cp_hll_from_fmt.f90 new file mode 100644 index 00000000..785b23ac --- /dev/null +++ b/ext/impl/psb_d_cp_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_cp_hll_from_fmt + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + class is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_d_hll_sparse_mat) + ! write(0,*) 'From type_hll' + if (b%is_dev()) call b%sync() + + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%hkoffs, a%hkoffs, info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) a%hksz = b%hksz + if (info == 0) a%nzt = b%nzt + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_d_cp_hll_from_fmt diff --git a/ext/impl/psb_d_cp_hll_to_coo.f90 b/ext/impl/psb_d_cp_hll_to_coo.f90 new file mode 100644 index 00000000..b20144c5 --- /dev/null +++ b/ext/impl/psb_d_cp_hll_to_coo.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_cp_hll_to_coo + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j, jj,k,ir, isz,err_act, hksz, hk, mxrwl,& + & irs, nzblk, kc + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + j = 1 + kc = 1 + k = 1 + hksz = a%hksz + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + nzblk = sum(a%irn(i:i+ir-1)) + call inner_copy(i,ir,b%ia(kc:kc+nzblk-1),& + & b%ja(kc:kc+nzblk-1),b%val(kc:kc+nzblk-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & hksz) + k = k + isz + kc = kc + nzblk + + enddo + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +contains + + subroutine inner_copy(i,ir,iac,& + & jac,valc,ja,val,irn,ld) + integer(psb_ipk_) :: i,ir,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*) + real(psb_dpk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc + kc = 1 + do ii = 1, ir + nc = irn(ii) + do jj=1,nc + iac(kc) = i+ii-1 + jac(kc) = ja(ii,jj) + valc(kc) = val(ii,jj) + kc = kc + 1 + end do + end do + + end subroutine inner_copy + +end subroutine psb_d_cp_hll_to_coo diff --git a/ext/impl/psb_d_cp_hll_to_fmt.f90 b/ext/impl/psb_d_cp_hll_to_fmt.f90 new file mode 100644 index 00000000..6c60c5b5 --- /dev/null +++ b/ext/impl/psb_d_cp_hll_to_fmt.f90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_cp_hll_to_fmt + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_d_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%hkoffs, b%hkoffs , info) + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + if (info == 0) b%hksz = a%hksz + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cp_hll_to_fmt diff --git a/ext/impl/psb_d_dia_aclsum.f90 b/ext/impl/psb_d_dia_aclsum.f90 new file mode 100644 index 00000000..0f4df6ca --- /dev/null +++ b/ext/impl/psb_d_dia_aclsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_aclsum(d,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_aclsum + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_aclsum diff --git a/ext/impl/psb_d_dia_allocate_mnnz.f90 b/ext/impl/psb_d_dia_allocate_mnnz.f90 new file mode 100644 index 00000000..309b7d4a --- /dev/null +++ b/ext/impl/psb_d_dia_allocate_mnnz.f90 @@ -0,0 +1,88 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -ione )/m + else + nz_ = ((max(7*m,7*n,ione)+m-ione)/m) + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,nz_,a%data,info) + if (info == psb_success_) call psb_realloc(m+n,a%offset,info) + if (info == psb_success_) then + a%data = 0 + a%offset = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_allocate_mnnz diff --git a/ext/impl/psb_d_dia_arwsum.f90 b/ext/impl/psb_d_dia_arwsum.f90 new file mode 100644 index 00000000..98eefc44 --- /dev/null +++ b/ext/impl/psb_d_dia_arwsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_arwsum(d,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_arwsum + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_arwsum diff --git a/ext/impl/psb_d_dia_colsum.f90 b/ext/impl/psb_d_dia_colsum.f90 new file mode 100644 index 00000000..6a6eb81c --- /dev/null +++ b/ext/impl/psb_d_dia_colsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_colsum(d,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_colsum + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_colsum diff --git a/ext/impl/psb_d_dia_csgetptn.f90 b/ext/impl/psb_d_dia_csgetptn.f90 new file mode 100644 index 00000000..ad0e040a --- /dev/null +++ b/ext/impl/psb_d_dia_csgetptn.f90 @@ -0,0 +1,188 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_csgetptn + implicit none + + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + + + end subroutine dia_getptn + +end subroutine psb_d_dia_csgetptn diff --git a/ext/impl/psb_d_dia_csgetrow.f90 b/ext/impl/psb_d_dia_csgetrow.f90 new file mode 100644 index 00000000..7e05a26e --- /dev/null +++ b/ext/impl/psb_d_dia_csgetrow.f90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_csgetrow + implicit none + + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + if (chksz) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,val,info) + end if + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + val(nzin_) = a%data(i,j) + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + end subroutine dia_getrow +end subroutine psb_d_dia_csgetrow diff --git a/ext/impl/psb_d_dia_csmm.f90 b/ext/impl/psb_d_dia_csmm.f90 new file mode 100644 index 00000000..81ad967d --- /dev/null +++ b/ext/impl/psb_d_dia_csmm.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_csmm + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_dia_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i,1:nxy) = y(i,1:nxy) + alpha*data(i,j)*x(i+off(j),1:nxy) + enddo + enddo + + end subroutine psb_d_dia_csmm_inner + +end subroutine psb_d_dia_csmm diff --git a/ext/impl/psb_d_dia_csmv.f90 b/ext/impl/psb_d_dia_csmv.f90 new file mode 100644 index 00000000..166b4c58 --- /dev/null +++ b/ext/impl/psb_d_dia_csmv.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_csmv + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_dia_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i) = y(i) + alpha*data(i,j)*x(i+off(j)) + enddo + enddo + + end subroutine psb_d_dia_csmv_inner + +end subroutine psb_d_dia_csmv diff --git a/ext/impl/psb_d_dia_get_diag.f90 b/ext/impl/psb_d_dia_get_diag.f90 new file mode 100644 index 00000000..bbcb4a12 --- /dev/null +++ b/ext/impl/psb_d_dia_get_diag.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_get_diag(a,d,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_get_diag + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = done + else + do i=1, size(a%offset) + if (a%offset(i) == 0) then + d(1:mnm) = a%data(1:mnm,i) + exit + end if + end do + end if + do i=mnm+1,size(d) + d(i) = dzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_get_diag diff --git a/ext/impl/psb_d_dia_maxval.f90 b/ext/impl/psb_d_dia_maxval.f90 new file mode 100644 index 00000000..f57be1ff --- /dev/null +++ b/ext/impl/psb_d_dia_maxval.f90 @@ -0,0 +1,54 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +function psb_d_dia_maxval(a) result(res) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_maxval + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_maxval' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = done + else + res = dzero + end if + + res = max(res,maxval(abs(a%data))) + +end function psb_d_dia_maxval diff --git a/ext/impl/psb_d_dia_mold.f90 b/ext/impl/psb_d_dia_mold.f90 new file mode 100644 index 00000000..2b3cef81 --- /dev/null +++ b/ext/impl/psb_d_dia_mold.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_mold(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_mold + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_dia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_mold diff --git a/ext/impl/psb_d_dia_print.f90 b/ext/impl/psb_d_dia_print.f90 new file mode 100644 index 00000000..e32dc2ed --- /dev/null +++ b/ext/impl/psb_d_dia_print.f90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_dia_print' + logical, parameter :: debug=.false. + + class(psb_d_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, jc, ir1, ir2 + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_d_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + write(iout,*) nr, nc, nz + + nc=size(a%data,2) + + + + if(present(iv)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) iv(i),iv(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),(i+jc),a%data(i,j) + enddo + enddo + + endif + +end subroutine psb_d_dia_print diff --git a/ext/impl/psb_d_dia_reallocate_nz.f90 b/ext/impl/psb_d_dia_reallocate_nz.f90 new file mode 100644 index 00000000..83864dd8 --- /dev/null +++ b/ext/impl/psb_d_dia_reallocate_nz.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_reallocate_nz(nz,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_dia_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! Ans: NOTHING. + ! + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_reallocate_nz diff --git a/ext/impl/psb_d_dia_reinit.f90 b/ext/impl/psb_d_dia_reinit.f90 new file mode 100644 index 00000000..f1e91ade --- /dev/null +++ b/ext/impl/psb_d_dia_reinit.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_reinit(a,clear) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_reinit + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%data(:,:) = dzero + call a%set_upd() + call a%set_host() + + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_reinit diff --git a/ext/impl/psb_d_dia_rowsum.f90 b/ext/impl/psb_d_dia_rowsum.f90 new file mode 100644 index 00000000..7a5875ba --- /dev/null +++ b/ext/impl/psb_d_dia_rowsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_rowsum(d,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_rowsum + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_rowsum diff --git a/ext/impl/psb_d_dia_scal.f90 b/ext/impl/psb_d_dia_scal.f90 new file mode 100644 index 00000000..d87c0d25 --- /dev/null +++ b/ext/impl/psb_d_dia_scal.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_scal(d,a,info,side) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_scal + implicit none + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), nc, jc, nr, ir1, ir2 + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%data(i,:) = a%data(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + nr=size(a%data,1) + nc=size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + a%data(i,j) = a%data(i,j) * d(i+jc) + enddo + enddo + + end if + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_scal diff --git a/ext/impl/psb_d_dia_scals.f90 b/ext/impl/psb_d_dia_scals.f90 new file mode 100644 index 00000000..a3958f57 --- /dev/null +++ b/ext/impl/psb_d_dia_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_scals(d,a,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_scals + implicit none + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%data(:,:) = a%data(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_scals diff --git a/ext/impl/psb_d_dns_mat_impl.f90 b/ext/impl/psb_d_dns_mat_impl.f90 new file mode 100644 index 00000000..edf5cde4 --- /dev/null +++ b/ext/impl/psb_d_dns_mat_impl.f90 @@ -0,0 +1,724 @@ + +!> Function csmv: +!! \memberof psb_d_dns_sparse_mat +!! \brief Product by a dense rank 1 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_d_dns_csmv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_csmv + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act, m, n, lda + character(len=20) :: name='d_dns_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (trans_ == 'N') then + m=a%get_nrows() + n=a%get_ncols() + else + n=a%get_nrows() + m=a%get_ncols() + end if + lda = size(a%val,1) + + + call dgemv(trans_,a%get_nrows(),a%get_ncols(),alpha,& + & a%val,size(a%val,1),x,1,beta,y,1) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_csmv + + +!> Function csmm: +!! \memberof psb_d_dns_sparse_mat +!! \brief Product by a dense rank 2 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:,:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:,:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_d_dns_csmm(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_csmm + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act,m,n,k, lda, ldx, ldy + character(len=20) :: name='d_dns_csmm' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (psb_toupper(trans_)=='N') then + m = a%get_nrows() + k = a%get_ncols() + n = min(size(y,2),size(x,2)) + else + k = a%get_nrows() + m = a%get_ncols() + n = min(size(y,2),size(x,2)) + end if + lda = size(a%val,1) + ldx = size(x,1) + ldy = size(y,1) + call dgemm(trans_,'N',m,n,k,alpha,a%val,lda,x,ldx,beta,y,ldy) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_csmm + + + +! +! +!> Function csnmi: +!! \memberof psb_d_dns_sparse_mat +!! \brief Operator infinity norm +!! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) +!! +! +function psb_d_dns_csnmi(a) result(res) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_csnmi + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + ! + integer(psb_ipk_) :: i + real(psb_dpk_) :: acc + + res = dzero + if (a%is_dev()) call a%sync() + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_d_dns_csnmi + + +! +!> Function get_diag: +!! \memberof psb_d_dns_sparse_mat +!! \brief Extract the diagonal of A. +!! +!! D(i) = A(i:i), i=1:min(nrows,ncols) +!! +!! \param d(:) The output diagonal +!! \param info return code. +! +subroutine psb_d_dns_get_diag(a,d,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_get_diag + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act, mnm, i + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + do i=1, mnm + d(i) = a%val(i,i) + end do + do i=mnm+1,size(d) + d(i) = dzero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_get_diag + + +! +! +!> Function reallocate_nz +!! \memberof psb_d_dns_sparse_mat +!! \brief One--parameters version of (re)allocate +!! +!! \param nz number of nonzeros to allocate for +!! i.e. makes sure that the internal storage +!! allows for NZ coefficients and their indices. +! +subroutine psb_d_dns_reallocate_nz(nz,a) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_dns_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! This is a no-op, allocation is fixed. + ! + if (a%is_dev()) call a%sync() + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_reallocate_nz + +! +!> Function mold: +!! \memberof psb_d_dns_sparse_mat +!! \brief Allocate a class(psb_d_dns_sparse_mat) with the +!! same dynamic type as the input. +!! This is equivalent to allocate( mold= ) and is provided +!! for those compilers not yet supporting mold. +!! \param b The output variable +!! \param info return code +! +subroutine psb_d_dns_mold(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_mold + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='dns_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_d_dns_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_mold + +! +! +!> Function allocate_mnnz +!! \memberof psb_d_dns_sparse_mat +!! \brief Three-parameters version of allocate +!! +!! \param m number of rows +!! \param n number of cols +!! \param nz [estimated internally] number of nonzeros to allocate for +! +subroutine psb_d_dns_allocate_mnnz(m,n,a,nz) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + ! + integer(psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_/)) + goto 9999 + endif + + + ! Basic stuff common to all formats + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_bld() + call a%set_host() + + ! We ignore NZ in this case. + + call psb_realloc(m,n,a%val,info) + if (info == psb_success_) then + a%val = dzero + a%nnz = 0 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_allocate_mnnz + + +! +! +! +!> Function csgetrow: +!! \memberof psb_d_dns_sparse_mat +!! \brief Get a (subset of) row(s) +!! +!! getrow is the basic method by which the other (getblk, clip) can +!! be implemented. +!! +!! Returns the set +!! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) +!! each identifying the position of a nonzero in A +!! i.e. +!! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) +!! with IMIN<=IA(:)<=IMAX +!! with JMIN<=JA(:)<=JMAX +!! IA,JA are reallocated as necessary. +!! +!! \param imin the minimum row index we are interested in +!! \param imax the minimum row index we are interested in +!! \param nz the number of output coefficients +!! \param ia(:) the output row indices +!! \param ja(:) the output col indices +!! \param val(:) the output coefficients +!! \param info return code +!! \param jmin [1] minimum col index +!! \param jmax [a\%get_ncols()] maximum col index +!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) +!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] +!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] +!! ( iren cannot be specified with rscale/cscale) +!! \param append [false] append to ia,ja +!! \param nzin [none] if append, then first new entry should go in entry nzin+1 +!! +! +subroutine psb_d_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_csgetrow + implicit none + + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + ! + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i,j,k + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax Function trim +!! \memberof psb_d_dns_sparse_mat +!! \brief Memory trim +!! Make sure the memory allocation of the sparse matrix is as tight as +!! possible given the actual number of nonzeros it contains. +! +subroutine psb_d_dns_trim(a) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_trim + implicit none + class(psb_d_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + ! Do nothing, we are already at minimum memory. + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_trim + +! +!> Function cp_from_coo: +!! \memberof psb_d_dns_sparse_mat +!! \brief Copy and convert from psb_d_coo_sparse_mat +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! + +subroutine psb_d_cp_dns_from_coo(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_cp_dns_from_coo + implicit none + + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + ! + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: nza, nr, i,err_act, nc + integer(psb_ipk_), parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (.not.b%is_by_rows()) then + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = dzero + do i=1, nza + a%val(tmp%ia(i),tmp%ja(i)) = tmp%val(i) + end do + a%nnz = nza + call tmp%free() + else + if (b%is_dev()) call b%sync() + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = dzero + do i=1, nza + a%val(b%ia(i),b%ja(i)) = b%val(i) + end do + a%nnz = nza + end if + call a%set_host() + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_cp_dns_from_coo + + + +! +!> Function cp_to_coo: +!! \memberof psb_d_dns_sparse_mat +!! \brief Copy and convert to psb_d_coo_sparse_mat +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! + +subroutine psb_d_cp_dns_to_coo(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_cp_dns_to_coo + implicit none + + class(psb_d_dns_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_Ipk_) :: nza, nr, nc,i,j,k,err_act + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + k = 0 + do i=1,a%get_nrows() + do j=1,a%get_ncols() + if (a%val(i,j) /= dzero) then + k = k + 1 + b%ia(k) = i + b%ja(k) = j + b%val(k) = a%val(i,j) + end if + end do + end do + + call b%set_nzeros(nza) + call b%set_sort_status(psb_row_major_) + call b%set_asb() + call b%set_host() + +end subroutine psb_d_cp_dns_to_coo + + + +! +!> Function mv_to_coo: +!! \memberof psb_d_dns_sparse_mat +!! \brief Convert to psb_d_coo_sparse_mat, freeing the source. +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! +subroutine psb_d_mv_dns_to_coo(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_mv_dns_to_coo + implicit none + + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_to_coo(b,info) + call a%free() + return + +end subroutine psb_d_mv_dns_to_coo + + +! +!> Function mv_from_coo: +!! \memberof psb_d_dns_sparse_mat +!! \brief Convert from psb_d_coo_sparse_mat, freeing the source. +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! +! +subroutine psb_d_mv_dns_from_coo(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_mv_dns_from_coo + implicit none + + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_d_mv_dns_from_coo + diff --git a/ext/impl/psb_d_ell_aclsum.f90 b/ext/impl/psb_d_ell_aclsum.f90 new file mode 100644 index 00000000..e0bfc18d --- /dev/null +++ b/ext/impl/psb_d_ell_aclsum.f90 @@ -0,0 +1,82 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_aclsum(d,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_aclsum + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_aclsum diff --git a/ext/impl/psb_d_ell_allocate_mnnz.f90 b/ext/impl/psb_d_ell_allocate_mnnz.f90 new file mode 100644 index 00000000..95e4558c --- /dev/null +++ b/ext/impl/psb_d_ell_allocate_mnnz.f90 @@ -0,0 +1,91 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = -1 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_allocate_mnnz diff --git a/ext/impl/psb_d_ell_arwsum.f90 b/ext/impl/psb_d_ell_arwsum.f90 new file mode 100644 index 00000000..6bf3b888 --- /dev/null +++ b/ext/impl/psb_d_ell_arwsum.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_arwsum(d,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_arwsum + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + is_unit = a%is_unit() + + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = done + else + d(i) = dzero + end if + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_arwsum diff --git a/ext/impl/psb_d_ell_colsum.f90 b/ext/impl/psb_d_ell_colsum.f90 new file mode 100644 index 00000000..9eb30ca0 --- /dev/null +++ b/ext/impl/psb_d_ell_colsum.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_colsum(d,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_colsum + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_colsum diff --git a/ext/impl/psb_d_ell_csgetblk.f90 b/ext/impl/psb_d_ell_csgetblk.f90 new file mode 100644 index 00000000..9725518f --- /dev/null +++ b/ext/impl/psb_d_ell_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_csgetblk + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='ell_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_csgetblk diff --git a/ext/impl/psb_d_ell_csgetptn.f90 b/ext/impl/psb_d_ell_csgetptn.f90 new file mode 100644 index 00000000..a050fe54 --- /dev/null +++ b/ext/impl/psb_d_ell_csgetptn.f90 @@ -0,0 +1,189 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_csgetptn + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_ell_csgetrow + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_ell_csmm + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_ell_csmv + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_ell_csnm1 + + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_dpk_), allocatable :: vt(:) + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csnm1' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = dzero + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + if (a%is_unit()) then + vt(:) = done + else + vt(:) = dzero + end if + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_d_ell_csnm1 diff --git a/ext/impl/psb_d_ell_csnmi.f90 b/ext/impl/psb_d_ell_csnmi.f90 new file mode 100644 index 00000000..b4e3d03e --- /dev/null +++ b/ext/impl/psb_d_ell_csnmi.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_ell_csnmi(a) result(res) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_csnmi + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csnmi' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = dzero + is_unit = a%is_unit() + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + if (is_unit) acc = acc + done + res = max(res,acc) + end do + +end function psb_d_ell_csnmi diff --git a/ext/impl/psb_d_ell_csput.f90 b/ext/impl/psb_d_ell_csput.f90 new file mode 100644 index 00000000..d38d9d51 --- /dev/null +++ b/ext/impl/psb_d_ell_csput.f90 @@ -0,0 +1,208 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_csput_a + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + call psb_d_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info < 0) then + info = psb_err_internal_error_ + else if (info > 0) then + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarded entries not belonging to us.' + info = psb_success_ + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +contains + + subroutine psb_d_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='d_ell_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = a%val(ir,ip) + val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_d_ell_srch_upd +end subroutine psb_d_ell_csput_a diff --git a/ext/impl/psb_d_ell_cssm.f90 b/ext/impl/psb_d_ell_cssm.f90 new file mode 100644 index 00000000..3c8b5f21 --- /dev/null +++ b/ext/impl/psb_d_ell_cssm.f90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_cssm + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_d_ell_cssv + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + real(psb_dpk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_d_ell_get_diag + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = done + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) then + d(i) = a%val(i,a%idiag(i)) + else + d(i) = dzero + end if + end do + end if + do i=mnm+1,size(d) + d(i) = dzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_get_diag diff --git a/ext/impl/psb_d_ell_maxval.f90 b/ext/impl/psb_d_ell_maxval.f90 new file mode 100644 index 00000000..d0cb24d3 --- /dev/null +++ b/ext/impl/psb_d_ell_maxval.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_ell_maxval(a) result(res) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_maxval + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csnmi' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = done + else + res = dzero + end if + + do i = 1, a%get_nrows() + acc = maxval(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_d_ell_maxval diff --git a/ext/impl/psb_d_ell_mold.f90 b/ext/impl/psb_d_ell_mold.f90 new file mode 100644 index 00000000..48814f3c --- /dev/null +++ b/ext/impl/psb_d_ell_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_mold(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_mold + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='ell_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_mold diff --git a/ext/impl/psb_d_ell_print.f90 b/ext/impl/psb_d_ell_print.f90 new file mode 100644 index 00000000..cf539662 --- /dev/null +++ b/ext/impl/psb_d_ell_print.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_ell_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% ELL' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_d_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_d_ell_print diff --git a/ext/impl/psb_d_ell_reallocate_nz.f90 b/ext/impl/psb_d_ell_reallocate_nz.f90 new file mode 100644 index 00000000..8f92ffad --- /dev/null +++ b/ext/impl/psb_d_ell_reallocate_nz.f90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_reallocate_nz(nz,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_reallocate_nz diff --git a/ext/impl/psb_d_ell_reinit.f90 b/ext/impl/psb_d_ell_reinit.f90 new file mode 100644 index 00000000..ab9a7ba2 --- /dev/null +++ b/ext/impl/psb_d_ell_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_reinit(a,clear) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_reinit + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:,:) = dzero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_reinit diff --git a/ext/impl/psb_d_ell_rowsum.f90 b/ext/impl/psb_d_ell_rowsum.f90 new file mode 100644 index 00000000..782775d4 --- /dev/null +++ b/ext/impl/psb_d_ell_rowsum.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_rowsum(d,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_rowsum + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical :: is_unit + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + is_unit = a%is_unit() + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = done + else + d(i) = dzero + end if + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_rowsum diff --git a/ext/impl/psb_d_ell_scal.f90 b/ext/impl/psb_d_ell_scal.f90 new file mode 100644 index 00000000..15be8a66 --- /dev/null +++ b/ext/impl/psb_d_ell_scal.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_scal(d,a,info,side) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_scal + implicit none + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5) + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + do i=1, m + do j=1, a%irn(i) + a%val(i,j) = a%val(i,j) * d(a%ja(i,j)) + end do + enddo + + end if + + call a%set_host() + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_scal diff --git a/ext/impl/psb_d_ell_scals.f90 b/ext/impl/psb_d_ell_scals.f90 new file mode 100644 index 00000000..501f42b0 --- /dev/null +++ b/ext/impl/psb_d_ell_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_scals(d,a,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_scals + implicit none + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_scals diff --git a/ext/impl/psb_d_ell_trim.f90 b/ext/impl/psb_d_ell_trim.f90 new file mode 100644 index 00000000..8b1d52f7 --- /dev/null +++ b/ext/impl/psb_d_ell_trim.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_trim(a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_trim + implicit none + class(psb_d_ell_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_trim diff --git a/ext/impl/psb_d_hdia_allocate_mnnz.f90 b/ext/impl/psb_d_hdia_allocate_mnnz.f90 new file mode 100644 index 00000000..e5721754 --- /dev/null +++ b/ext/impl/psb_d_hdia_allocate_mnnz.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_hdia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_hdia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hdia_allocate_mnnz diff --git a/ext/impl/psb_d_hdia_csmv.f90 b/ext/impl/psb_d_hdia_csmv.f90 new file mode 100644 index 00000000..82599342 --- /dev/null +++ b/ext/impl/psb_d_hdia_csmv.f90 @@ -0,0 +1,162 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_hdia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_hdia_csmv + implicit none + class(psb_d_hdia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc,nr,nc + integer(psb_ipk_) :: irs,ics, nmx, ni + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, ncd,ib, nzhack, & + & hackfirst, hacknext + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hdia_csmv' + logical, parameter :: debug=.false. + real :: start, finish + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + info = psb_err_transpose_not_n_unsupported_ + call psb_errpush(info,name) + goto 9999 + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1)=0) then + ir1 = 1 + ! min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + jc = ir1 + rdisp + offsets(j) + do i=ir1,ir2 + y(rdisp+i) = y(rdisp+i) + alpha*data(i,j)*x(jc) + jc = jc + 1 + enddo + end do + end subroutine psi_d_inner_dia_csmv + +end subroutine psb_d_hdia_csmv diff --git a/ext/impl/psb_d_hdia_mold.f90 b/ext/impl/psb_d_hdia_mold.f90 new file mode 100644 index 00000000..cebedd44 --- /dev/null +++ b/ext/impl/psb_d_hdia_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hdia_mold(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_hdia_mold + implicit none + class(psb_d_hdia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hdia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_hdia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hdia_mold diff --git a/ext/impl/psb_d_hdia_print.f90 b/ext/impl/psb_d_hdia_print.f90 new file mode 100644 index 00000000..43753299 --- /dev/null +++ b/ext/impl/psb_d_hdia_print.f90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_hdia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_hdia_print + use psi_ext_util_mod + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_d_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdia_print' + logical, parameter :: debug=.false. + + class(psb_d_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, k, ncd,ib, nzhack, info,& + & hackfirst, hacknext + integer(psb_ipk_), allocatable :: ia(:), ja(:) + real(psb_dpk_), allocatable :: val(:) + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% HDIA' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_d_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + + nhacks = a%nhacks + hacksize = a%hacksize + maxnzhack = 0 + do k=1, nhacks + maxnzhack = max(maxnzhack,(a%hackoffsets(k+1)-a%hackoffsets(k))) + end do + maxnzhack = hacksize*maxnzhack + allocate(ia(maxnzhack),ja(maxnzhack),val(maxnzhack),stat=info) + if (info /= 0) return + + write(iout,*) nr, nc, nz + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + ncd = hacknext-hackfirst + + call psi_d_xtr_coo_from_dia(nr,nc,& + & ia, ja, val, nzhack,& + & hacksize,ncd,& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) + !nzhack = sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + + if(present(iv)) then + do j=1,nzhack + write(iout,frmt) iv(ia(j)),iv(ja(j)),val(j) + enddo + else + if (present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ja(j),val(j) + enddo + else if (present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ja(j),val(j) + enddo + endif + end if + + end do + +end subroutine psb_d_hdia_print diff --git a/ext/impl/psb_d_hll_aclsum.f90 b/ext/impl/psb_d_hll_aclsum.f90 new file mode 100644 index 00000000..1f868edc --- /dev/null +++ b/ext/impl/psb_d_hll_aclsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_aclsum(d,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_aclsum + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call d_hll_aclsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine d_hll_aclsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine d_hll_aclsum + +end subroutine psb_d_hll_aclsum diff --git a/ext/impl/psb_d_hll_allocate_mnnz.f90 b/ext/impl/psb_d_hll_allocate_mnnz.f90 new file mode 100644 index 00000000..f58d0e4a --- /dev/null +++ b/ext/impl/psb_d_hll_allocate_mnnz.f90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m+1,a%hkoffs,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_hksz(psb_hksz_def_) + call a%set_host() + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_allocate_mnnz diff --git a/ext/impl/psb_d_hll_arwsum.f90 b/ext/impl/psb_d_hll_arwsum.f90 new file mode 100644 index 00000000..e5ae24fb --- /dev/null +++ b/ext/impl/psb_d_hll_arwsum.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_arwsum(d,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_arwsum + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call d_hll_arwsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine d_hll_arwsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + abs(val(i,j)) + end do + end do + + end subroutine d_hll_arwsum + +end subroutine psb_d_hll_arwsum diff --git a/ext/impl/psb_d_hll_colsum.f90 b/ext/impl/psb_d_hll_colsum.f90 new file mode 100644 index 00000000..8c2020ec --- /dev/null +++ b/ext/impl/psb_d_hll_colsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_colsum(d,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_colsum + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call d_hll_colsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine d_hll_colsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine d_hll_colsum + +end subroutine psb_d_hll_colsum diff --git a/ext/impl/psb_d_hll_csgetblk.f90 b/ext/impl/psb_d_hll_csgetblk.f90 new file mode 100644 index 00000000..185baf29 --- /dev/null +++ b/ext/impl/psb_d_hll_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csgetblk + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='hll_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_csgetblk diff --git a/ext/impl/psb_d_hll_csgetptn.f90 b/ext/impl/psb_d_hll_csgetptn.f90 new file mode 100644 index 00000000..a7cdc148 --- /dev/null +++ b/ext/impl/psb_d_hll_csgetptn.f90 @@ -0,0 +1,209 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csgetptn + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_hll_csgetrow + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_hll_csmm + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy,ldx,ldy,hksz,mxrwl + real(psb_dpk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hll_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + nxy = min(size(x,2) , size(y,2) ) + + + ldx = size(x,1) + ldy = size(y,1) + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (ldx psb_d_hll_csmv + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hkpnt, mxrwl, mmhk + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hll_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (size(x,1) 0) then + select case(hksz) + case(4) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_notra_4(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(8) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + &call psb_d_hll_csmv_notra_8(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(16) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_notra_16(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(24) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_notra_24(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(32) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_notra_32(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case default + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + end select + end if + if (mmhk < m) then + i = mmhk+1 + ir = m-mmhk + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + call psb_d_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + if (info /= psb_success_) goto 9999 + end if + j = j + 1 + end if + + else + + j=1 + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,m,hksz + j = ((i-1)/hksz)+1 + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + end if + end if + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_csmv_inner(ir,m,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,tra,ctra,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit,tra,ctra + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + if (tra) then + + if (beta == done) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*val(i,j)*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (ctra) then + + if (beta == done) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*(val(i,j))*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (.not.(tra.or.ctra)) then + + if (alpha == dzero) then + if (beta == dzero) then + do i=1,m + y(ir+i-1) = dzero + end do + else + do i=1,m + y(ir+i-1) = beta*y(ir+i-1) + end do + end if + + else + if (beta == dzero) then + do i=1,m + tmp = dzero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + end do + else + do i=1,m + tmp = dzero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + beta*y(ir+i-1) + end do + endif + end if + end if + + if (is_unit) then + do i=1, min(m,n) + y(i) = y(i) + alpha*x(i) + end do + end if + + end subroutine psb_d_hll_csmv_inner + + subroutine psb_d_hll_csmv_notra_8(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=8 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:8)) + tmp(1:8) = tmp(1:8) + val(1:8,j)*x(ja(1:8,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+8-1) = alpha*tmp(1:8) + else + y(ir:ir+8-1) = alpha*tmp(1:8) + beta*y(ir:ir+8-1) + end if + + + if (is_unit) then + do i=1, min(8,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_8 + + subroutine psb_d_hll_csmv_notra_24(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=24 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:24)) + tmp(1:24) = tmp(1:24) + val(1:24,j)*x(ja(1:24,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+24-1) = alpha*tmp(1:24) + else + y(ir:ir+24-1) = alpha*tmp(1:24) + beta*y(ir:ir+24-1) + end if + + + if (is_unit) then + do i=1, min(24,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_24 + + subroutine psb_d_hll_csmv_notra_16(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=16 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:16)) + tmp(1:16) = tmp(1:16) + val(1:16,j)*x(ja(1:16,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+16-1) = alpha*tmp(1:16) + else + y(ir:ir+16-1) = alpha*tmp(1:16) + beta*y(ir:ir+16-1) + end if + + + if (is_unit) then + do i=1, min(16,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_16 + + subroutine psb_d_hll_csmv_notra_32(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=32 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:32)) + tmp(1:32) = tmp(1:32) + val(1:32,j)*x(ja(1:32,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+32-1) = alpha*tmp(1:32) + else + y(ir:ir+32-1) = alpha*tmp(1:32) + beta*y(ir:ir+32-1) + end if + + + if (is_unit) then + do i=1, min(32,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_32 + + subroutine psb_d_hll_csmv_notra_4(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=4 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:4)) + tmp(1:4) = tmp(1:4) + val(1:4,j)*x(ja(1:4,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+4-1) = alpha*tmp(1:4) + else + y(ir:ir+4-1) = alpha*tmp(1:4) + beta*y(ir:ir+4-1) + end if + + + if (is_unit) then + do i=1, min(4,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_4 + +end subroutine psb_d_hll_csmv diff --git a/ext/impl/psb_d_hll_csnm1.f90 b/ext/impl/psb_d_hll_csnm1.f90 new file mode 100644 index 00000000..4627a4d2 --- /dev/null +++ b/ext/impl/psb_d_hll_csnm1.f90 @@ -0,0 +1,111 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_hll_csnm1(a) result(res) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csnm1 + + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info, hksz, mxrwl + real(psb_dpk_), allocatable :: vt(:) + logical :: is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hll_csnm1' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + res = dzero + if (a%is_dev()) call a%sync() + n = a%get_ncols() + m = a%get_nrows() + allocate(vt(n),stat=info) + if (Info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + if (a%is_unit()) then + vt = done + else + vt = dzero + end if + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_d_hll_csnm1_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & vt,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + res = maxval(vt) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_csnm1_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & vt,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: vt(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + vt(jc) = vt(jc) + abs(val(i,j)) + end do + end do + end subroutine psb_d_hll_csnm1_inner + +end function psb_d_hll_csnm1 diff --git a/ext/impl/psb_d_hll_csnmi.f90 b/ext/impl/psb_d_hll_csnmi.f90 new file mode 100644 index 00000000..2b758fa3 --- /dev/null +++ b/ext/impl/psb_d_hll_csnmi.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_hll_csnmi(a) result(res) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csnmi + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc, hksz, mxrwl, info + Integer(Psb_ipk_) :: err_act + logical :: is_unit + character(len=20) :: name='d_csnmi' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = 0 + res = dzero + if (a%is_dev()) call a%sync() + + n = a%get_ncols() + m = a%get_nrows() + is_unit = a%is_unit() + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_d_hll_csnmi_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & res,is_unit,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_csnmi_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & res,is_unit,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: res + logical :: is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: tmp, acc + + info = psb_success_ + if (is_unit) then + tmp = done + else + tmp = dzero + end if + do i=1,m + acc = tmp + do j=1, irn(i) + acc = acc + abs(val(i,j)) + end do + res = max(acc,res) + end do + end subroutine psb_d_hll_csnmi_inner + +end function psb_d_hll_csnmi diff --git a/ext/impl/psb_d_hll_csput.f90 b/ext/impl/psb_d_hll_csput.f90 new file mode 100644 index 00000000..064e6c59 --- /dev/null +++ b/ext/impl/psb_d_hll_csput.f90 @@ -0,0 +1,233 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csput_a + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hll_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + + call psb_d_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + call a%set_host() + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ip, i1,i2,nr,nc,nnz,dupl,ng,& + & hksz, hk, hkzpnt, ihkr, mxrwl, lastrow + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='d_hll_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + lastrow = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + hksz = a%get_hksz() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_d_hll_srch_upd + +end subroutine psb_d_hll_csput_a diff --git a/ext/impl/psb_d_hll_cssm.f90 b/ext/impl/psb_d_hll_cssm.f90 new file mode 100644 index 00000000..f4f6e349 --- /dev/null +++ b/ext/impl/psb_d_hll_cssm.f90 @@ -0,0 +1,506 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_cssm + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ldx, ldy, hksz, nxy, mk, mxrwl + real(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hll_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + info = psb_err_missing_override_method_ + call psb_errpush(info,name) + goto 9999 + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + hksz = a%get_hksz() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + ldx = size(x,1) + ldy = size(y,1) + if (ldx psb_d_hll_cssv + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hk, mxrwl, noffs, kc, mk + real(psb_dpk_) :: acc + real(psb_dpk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hll_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_d_hll_get_diag + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k, ke, hksz, ld,ir, mxrwl + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + ld = size(d) + if (ld< mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,ld/)) + goto 9999 + end if + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = done + else + + hksz = a%get_hksz() + j=1 + do i=1,mnm,hksz + ir = min(hksz,mnm-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + ke = a%hkoffs(j+1) + call psb_d_hll_get_diag_inner(ir,a%irn(i:i+ir-1),& + & a%ja(k:ke),hksz,a%val(k:ke),hksz,& + & a%idiag(i:i+ir-1),d(i:i+ir-1),info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + end if + + do i=mnm+1,size(d) + d(i) = dzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_get_diag_inner(m,irn,ja,ldj,val,ldv,& + & idiag,d,info) + integer(psb_ipk_), intent(in) :: m,ldj,ldv,ja(ldj,*),irn(*), idiag(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + do i=1,m + if (idiag(i) /= 0) then + d(i) = val(i,idiag(i)) + else + d(i) = dzero + end if + end do + + end subroutine psb_d_hll_get_diag_inner + +end subroutine psb_d_hll_get_diag diff --git a/ext/impl/psb_d_hll_maxval.f90 b/ext/impl/psb_d_hll_maxval.f90 new file mode 100644 index 00000000..8408cc96 --- /dev/null +++ b/ext/impl/psb_d_hll_maxval.f90 @@ -0,0 +1,45 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_hll_maxval(a) result(res) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_maxval + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + if (a%is_dev()) call a%sync() + res = maxval(abs(a%val(:))) + if (a%is_unit()) res = max(res,done) + +end function psb_d_hll_maxval diff --git a/ext/impl/psb_d_hll_mold.f90 b/ext/impl/psb_d_hll_mold.f90 new file mode 100644 index 00000000..e9d721f0 --- /dev/null +++ b/ext/impl/psb_d_hll_mold.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_mold(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_mold + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hll_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_mold diff --git a/ext/impl/psb_d_hll_print.f90 b/ext/impl/psb_d_hll_print.f90 new file mode 100644 index 00000000..93c56d5c --- /dev/null +++ b/ext/impl/psb_d_hll_print.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hll_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, k, hksz, hk, mxrwl,ir, ix + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_d_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + hksz = a%get_hksz() + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + endif + endif + +end subroutine psb_d_hll_print diff --git a/ext/impl/psb_d_hll_reallocate_nz.f90 b/ext/impl/psb_d_hll_reallocate_nz.f90 new file mode 100644 index 00000000..7abdd58f --- /dev/null +++ b/ext/impl/psb_d_hll_reallocate_nz.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_reallocate_nz(nz,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,nz_ + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_hll_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + nz_ = max(nz,ione) + call psb_realloc(nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(nz_,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_reallocate_nz diff --git a/ext/impl/psb_d_hll_reinit.f90 b/ext/impl/psb_d_hll_reinit.f90 new file mode 100644 index 00000000..6a0f34fa --- /dev/null +++ b/ext/impl/psb_d_hll_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_reinit(a,clear) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_reinit + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:) = dzero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_reinit diff --git a/ext/impl/psb_d_hll_rowsum.f90 b/ext/impl/psb_d_hll_rowsum.f90 new file mode 100644 index 00000000..bfa2d2e1 --- /dev/null +++ b/ext/impl/psb_d_hll_rowsum.f90 @@ -0,0 +1,110 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_rowsum(d,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_rowsum + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + if (a%is_unit()) then + d = done + else + d = dzero + end if + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call d_hll_rowsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine d_hll_rowsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + (val(i,j)) + end do + end do + + end subroutine d_hll_rowsum + +end subroutine psb_d_hll_rowsum diff --git a/ext/impl/psb_d_hll_scal.f90 b/ext/impl/psb_d_hll_scal.f90 new file mode 100644 index 00000000..ed9dd9ce --- /dev/null +++ b/ext/impl/psb_d_hll_scal.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_scal(d,a,info,side) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_scal + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), ld, k, mxrwl, hksz, ir + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + info = psb_err_missing_override_method_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + ld = size(d) + if (left) then + m = a%get_nrows() + if (ld < m) then + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else + n = a%get_ncols() + if (ld < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_d_hll_scal_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & left,d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_scal_inner(ir,m,n,irn,ja,ldj,val,ldv,left,d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: d(*) + real(psb_dpk_), intent(inout) :: val(ldv,*) + logical, intent(in) :: left + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + if (left) then + do i=1,m + do j=1, irn(i) + val(i,j) = val(i,j)*d(ir+i-1) + end do + end do + else + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + val(i,j) = val(i,j)*d(jc) + end do + end do + + end if + + end subroutine psb_d_hll_scal_inner + + +end subroutine psb_d_hll_scal diff --git a/ext/impl/psb_d_hll_scals.f90 b/ext/impl/psb_d_hll_scals.f90 new file mode 100644 index 00000000..8e05cddd --- /dev/null +++ b/ext/impl/psb_d_hll_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_scals(d,a,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_scals + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:) = a%val(:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_scals diff --git a/ext/impl/psb_d_mv_dia_from_coo.f90 b/ext/impl/psb_d_mv_dia_from_coo.f90 new file mode 100644 index 00000000..e38e975a --- /dev/null +++ b/ext/impl/psb_d_mv_dia_from_coo.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_mv_dia_from_coo + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_dia_from_coo diff --git a/ext/impl/psb_d_mv_dia_to_coo.f90 b/ext/impl/psb_d_mv_dia_to_coo.f90 new file mode 100644 index 00000000..d8ac7a69 --- /dev/null +++ b/ext/impl/psb_d_mv_dia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_mv_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_mv_dia_to_coo + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_dia_to_coo diff --git a/ext/impl/psb_d_mv_ell_from_coo.f90 b/ext/impl/psb_d_mv_ell_from_coo.f90 new file mode 100644 index 00000000..8f98daab --- /dev/null +++ b/ext/impl/psb_d_mv_ell_from_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_mv_ell_from_coo + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_d_mv_ell_from_coo diff --git a/ext/impl/psb_d_mv_ell_from_fmt.f90 b/ext/impl/psb_d_mv_ell_from_fmt.f90 new file mode 100644 index 00000000..6589fd0a --- /dev/null +++ b/ext/impl/psb_d_mv_ell_from_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_mv_ell_from_fmt + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_d_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_ell_from_fmt diff --git a/ext/impl/psb_d_mv_ell_to_coo.f90 b/ext/impl/psb_d_mv_ell_to_coo.f90 new file mode 100644 index 00000000..a1220a6e --- /dev/null +++ b/ext/impl/psb_d_mv_ell_to_coo.f90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_mv_ell_to_coo + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_ell_to_coo diff --git a/ext/impl/psb_d_mv_ell_to_fmt.f90 b/ext/impl/psb_d_mv_ell_to_fmt.f90 new file mode 100644 index 00000000..a5975360 --- /dev/null +++ b/ext/impl/psb_d_mv_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_mv_ell_to_fmt + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_d_ell_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_ell_to_fmt diff --git a/ext/impl/psb_d_mv_hdia_from_coo.f90 b/ext/impl/psb_d_mv_hdia_from_coo.f90 new file mode 100644 index 00000000..68caea34 --- /dev/null +++ b/ext/impl/psb_d_mv_hdia_from_coo.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_mv_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_mv_hdia_from_coo + implicit none + + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.(b%is_by_rows())) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_hdia_from_coo diff --git a/ext/impl/psb_d_mv_hdia_to_coo.f90 b/ext/impl/psb_d_mv_hdia_to_coo.f90 new file mode 100644 index 00000000..595e20a2 --- /dev/null +++ b/ext/impl/psb_d_mv_hdia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_mv_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_mv_hdia_to_coo + implicit none + + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_hdia_to_coo diff --git a/ext/impl/psb_d_mv_hll_from_coo.f90 b/ext/impl/psb_d_mv_hll_from_coo.f90 new file mode 100644 index 00000000..78faed4b --- /dev/null +++ b/ext/impl/psb_d_mv_hll_from_coo.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_mv_hll_from_coo + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: hksz + info = psb_success_ + if (.not.b%is_by_rows()) call b%fix(info) + hksz = psi_get_hksz() + call psi_convert_hll_from_coo(a,hksz,b,info) + if (info /= 0) goto 9999 + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_hll_from_coo diff --git a/ext/impl/psb_d_mv_hll_from_fmt.f90 b/ext/impl/psb_d_mv_hll_from_fmt.f90 new file mode 100644 index 00000000..76a2f2fb --- /dev/null +++ b/ext/impl/psb_d_mv_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_mv_hll_from_fmt + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_d_hll_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%hkoffs, a%hkoffs) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + a%hksz = b%hksz + a%nzt = b%nzt + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_hll_from_fmt diff --git a/ext/impl/psb_d_mv_hll_to_coo.f90 b/ext/impl/psb_d_mv_hll_to_coo.f90 new file mode 100644 index 00000000..fbc9111b --- /dev/null +++ b/ext/impl/psb_d_mv_hll_to_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_mv_hll_to_coo + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%cp_to_coo(b,info) + + if (info /= psb_success_) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_hll_to_coo diff --git a/ext/impl/psb_d_mv_hll_to_fmt.f90 b/ext/impl/psb_d_mv_hll_to_fmt.f90 new file mode 100644 index 00000000..8022b2e5 --- /dev/null +++ b/ext/impl/psb_d_mv_hll_to_fmt.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_mv_hll_to_fmt + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_d_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%hkoffs, b%hkoffs) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + b%hksz = a%hksz + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_hll_to_fmt diff --git a/ext/impl/psb_s_cp_dia_from_coo.f90 b/ext/impl/psb_s_cp_dia_from_coo.f90 new file mode 100644 index 00000000..6d9a0749 --- /dev/null +++ b/ext/impl/psb_s_cp_dia_from_coo.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cp_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_cp_dia_from_coo + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_convert_dia_from_coo(a,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_dia_from_coo(a,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_dia_from_coo diff --git a/ext/impl/psb_s_cp_dia_to_coo.f90 b/ext/impl/psb_s_cp_dia_to_coo.f90 new file mode 100644 index 00000000..c0cd5d32 --- /dev/null +++ b/ext/impl/psb_s_cp_dia_to_coo.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_cp_dia_to_coo + implicit none + + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc, nzd + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + call psi_s_xtr_coo_from_dia(nr,nc,& + & b%ia, b%ja, b%val, nzd, & + & size(a%data,1),size(a%data,2),& + & a%data,a%offset,info) + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +end subroutine psb_s_cp_dia_to_coo diff --git a/ext/impl/psb_s_cp_ell_from_coo.f90 b/ext/impl/psb_s_cp_ell_from_coo.f90 new file mode 100644 index 00000000..f178a05c --- /dev/null +++ b/ext/impl/psb_s_cp_ell_from_coo.f90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_cp_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_s_convert_ell_from_coo(a,b,info) + else + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call psi_s_convert_ell_from_coo(a,tmp,info) + if (info == psb_success_) call tmp%free() + end if + if (info /= psb_success_) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_s_cp_ell_from_coo diff --git a/ext/impl/psb_s_cp_ell_from_fmt.f90 b/ext/impl/psb_s_cp_ell_from_fmt.f90 new file mode 100644 index 00000000..bffe3d85 --- /dev/null +++ b/ext/impl/psb_s_cp_ell_from_fmt.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_cp_ell_from_fmt + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_s_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_s_cp_ell_from_fmt diff --git a/ext/impl/psb_s_cp_ell_to_coo.f90 b/ext/impl/psb_s_cp_ell_to_coo.f90 new file mode 100644 index 00000000..b8acddfc --- /dev/null +++ b/ext/impl/psb_s_cp_ell_to_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_cp_ell_to_coo + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: i, j, k, nr, nc, nza + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + call b%set_host() + +end subroutine psb_s_cp_ell_to_coo diff --git a/ext/impl/psb_s_cp_ell_to_fmt.f90 b/ext/impl/psb_s_cp_ell_to_fmt.f90 new file mode 100644 index 00000000..58fe3756 --- /dev/null +++ b/ext/impl/psb_s_cp_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_cp_ell_to_fmt + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_s_ell_sparse_mat) + if (a%is_dev()) call a%sync() + + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_cp_ell_to_fmt diff --git a/ext/impl/psb_s_cp_hdia_from_coo.f90 b/ext/impl/psb_s_cp_hdia_from_coo.f90 new file mode 100644 index 00000000..b3d427d9 --- /dev/null +++ b/ext/impl/psb_s_cp_hdia_from_coo.f90 @@ -0,0 +1,222 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_cp_hdia_from_coo + implicit none + + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call inner_cp_hdia_from_coo(a,b,info) + if (info /= psb_success_) goto 9999 + else + call b%cp_to_coo(tmp,info) + if (info /= psb_success_) goto 9999 + if (.not.tmp%is_by_rows()) call tmp%fix(info) + if (info /= psb_success_) goto 9999 + call inner_cp_hdia_from_coo(a,tmp,info) + if (info /= psb_success_) goto 9999 + call tmp%free() + end if + call a%set_host() + + return + +9999 continue + + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine inner_cp_hdia_from_coo(a,tmp,info) + use psb_base_mod + use psi_ext_util_mod + + implicit none + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w + integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext, nzout + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + logical, parameter :: debug=.false. + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + hacksize = a%hacksize + a%nhacks = (nr+hacksize-1)/hacksize + nhacks = a%nhacks + + ndiag = nr+nc-1 + if (info == psb_success_) call psb_realloc(nr,irsz,info) + if (info == psb_success_) call psb_realloc(ndiag,d,info) + if (info == psb_success_) call psb_realloc(ndiag,offset,info) + if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info) + if (info /= psb_success_) return + + irsz = 0 + do k=1,nza + ir = tmp%ia(k) + irsz(ir) = irsz(ir)+1 + end do + + a%nzeros = 0 + d = 0 + iszd = 0 + a%hackOffsets(1)=0 + klast1 = 1 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + if (debug) then + write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr + write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1) + write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1) + end if + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, offset, info, initd=.false., cleard=.true.) + iszd = iszd + nd + a%hackOffsets(k+1)=iszd + if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd + if (debug) write(*,*) 'offset ', offset(1:nd) + end do + if (debug) then + write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd + write(*,*) 'Hackoffsets ',a%hackOffsets(:) + end if + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info) + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info) + if (info /= psb_success_) return + klast1 = 1 + ! + ! Second run: copy elements + ! + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, a%diaOffsets(hackfirst+1:hacknext), info, & + & initd=.false., cleard=.false.) + if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext) + call psi_s_xtr_dia_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & tmp%val(kfirst:klast1-1), & + & d,hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,& + & initdata=.true.,rdisp=(i-1)) + + call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),& + & a%diaOffsets(hackfirst+1:hacknext),nzout) + a%nzeros = a%nzeros + nzout + call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext)) + + end do + if (debug) then + write(*,*) 'NZEROS: ',a%nzeros, nza + write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd) + write(*,*) 'values: ' + j=0 + do k=1,nhacks + write(*,*) 'Hack No. ',k + do i=1,hacksize*(iszd/nhacks) + j = j + 1 + write(*,*) j, a%val(j) + end do + end do + end if + end subroutine inner_cp_hdia_from_coo + + subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz) + implicit none + integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:) + integer(psb_ipk_), intent(out) :: nz + ! + integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1 + nz = 0 + nrcmdisp = min(nr-rdisp,nc-rdisp) + rdisp1 = 1-rdisp + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + nz = nz + (ir2-ir1+1) + end do + end subroutine countnz + + subroutine cleand(nr,nd,d,offset) + implicit none + integer(psb_ipk_), intent(in) :: nr,nd,offset(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_) :: i,id + + do i=1,nd + id = offset(i) + nr + d(id) = 0 + end do + end subroutine cleand + +end subroutine psb_s_cp_hdia_from_coo diff --git a/ext/impl/psb_s_cp_hdia_to_coo.f90 b/ext/impl/psb_s_cp_hdia_to_coo.f90 new file mode 100644 index 00000000..8e90e236 --- /dev/null +++ b/ext/impl/psb_s_cp_hdia_to_coo.f90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_cp_hdia_to_coo + use psi_ext_util_mod + implicit none + + class(psb_s_hdia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + call b%set_nzeros(nza) + call b%set_sort_status(psb_unsorted_) + nhacks = a%nhacks + hacksize = a%hacksize + j = 0 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_s_xtr_coo_from_dia(nr,nc,& + & b%ia(j+1:), b%ja(j+1:), b%val(j+1:), nzd, & + & hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) +!!$ write(*,*) 'diaoffsets',ib,' : ',ib - abs(a%diaOffsets(hackfirst+1:hacknext)) +!!$ write(*,*) 'sum',ib,j,' : ',sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + j = j + nzd + end do + if (nza /= j) then + write(*,*) 'Wrong counts in hdia_to_coo',j,nza + info = -8 + return + end if + call b%set_host() + call b%fix(info) + +end subroutine psb_s_cp_hdia_to_coo diff --git a/ext/impl/psb_s_cp_hll_from_coo.f90 b/ext/impl/psb_s_cp_hll_from_coo.f90 new file mode 100644 index 00000000..9d75f994 --- /dev/null +++ b/ext/impl/psb_s_cp_hll_from_coo.f90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_cp_hll_from_coo + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + character(len=20) :: name='hll_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + if (b%is_dev()) call b%sync() + hksz = psi_get_hksz() + if (b%is_by_rows()) then + call psi_convert_hll_from_coo(a,hksz,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_hll_from_coo(a,hksz,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_hll_from_coo diff --git a/ext/impl/psb_s_cp_hll_from_fmt.f90 b/ext/impl/psb_s_cp_hll_from_fmt.f90 new file mode 100644 index 00000000..8f010902 --- /dev/null +++ b/ext/impl/psb_s_cp_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_cp_hll_from_fmt + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + class is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_s_hll_sparse_mat) + ! write(0,*) 'From type_hll' + if (b%is_dev()) call b%sync() + + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%hkoffs, a%hkoffs, info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) a%hksz = b%hksz + if (info == 0) a%nzt = b%nzt + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_s_cp_hll_from_fmt diff --git a/ext/impl/psb_s_cp_hll_to_coo.f90 b/ext/impl/psb_s_cp_hll_to_coo.f90 new file mode 100644 index 00000000..74502ba2 --- /dev/null +++ b/ext/impl/psb_s_cp_hll_to_coo.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_cp_hll_to_coo + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j, jj,k,ir, isz,err_act, hksz, hk, mxrwl,& + & irs, nzblk, kc + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + j = 1 + kc = 1 + k = 1 + hksz = a%hksz + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + nzblk = sum(a%irn(i:i+ir-1)) + call inner_copy(i,ir,b%ia(kc:kc+nzblk-1),& + & b%ja(kc:kc+nzblk-1),b%val(kc:kc+nzblk-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & hksz) + k = k + isz + kc = kc + nzblk + + enddo + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +contains + + subroutine inner_copy(i,ir,iac,& + & jac,valc,ja,val,irn,ld) + integer(psb_ipk_) :: i,ir,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*) + real(psb_spk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc + kc = 1 + do ii = 1, ir + nc = irn(ii) + do jj=1,nc + iac(kc) = i+ii-1 + jac(kc) = ja(ii,jj) + valc(kc) = val(ii,jj) + kc = kc + 1 + end do + end do + + end subroutine inner_copy + +end subroutine psb_s_cp_hll_to_coo diff --git a/ext/impl/psb_s_cp_hll_to_fmt.f90 b/ext/impl/psb_s_cp_hll_to_fmt.f90 new file mode 100644 index 00000000..f7adaa54 --- /dev/null +++ b/ext/impl/psb_s_cp_hll_to_fmt.f90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_cp_hll_to_fmt + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_s_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%hkoffs, b%hkoffs , info) + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + if (info == 0) b%hksz = a%hksz + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_cp_hll_to_fmt diff --git a/ext/impl/psb_s_dia_aclsum.f90 b/ext/impl/psb_s_dia_aclsum.f90 new file mode 100644 index 00000000..718a2424 --- /dev/null +++ b/ext/impl/psb_s_dia_aclsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_aclsum(d,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_aclsum + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_aclsum diff --git a/ext/impl/psb_s_dia_allocate_mnnz.f90 b/ext/impl/psb_s_dia_allocate_mnnz.f90 new file mode 100644 index 00000000..df56c4a6 --- /dev/null +++ b/ext/impl/psb_s_dia_allocate_mnnz.f90 @@ -0,0 +1,88 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -ione )/m + else + nz_ = ((max(7*m,7*n,ione)+m-ione)/m) + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,nz_,a%data,info) + if (info == psb_success_) call psb_realloc(m+n,a%offset,info) + if (info == psb_success_) then + a%data = 0 + a%offset = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_allocate_mnnz diff --git a/ext/impl/psb_s_dia_arwsum.f90 b/ext/impl/psb_s_dia_arwsum.f90 new file mode 100644 index 00000000..5a974bbf --- /dev/null +++ b/ext/impl/psb_s_dia_arwsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_arwsum(d,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_arwsum + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_arwsum diff --git a/ext/impl/psb_s_dia_colsum.f90 b/ext/impl/psb_s_dia_colsum.f90 new file mode 100644 index 00000000..e60eb88f --- /dev/null +++ b/ext/impl/psb_s_dia_colsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_colsum(d,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_colsum + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_colsum diff --git a/ext/impl/psb_s_dia_csgetptn.f90 b/ext/impl/psb_s_dia_csgetptn.f90 new file mode 100644 index 00000000..f946eb73 --- /dev/null +++ b/ext/impl/psb_s_dia_csgetptn.f90 @@ -0,0 +1,188 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_csgetptn + implicit none + + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + + + end subroutine dia_getptn + +end subroutine psb_s_dia_csgetptn diff --git a/ext/impl/psb_s_dia_csgetrow.f90 b/ext/impl/psb_s_dia_csgetrow.f90 new file mode 100644 index 00000000..b79e2470 --- /dev/null +++ b/ext/impl/psb_s_dia_csgetrow.f90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_csgetrow + implicit none + + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + if (chksz) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,val,info) + end if + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + val(nzin_) = a%data(i,j) + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + end subroutine dia_getrow +end subroutine psb_s_dia_csgetrow diff --git a/ext/impl/psb_s_dia_csmm.f90 b/ext/impl/psb_s_dia_csmm.f90 new file mode 100644 index 00000000..9f586dbb --- /dev/null +++ b/ext/impl/psb_s_dia_csmm.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_csmm + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_dia_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i,1:nxy) = y(i,1:nxy) + alpha*data(i,j)*x(i+off(j),1:nxy) + enddo + enddo + + end subroutine psb_s_dia_csmm_inner + +end subroutine psb_s_dia_csmm diff --git a/ext/impl/psb_s_dia_csmv.f90 b/ext/impl/psb_s_dia_csmv.f90 new file mode 100644 index 00000000..1a23932e --- /dev/null +++ b/ext/impl/psb_s_dia_csmv.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_csmv + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_dia_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i) = y(i) + alpha*data(i,j)*x(i+off(j)) + enddo + enddo + + end subroutine psb_s_dia_csmv_inner + +end subroutine psb_s_dia_csmv diff --git a/ext/impl/psb_s_dia_get_diag.f90 b/ext/impl/psb_s_dia_get_diag.f90 new file mode 100644 index 00000000..5909c72a --- /dev/null +++ b/ext/impl/psb_s_dia_get_diag.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_get_diag(a,d,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_get_diag + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = sone + else + do i=1, size(a%offset) + if (a%offset(i) == 0) then + d(1:mnm) = a%data(1:mnm,i) + exit + end if + end do + end if + do i=mnm+1,size(d) + d(i) = szero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_get_diag diff --git a/ext/impl/psb_s_dia_maxval.f90 b/ext/impl/psb_s_dia_maxval.f90 new file mode 100644 index 00000000..5f672644 --- /dev/null +++ b/ext/impl/psb_s_dia_maxval.f90 @@ -0,0 +1,54 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +function psb_s_dia_maxval(a) result(res) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_maxval + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_maxval' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = sone + else + res = szero + end if + + res = max(res,maxval(abs(a%data))) + +end function psb_s_dia_maxval diff --git a/ext/impl/psb_s_dia_mold.f90 b/ext/impl/psb_s_dia_mold.f90 new file mode 100644 index 00000000..a65379a4 --- /dev/null +++ b/ext/impl/psb_s_dia_mold.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_mold(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_mold + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_dia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_mold diff --git a/ext/impl/psb_s_dia_print.f90 b/ext/impl/psb_s_dia_print.f90 new file mode 100644 index 00000000..a0de1ba8 --- /dev/null +++ b/ext/impl/psb_s_dia_print.f90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_dia_print' + logical, parameter :: debug=.false. + + class(psb_s_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, jc, ir1, ir2 + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_s_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + write(iout,*) nr, nc, nz + + nc=size(a%data,2) + + + + if(present(iv)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) iv(i),iv(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),(i+jc),a%data(i,j) + enddo + enddo + + endif + +end subroutine psb_s_dia_print diff --git a/ext/impl/psb_s_dia_reallocate_nz.f90 b/ext/impl/psb_s_dia_reallocate_nz.f90 new file mode 100644 index 00000000..d37d9e5f --- /dev/null +++ b/ext/impl/psb_s_dia_reallocate_nz.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_reallocate_nz(nz,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_dia_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! Ans: NOTHING. + ! + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_reallocate_nz diff --git a/ext/impl/psb_s_dia_reinit.f90 b/ext/impl/psb_s_dia_reinit.f90 new file mode 100644 index 00000000..dd109783 --- /dev/null +++ b/ext/impl/psb_s_dia_reinit.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_reinit(a,clear) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_reinit + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%data(:,:) = szero + call a%set_upd() + call a%set_host() + + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_reinit diff --git a/ext/impl/psb_s_dia_rowsum.f90 b/ext/impl/psb_s_dia_rowsum.f90 new file mode 100644 index 00000000..3f32a2b2 --- /dev/null +++ b/ext/impl/psb_s_dia_rowsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_rowsum(d,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_rowsum + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_rowsum diff --git a/ext/impl/psb_s_dia_scal.f90 b/ext/impl/psb_s_dia_scal.f90 new file mode 100644 index 00000000..7ccf881a --- /dev/null +++ b/ext/impl/psb_s_dia_scal.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_scal(d,a,info,side) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_scal + implicit none + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), nc, jc, nr, ir1, ir2 + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%data(i,:) = a%data(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + nr=size(a%data,1) + nc=size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + a%data(i,j) = a%data(i,j) * d(i+jc) + enddo + enddo + + end if + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_scal diff --git a/ext/impl/psb_s_dia_scals.f90 b/ext/impl/psb_s_dia_scals.f90 new file mode 100644 index 00000000..da1bc94e --- /dev/null +++ b/ext/impl/psb_s_dia_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_scals(d,a,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_scals + implicit none + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%data(:,:) = a%data(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_scals diff --git a/ext/impl/psb_s_dns_mat_impl.f90 b/ext/impl/psb_s_dns_mat_impl.f90 new file mode 100644 index 00000000..f6f458c8 --- /dev/null +++ b/ext/impl/psb_s_dns_mat_impl.f90 @@ -0,0 +1,724 @@ + +!> Function csmv: +!! \memberof psb_s_dns_sparse_mat +!! \brief Product by a dense rank 1 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_s_dns_csmv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_csmv + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act, m, n, lda + character(len=20) :: name='s_dns_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (trans_ == 'N') then + m=a%get_nrows() + n=a%get_ncols() + else + n=a%get_nrows() + m=a%get_ncols() + end if + lda = size(a%val,1) + + + call sgemv(trans_,a%get_nrows(),a%get_ncols(),alpha,& + & a%val,size(a%val,1),x,1,beta,y,1) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_csmv + + +!> Function csmm: +!! \memberof psb_s_dns_sparse_mat +!! \brief Product by a dense rank 2 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:,:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:,:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_s_dns_csmm(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_csmm + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act,m,n,k, lda, ldx, ldy + character(len=20) :: name='s_dns_csmm' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (psb_toupper(trans_)=='N') then + m = a%get_nrows() + k = a%get_ncols() + n = min(size(y,2),size(x,2)) + else + k = a%get_nrows() + m = a%get_ncols() + n = min(size(y,2),size(x,2)) + end if + lda = size(a%val,1) + ldx = size(x,1) + ldy = size(y,1) + call sgemm(trans_,'N',m,n,k,alpha,a%val,lda,x,ldx,beta,y,ldy) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_csmm + + + +! +! +!> Function csnmi: +!! \memberof psb_s_dns_sparse_mat +!! \brief Operator infinity norm +!! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) +!! +! +function psb_s_dns_csnmi(a) result(res) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_csnmi + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + ! + integer(psb_ipk_) :: i + real(psb_spk_) :: acc + + res = szero + if (a%is_dev()) call a%sync() + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_s_dns_csnmi + + +! +!> Function get_diag: +!! \memberof psb_s_dns_sparse_mat +!! \brief Extract the diagonal of A. +!! +!! D(i) = A(i:i), i=1:min(nrows,ncols) +!! +!! \param d(:) The output diagonal +!! \param info return code. +! +subroutine psb_s_dns_get_diag(a,d,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_get_diag + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act, mnm, i + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + do i=1, mnm + d(i) = a%val(i,i) + end do + do i=mnm+1,size(d) + d(i) = szero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_get_diag + + +! +! +!> Function reallocate_nz +!! \memberof psb_s_dns_sparse_mat +!! \brief One--parameters version of (re)allocate +!! +!! \param nz number of nonzeros to allocate for +!! i.e. makes sure that the internal storage +!! allows for NZ coefficients and their indices. +! +subroutine psb_s_dns_reallocate_nz(nz,a) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_dns_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! This is a no-op, allocation is fixed. + ! + if (a%is_dev()) call a%sync() + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_reallocate_nz + +! +!> Function mold: +!! \memberof psb_s_dns_sparse_mat +!! \brief Allocate a class(psb_s_dns_sparse_mat) with the +!! same dynamic type as the input. +!! This is equivalent to allocate( mold= ) and is provided +!! for those compilers not yet supporting mold. +!! \param b The output variable +!! \param info return code +! +subroutine psb_s_dns_mold(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_mold + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='dns_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_s_dns_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_mold + +! +! +!> Function allocate_mnnz +!! \memberof psb_s_dns_sparse_mat +!! \brief Three-parameters version of allocate +!! +!! \param m number of rows +!! \param n number of cols +!! \param nz [estimated internally] number of nonzeros to allocate for +! +subroutine psb_s_dns_allocate_mnnz(m,n,a,nz) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + ! + integer(psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_/)) + goto 9999 + endif + + + ! Basic stuff common to all formats + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_bld() + call a%set_host() + + ! We ignore NZ in this case. + + call psb_realloc(m,n,a%val,info) + if (info == psb_success_) then + a%val = szero + a%nnz = 0 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_allocate_mnnz + + +! +! +! +!> Function csgetrow: +!! \memberof psb_s_dns_sparse_mat +!! \brief Get a (subset of) row(s) +!! +!! getrow is the basic method by which the other (getblk, clip) can +!! be implemented. +!! +!! Returns the set +!! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) +!! each identifying the position of a nonzero in A +!! i.e. +!! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) +!! with IMIN<=IA(:)<=IMAX +!! with JMIN<=JA(:)<=JMAX +!! IA,JA are reallocated as necessary. +!! +!! \param imin the minimum row index we are interested in +!! \param imax the minimum row index we are interested in +!! \param nz the number of output coefficients +!! \param ia(:) the output row indices +!! \param ja(:) the output col indices +!! \param val(:) the output coefficients +!! \param info return code +!! \param jmin [1] minimum col index +!! \param jmax [a\%get_ncols()] maximum col index +!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) +!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] +!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] +!! ( iren cannot be specified with rscale/cscale) +!! \param append [false] append to ia,ja +!! \param nzin [none] if append, then first new entry should go in entry nzin+1 +!! +! +subroutine psb_s_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_csgetrow + implicit none + + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + ! + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i,j,k + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax Function trim +!! \memberof psb_s_dns_sparse_mat +!! \brief Memory trim +!! Make sure the memory allocation of the sparse matrix is as tight as +!! possible given the actual number of nonzeros it contains. +! +subroutine psb_s_dns_trim(a) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_trim + implicit none + class(psb_s_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + ! Do nothing, we are already at minimum memory. + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_trim + +! +!> Function cp_from_coo: +!! \memberof psb_s_dns_sparse_mat +!! \brief Copy and convert from psb_s_coo_sparse_mat +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! + +subroutine psb_s_cp_dns_from_coo(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_cp_dns_from_coo + implicit none + + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + ! + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: nza, nr, i,err_act, nc + integer(psb_ipk_), parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (.not.b%is_by_rows()) then + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = szero + do i=1, nza + a%val(tmp%ia(i),tmp%ja(i)) = tmp%val(i) + end do + a%nnz = nza + call tmp%free() + else + if (b%is_dev()) call b%sync() + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = szero + do i=1, nza + a%val(b%ia(i),b%ja(i)) = b%val(i) + end do + a%nnz = nza + end if + call a%set_host() + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_cp_dns_from_coo + + + +! +!> Function cp_to_coo: +!! \memberof psb_s_dns_sparse_mat +!! \brief Copy and convert to psb_s_coo_sparse_mat +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! + +subroutine psb_s_cp_dns_to_coo(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_cp_dns_to_coo + implicit none + + class(psb_s_dns_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_Ipk_) :: nza, nr, nc,i,j,k,err_act + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + k = 0 + do i=1,a%get_nrows() + do j=1,a%get_ncols() + if (a%val(i,j) /= szero) then + k = k + 1 + b%ia(k) = i + b%ja(k) = j + b%val(k) = a%val(i,j) + end if + end do + end do + + call b%set_nzeros(nza) + call b%set_sort_status(psb_row_major_) + call b%set_asb() + call b%set_host() + +end subroutine psb_s_cp_dns_to_coo + + + +! +!> Function mv_to_coo: +!! \memberof psb_s_dns_sparse_mat +!! \brief Convert to psb_s_coo_sparse_mat, freeing the source. +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! +subroutine psb_s_mv_dns_to_coo(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_mv_dns_to_coo + implicit none + + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_to_coo(b,info) + call a%free() + return + +end subroutine psb_s_mv_dns_to_coo + + +! +!> Function mv_from_coo: +!! \memberof psb_s_dns_sparse_mat +!! \brief Convert from psb_s_coo_sparse_mat, freeing the source. +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! +! +subroutine psb_s_mv_dns_from_coo(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_mv_dns_from_coo + implicit none + + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_s_mv_dns_from_coo + diff --git a/ext/impl/psb_s_ell_aclsum.f90 b/ext/impl/psb_s_ell_aclsum.f90 new file mode 100644 index 00000000..2eea0cc9 --- /dev/null +++ b/ext/impl/psb_s_ell_aclsum.f90 @@ -0,0 +1,82 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_aclsum(d,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_aclsum + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_aclsum diff --git a/ext/impl/psb_s_ell_allocate_mnnz.f90 b/ext/impl/psb_s_ell_allocate_mnnz.f90 new file mode 100644 index 00000000..fd9f1b49 --- /dev/null +++ b/ext/impl/psb_s_ell_allocate_mnnz.f90 @@ -0,0 +1,91 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = -1 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_allocate_mnnz diff --git a/ext/impl/psb_s_ell_arwsum.f90 b/ext/impl/psb_s_ell_arwsum.f90 new file mode 100644 index 00000000..a47f8721 --- /dev/null +++ b/ext/impl/psb_s_ell_arwsum.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_arwsum(d,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_arwsum + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + is_unit = a%is_unit() + + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = sone + else + d(i) = szero + end if + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_arwsum diff --git a/ext/impl/psb_s_ell_colsum.f90 b/ext/impl/psb_s_ell_colsum.f90 new file mode 100644 index 00000000..0924d8a9 --- /dev/null +++ b/ext/impl/psb_s_ell_colsum.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_colsum(d,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_colsum + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_colsum diff --git a/ext/impl/psb_s_ell_csgetblk.f90 b/ext/impl/psb_s_ell_csgetblk.f90 new file mode 100644 index 00000000..5468e93c --- /dev/null +++ b/ext/impl/psb_s_ell_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_csgetblk + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='ell_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_csgetblk diff --git a/ext/impl/psb_s_ell_csgetptn.f90 b/ext/impl/psb_s_ell_csgetptn.f90 new file mode 100644 index 00000000..07463757 --- /dev/null +++ b/ext/impl/psb_s_ell_csgetptn.f90 @@ -0,0 +1,189 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_csgetptn + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_s_ell_csgetrow + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_s_ell_csmm + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_ell_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_ell_csmv + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_ell_csnm1 + + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_spk_), allocatable :: vt(:) + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_ell_csnm1' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = szero + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + if (a%is_unit()) then + vt(:) = sone + else + vt(:) = szero + end if + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_s_ell_csnm1 diff --git a/ext/impl/psb_s_ell_csnmi.f90 b/ext/impl/psb_s_ell_csnmi.f90 new file mode 100644 index 00000000..1df9bafa --- /dev/null +++ b/ext/impl/psb_s_ell_csnmi.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_ell_csnmi(a) result(res) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_csnmi + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_spk_) :: acc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_csnmi' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = szero + is_unit = a%is_unit() + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + if (is_unit) acc = acc + sone + res = max(res,acc) + end do + +end function psb_s_ell_csnmi diff --git a/ext/impl/psb_s_ell_csput.f90 b/ext/impl/psb_s_ell_csput.f90 new file mode 100644 index 00000000..c0d69067 --- /dev/null +++ b/ext/impl/psb_s_ell_csput.f90 @@ -0,0 +1,208 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_csput_a + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_ell_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + call psb_s_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info < 0) then + info = psb_err_internal_error_ + else if (info > 0) then + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarded entries not belonging to us.' + info = psb_success_ + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +contains + + subroutine psb_s_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='s_ell_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = a%val(ir,ip) + val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_s_ell_srch_upd +end subroutine psb_s_ell_csput_a diff --git a/ext/impl/psb_s_ell_cssm.f90 b/ext/impl/psb_s_ell_cssm.f90 new file mode 100644 index 00000000..ca50fa35 --- /dev/null +++ b/ext/impl/psb_s_ell_cssm.f90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_cssm + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_ell_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_s_ell_cssv + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + real(psb_spk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_ell_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_s_ell_get_diag + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = sone + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) then + d(i) = a%val(i,a%idiag(i)) + else + d(i) = szero + end if + end do + end if + do i=mnm+1,size(d) + d(i) = szero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_get_diag diff --git a/ext/impl/psb_s_ell_maxval.f90 b/ext/impl/psb_s_ell_maxval.f90 new file mode 100644 index 00000000..6e2635b8 --- /dev/null +++ b/ext/impl/psb_s_ell_maxval.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_ell_maxval(a) result(res) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_maxval + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_spk_) :: acc + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_csnmi' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = sone + else + res = szero + end if + + do i = 1, a%get_nrows() + acc = maxval(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_s_ell_maxval diff --git a/ext/impl/psb_s_ell_mold.f90 b/ext/impl/psb_s_ell_mold.f90 new file mode 100644 index 00000000..4d137112 --- /dev/null +++ b/ext/impl/psb_s_ell_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_mold(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_mold + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='ell_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_mold diff --git a/ext/impl/psb_s_ell_print.f90 b/ext/impl/psb_s_ell_print.f90 new file mode 100644 index 00000000..aec15a6d --- /dev/null +++ b/ext/impl/psb_s_ell_print.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_ell_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% ELL' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_s_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_s_ell_print diff --git a/ext/impl/psb_s_ell_reallocate_nz.f90 b/ext/impl/psb_s_ell_reallocate_nz.f90 new file mode 100644 index 00000000..ff7dabda --- /dev/null +++ b/ext/impl/psb_s_ell_reallocate_nz.f90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_reallocate_nz(nz,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_reallocate_nz diff --git a/ext/impl/psb_s_ell_reinit.f90 b/ext/impl/psb_s_ell_reinit.f90 new file mode 100644 index 00000000..088e8398 --- /dev/null +++ b/ext/impl/psb_s_ell_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_reinit(a,clear) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_reinit + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:,:) = szero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_reinit diff --git a/ext/impl/psb_s_ell_rowsum.f90 b/ext/impl/psb_s_ell_rowsum.f90 new file mode 100644 index 00000000..092329c9 --- /dev/null +++ b/ext/impl/psb_s_ell_rowsum.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_rowsum(d,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_rowsum + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical :: is_unit + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + is_unit = a%is_unit() + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = sone + else + d(i) = szero + end if + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_rowsum diff --git a/ext/impl/psb_s_ell_scal.f90 b/ext/impl/psb_s_ell_scal.f90 new file mode 100644 index 00000000..7f39f63d --- /dev/null +++ b/ext/impl/psb_s_ell_scal.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_scal(d,a,info,side) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_scal + implicit none + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5) + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + do i=1, m + do j=1, a%irn(i) + a%val(i,j) = a%val(i,j) * d(a%ja(i,j)) + end do + enddo + + end if + + call a%set_host() + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_scal diff --git a/ext/impl/psb_s_ell_scals.f90 b/ext/impl/psb_s_ell_scals.f90 new file mode 100644 index 00000000..4bc77626 --- /dev/null +++ b/ext/impl/psb_s_ell_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_scals(d,a,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_scals + implicit none + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_scals diff --git a/ext/impl/psb_s_ell_trim.f90 b/ext/impl/psb_s_ell_trim.f90 new file mode 100644 index 00000000..758a8bb5 --- /dev/null +++ b/ext/impl/psb_s_ell_trim.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_trim(a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_trim + implicit none + class(psb_s_ell_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_trim diff --git a/ext/impl/psb_s_hdia_allocate_mnnz.f90 b/ext/impl/psb_s_hdia_allocate_mnnz.f90 new file mode 100644 index 00000000..2c4e16fc --- /dev/null +++ b/ext/impl/psb_s_hdia_allocate_mnnz.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_hdia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_hdia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hdia_allocate_mnnz diff --git a/ext/impl/psb_s_hdia_csmv.f90 b/ext/impl/psb_s_hdia_csmv.f90 new file mode 100644 index 00000000..d945f964 --- /dev/null +++ b/ext/impl/psb_s_hdia_csmv.f90 @@ -0,0 +1,162 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_hdia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_hdia_csmv + implicit none + class(psb_s_hdia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc,nr,nc + integer(psb_ipk_) :: irs,ics, nmx, ni + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, ncd,ib, nzhack, & + & hackfirst, hacknext + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hdia_csmv' + logical, parameter :: debug=.false. + real :: start, finish + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + info = psb_err_transpose_not_n_unsupported_ + call psb_errpush(info,name) + goto 9999 + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1)=0) then + ir1 = 1 + ! min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + jc = ir1 + rdisp + offsets(j) + do i=ir1,ir2 + y(rdisp+i) = y(rdisp+i) + alpha*data(i,j)*x(jc) + jc = jc + 1 + enddo + end do + end subroutine psi_s_inner_dia_csmv + +end subroutine psb_s_hdia_csmv diff --git a/ext/impl/psb_s_hdia_mold.f90 b/ext/impl/psb_s_hdia_mold.f90 new file mode 100644 index 00000000..a62630c0 --- /dev/null +++ b/ext/impl/psb_s_hdia_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hdia_mold(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_hdia_mold + implicit none + class(psb_s_hdia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hdia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_hdia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hdia_mold diff --git a/ext/impl/psb_s_hdia_print.f90 b/ext/impl/psb_s_hdia_print.f90 new file mode 100644 index 00000000..f4b927bc --- /dev/null +++ b/ext/impl/psb_s_hdia_print.f90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_hdia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_hdia_print + use psi_ext_util_mod + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_s_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdia_print' + logical, parameter :: debug=.false. + + class(psb_s_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, k, ncd,ib, nzhack, info,& + & hackfirst, hacknext + integer(psb_ipk_), allocatable :: ia(:), ja(:) + real(psb_spk_), allocatable :: val(:) + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% HDIA' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_s_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + + nhacks = a%nhacks + hacksize = a%hacksize + maxnzhack = 0 + do k=1, nhacks + maxnzhack = max(maxnzhack,(a%hackoffsets(k+1)-a%hackoffsets(k))) + end do + maxnzhack = hacksize*maxnzhack + allocate(ia(maxnzhack),ja(maxnzhack),val(maxnzhack),stat=info) + if (info /= 0) return + + write(iout,*) nr, nc, nz + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + ncd = hacknext-hackfirst + + call psi_s_xtr_coo_from_dia(nr,nc,& + & ia, ja, val, nzhack,& + & hacksize,ncd,& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) + !nzhack = sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + + if(present(iv)) then + do j=1,nzhack + write(iout,frmt) iv(ia(j)),iv(ja(j)),val(j) + enddo + else + if (present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ja(j),val(j) + enddo + else if (present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ja(j),val(j) + enddo + endif + end if + + end do + +end subroutine psb_s_hdia_print diff --git a/ext/impl/psb_s_hll_aclsum.f90 b/ext/impl/psb_s_hll_aclsum.f90 new file mode 100644 index 00000000..cf75dfb2 --- /dev/null +++ b/ext/impl/psb_s_hll_aclsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_aclsum(d,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_aclsum + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call s_hll_aclsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine s_hll_aclsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine s_hll_aclsum + +end subroutine psb_s_hll_aclsum diff --git a/ext/impl/psb_s_hll_allocate_mnnz.f90 b/ext/impl/psb_s_hll_allocate_mnnz.f90 new file mode 100644 index 00000000..549eccb4 --- /dev/null +++ b/ext/impl/psb_s_hll_allocate_mnnz.f90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m+1,a%hkoffs,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_hksz(psb_hksz_def_) + call a%set_host() + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_allocate_mnnz diff --git a/ext/impl/psb_s_hll_arwsum.f90 b/ext/impl/psb_s_hll_arwsum.f90 new file mode 100644 index 00000000..b93efb12 --- /dev/null +++ b/ext/impl/psb_s_hll_arwsum.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_arwsum(d,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_arwsum + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call s_hll_arwsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine s_hll_arwsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + abs(val(i,j)) + end do + end do + + end subroutine s_hll_arwsum + +end subroutine psb_s_hll_arwsum diff --git a/ext/impl/psb_s_hll_colsum.f90 b/ext/impl/psb_s_hll_colsum.f90 new file mode 100644 index 00000000..02cceac2 --- /dev/null +++ b/ext/impl/psb_s_hll_colsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_colsum(d,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_colsum + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call s_hll_colsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine s_hll_colsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine s_hll_colsum + +end subroutine psb_s_hll_colsum diff --git a/ext/impl/psb_s_hll_csgetblk.f90 b/ext/impl/psb_s_hll_csgetblk.f90 new file mode 100644 index 00000000..c925e3a2 --- /dev/null +++ b/ext/impl/psb_s_hll_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csgetblk + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='hll_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_csgetblk diff --git a/ext/impl/psb_s_hll_csgetptn.f90 b/ext/impl/psb_s_hll_csgetptn.f90 new file mode 100644 index 00000000..ccb1b6a1 --- /dev/null +++ b/ext/impl/psb_s_hll_csgetptn.f90 @@ -0,0 +1,209 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csgetptn + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_s_hll_csgetrow + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_s_hll_csmm + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy,ldx,ldy,hksz,mxrwl + real(psb_spk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hll_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + nxy = min(size(x,2) , size(y,2) ) + + + ldx = size(x,1) + ldy = size(y,1) + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (ldx psb_s_hll_csmv + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hkpnt, mxrwl, mmhk + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_hll_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (size(x,1) 0) then + select case(hksz) + case(4) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_notra_4(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(8) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + &call psb_s_hll_csmv_notra_8(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(16) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_notra_16(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(24) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_notra_24(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(32) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_notra_32(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case default + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + end select + end if + if (mmhk < m) then + i = mmhk+1 + ir = m-mmhk + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + call psb_s_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + if (info /= psb_success_) goto 9999 + end if + j = j + 1 + end if + + else + + j=1 + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,m,hksz + j = ((i-1)/hksz)+1 + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + end if + end if + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_csmv_inner(ir,m,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,tra,ctra,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit,tra,ctra + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + if (tra) then + + if (beta == sone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*val(i,j)*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (ctra) then + + if (beta == sone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*(val(i,j))*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (.not.(tra.or.ctra)) then + + if (alpha == szero) then + if (beta == szero) then + do i=1,m + y(ir+i-1) = szero + end do + else + do i=1,m + y(ir+i-1) = beta*y(ir+i-1) + end do + end if + + else + if (beta == szero) then + do i=1,m + tmp = szero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + end do + else + do i=1,m + tmp = szero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + beta*y(ir+i-1) + end do + endif + end if + end if + + if (is_unit) then + do i=1, min(m,n) + y(i) = y(i) + alpha*x(i) + end do + end if + + end subroutine psb_s_hll_csmv_inner + + subroutine psb_s_hll_csmv_notra_8(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=8 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:8)) + tmp(1:8) = tmp(1:8) + val(1:8,j)*x(ja(1:8,j)) + end do + end if + if (beta == szero) then + y(ir:ir+8-1) = alpha*tmp(1:8) + else + y(ir:ir+8-1) = alpha*tmp(1:8) + beta*y(ir:ir+8-1) + end if + + + if (is_unit) then + do i=1, min(8,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_8 + + subroutine psb_s_hll_csmv_notra_24(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=24 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:24)) + tmp(1:24) = tmp(1:24) + val(1:24,j)*x(ja(1:24,j)) + end do + end if + if (beta == szero) then + y(ir:ir+24-1) = alpha*tmp(1:24) + else + y(ir:ir+24-1) = alpha*tmp(1:24) + beta*y(ir:ir+24-1) + end if + + + if (is_unit) then + do i=1, min(24,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_24 + + subroutine psb_s_hll_csmv_notra_16(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=16 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:16)) + tmp(1:16) = tmp(1:16) + val(1:16,j)*x(ja(1:16,j)) + end do + end if + if (beta == szero) then + y(ir:ir+16-1) = alpha*tmp(1:16) + else + y(ir:ir+16-1) = alpha*tmp(1:16) + beta*y(ir:ir+16-1) + end if + + + if (is_unit) then + do i=1, min(16,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_16 + + subroutine psb_s_hll_csmv_notra_32(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=32 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:32)) + tmp(1:32) = tmp(1:32) + val(1:32,j)*x(ja(1:32,j)) + end do + end if + if (beta == szero) then + y(ir:ir+32-1) = alpha*tmp(1:32) + else + y(ir:ir+32-1) = alpha*tmp(1:32) + beta*y(ir:ir+32-1) + end if + + + if (is_unit) then + do i=1, min(32,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_32 + + subroutine psb_s_hll_csmv_notra_4(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=4 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:4)) + tmp(1:4) = tmp(1:4) + val(1:4,j)*x(ja(1:4,j)) + end do + end if + if (beta == szero) then + y(ir:ir+4-1) = alpha*tmp(1:4) + else + y(ir:ir+4-1) = alpha*tmp(1:4) + beta*y(ir:ir+4-1) + end if + + + if (is_unit) then + do i=1, min(4,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_4 + +end subroutine psb_s_hll_csmv diff --git a/ext/impl/psb_s_hll_csnm1.f90 b/ext/impl/psb_s_hll_csnm1.f90 new file mode 100644 index 00000000..6e745081 --- /dev/null +++ b/ext/impl/psb_s_hll_csnm1.f90 @@ -0,0 +1,111 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_hll_csnm1(a) result(res) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csnm1 + + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info, hksz, mxrwl + real(psb_spk_), allocatable :: vt(:) + logical :: is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hll_csnm1' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + res = szero + if (a%is_dev()) call a%sync() + n = a%get_ncols() + m = a%get_nrows() + allocate(vt(n),stat=info) + if (Info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + if (a%is_unit()) then + vt = sone + else + vt = szero + end if + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_s_hll_csnm1_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & vt,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + res = maxval(vt) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_csnm1_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & vt,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: vt(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + vt(jc) = vt(jc) + abs(val(i,j)) + end do + end do + end subroutine psb_s_hll_csnm1_inner + +end function psb_s_hll_csnm1 diff --git a/ext/impl/psb_s_hll_csnmi.f90 b/ext/impl/psb_s_hll_csnmi.f90 new file mode 100644 index 00000000..3be15f9b --- /dev/null +++ b/ext/impl/psb_s_hll_csnmi.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_hll_csnmi(a) result(res) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csnmi + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc, hksz, mxrwl, info + Integer(Psb_ipk_) :: err_act + logical :: is_unit + character(len=20) :: name='s_csnmi' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = 0 + res = szero + if (a%is_dev()) call a%sync() + + n = a%get_ncols() + m = a%get_nrows() + is_unit = a%is_unit() + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_s_hll_csnmi_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & res,is_unit,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_csnmi_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & res,is_unit,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: res + logical :: is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: tmp, acc + + info = psb_success_ + if (is_unit) then + tmp = sone + else + tmp = szero + end if + do i=1,m + acc = tmp + do j=1, irn(i) + acc = acc + abs(val(i,j)) + end do + res = max(acc,res) + end do + end subroutine psb_s_hll_csnmi_inner + +end function psb_s_hll_csnmi diff --git a/ext/impl/psb_s_hll_csput.f90 b/ext/impl/psb_s_hll_csput.f90 new file mode 100644 index 00000000..b12678d4 --- /dev/null +++ b/ext/impl/psb_s_hll_csput.f90 @@ -0,0 +1,233 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csput_a + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hll_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + + call psb_s_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + call a%set_host() + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ip, i1,i2,nr,nc,nnz,dupl,ng,& + & hksz, hk, hkzpnt, ihkr, mxrwl, lastrow + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='s_hll_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + lastrow = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + hksz = a%get_hksz() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_s_hll_srch_upd + +end subroutine psb_s_hll_csput_a diff --git a/ext/impl/psb_s_hll_cssm.f90 b/ext/impl/psb_s_hll_cssm.f90 new file mode 100644 index 00000000..30c77c8f --- /dev/null +++ b/ext/impl/psb_s_hll_cssm.f90 @@ -0,0 +1,506 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_cssm + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ldx, ldy, hksz, nxy, mk, mxrwl + real(psb_spk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_hll_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + info = psb_err_missing_override_method_ + call psb_errpush(info,name) + goto 9999 + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + hksz = a%get_hksz() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + ldx = size(x,1) + ldy = size(y,1) + if (ldx psb_s_hll_cssv + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hk, mxrwl, noffs, kc, mk + real(psb_spk_) :: acc + real(psb_spk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hll_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_s_hll_get_diag + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k, ke, hksz, ld,ir, mxrwl + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + ld = size(d) + if (ld< mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,ld/)) + goto 9999 + end if + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = sone + else + + hksz = a%get_hksz() + j=1 + do i=1,mnm,hksz + ir = min(hksz,mnm-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + ke = a%hkoffs(j+1) + call psb_s_hll_get_diag_inner(ir,a%irn(i:i+ir-1),& + & a%ja(k:ke),hksz,a%val(k:ke),hksz,& + & a%idiag(i:i+ir-1),d(i:i+ir-1),info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + end if + + do i=mnm+1,size(d) + d(i) = szero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_get_diag_inner(m,irn,ja,ldj,val,ldv,& + & idiag,d,info) + integer(psb_ipk_), intent(in) :: m,ldj,ldv,ja(ldj,*),irn(*), idiag(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + do i=1,m + if (idiag(i) /= 0) then + d(i) = val(i,idiag(i)) + else + d(i) = szero + end if + end do + + end subroutine psb_s_hll_get_diag_inner + +end subroutine psb_s_hll_get_diag diff --git a/ext/impl/psb_s_hll_maxval.f90 b/ext/impl/psb_s_hll_maxval.f90 new file mode 100644 index 00000000..84625328 --- /dev/null +++ b/ext/impl/psb_s_hll_maxval.f90 @@ -0,0 +1,45 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_hll_maxval(a) result(res) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_maxval + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + if (a%is_dev()) call a%sync() + res = maxval(abs(a%val(:))) + if (a%is_unit()) res = max(res,sone) + +end function psb_s_hll_maxval diff --git a/ext/impl/psb_s_hll_mold.f90 b/ext/impl/psb_s_hll_mold.f90 new file mode 100644 index 00000000..eb04ccd9 --- /dev/null +++ b/ext/impl/psb_s_hll_mold.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_mold(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_mold + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hll_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_mold diff --git a/ext/impl/psb_s_hll_print.f90 b/ext/impl/psb_s_hll_print.f90 new file mode 100644 index 00000000..fb6bb38d --- /dev/null +++ b/ext/impl/psb_s_hll_print.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_hll_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, k, hksz, hk, mxrwl,ir, ix + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_s_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + hksz = a%get_hksz() + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + endif + endif + +end subroutine psb_s_hll_print diff --git a/ext/impl/psb_s_hll_reallocate_nz.f90 b/ext/impl/psb_s_hll_reallocate_nz.f90 new file mode 100644 index 00000000..f7a3076f --- /dev/null +++ b/ext/impl/psb_s_hll_reallocate_nz.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_reallocate_nz(nz,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,nz_ + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_hll_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + nz_ = max(nz,ione) + call psb_realloc(nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(nz_,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_reallocate_nz diff --git a/ext/impl/psb_s_hll_reinit.f90 b/ext/impl/psb_s_hll_reinit.f90 new file mode 100644 index 00000000..170abe08 --- /dev/null +++ b/ext/impl/psb_s_hll_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_reinit(a,clear) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_reinit + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:) = szero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_reinit diff --git a/ext/impl/psb_s_hll_rowsum.f90 b/ext/impl/psb_s_hll_rowsum.f90 new file mode 100644 index 00000000..c7484698 --- /dev/null +++ b/ext/impl/psb_s_hll_rowsum.f90 @@ -0,0 +1,110 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_rowsum(d,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_rowsum + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + if (a%is_unit()) then + d = sone + else + d = szero + end if + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call s_hll_rowsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine s_hll_rowsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + (val(i,j)) + end do + end do + + end subroutine s_hll_rowsum + +end subroutine psb_s_hll_rowsum diff --git a/ext/impl/psb_s_hll_scal.f90 b/ext/impl/psb_s_hll_scal.f90 new file mode 100644 index 00000000..c8f3ddd5 --- /dev/null +++ b/ext/impl/psb_s_hll_scal.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_scal(d,a,info,side) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_scal + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), ld, k, mxrwl, hksz, ir + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + info = psb_err_missing_override_method_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + ld = size(d) + if (left) then + m = a%get_nrows() + if (ld < m) then + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else + n = a%get_ncols() + if (ld < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_s_hll_scal_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & left,d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_scal_inner(ir,m,n,irn,ja,ldj,val,ldv,left,d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: d(*) + real(psb_spk_), intent(inout) :: val(ldv,*) + logical, intent(in) :: left + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + if (left) then + do i=1,m + do j=1, irn(i) + val(i,j) = val(i,j)*d(ir+i-1) + end do + end do + else + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + val(i,j) = val(i,j)*d(jc) + end do + end do + + end if + + end subroutine psb_s_hll_scal_inner + + +end subroutine psb_s_hll_scal diff --git a/ext/impl/psb_s_hll_scals.f90 b/ext/impl/psb_s_hll_scals.f90 new file mode 100644 index 00000000..8f823a20 --- /dev/null +++ b/ext/impl/psb_s_hll_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_scals(d,a,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_scals + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:) = a%val(:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_scals diff --git a/ext/impl/psb_s_mv_dia_from_coo.f90 b/ext/impl/psb_s_mv_dia_from_coo.f90 new file mode 100644 index 00000000..d7dcfc1b --- /dev/null +++ b/ext/impl/psb_s_mv_dia_from_coo.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_mv_dia_from_coo + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_dia_from_coo diff --git a/ext/impl/psb_s_mv_dia_to_coo.f90 b/ext/impl/psb_s_mv_dia_to_coo.f90 new file mode 100644 index 00000000..c0944b21 --- /dev/null +++ b/ext/impl/psb_s_mv_dia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_mv_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_mv_dia_to_coo + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_s_mv_dia_to_coo diff --git a/ext/impl/psb_s_mv_ell_from_coo.f90 b/ext/impl/psb_s_mv_ell_from_coo.f90 new file mode 100644 index 00000000..90965e41 --- /dev/null +++ b/ext/impl/psb_s_mv_ell_from_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_mv_ell_from_coo + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_s_mv_ell_from_coo diff --git a/ext/impl/psb_s_mv_ell_from_fmt.f90 b/ext/impl/psb_s_mv_ell_from_fmt.f90 new file mode 100644 index 00000000..03ebf8e4 --- /dev/null +++ b/ext/impl/psb_s_mv_ell_from_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_mv_ell_from_fmt + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_s_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_ell_from_fmt diff --git a/ext/impl/psb_s_mv_ell_to_coo.f90 b/ext/impl/psb_s_mv_ell_to_coo.f90 new file mode 100644 index 00000000..151cbeff --- /dev/null +++ b/ext/impl/psb_s_mv_ell_to_coo.f90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_mv_ell_to_coo + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_s_mv_ell_to_coo diff --git a/ext/impl/psb_s_mv_ell_to_fmt.f90 b/ext/impl/psb_s_mv_ell_to_fmt.f90 new file mode 100644 index 00000000..66f33508 --- /dev/null +++ b/ext/impl/psb_s_mv_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_mv_ell_to_fmt + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_s_ell_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_ell_to_fmt diff --git a/ext/impl/psb_s_mv_hdia_from_coo.f90 b/ext/impl/psb_s_mv_hdia_from_coo.f90 new file mode 100644 index 00000000..88765079 --- /dev/null +++ b/ext/impl/psb_s_mv_hdia_from_coo.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_mv_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_mv_hdia_from_coo + implicit none + + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.(b%is_by_rows())) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_hdia_from_coo diff --git a/ext/impl/psb_s_mv_hdia_to_coo.f90 b/ext/impl/psb_s_mv_hdia_to_coo.f90 new file mode 100644 index 00000000..56399b2a --- /dev/null +++ b/ext/impl/psb_s_mv_hdia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_mv_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_mv_hdia_to_coo + implicit none + + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_s_mv_hdia_to_coo diff --git a/ext/impl/psb_s_mv_hll_from_coo.f90 b/ext/impl/psb_s_mv_hll_from_coo.f90 new file mode 100644 index 00000000..c8e46086 --- /dev/null +++ b/ext/impl/psb_s_mv_hll_from_coo.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_mv_hll_from_coo + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: hksz + info = psb_success_ + if (.not.b%is_by_rows()) call b%fix(info) + hksz = psi_get_hksz() + call psi_convert_hll_from_coo(a,hksz,b,info) + if (info /= 0) goto 9999 + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_hll_from_coo diff --git a/ext/impl/psb_s_mv_hll_from_fmt.f90 b/ext/impl/psb_s_mv_hll_from_fmt.f90 new file mode 100644 index 00000000..19bda0a6 --- /dev/null +++ b/ext/impl/psb_s_mv_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_mv_hll_from_fmt + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_s_hll_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%hkoffs, a%hkoffs) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + a%hksz = b%hksz + a%nzt = b%nzt + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_hll_from_fmt diff --git a/ext/impl/psb_s_mv_hll_to_coo.f90 b/ext/impl/psb_s_mv_hll_to_coo.f90 new file mode 100644 index 00000000..d36286a5 --- /dev/null +++ b/ext/impl/psb_s_mv_hll_to_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_mv_hll_to_coo + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%cp_to_coo(b,info) + + if (info /= psb_success_) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_s_mv_hll_to_coo diff --git a/ext/impl/psb_s_mv_hll_to_fmt.f90 b/ext/impl/psb_s_mv_hll_to_fmt.f90 new file mode 100644 index 00000000..17618f69 --- /dev/null +++ b/ext/impl/psb_s_mv_hll_to_fmt.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_mv_hll_to_fmt + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_s_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%hkoffs, b%hkoffs) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + b%hksz = a%hksz + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_hll_to_fmt diff --git a/ext/impl/psb_z_cp_dia_from_coo.f90 b/ext/impl/psb_z_cp_dia_from_coo.f90 new file mode 100644 index 00000000..e87bfb34 --- /dev/null +++ b/ext/impl/psb_z_cp_dia_from_coo.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cp_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_cp_dia_from_coo + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_convert_dia_from_coo(a,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_dia_from_coo(a,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_dia_from_coo diff --git a/ext/impl/psb_z_cp_dia_to_coo.f90 b/ext/impl/psb_z_cp_dia_to_coo.f90 new file mode 100644 index 00000000..26fac30b --- /dev/null +++ b/ext/impl/psb_z_cp_dia_to_coo.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_cp_dia_to_coo + implicit none + + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc, nzd + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + call psi_z_xtr_coo_from_dia(nr,nc,& + & b%ia, b%ja, b%val, nzd, & + & size(a%data,1),size(a%data,2),& + & a%data,a%offset,info) + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +end subroutine psb_z_cp_dia_to_coo diff --git a/ext/impl/psb_z_cp_ell_from_coo.f90 b/ext/impl/psb_z_cp_ell_from_coo.f90 new file mode 100644 index 00000000..7559621d --- /dev/null +++ b/ext/impl/psb_z_cp_ell_from_coo.f90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_cp_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_z_convert_ell_from_coo(a,b,info) + else + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call psi_z_convert_ell_from_coo(a,tmp,info) + if (info == psb_success_) call tmp%free() + end if + if (info /= psb_success_) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_z_cp_ell_from_coo diff --git a/ext/impl/psb_z_cp_ell_from_fmt.f90 b/ext/impl/psb_z_cp_ell_from_fmt.f90 new file mode 100644 index 00000000..6d63b64e --- /dev/null +++ b/ext/impl/psb_z_cp_ell_from_fmt.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_cp_ell_from_fmt + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_z_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_z_cp_ell_from_fmt diff --git a/ext/impl/psb_z_cp_ell_to_coo.f90 b/ext/impl/psb_z_cp_ell_to_coo.f90 new file mode 100644 index 00000000..38a1696b --- /dev/null +++ b/ext/impl/psb_z_cp_ell_to_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_cp_ell_to_coo + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: i, j, k, nr, nc, nza + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + call b%set_host() + +end subroutine psb_z_cp_ell_to_coo diff --git a/ext/impl/psb_z_cp_ell_to_fmt.f90 b/ext/impl/psb_z_cp_ell_to_fmt.f90 new file mode 100644 index 00000000..7fb64a90 --- /dev/null +++ b/ext/impl/psb_z_cp_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_cp_ell_to_fmt + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_z_ell_sparse_mat) + if (a%is_dev()) call a%sync() + + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_cp_ell_to_fmt diff --git a/ext/impl/psb_z_cp_hdia_from_coo.f90 b/ext/impl/psb_z_cp_hdia_from_coo.f90 new file mode 100644 index 00000000..ed77914e --- /dev/null +++ b/ext/impl/psb_z_cp_hdia_from_coo.f90 @@ -0,0 +1,222 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_cp_hdia_from_coo + implicit none + + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call inner_cp_hdia_from_coo(a,b,info) + if (info /= psb_success_) goto 9999 + else + call b%cp_to_coo(tmp,info) + if (info /= psb_success_) goto 9999 + if (.not.tmp%is_by_rows()) call tmp%fix(info) + if (info /= psb_success_) goto 9999 + call inner_cp_hdia_from_coo(a,tmp,info) + if (info /= psb_success_) goto 9999 + call tmp%free() + end if + call a%set_host() + + return + +9999 continue + + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine inner_cp_hdia_from_coo(a,tmp,info) + use psb_base_mod + use psi_ext_util_mod + + implicit none + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w + integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext, nzout + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + logical, parameter :: debug=.false. + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + hacksize = a%hacksize + a%nhacks = (nr+hacksize-1)/hacksize + nhacks = a%nhacks + + ndiag = nr+nc-1 + if (info == psb_success_) call psb_realloc(nr,irsz,info) + if (info == psb_success_) call psb_realloc(ndiag,d,info) + if (info == psb_success_) call psb_realloc(ndiag,offset,info) + if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info) + if (info /= psb_success_) return + + irsz = 0 + do k=1,nza + ir = tmp%ia(k) + irsz(ir) = irsz(ir)+1 + end do + + a%nzeros = 0 + d = 0 + iszd = 0 + a%hackOffsets(1)=0 + klast1 = 1 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + if (debug) then + write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr + write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1) + write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1) + end if + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, offset, info, initd=.false., cleard=.true.) + iszd = iszd + nd + a%hackOffsets(k+1)=iszd + if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd + if (debug) write(*,*) 'offset ', offset(1:nd) + end do + if (debug) then + write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd + write(*,*) 'Hackoffsets ',a%hackOffsets(:) + end if + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info) + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info) + if (info /= psb_success_) return + klast1 = 1 + ! + ! Second run: copy elements + ! + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, a%diaOffsets(hackfirst+1:hacknext), info, & + & initd=.false., cleard=.false.) + if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext) + call psi_z_xtr_dia_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & tmp%val(kfirst:klast1-1), & + & d,hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,& + & initdata=.true.,rdisp=(i-1)) + + call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),& + & a%diaOffsets(hackfirst+1:hacknext),nzout) + a%nzeros = a%nzeros + nzout + call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext)) + + end do + if (debug) then + write(*,*) 'NZEROS: ',a%nzeros, nza + write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd) + write(*,*) 'values: ' + j=0 + do k=1,nhacks + write(*,*) 'Hack No. ',k + do i=1,hacksize*(iszd/nhacks) + j = j + 1 + write(*,*) j, a%val(j) + end do + end do + end if + end subroutine inner_cp_hdia_from_coo + + subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz) + implicit none + integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:) + integer(psb_ipk_), intent(out) :: nz + ! + integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1 + nz = 0 + nrcmdisp = min(nr-rdisp,nc-rdisp) + rdisp1 = 1-rdisp + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + nz = nz + (ir2-ir1+1) + end do + end subroutine countnz + + subroutine cleand(nr,nd,d,offset) + implicit none + integer(psb_ipk_), intent(in) :: nr,nd,offset(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_) :: i,id + + do i=1,nd + id = offset(i) + nr + d(id) = 0 + end do + end subroutine cleand + +end subroutine psb_z_cp_hdia_from_coo diff --git a/ext/impl/psb_z_cp_hdia_to_coo.f90 b/ext/impl/psb_z_cp_hdia_to_coo.f90 new file mode 100644 index 00000000..c0544ff0 --- /dev/null +++ b/ext/impl/psb_z_cp_hdia_to_coo.f90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_cp_hdia_to_coo + use psi_ext_util_mod + implicit none + + class(psb_z_hdia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + call b%set_nzeros(nza) + call b%set_sort_status(psb_unsorted_) + nhacks = a%nhacks + hacksize = a%hacksize + j = 0 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_z_xtr_coo_from_dia(nr,nc,& + & b%ia(j+1:), b%ja(j+1:), b%val(j+1:), nzd, & + & hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) +!!$ write(*,*) 'diaoffsets',ib,' : ',ib - abs(a%diaOffsets(hackfirst+1:hacknext)) +!!$ write(*,*) 'sum',ib,j,' : ',sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + j = j + nzd + end do + if (nza /= j) then + write(*,*) 'Wrong counts in hdia_to_coo',j,nza + info = -8 + return + end if + call b%set_host() + call b%fix(info) + +end subroutine psb_z_cp_hdia_to_coo diff --git a/ext/impl/psb_z_cp_hll_from_coo.f90 b/ext/impl/psb_z_cp_hll_from_coo.f90 new file mode 100644 index 00000000..15a8d1c2 --- /dev/null +++ b/ext/impl/psb_z_cp_hll_from_coo.f90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_cp_hll_from_coo + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + character(len=20) :: name='hll_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + if (b%is_dev()) call b%sync() + hksz = psi_get_hksz() + if (b%is_by_rows()) then + call psi_convert_hll_from_coo(a,hksz,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_hll_from_coo(a,hksz,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_hll_from_coo diff --git a/ext/impl/psb_z_cp_hll_from_fmt.f90 b/ext/impl/psb_z_cp_hll_from_fmt.f90 new file mode 100644 index 00000000..3bdb2271 --- /dev/null +++ b/ext/impl/psb_z_cp_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_cp_hll_from_fmt + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + class is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_z_hll_sparse_mat) + ! write(0,*) 'From type_hll' + if (b%is_dev()) call b%sync() + + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%hkoffs, a%hkoffs, info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) a%hksz = b%hksz + if (info == 0) a%nzt = b%nzt + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_z_cp_hll_from_fmt diff --git a/ext/impl/psb_z_cp_hll_to_coo.f90 b/ext/impl/psb_z_cp_hll_to_coo.f90 new file mode 100644 index 00000000..409fe7b5 --- /dev/null +++ b/ext/impl/psb_z_cp_hll_to_coo.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_cp_hll_to_coo + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j, jj,k,ir, isz,err_act, hksz, hk, mxrwl,& + & irs, nzblk, kc + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + j = 1 + kc = 1 + k = 1 + hksz = a%hksz + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + nzblk = sum(a%irn(i:i+ir-1)) + call inner_copy(i,ir,b%ia(kc:kc+nzblk-1),& + & b%ja(kc:kc+nzblk-1),b%val(kc:kc+nzblk-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & hksz) + k = k + isz + kc = kc + nzblk + + enddo + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +contains + + subroutine inner_copy(i,ir,iac,& + & jac,valc,ja,val,irn,ld) + integer(psb_ipk_) :: i,ir,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*) + complex(psb_dpk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc + kc = 1 + do ii = 1, ir + nc = irn(ii) + do jj=1,nc + iac(kc) = i+ii-1 + jac(kc) = ja(ii,jj) + valc(kc) = val(ii,jj) + kc = kc + 1 + end do + end do + + end subroutine inner_copy + +end subroutine psb_z_cp_hll_to_coo diff --git a/ext/impl/psb_z_cp_hll_to_fmt.f90 b/ext/impl/psb_z_cp_hll_to_fmt.f90 new file mode 100644 index 00000000..b0417c92 --- /dev/null +++ b/ext/impl/psb_z_cp_hll_to_fmt.f90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_cp_hll_to_fmt + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_z_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%hkoffs, b%hkoffs , info) + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + if (info == 0) b%hksz = a%hksz + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_cp_hll_to_fmt diff --git a/ext/impl/psb_z_dia_aclsum.f90 b/ext/impl/psb_z_dia_aclsum.f90 new file mode 100644 index 00000000..5aed7ff0 --- /dev/null +++ b/ext/impl/psb_z_dia_aclsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_aclsum(d,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_aclsum + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_aclsum diff --git a/ext/impl/psb_z_dia_allocate_mnnz.f90 b/ext/impl/psb_z_dia_allocate_mnnz.f90 new file mode 100644 index 00000000..e9c614f6 --- /dev/null +++ b/ext/impl/psb_z_dia_allocate_mnnz.f90 @@ -0,0 +1,88 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -ione )/m + else + nz_ = ((max(7*m,7*n,ione)+m-ione)/m) + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,nz_,a%data,info) + if (info == psb_success_) call psb_realloc(m+n,a%offset,info) + if (info == psb_success_) then + a%data = 0 + a%offset = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_allocate_mnnz diff --git a/ext/impl/psb_z_dia_arwsum.f90 b/ext/impl/psb_z_dia_arwsum.f90 new file mode 100644 index 00000000..42805349 --- /dev/null +++ b/ext/impl/psb_z_dia_arwsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_arwsum(d,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_arwsum + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_arwsum diff --git a/ext/impl/psb_z_dia_colsum.f90 b/ext/impl/psb_z_dia_colsum.f90 new file mode 100644 index 00000000..69919736 --- /dev/null +++ b/ext/impl/psb_z_dia_colsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_colsum(d,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_colsum + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = zone + else + d = zzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_colsum diff --git a/ext/impl/psb_z_dia_csgetptn.f90 b/ext/impl/psb_z_dia_csgetptn.f90 new file mode 100644 index 00000000..d63304f8 --- /dev/null +++ b/ext/impl/psb_z_dia_csgetptn.f90 @@ -0,0 +1,188 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_csgetptn + implicit none + + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + + + end subroutine dia_getptn + +end subroutine psb_z_dia_csgetptn diff --git a/ext/impl/psb_z_dia_csgetrow.f90 b/ext/impl/psb_z_dia_csgetrow.f90 new file mode 100644 index 00000000..6571264e --- /dev/null +++ b/ext/impl/psb_z_dia_csgetrow.f90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_csgetrow + implicit none + + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + if (chksz) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,val,info) + end if + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + val(nzin_) = a%data(i,j) + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + end subroutine dia_getrow +end subroutine psb_z_dia_csgetrow diff --git a/ext/impl/psb_z_dia_csmm.f90 b/ext/impl/psb_z_dia_csmm.f90 new file mode 100644 index 00000000..cbebd10e --- /dev/null +++ b/ext/impl/psb_z_dia_csmm.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_csmm + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_dia_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i,1:nxy) = y(i,1:nxy) + alpha*data(i,j)*x(i+off(j),1:nxy) + enddo + enddo + + end subroutine psb_z_dia_csmm_inner + +end subroutine psb_z_dia_csmm diff --git a/ext/impl/psb_z_dia_csmv.f90 b/ext/impl/psb_z_dia_csmv.f90 new file mode 100644 index 00000000..9d1f5a2a --- /dev/null +++ b/ext/impl/psb_z_dia_csmv.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_csmv + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_dia_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i) = y(i) + alpha*data(i,j)*x(i+off(j)) + enddo + enddo + + end subroutine psb_z_dia_csmv_inner + +end subroutine psb_z_dia_csmv diff --git a/ext/impl/psb_z_dia_get_diag.f90 b/ext/impl/psb_z_dia_get_diag.f90 new file mode 100644 index 00000000..9b403923 --- /dev/null +++ b/ext/impl/psb_z_dia_get_diag.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_get_diag(a,d,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_get_diag + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = zone + else + do i=1, size(a%offset) + if (a%offset(i) == 0) then + d(1:mnm) = a%data(1:mnm,i) + exit + end if + end do + end if + do i=mnm+1,size(d) + d(i) = zzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_get_diag diff --git a/ext/impl/psb_z_dia_maxval.f90 b/ext/impl/psb_z_dia_maxval.f90 new file mode 100644 index 00000000..d3518c17 --- /dev/null +++ b/ext/impl/psb_z_dia_maxval.f90 @@ -0,0 +1,54 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +function psb_z_dia_maxval(a) result(res) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_maxval + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_maxval' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = done + else + res = dzero + end if + + res = max(res,maxval(abs(a%data))) + +end function psb_z_dia_maxval diff --git a/ext/impl/psb_z_dia_mold.f90 b/ext/impl/psb_z_dia_mold.f90 new file mode 100644 index 00000000..421af284 --- /dev/null +++ b/ext/impl/psb_z_dia_mold.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_mold(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_mold + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_dia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_mold diff --git a/ext/impl/psb_z_dia_print.f90 b/ext/impl/psb_z_dia_print.f90 new file mode 100644 index 00000000..1f7853ef --- /dev/null +++ b/ext/impl/psb_z_dia_print.f90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_dia_print' + logical, parameter :: debug=.false. + + class(psb_z_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, jc, ir1, ir2 + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_z_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + write(iout,*) nr, nc, nz + + nc=size(a%data,2) + + + + if(present(iv)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) iv(i),iv(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),(i+jc),a%data(i,j) + enddo + enddo + + endif + +end subroutine psb_z_dia_print diff --git a/ext/impl/psb_z_dia_reallocate_nz.f90 b/ext/impl/psb_z_dia_reallocate_nz.f90 new file mode 100644 index 00000000..2d204a64 --- /dev/null +++ b/ext/impl/psb_z_dia_reallocate_nz.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_reallocate_nz(nz,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_dia_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! Ans: NOTHING. + ! + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_reallocate_nz diff --git a/ext/impl/psb_z_dia_reinit.f90 b/ext/impl/psb_z_dia_reinit.f90 new file mode 100644 index 00000000..0f58a9ed --- /dev/null +++ b/ext/impl/psb_z_dia_reinit.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_reinit(a,clear) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_reinit + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%data(:,:) = zzero + call a%set_upd() + call a%set_host() + + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_reinit diff --git a/ext/impl/psb_z_dia_rowsum.f90 b/ext/impl/psb_z_dia_rowsum.f90 new file mode 100644 index 00000000..6918ada1 --- /dev/null +++ b/ext/impl/psb_z_dia_rowsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_rowsum(d,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_rowsum + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_rowsum diff --git a/ext/impl/psb_z_dia_scal.f90 b/ext/impl/psb_z_dia_scal.f90 new file mode 100644 index 00000000..65957e60 --- /dev/null +++ b/ext/impl/psb_z_dia_scal.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_scal(d,a,info,side) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_scal + implicit none + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), nc, jc, nr, ir1, ir2 + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%data(i,:) = a%data(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + nr=size(a%data,1) + nc=size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + a%data(i,j) = a%data(i,j) * d(i+jc) + enddo + enddo + + end if + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_scal diff --git a/ext/impl/psb_z_dia_scals.f90 b/ext/impl/psb_z_dia_scals.f90 new file mode 100644 index 00000000..895763d9 --- /dev/null +++ b/ext/impl/psb_z_dia_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_scals(d,a,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_scals + implicit none + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%data(:,:) = a%data(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_scals diff --git a/ext/impl/psb_z_dns_mat_impl.f90 b/ext/impl/psb_z_dns_mat_impl.f90 new file mode 100644 index 00000000..b249a3f2 --- /dev/null +++ b/ext/impl/psb_z_dns_mat_impl.f90 @@ -0,0 +1,724 @@ + +!> Function csmv: +!! \memberof psb_z_dns_sparse_mat +!! \brief Product by a dense rank 1 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_z_dns_csmv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_csmv + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act, m, n, lda + character(len=20) :: name='z_dns_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (trans_ == 'N') then + m=a%get_nrows() + n=a%get_ncols() + else + n=a%get_nrows() + m=a%get_ncols() + end if + lda = size(a%val,1) + + + call zgemv(trans_,a%get_nrows(),a%get_ncols(),alpha,& + & a%val,size(a%val,1),x,1,beta,y,1) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_csmv + + +!> Function csmm: +!! \memberof psb_z_dns_sparse_mat +!! \brief Product by a dense rank 2 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:,:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:,:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_z_dns_csmm(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_csmm + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act,m,n,k, lda, ldx, ldy + character(len=20) :: name='z_dns_csmm' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (psb_toupper(trans_)=='N') then + m = a%get_nrows() + k = a%get_ncols() + n = min(size(y,2),size(x,2)) + else + k = a%get_nrows() + m = a%get_ncols() + n = min(size(y,2),size(x,2)) + end if + lda = size(a%val,1) + ldx = size(x,1) + ldy = size(y,1) + call zgemm(trans_,'N',m,n,k,alpha,a%val,lda,x,ldx,beta,y,ldy) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_csmm + + + +! +! +!> Function csnmi: +!! \memberof psb_z_dns_sparse_mat +!! \brief Operator infinity norm +!! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) +!! +! +function psb_z_dns_csnmi(a) result(res) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_csnmi + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + ! + integer(psb_ipk_) :: i + real(psb_dpk_) :: acc + + res = dzero + if (a%is_dev()) call a%sync() + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_z_dns_csnmi + + +! +!> Function get_diag: +!! \memberof psb_z_dns_sparse_mat +!! \brief Extract the diagonal of A. +!! +!! D(i) = A(i:i), i=1:min(nrows,ncols) +!! +!! \param d(:) The output diagonal +!! \param info return code. +! +subroutine psb_z_dns_get_diag(a,d,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_get_diag + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act, mnm, i + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + do i=1, mnm + d(i) = a%val(i,i) + end do + do i=mnm+1,size(d) + d(i) = zzero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_get_diag + + +! +! +!> Function reallocate_nz +!! \memberof psb_z_dns_sparse_mat +!! \brief One--parameters version of (re)allocate +!! +!! \param nz number of nonzeros to allocate for +!! i.e. makes sure that the internal storage +!! allows for NZ coefficients and their indices. +! +subroutine psb_z_dns_reallocate_nz(nz,a) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_dns_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! This is a no-op, allocation is fixed. + ! + if (a%is_dev()) call a%sync() + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_reallocate_nz + +! +!> Function mold: +!! \memberof psb_z_dns_sparse_mat +!! \brief Allocate a class(psb_z_dns_sparse_mat) with the +!! same dynamic type as the input. +!! This is equivalent to allocate( mold= ) and is provided +!! for those compilers not yet supporting mold. +!! \param b The output variable +!! \param info return code +! +subroutine psb_z_dns_mold(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_mold + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='dns_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_z_dns_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_mold + +! +! +!> Function allocate_mnnz +!! \memberof psb_z_dns_sparse_mat +!! \brief Three-parameters version of allocate +!! +!! \param m number of rows +!! \param n number of cols +!! \param nz [estimated internally] number of nonzeros to allocate for +! +subroutine psb_z_dns_allocate_mnnz(m,n,a,nz) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + ! + integer(psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_/)) + goto 9999 + endif + + + ! Basic stuff common to all formats + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_bld() + call a%set_host() + + ! We ignore NZ in this case. + + call psb_realloc(m,n,a%val,info) + if (info == psb_success_) then + a%val = zzero + a%nnz = 0 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_allocate_mnnz + + +! +! +! +!> Function csgetrow: +!! \memberof psb_z_dns_sparse_mat +!! \brief Get a (subset of) row(s) +!! +!! getrow is the basic method by which the other (getblk, clip) can +!! be implemented. +!! +!! Returns the set +!! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) +!! each identifying the position of a nonzero in A +!! i.e. +!! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) +!! with IMIN<=IA(:)<=IMAX +!! with JMIN<=JA(:)<=JMAX +!! IA,JA are reallocated as necessary. +!! +!! \param imin the minimum row index we are interested in +!! \param imax the minimum row index we are interested in +!! \param nz the number of output coefficients +!! \param ia(:) the output row indices +!! \param ja(:) the output col indices +!! \param val(:) the output coefficients +!! \param info return code +!! \param jmin [1] minimum col index +!! \param jmax [a\%get_ncols()] maximum col index +!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) +!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] +!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] +!! ( iren cannot be specified with rscale/cscale) +!! \param append [false] append to ia,ja +!! \param nzin [none] if append, then first new entry should go in entry nzin+1 +!! +! +subroutine psb_z_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_csgetrow + implicit none + + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + ! + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i,j,k + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax Function trim +!! \memberof psb_z_dns_sparse_mat +!! \brief Memory trim +!! Make sure the memory allocation of the sparse matrix is as tight as +!! possible given the actual number of nonzeros it contains. +! +subroutine psb_z_dns_trim(a) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_trim + implicit none + class(psb_z_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + ! Do nothing, we are already at minimum memory. + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_trim + +! +!> Function cp_from_coo: +!! \memberof psb_z_dns_sparse_mat +!! \brief Copy and convert from psb_z_coo_sparse_mat +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! + +subroutine psb_z_cp_dns_from_coo(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_cp_dns_from_coo + implicit none + + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + ! + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: nza, nr, i,err_act, nc + integer(psb_ipk_), parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (.not.b%is_by_rows()) then + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = zzero + do i=1, nza + a%val(tmp%ia(i),tmp%ja(i)) = tmp%val(i) + end do + a%nnz = nza + call tmp%free() + else + if (b%is_dev()) call b%sync() + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = zzero + do i=1, nza + a%val(b%ia(i),b%ja(i)) = b%val(i) + end do + a%nnz = nza + end if + call a%set_host() + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_cp_dns_from_coo + + + +! +!> Function cp_to_coo: +!! \memberof psb_z_dns_sparse_mat +!! \brief Copy and convert to psb_z_coo_sparse_mat +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! + +subroutine psb_z_cp_dns_to_coo(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_cp_dns_to_coo + implicit none + + class(psb_z_dns_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_Ipk_) :: nza, nr, nc,i,j,k,err_act + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + k = 0 + do i=1,a%get_nrows() + do j=1,a%get_ncols() + if (a%val(i,j) /= zzero) then + k = k + 1 + b%ia(k) = i + b%ja(k) = j + b%val(k) = a%val(i,j) + end if + end do + end do + + call b%set_nzeros(nza) + call b%set_sort_status(psb_row_major_) + call b%set_asb() + call b%set_host() + +end subroutine psb_z_cp_dns_to_coo + + + +! +!> Function mv_to_coo: +!! \memberof psb_z_dns_sparse_mat +!! \brief Convert to psb_z_coo_sparse_mat, freeing the source. +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! +subroutine psb_z_mv_dns_to_coo(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_mv_dns_to_coo + implicit none + + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_to_coo(b,info) + call a%free() + return + +end subroutine psb_z_mv_dns_to_coo + + +! +!> Function mv_from_coo: +!! \memberof psb_z_dns_sparse_mat +!! \brief Convert from psb_z_coo_sparse_mat, freeing the source. +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! +! +subroutine psb_z_mv_dns_from_coo(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_mv_dns_from_coo + implicit none + + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_z_mv_dns_from_coo + diff --git a/ext/impl/psb_z_ell_aclsum.f90 b/ext/impl/psb_z_ell_aclsum.f90 new file mode 100644 index 00000000..b03121fd --- /dev/null +++ b/ext/impl/psb_z_ell_aclsum.f90 @@ -0,0 +1,82 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_aclsum(d,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_aclsum + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_aclsum diff --git a/ext/impl/psb_z_ell_allocate_mnnz.f90 b/ext/impl/psb_z_ell_allocate_mnnz.f90 new file mode 100644 index 00000000..f7f7f67e --- /dev/null +++ b/ext/impl/psb_z_ell_allocate_mnnz.f90 @@ -0,0 +1,91 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = -1 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_allocate_mnnz diff --git a/ext/impl/psb_z_ell_arwsum.f90 b/ext/impl/psb_z_ell_arwsum.f90 new file mode 100644 index 00000000..9d4b4949 --- /dev/null +++ b/ext/impl/psb_z_ell_arwsum.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_arwsum(d,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_arwsum + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + is_unit = a%is_unit() + + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = done + else + d(i) = dzero + end if + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_arwsum diff --git a/ext/impl/psb_z_ell_colsum.f90 b/ext/impl/psb_z_ell_colsum.f90 new file mode 100644 index 00000000..e9c2bc0b --- /dev/null +++ b/ext/impl/psb_z_ell_colsum.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_colsum(d,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_colsum + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = zone + else + d = zzero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_colsum diff --git a/ext/impl/psb_z_ell_csgetblk.f90 b/ext/impl/psb_z_ell_csgetblk.f90 new file mode 100644 index 00000000..d2e56e1d --- /dev/null +++ b/ext/impl/psb_z_ell_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_csgetblk + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='ell_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_csgetblk diff --git a/ext/impl/psb_z_ell_csgetptn.f90 b/ext/impl/psb_z_ell_csgetptn.f90 new file mode 100644 index 00000000..97ed7d90 --- /dev/null +++ b/ext/impl/psb_z_ell_csgetptn.f90 @@ -0,0 +1,189 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_csgetptn + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_z_ell_csgetrow + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_z_ell_csmm + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_ell_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_ell_csmv + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_ell_csnm1 + + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_dpk_), allocatable :: vt(:) + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_ell_csnm1' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = dzero + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + if (a%is_unit()) then + vt(:) = done + else + vt(:) = dzero + end if + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_z_ell_csnm1 diff --git a/ext/impl/psb_z_ell_csnmi.f90 b/ext/impl/psb_z_ell_csnmi.f90 new file mode 100644 index 00000000..ecbfb1e1 --- /dev/null +++ b/ext/impl/psb_z_ell_csnmi.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_ell_csnmi(a) result(res) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_csnmi + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_csnmi' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = dzero + is_unit = a%is_unit() + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + if (is_unit) acc = acc + done + res = max(res,acc) + end do + +end function psb_z_ell_csnmi diff --git a/ext/impl/psb_z_ell_csput.f90 b/ext/impl/psb_z_ell_csput.f90 new file mode 100644 index 00000000..cf45070f --- /dev/null +++ b/ext/impl/psb_z_ell_csput.f90 @@ -0,0 +1,208 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_csput_a + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_ell_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + call psb_z_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info < 0) then + info = psb_err_internal_error_ + else if (info > 0) then + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarded entries not belonging to us.' + info = psb_success_ + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +contains + + subroutine psb_z_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='z_ell_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = a%val(ir,ip) + val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_z_ell_srch_upd +end subroutine psb_z_ell_csput_a diff --git a/ext/impl/psb_z_ell_cssm.f90 b/ext/impl/psb_z_ell_cssm.f90 new file mode 100644 index 00000000..2e26c656 --- /dev/null +++ b/ext/impl/psb_z_ell_cssm.f90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_cssm + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_ell_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_z_ell_cssv + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + complex(psb_dpk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_ell_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_z_ell_get_diag + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = zone + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) then + d(i) = a%val(i,a%idiag(i)) + else + d(i) = zzero + end if + end do + end if + do i=mnm+1,size(d) + d(i) = zzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_get_diag diff --git a/ext/impl/psb_z_ell_maxval.f90 b/ext/impl/psb_z_ell_maxval.f90 new file mode 100644 index 00000000..9596f124 --- /dev/null +++ b/ext/impl/psb_z_ell_maxval.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_ell_maxval(a) result(res) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_maxval + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_csnmi' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = done + else + res = dzero + end if + + do i = 1, a%get_nrows() + acc = maxval(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_z_ell_maxval diff --git a/ext/impl/psb_z_ell_mold.f90 b/ext/impl/psb_z_ell_mold.f90 new file mode 100644 index 00000000..3e1db6cc --- /dev/null +++ b/ext/impl/psb_z_ell_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_mold(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_mold + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='ell_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_mold diff --git a/ext/impl/psb_z_ell_print.f90 b/ext/impl/psb_z_ell_print.f90 new file mode 100644 index 00000000..502abb94 --- /dev/null +++ b/ext/impl/psb_z_ell_print.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_ell_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% ELL' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_z_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_z_ell_print diff --git a/ext/impl/psb_z_ell_reallocate_nz.f90 b/ext/impl/psb_z_ell_reallocate_nz.f90 new file mode 100644 index 00000000..58237508 --- /dev/null +++ b/ext/impl/psb_z_ell_reallocate_nz.f90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_reallocate_nz(nz,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_reallocate_nz diff --git a/ext/impl/psb_z_ell_reinit.f90 b/ext/impl/psb_z_ell_reinit.f90 new file mode 100644 index 00000000..d73620d8 --- /dev/null +++ b/ext/impl/psb_z_ell_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_reinit(a,clear) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_reinit + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:,:) = zzero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_reinit diff --git a/ext/impl/psb_z_ell_rowsum.f90 b/ext/impl/psb_z_ell_rowsum.f90 new file mode 100644 index 00000000..60eb70af --- /dev/null +++ b/ext/impl/psb_z_ell_rowsum.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_rowsum(d,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_rowsum + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical :: is_unit + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + is_unit = a%is_unit() + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = zone + else + d(i) = zzero + end if + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_rowsum diff --git a/ext/impl/psb_z_ell_scal.f90 b/ext/impl/psb_z_ell_scal.f90 new file mode 100644 index 00000000..7f2f8944 --- /dev/null +++ b/ext/impl/psb_z_ell_scal.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_scal(d,a,info,side) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_scal + implicit none + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5) + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + do i=1, m + do j=1, a%irn(i) + a%val(i,j) = a%val(i,j) * d(a%ja(i,j)) + end do + enddo + + end if + + call a%set_host() + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_scal diff --git a/ext/impl/psb_z_ell_scals.f90 b/ext/impl/psb_z_ell_scals.f90 new file mode 100644 index 00000000..4086d8cc --- /dev/null +++ b/ext/impl/psb_z_ell_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_scals(d,a,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_scals + implicit none + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_scals diff --git a/ext/impl/psb_z_ell_trim.f90 b/ext/impl/psb_z_ell_trim.f90 new file mode 100644 index 00000000..7cc2ed65 --- /dev/null +++ b/ext/impl/psb_z_ell_trim.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_trim(a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_trim + implicit none + class(psb_z_ell_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_trim diff --git a/ext/impl/psb_z_hdia_allocate_mnnz.f90 b/ext/impl/psb_z_hdia_allocate_mnnz.f90 new file mode 100644 index 00000000..abed0c58 --- /dev/null +++ b/ext/impl/psb_z_hdia_allocate_mnnz.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_hdia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_hdia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hdia_allocate_mnnz diff --git a/ext/impl/psb_z_hdia_csmv.f90 b/ext/impl/psb_z_hdia_csmv.f90 new file mode 100644 index 00000000..73d11da6 --- /dev/null +++ b/ext/impl/psb_z_hdia_csmv.f90 @@ -0,0 +1,162 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_hdia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_hdia_csmv + implicit none + class(psb_z_hdia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc,nr,nc + integer(psb_ipk_) :: irs,ics, nmx, ni + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, ncd,ib, nzhack, & + & hackfirst, hacknext + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hdia_csmv' + logical, parameter :: debug=.false. + real :: start, finish + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + info = psb_err_transpose_not_n_unsupported_ + call psb_errpush(info,name) + goto 9999 + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1)=0) then + ir1 = 1 + ! min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + jc = ir1 + rdisp + offsets(j) + do i=ir1,ir2 + y(rdisp+i) = y(rdisp+i) + alpha*data(i,j)*x(jc) + jc = jc + 1 + enddo + end do + end subroutine psi_z_inner_dia_csmv + +end subroutine psb_z_hdia_csmv diff --git a/ext/impl/psb_z_hdia_mold.f90 b/ext/impl/psb_z_hdia_mold.f90 new file mode 100644 index 00000000..d91bdb35 --- /dev/null +++ b/ext/impl/psb_z_hdia_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hdia_mold(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_hdia_mold + implicit none + class(psb_z_hdia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hdia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_hdia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hdia_mold diff --git a/ext/impl/psb_z_hdia_print.f90 b/ext/impl/psb_z_hdia_print.f90 new file mode 100644 index 00000000..46f7769d --- /dev/null +++ b/ext/impl/psb_z_hdia_print.f90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_hdia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_hdia_print + use psi_ext_util_mod + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_z_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdia_print' + logical, parameter :: debug=.false. + + class(psb_z_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, k, ncd,ib, nzhack, info,& + & hackfirst, hacknext + integer(psb_ipk_), allocatable :: ia(:), ja(:) + complex(psb_dpk_), allocatable :: val(:) + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% HDIA' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_z_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + + nhacks = a%nhacks + hacksize = a%hacksize + maxnzhack = 0 + do k=1, nhacks + maxnzhack = max(maxnzhack,(a%hackoffsets(k+1)-a%hackoffsets(k))) + end do + maxnzhack = hacksize*maxnzhack + allocate(ia(maxnzhack),ja(maxnzhack),val(maxnzhack),stat=info) + if (info /= 0) return + + write(iout,*) nr, nc, nz + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + ncd = hacknext-hackfirst + + call psi_z_xtr_coo_from_dia(nr,nc,& + & ia, ja, val, nzhack,& + & hacksize,ncd,& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) + !nzhack = sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + + if(present(iv)) then + do j=1,nzhack + write(iout,frmt) iv(ia(j)),iv(ja(j)),val(j) + enddo + else + if (present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ja(j),val(j) + enddo + else if (present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ja(j),val(j) + enddo + endif + end if + + end do + +end subroutine psb_z_hdia_print diff --git a/ext/impl/psb_z_hll_aclsum.f90 b/ext/impl/psb_z_hll_aclsum.f90 new file mode 100644 index 00000000..e4add299 --- /dev/null +++ b/ext/impl/psb_z_hll_aclsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_aclsum(d,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_aclsum + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call z_hll_aclsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine z_hll_aclsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine z_hll_aclsum + +end subroutine psb_z_hll_aclsum diff --git a/ext/impl/psb_z_hll_allocate_mnnz.f90 b/ext/impl/psb_z_hll_allocate_mnnz.f90 new file mode 100644 index 00000000..6ba9d7f1 --- /dev/null +++ b/ext/impl/psb_z_hll_allocate_mnnz.f90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m+1,a%hkoffs,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_hksz(psb_hksz_def_) + call a%set_host() + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_allocate_mnnz diff --git a/ext/impl/psb_z_hll_arwsum.f90 b/ext/impl/psb_z_hll_arwsum.f90 new file mode 100644 index 00000000..a6e020fd --- /dev/null +++ b/ext/impl/psb_z_hll_arwsum.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_arwsum(d,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_arwsum + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call z_hll_arwsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine z_hll_arwsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + abs(val(i,j)) + end do + end do + + end subroutine z_hll_arwsum + +end subroutine psb_z_hll_arwsum diff --git a/ext/impl/psb_z_hll_colsum.f90 b/ext/impl/psb_z_hll_colsum.f90 new file mode 100644 index 00000000..196a694e --- /dev/null +++ b/ext/impl/psb_z_hll_colsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_colsum(d,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_colsum + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = zone + else + d = zzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call z_hll_colsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine z_hll_colsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + complex(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine z_hll_colsum + +end subroutine psb_z_hll_colsum diff --git a/ext/impl/psb_z_hll_csgetblk.f90 b/ext/impl/psb_z_hll_csgetblk.f90 new file mode 100644 index 00000000..0cdf1fef --- /dev/null +++ b/ext/impl/psb_z_hll_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csgetblk + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='hll_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_csgetblk diff --git a/ext/impl/psb_z_hll_csgetptn.f90 b/ext/impl/psb_z_hll_csgetptn.f90 new file mode 100644 index 00000000..9d4c6714 --- /dev/null +++ b/ext/impl/psb_z_hll_csgetptn.f90 @@ -0,0 +1,209 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csgetptn + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_z_hll_csgetrow + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_z_hll_csmm + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy,ldx,ldy,hksz,mxrwl + complex(psb_dpk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hll_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + nxy = min(size(x,2) , size(y,2) ) + + + ldx = size(x,1) + ldy = size(y,1) + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (ldx psb_z_hll_csmv + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hkpnt, mxrwl, mmhk + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_hll_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (size(x,1) 0) then + select case(hksz) + case(4) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_notra_4(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(8) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + &call psb_z_hll_csmv_notra_8(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(16) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_notra_16(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(24) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_notra_24(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(32) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_notra_32(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case default + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + end select + end if + if (mmhk < m) then + i = mmhk+1 + ir = m-mmhk + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + call psb_z_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + if (info /= psb_success_) goto 9999 + end if + j = j + 1 + end if + + else + + j=1 + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,m,hksz + j = ((i-1)/hksz)+1 + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + end if + end if + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_csmv_inner(ir,m,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,tra,ctra,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit,tra,ctra + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + if (tra) then + + if (beta == zone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*val(i,j)*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (ctra) then + + if (beta == zone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*conjg(val(i,j))*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (.not.(tra.or.ctra)) then + + if (alpha == zzero) then + if (beta == zzero) then + do i=1,m + y(ir+i-1) = zzero + end do + else + do i=1,m + y(ir+i-1) = beta*y(ir+i-1) + end do + end if + + else + if (beta == zzero) then + do i=1,m + tmp = zzero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + end do + else + do i=1,m + tmp = zzero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + beta*y(ir+i-1) + end do + endif + end if + end if + + if (is_unit) then + do i=1, min(m,n) + y(i) = y(i) + alpha*x(i) + end do + end if + + end subroutine psb_z_hll_csmv_inner + + subroutine psb_z_hll_csmv_notra_8(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=8 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:8)) + tmp(1:8) = tmp(1:8) + val(1:8,j)*x(ja(1:8,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+8-1) = alpha*tmp(1:8) + else + y(ir:ir+8-1) = alpha*tmp(1:8) + beta*y(ir:ir+8-1) + end if + + + if (is_unit) then + do i=1, min(8,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_8 + + subroutine psb_z_hll_csmv_notra_24(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=24 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:24)) + tmp(1:24) = tmp(1:24) + val(1:24,j)*x(ja(1:24,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+24-1) = alpha*tmp(1:24) + else + y(ir:ir+24-1) = alpha*tmp(1:24) + beta*y(ir:ir+24-1) + end if + + + if (is_unit) then + do i=1, min(24,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_24 + + subroutine psb_z_hll_csmv_notra_16(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=16 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:16)) + tmp(1:16) = tmp(1:16) + val(1:16,j)*x(ja(1:16,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+16-1) = alpha*tmp(1:16) + else + y(ir:ir+16-1) = alpha*tmp(1:16) + beta*y(ir:ir+16-1) + end if + + + if (is_unit) then + do i=1, min(16,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_16 + + subroutine psb_z_hll_csmv_notra_32(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=32 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:32)) + tmp(1:32) = tmp(1:32) + val(1:32,j)*x(ja(1:32,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+32-1) = alpha*tmp(1:32) + else + y(ir:ir+32-1) = alpha*tmp(1:32) + beta*y(ir:ir+32-1) + end if + + + if (is_unit) then + do i=1, min(32,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_32 + + subroutine psb_z_hll_csmv_notra_4(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=4 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:4)) + tmp(1:4) = tmp(1:4) + val(1:4,j)*x(ja(1:4,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+4-1) = alpha*tmp(1:4) + else + y(ir:ir+4-1) = alpha*tmp(1:4) + beta*y(ir:ir+4-1) + end if + + + if (is_unit) then + do i=1, min(4,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_4 + +end subroutine psb_z_hll_csmv diff --git a/ext/impl/psb_z_hll_csnm1.f90 b/ext/impl/psb_z_hll_csnm1.f90 new file mode 100644 index 00000000..eb5c5b6b --- /dev/null +++ b/ext/impl/psb_z_hll_csnm1.f90 @@ -0,0 +1,111 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_hll_csnm1(a) result(res) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csnm1 + + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info, hksz, mxrwl + real(psb_dpk_), allocatable :: vt(:) + logical :: is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hll_csnm1' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + res = dzero + if (a%is_dev()) call a%sync() + n = a%get_ncols() + m = a%get_nrows() + allocate(vt(n),stat=info) + if (Info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + if (a%is_unit()) then + vt = done + else + vt = dzero + end if + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_z_hll_csnm1_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & vt,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + res = maxval(vt) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_csnm1_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & vt,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: vt(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + vt(jc) = vt(jc) + abs(val(i,j)) + end do + end do + end subroutine psb_z_hll_csnm1_inner + +end function psb_z_hll_csnm1 diff --git a/ext/impl/psb_z_hll_csnmi.f90 b/ext/impl/psb_z_hll_csnmi.f90 new file mode 100644 index 00000000..6243e5cf --- /dev/null +++ b/ext/impl/psb_z_hll_csnmi.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_hll_csnmi(a) result(res) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csnmi + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc, hksz, mxrwl, info + Integer(Psb_ipk_) :: err_act + logical :: is_unit + character(len=20) :: name='z_csnmi' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = 0 + res = dzero + if (a%is_dev()) call a%sync() + + n = a%get_ncols() + m = a%get_nrows() + is_unit = a%is_unit() + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_z_hll_csnmi_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & res,is_unit,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_csnmi_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & res,is_unit,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: res + logical :: is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: tmp, acc + + info = psb_success_ + if (is_unit) then + tmp = done + else + tmp = dzero + end if + do i=1,m + acc = tmp + do j=1, irn(i) + acc = acc + abs(val(i,j)) + end do + res = max(acc,res) + end do + end subroutine psb_z_hll_csnmi_inner + +end function psb_z_hll_csnmi diff --git a/ext/impl/psb_z_hll_csput.f90 b/ext/impl/psb_z_hll_csput.f90 new file mode 100644 index 00000000..e47664c7 --- /dev/null +++ b/ext/impl/psb_z_hll_csput.f90 @@ -0,0 +1,233 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csput_a + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hll_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + + call psb_z_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + call a%set_host() + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ip, i1,i2,nr,nc,nnz,dupl,ng,& + & hksz, hk, hkzpnt, ihkr, mxrwl, lastrow + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='z_hll_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + lastrow = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + hksz = a%get_hksz() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_z_hll_srch_upd + +end subroutine psb_z_hll_csput_a diff --git a/ext/impl/psb_z_hll_cssm.f90 b/ext/impl/psb_z_hll_cssm.f90 new file mode 100644 index 00000000..ba1aa150 --- /dev/null +++ b/ext/impl/psb_z_hll_cssm.f90 @@ -0,0 +1,506 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_cssm + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ldx, ldy, hksz, nxy, mk, mxrwl + complex(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_hll_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + info = psb_err_missing_override_method_ + call psb_errpush(info,name) + goto 9999 + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + hksz = a%get_hksz() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + ldx = size(x,1) + ldy = size(y,1) + if (ldx psb_z_hll_cssv + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hk, mxrwl, noffs, kc, mk + complex(psb_dpk_) :: acc + complex(psb_dpk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hll_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_z_hll_get_diag + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k, ke, hksz, ld,ir, mxrwl + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + ld = size(d) + if (ld< mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,ld/)) + goto 9999 + end if + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = zone + else + + hksz = a%get_hksz() + j=1 + do i=1,mnm,hksz + ir = min(hksz,mnm-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + ke = a%hkoffs(j+1) + call psb_z_hll_get_diag_inner(ir,a%irn(i:i+ir-1),& + & a%ja(k:ke),hksz,a%val(k:ke),hksz,& + & a%idiag(i:i+ir-1),d(i:i+ir-1),info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + end if + + do i=mnm+1,size(d) + d(i) = zzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_get_diag_inner(m,irn,ja,ldj,val,ldv,& + & idiag,d,info) + integer(psb_ipk_), intent(in) :: m,ldj,ldv,ja(ldj,*),irn(*), idiag(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + complex(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + do i=1,m + if (idiag(i) /= 0) then + d(i) = val(i,idiag(i)) + else + d(i) = zzero + end if + end do + + end subroutine psb_z_hll_get_diag_inner + +end subroutine psb_z_hll_get_diag diff --git a/ext/impl/psb_z_hll_maxval.f90 b/ext/impl/psb_z_hll_maxval.f90 new file mode 100644 index 00000000..22258c3a --- /dev/null +++ b/ext/impl/psb_z_hll_maxval.f90 @@ -0,0 +1,45 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_hll_maxval(a) result(res) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_maxval + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + if (a%is_dev()) call a%sync() + res = maxval(abs(a%val(:))) + if (a%is_unit()) res = max(res,done) + +end function psb_z_hll_maxval diff --git a/ext/impl/psb_z_hll_mold.f90 b/ext/impl/psb_z_hll_mold.f90 new file mode 100644 index 00000000..e108e9ce --- /dev/null +++ b/ext/impl/psb_z_hll_mold.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_mold(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_mold + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hll_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_mold diff --git a/ext/impl/psb_z_hll_print.f90 b/ext/impl/psb_z_hll_print.f90 new file mode 100644 index 00000000..43882264 --- /dev/null +++ b/ext/impl/psb_z_hll_print.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_hll_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, k, hksz, hk, mxrwl,ir, ix + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_z_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + hksz = a%get_hksz() + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + endif + endif + +end subroutine psb_z_hll_print diff --git a/ext/impl/psb_z_hll_reallocate_nz.f90 b/ext/impl/psb_z_hll_reallocate_nz.f90 new file mode 100644 index 00000000..23432f9f --- /dev/null +++ b/ext/impl/psb_z_hll_reallocate_nz.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_reallocate_nz(nz,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,nz_ + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_hll_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + nz_ = max(nz,ione) + call psb_realloc(nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(nz_,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_reallocate_nz diff --git a/ext/impl/psb_z_hll_reinit.f90 b/ext/impl/psb_z_hll_reinit.f90 new file mode 100644 index 00000000..b6851c61 --- /dev/null +++ b/ext/impl/psb_z_hll_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_reinit(a,clear) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_reinit + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:) = zzero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_reinit diff --git a/ext/impl/psb_z_hll_rowsum.f90 b/ext/impl/psb_z_hll_rowsum.f90 new file mode 100644 index 00000000..027c5b22 --- /dev/null +++ b/ext/impl/psb_z_hll_rowsum.f90 @@ -0,0 +1,110 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_rowsum(d,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_rowsum + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + if (a%is_unit()) then + d = zone + else + d = zzero + end if + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call z_hll_rowsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine z_hll_rowsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + complex(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + (val(i,j)) + end do + end do + + end subroutine z_hll_rowsum + +end subroutine psb_z_hll_rowsum diff --git a/ext/impl/psb_z_hll_scal.f90 b/ext/impl/psb_z_hll_scal.f90 new file mode 100644 index 00000000..a11d0da8 --- /dev/null +++ b/ext/impl/psb_z_hll_scal.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_scal(d,a,info,side) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_scal + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), ld, k, mxrwl, hksz, ir + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + info = psb_err_missing_override_method_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + ld = size(d) + if (left) then + m = a%get_nrows() + if (ld < m) then + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else + n = a%get_ncols() + if (ld < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_z_hll_scal_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & left,d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_scal_inner(ir,m,n,irn,ja,ldj,val,ldv,left,d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: d(*) + complex(psb_dpk_), intent(inout) :: val(ldv,*) + logical, intent(in) :: left + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + if (left) then + do i=1,m + do j=1, irn(i) + val(i,j) = val(i,j)*d(ir+i-1) + end do + end do + else + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + val(i,j) = val(i,j)*d(jc) + end do + end do + + end if + + end subroutine psb_z_hll_scal_inner + + +end subroutine psb_z_hll_scal diff --git a/ext/impl/psb_z_hll_scals.f90 b/ext/impl/psb_z_hll_scals.f90 new file mode 100644 index 00000000..432f11e6 --- /dev/null +++ b/ext/impl/psb_z_hll_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_scals(d,a,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_scals + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:) = a%val(:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_scals diff --git a/ext/impl/psb_z_mv_dia_from_coo.f90 b/ext/impl/psb_z_mv_dia_from_coo.f90 new file mode 100644 index 00000000..29e27dfc --- /dev/null +++ b/ext/impl/psb_z_mv_dia_from_coo.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_mv_dia_from_coo + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_dia_from_coo diff --git a/ext/impl/psb_z_mv_dia_to_coo.f90 b/ext/impl/psb_z_mv_dia_to_coo.f90 new file mode 100644 index 00000000..1679c9e0 --- /dev/null +++ b/ext/impl/psb_z_mv_dia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_mv_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_mv_dia_to_coo + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_z_mv_dia_to_coo diff --git a/ext/impl/psb_z_mv_ell_from_coo.f90 b/ext/impl/psb_z_mv_ell_from_coo.f90 new file mode 100644 index 00000000..de39604e --- /dev/null +++ b/ext/impl/psb_z_mv_ell_from_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_mv_ell_from_coo + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_z_mv_ell_from_coo diff --git a/ext/impl/psb_z_mv_ell_from_fmt.f90 b/ext/impl/psb_z_mv_ell_from_fmt.f90 new file mode 100644 index 00000000..a2c7c190 --- /dev/null +++ b/ext/impl/psb_z_mv_ell_from_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_mv_ell_from_fmt + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_z_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_ell_from_fmt diff --git a/ext/impl/psb_z_mv_ell_to_coo.f90 b/ext/impl/psb_z_mv_ell_to_coo.f90 new file mode 100644 index 00000000..3f8afb0a --- /dev/null +++ b/ext/impl/psb_z_mv_ell_to_coo.f90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_mv_ell_to_coo + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_z_mv_ell_to_coo diff --git a/ext/impl/psb_z_mv_ell_to_fmt.f90 b/ext/impl/psb_z_mv_ell_to_fmt.f90 new file mode 100644 index 00000000..d34ae80e --- /dev/null +++ b/ext/impl/psb_z_mv_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_mv_ell_to_fmt + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_z_ell_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_ell_to_fmt diff --git a/ext/impl/psb_z_mv_hdia_from_coo.f90 b/ext/impl/psb_z_mv_hdia_from_coo.f90 new file mode 100644 index 00000000..b9593f34 --- /dev/null +++ b/ext/impl/psb_z_mv_hdia_from_coo.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_mv_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_mv_hdia_from_coo + implicit none + + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.(b%is_by_rows())) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_hdia_from_coo diff --git a/ext/impl/psb_z_mv_hdia_to_coo.f90 b/ext/impl/psb_z_mv_hdia_to_coo.f90 new file mode 100644 index 00000000..f4c8df55 --- /dev/null +++ b/ext/impl/psb_z_mv_hdia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_mv_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_mv_hdia_to_coo + implicit none + + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_z_mv_hdia_to_coo diff --git a/ext/impl/psb_z_mv_hll_from_coo.f90 b/ext/impl/psb_z_mv_hll_from_coo.f90 new file mode 100644 index 00000000..abe988b3 --- /dev/null +++ b/ext/impl/psb_z_mv_hll_from_coo.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_mv_hll_from_coo + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: hksz + info = psb_success_ + if (.not.b%is_by_rows()) call b%fix(info) + hksz = psi_get_hksz() + call psi_convert_hll_from_coo(a,hksz,b,info) + if (info /= 0) goto 9999 + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_hll_from_coo diff --git a/ext/impl/psb_z_mv_hll_from_fmt.f90 b/ext/impl/psb_z_mv_hll_from_fmt.f90 new file mode 100644 index 00000000..81626aba --- /dev/null +++ b/ext/impl/psb_z_mv_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_mv_hll_from_fmt + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_z_hll_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%hkoffs, a%hkoffs) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + a%hksz = b%hksz + a%nzt = b%nzt + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_hll_from_fmt diff --git a/ext/impl/psb_z_mv_hll_to_coo.f90 b/ext/impl/psb_z_mv_hll_to_coo.f90 new file mode 100644 index 00000000..af033004 --- /dev/null +++ b/ext/impl/psb_z_mv_hll_to_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_mv_hll_to_coo + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%cp_to_coo(b,info) + + if (info /= psb_success_) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_z_mv_hll_to_coo diff --git a/ext/impl/psb_z_mv_hll_to_fmt.f90 b/ext/impl/psb_z_mv_hll_to_fmt.f90 new file mode 100644 index 00000000..a2fd7027 --- /dev/null +++ b/ext/impl/psb_z_mv_hll_to_fmt.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_mv_hll_to_fmt + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_z_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%hkoffs, b%hkoffs) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + b%hksz = a%hksz + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_hll_to_fmt diff --git a/ext/impl/psi_c_convert_dia_from_coo.f90 b/ext/impl/psi_c_convert_dia_from_coo.f90 new file mode 100644 index 00000000..29565748 --- /dev/null +++ b/ext/impl/psi_c_convert_dia_from_coo.f90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_c_convert_dia_from_coo(a,tmp,info) + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psi_c_convert_dia_from_coo + use psi_ext_util_mod + implicit none + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,nd + integer(psb_ipk_),allocatable :: d(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza,ir,ic + + info = psb_success_ + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + ndiag = nr+nc-1 + allocate(d(ndiag),stat=info) + if (info /= 0) return + call psb_realloc(ndiag,a%offset,info) + if (info /= 0) return + + call psi_dia_offset_from_coo(nr,nc,nza,tmp%ia,tmp%ja, & + & nd,d,a%offset,info,initd=.true.,cleard=.false.) + + call psb_realloc(nd,a%offset,info) + if (info /= 0) return + call psb_realloc(nr,nd,a%data,info) + if (info /= 0) return + a%nzeros = nza + + call psi_xtr_dia_from_coo(nr,nc,nza,tmp%ia,tmp%ja,tmp%val,& + & d,nr,nd,a%data,info,initdata=.true.) + + deallocate(d,stat=info) + if (info /= 0) return + +end subroutine psi_c_convert_dia_from_coo diff --git a/ext/impl/psi_c_convert_ell_from_coo.f90 b/ext/impl/psi_c_convert_ell_from_coo.f90 new file mode 100644 index 00000000..b4e0c7e4 --- /dev/null +++ b/ext/impl/psi_c_convert_ell_from_coo.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_c_convert_ell_from_coo(a,tmp,info,hacksize) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psi_c_convert_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, hsz_, ldv + + info = psb_success_ + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= psb_success_) return + a%irn = 0 + do i=1, nza + ir = tmp%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + end do + ! Allocate and extract. + call psb_realloc(nr,a%idiag,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%val,info) + if (info /= psb_success_) return + + call psi_c_xtr_ell_from_coo(ione,nr,nzm,tmp%ia,tmp%ja,tmp%val,& + & a%ja,a%val,a%irn,a%idiag,ldv) + +end subroutine psi_c_convert_ell_from_coo + diff --git a/ext/impl/psi_c_convert_hll_from_coo.f90 b/ext/impl/psi_c_convert_hll_from_coo.f90 new file mode 100644 index 00000000..2ebb86a6 --- /dev/null +++ b/ext/impl/psi_c_convert_hll_from_coo.f90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_c_convert_hll_from_coo(a,hksz,tmp,info) + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psi_c_convert_hll_from_coo + use psi_ext_util_mod + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + + + if (.not.tmp%is_by_rows()) then + info = -98765 + return + end if + + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) return + a%irn = 0 + do i=1, nza + a%irn(tmp%ia(i)) = a%irn(tmp%ia(i)) + 1 + end do + + a%nzt = nza + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + do k=1,ir-1 + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(isz,a%ja,info) + if (info == 0) call psb_realloc(isz,a%val,info) + if (info /= 0) return + ! Init last chunk of data + nzm = a%hkoffs(noffs+1)-a%hkoffs(noffs) + a%val(isz-(nzm-1):isz) = czero + a%ja(isz-(nzm-1):isz) = nr + ! + ! Now copy everything, noting the position of the diagonal. + ! + kc = 1 + k = 1 + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + mxrwl = isz/hksz + nza = sum(a%irn(i:i+ir-1)) + call psi_c_xtr_ell_from_coo(i,ir,mxrwl,tmp%ia(kc:kc+nza-1),& + & tmp%ja(kc:kc+nza-1),tmp%val(kc:kc+nza-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & a%idiag(i:i+ir-1),hksz) + k = k + isz + kc = kc + nza + + enddo + + ! Third copy the other stuff + if (info /= 0) return + call a%set_sorted(.true.) + +end subroutine psi_c_convert_hll_from_coo diff --git a/ext/impl/psi_c_xtr_coo_from_dia.f90 b/ext/impl/psi_c_xtr_coo_from_dia.f90 new file mode 100644 index 00000000..eab82a11 --- /dev/null +++ b/ext/impl/psi_c_xtr_coo_from_dia.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_c_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,info,rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, czero + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:),nz + complex(psb_spk_), intent(inout) :: val(:) + complex(psb_spk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + integer(psb_ipk_) :: rdisp_, nrcmdisp, rdisp1 + integer(psb_ipk_) :: i,j,ir1, ir2, ir, ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_coo_from_dia',nr,nc,nrd,ncd, rdisp_ + nrcmdisp = min(nr-rdisp_,nc-rdisp_) + rdisp1 = 1-rdisp_ + nz = 0 + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + if (debug) write(0,*) ' Loop J',j,ir1,ir2, offsets(j) + do i=ir1,ir2 + ir = i + rdisp_ + ic = i + rdisp_ + offsets(j) + if (debug) write(0,*) ' Loop I',i,ir,ic + nz = nz + 1 + ia(nz) = ir + ja(nz) = ic + val(nz) = data(i,j) + enddo + end do + +end subroutine psi_c_xtr_coo_from_dia + diff --git a/ext/impl/psi_c_xtr_dia_from_coo.f90 b/ext/impl/psi_c_xtr_dia_from_coo.f90 new file mode 100644 index 00000000..f72a03df --- /dev/null +++ b/ext/impl/psi_c_xtr_dia_from_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_c_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata, rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, czero + + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd,ncd,ia(:), ja(:), d(:) + complex(psb_spk_), intent(in) :: val(:) + complex(psb_spk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + logical :: initdata_ + integer(psb_ipk_) :: rdisp_ + integer(psb_ipk_) :: i,ir,ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + initdata_ = .true. + if (present(initdata)) initdata_ = initdata + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_dia_from_coo',nr,nc,nz,nrd,ncd,initdata_, rdisp_ + + if (initdata_) data(1:nrd,1:ncd) = czero + + do i=1,nz + ir = ia(i) + k = ja(i) - ir + ic = d(nr+k) + if (debug) write(0,*) 'loop xtr_dia_from_coo :',ia(i),ja(i),k,ir-rdisp_,ic + data(ir-rdisp_,ic) = val(i) + enddo + + +end subroutine psi_c_xtr_dia_from_coo diff --git a/ext/impl/psi_c_xtr_ell_from_coo.f90 b/ext/impl/psi_c_xtr_ell_from_coo.f90 new file mode 100644 index 00000000..706e6c1f --- /dev/null +++ b/ext/impl/psi_c_xtr_ell_from_coo.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_c_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,valc, & + & ja,val,irn,diag,ld) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, czero + + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + complex(psb_spk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc, ir, ic + kc = 1 + do ii = 1, nr + nc = irn(ii) + do jj=1,nc + !if (iac(kc) /= i+ii-1) write(0,*) 'Copy mismatch',iac(kc),i,ii,i+ii-1 + ir = iac(kc) + ic = jac(kc) + if (ir == ic) diag(ii) = jj + ja(ii,jj) = ic + val(ii,jj) = valc(kc) + kc = kc + 1 + end do + ! We are assuming that jac contains at least one valid entry + ! If the previous loop did not have any entries, pick one valid + ! value. + if (nc == 0) ic = jac(1) + do jj = nc+1,mxrwl + ja(ii,jj) = ic + val(ii,jj) = czero + end do + end do +end subroutine psi_c_xtr_ell_from_coo + diff --git a/ext/impl/psi_d_convert_dia_from_coo.f90 b/ext/impl/psi_d_convert_dia_from_coo.f90 new file mode 100644 index 00000000..5f821967 --- /dev/null +++ b/ext/impl/psi_d_convert_dia_from_coo.f90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_d_convert_dia_from_coo(a,tmp,info) + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psi_d_convert_dia_from_coo + use psi_ext_util_mod + implicit none + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,nd + integer(psb_ipk_),allocatable :: d(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza,ir,ic + + info = psb_success_ + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + ndiag = nr+nc-1 + allocate(d(ndiag),stat=info) + if (info /= 0) return + call psb_realloc(ndiag,a%offset,info) + if (info /= 0) return + + call psi_dia_offset_from_coo(nr,nc,nza,tmp%ia,tmp%ja, & + & nd,d,a%offset,info,initd=.true.,cleard=.false.) + + call psb_realloc(nd,a%offset,info) + if (info /= 0) return + call psb_realloc(nr,nd,a%data,info) + if (info /= 0) return + a%nzeros = nza + + call psi_xtr_dia_from_coo(nr,nc,nza,tmp%ia,tmp%ja,tmp%val,& + & d,nr,nd,a%data,info,initdata=.true.) + + deallocate(d,stat=info) + if (info /= 0) return + +end subroutine psi_d_convert_dia_from_coo diff --git a/ext/impl/psi_d_convert_ell_from_coo.f90 b/ext/impl/psi_d_convert_ell_from_coo.f90 new file mode 100644 index 00000000..51471c19 --- /dev/null +++ b/ext/impl/psi_d_convert_ell_from_coo.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_d_convert_ell_from_coo(a,tmp,info,hacksize) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psi_d_convert_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, hsz_, ldv + + info = psb_success_ + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= psb_success_) return + a%irn = 0 + do i=1, nza + ir = tmp%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + end do + ! Allocate and extract. + call psb_realloc(nr,a%idiag,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%val,info) + if (info /= psb_success_) return + + call psi_d_xtr_ell_from_coo(ione,nr,nzm,tmp%ia,tmp%ja,tmp%val,& + & a%ja,a%val,a%irn,a%idiag,ldv) + +end subroutine psi_d_convert_ell_from_coo + diff --git a/ext/impl/psi_d_convert_hll_from_coo.f90 b/ext/impl/psi_d_convert_hll_from_coo.f90 new file mode 100644 index 00000000..cb07e52c --- /dev/null +++ b/ext/impl/psi_d_convert_hll_from_coo.f90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_d_convert_hll_from_coo(a,hksz,tmp,info) + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psi_d_convert_hll_from_coo + use psi_ext_util_mod + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + + + if (.not.tmp%is_by_rows()) then + info = -98765 + return + end if + + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) return + a%irn = 0 + do i=1, nza + a%irn(tmp%ia(i)) = a%irn(tmp%ia(i)) + 1 + end do + + a%nzt = nza + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + do k=1,ir-1 + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(isz,a%ja,info) + if (info == 0) call psb_realloc(isz,a%val,info) + if (info /= 0) return + ! Init last chunk of data + nzm = a%hkoffs(noffs+1)-a%hkoffs(noffs) + a%val(isz-(nzm-1):isz) = dzero + a%ja(isz-(nzm-1):isz) = nr + ! + ! Now copy everything, noting the position of the diagonal. + ! + kc = 1 + k = 1 + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + mxrwl = isz/hksz + nza = sum(a%irn(i:i+ir-1)) + call psi_d_xtr_ell_from_coo(i,ir,mxrwl,tmp%ia(kc:kc+nza-1),& + & tmp%ja(kc:kc+nza-1),tmp%val(kc:kc+nza-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & a%idiag(i:i+ir-1),hksz) + k = k + isz + kc = kc + nza + + enddo + + ! Third copy the other stuff + if (info /= 0) return + call a%set_sorted(.true.) + +end subroutine psi_d_convert_hll_from_coo diff --git a/ext/impl/psi_d_xtr_coo_from_dia.f90 b/ext/impl/psi_d_xtr_coo_from_dia.f90 new file mode 100644 index 00000000..5fc98b82 --- /dev/null +++ b/ext/impl/psi_d_xtr_coo_from_dia.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_d_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,info,rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, dzero + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:),nz + real(psb_dpk_), intent(inout) :: val(:) + real(psb_dpk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + integer(psb_ipk_) :: rdisp_, nrcmdisp, rdisp1 + integer(psb_ipk_) :: i,j,ir1, ir2, ir, ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_coo_from_dia',nr,nc,nrd,ncd, rdisp_ + nrcmdisp = min(nr-rdisp_,nc-rdisp_) + rdisp1 = 1-rdisp_ + nz = 0 + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + if (debug) write(0,*) ' Loop J',j,ir1,ir2, offsets(j) + do i=ir1,ir2 + ir = i + rdisp_ + ic = i + rdisp_ + offsets(j) + if (debug) write(0,*) ' Loop I',i,ir,ic + nz = nz + 1 + ia(nz) = ir + ja(nz) = ic + val(nz) = data(i,j) + enddo + end do + +end subroutine psi_d_xtr_coo_from_dia + diff --git a/ext/impl/psi_d_xtr_dia_from_coo.f90 b/ext/impl/psi_d_xtr_dia_from_coo.f90 new file mode 100644 index 00000000..cd95b64e --- /dev/null +++ b/ext/impl/psi_d_xtr_dia_from_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_d_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata, rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, dzero + + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd,ncd,ia(:), ja(:), d(:) + real(psb_dpk_), intent(in) :: val(:) + real(psb_dpk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + logical :: initdata_ + integer(psb_ipk_) :: rdisp_ + integer(psb_ipk_) :: i,ir,ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + initdata_ = .true. + if (present(initdata)) initdata_ = initdata + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_dia_from_coo',nr,nc,nz,nrd,ncd,initdata_, rdisp_ + + if (initdata_) data(1:nrd,1:ncd) = dzero + + do i=1,nz + ir = ia(i) + k = ja(i) - ir + ic = d(nr+k) + if (debug) write(0,*) 'loop xtr_dia_from_coo :',ia(i),ja(i),k,ir-rdisp_,ic + data(ir-rdisp_,ic) = val(i) + enddo + + +end subroutine psi_d_xtr_dia_from_coo diff --git a/ext/impl/psi_d_xtr_ell_from_coo.f90 b/ext/impl/psi_d_xtr_ell_from_coo.f90 new file mode 100644 index 00000000..ec520797 --- /dev/null +++ b/ext/impl/psi_d_xtr_ell_from_coo.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_d_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,valc, & + & ja,val,irn,diag,ld) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, dzero + + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + real(psb_dpk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc, ir, ic + kc = 1 + do ii = 1, nr + nc = irn(ii) + do jj=1,nc + !if (iac(kc) /= i+ii-1) write(0,*) 'Copy mismatch',iac(kc),i,ii,i+ii-1 + ir = iac(kc) + ic = jac(kc) + if (ir == ic) diag(ii) = jj + ja(ii,jj) = ic + val(ii,jj) = valc(kc) + kc = kc + 1 + end do + ! We are assuming that jac contains at least one valid entry + ! If the previous loop did not have any entries, pick one valid + ! value. + if (nc == 0) ic = jac(1) + do jj = nc+1,mxrwl + ja(ii,jj) = ic + val(ii,jj) = dzero + end do + end do +end subroutine psi_d_xtr_ell_from_coo + diff --git a/ext/impl/psi_s_convert_dia_from_coo.f90 b/ext/impl/psi_s_convert_dia_from_coo.f90 new file mode 100644 index 00000000..d2f58778 --- /dev/null +++ b/ext/impl/psi_s_convert_dia_from_coo.f90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_s_convert_dia_from_coo(a,tmp,info) + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psi_s_convert_dia_from_coo + use psi_ext_util_mod + implicit none + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,nd + integer(psb_ipk_),allocatable :: d(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza,ir,ic + + info = psb_success_ + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + ndiag = nr+nc-1 + allocate(d(ndiag),stat=info) + if (info /= 0) return + call psb_realloc(ndiag,a%offset,info) + if (info /= 0) return + + call psi_dia_offset_from_coo(nr,nc,nza,tmp%ia,tmp%ja, & + & nd,d,a%offset,info,initd=.true.,cleard=.false.) + + call psb_realloc(nd,a%offset,info) + if (info /= 0) return + call psb_realloc(nr,nd,a%data,info) + if (info /= 0) return + a%nzeros = nza + + call psi_xtr_dia_from_coo(nr,nc,nza,tmp%ia,tmp%ja,tmp%val,& + & d,nr,nd,a%data,info,initdata=.true.) + + deallocate(d,stat=info) + if (info /= 0) return + +end subroutine psi_s_convert_dia_from_coo diff --git a/ext/impl/psi_s_convert_ell_from_coo.f90 b/ext/impl/psi_s_convert_ell_from_coo.f90 new file mode 100644 index 00000000..ecdd9b1e --- /dev/null +++ b/ext/impl/psi_s_convert_ell_from_coo.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_s_convert_ell_from_coo(a,tmp,info,hacksize) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psi_s_convert_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, hsz_, ldv + + info = psb_success_ + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= psb_success_) return + a%irn = 0 + do i=1, nza + ir = tmp%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + end do + ! Allocate and extract. + call psb_realloc(nr,a%idiag,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%val,info) + if (info /= psb_success_) return + + call psi_s_xtr_ell_from_coo(ione,nr,nzm,tmp%ia,tmp%ja,tmp%val,& + & a%ja,a%val,a%irn,a%idiag,ldv) + +end subroutine psi_s_convert_ell_from_coo + diff --git a/ext/impl/psi_s_convert_hll_from_coo.f90 b/ext/impl/psi_s_convert_hll_from_coo.f90 new file mode 100644 index 00000000..dcf6c4e2 --- /dev/null +++ b/ext/impl/psi_s_convert_hll_from_coo.f90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_s_convert_hll_from_coo(a,hksz,tmp,info) + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psi_s_convert_hll_from_coo + use psi_ext_util_mod + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + + + if (.not.tmp%is_by_rows()) then + info = -98765 + return + end if + + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) return + a%irn = 0 + do i=1, nza + a%irn(tmp%ia(i)) = a%irn(tmp%ia(i)) + 1 + end do + + a%nzt = nza + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + do k=1,ir-1 + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(isz,a%ja,info) + if (info == 0) call psb_realloc(isz,a%val,info) + if (info /= 0) return + ! Init last chunk of data + nzm = a%hkoffs(noffs+1)-a%hkoffs(noffs) + a%val(isz-(nzm-1):isz) = szero + a%ja(isz-(nzm-1):isz) = nr + ! + ! Now copy everything, noting the position of the diagonal. + ! + kc = 1 + k = 1 + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + mxrwl = isz/hksz + nza = sum(a%irn(i:i+ir-1)) + call psi_s_xtr_ell_from_coo(i,ir,mxrwl,tmp%ia(kc:kc+nza-1),& + & tmp%ja(kc:kc+nza-1),tmp%val(kc:kc+nza-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & a%idiag(i:i+ir-1),hksz) + k = k + isz + kc = kc + nza + + enddo + + ! Third copy the other stuff + if (info /= 0) return + call a%set_sorted(.true.) + +end subroutine psi_s_convert_hll_from_coo diff --git a/ext/impl/psi_s_xtr_coo_from_dia.f90 b/ext/impl/psi_s_xtr_coo_from_dia.f90 new file mode 100644 index 00000000..3745365b --- /dev/null +++ b/ext/impl/psi_s_xtr_coo_from_dia.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_s_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,info,rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, szero + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:),nz + real(psb_spk_), intent(inout) :: val(:) + real(psb_spk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + integer(psb_ipk_) :: rdisp_, nrcmdisp, rdisp1 + integer(psb_ipk_) :: i,j,ir1, ir2, ir, ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_coo_from_dia',nr,nc,nrd,ncd, rdisp_ + nrcmdisp = min(nr-rdisp_,nc-rdisp_) + rdisp1 = 1-rdisp_ + nz = 0 + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + if (debug) write(0,*) ' Loop J',j,ir1,ir2, offsets(j) + do i=ir1,ir2 + ir = i + rdisp_ + ic = i + rdisp_ + offsets(j) + if (debug) write(0,*) ' Loop I',i,ir,ic + nz = nz + 1 + ia(nz) = ir + ja(nz) = ic + val(nz) = data(i,j) + enddo + end do + +end subroutine psi_s_xtr_coo_from_dia + diff --git a/ext/impl/psi_s_xtr_dia_from_coo.f90 b/ext/impl/psi_s_xtr_dia_from_coo.f90 new file mode 100644 index 00000000..a8ee7c4b --- /dev/null +++ b/ext/impl/psi_s_xtr_dia_from_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_s_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata, rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, szero + + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd,ncd,ia(:), ja(:), d(:) + real(psb_spk_), intent(in) :: val(:) + real(psb_spk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + logical :: initdata_ + integer(psb_ipk_) :: rdisp_ + integer(psb_ipk_) :: i,ir,ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + initdata_ = .true. + if (present(initdata)) initdata_ = initdata + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_dia_from_coo',nr,nc,nz,nrd,ncd,initdata_, rdisp_ + + if (initdata_) data(1:nrd,1:ncd) = szero + + do i=1,nz + ir = ia(i) + k = ja(i) - ir + ic = d(nr+k) + if (debug) write(0,*) 'loop xtr_dia_from_coo :',ia(i),ja(i),k,ir-rdisp_,ic + data(ir-rdisp_,ic) = val(i) + enddo + + +end subroutine psi_s_xtr_dia_from_coo diff --git a/ext/impl/psi_s_xtr_ell_from_coo.f90 b/ext/impl/psi_s_xtr_ell_from_coo.f90 new file mode 100644 index 00000000..0bac2ec0 --- /dev/null +++ b/ext/impl/psi_s_xtr_ell_from_coo.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_s_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,valc, & + & ja,val,irn,diag,ld) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, szero + + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + real(psb_spk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc, ir, ic + kc = 1 + do ii = 1, nr + nc = irn(ii) + do jj=1,nc + !if (iac(kc) /= i+ii-1) write(0,*) 'Copy mismatch',iac(kc),i,ii,i+ii-1 + ir = iac(kc) + ic = jac(kc) + if (ir == ic) diag(ii) = jj + ja(ii,jj) = ic + val(ii,jj) = valc(kc) + kc = kc + 1 + end do + ! We are assuming that jac contains at least one valid entry + ! If the previous loop did not have any entries, pick one valid + ! value. + if (nc == 0) ic = jac(1) + do jj = nc+1,mxrwl + ja(ii,jj) = ic + val(ii,jj) = szero + end do + end do +end subroutine psi_s_xtr_ell_from_coo + diff --git a/ext/impl/psi_z_convert_dia_from_coo.f90 b/ext/impl/psi_z_convert_dia_from_coo.f90 new file mode 100644 index 00000000..ddc9d2fd --- /dev/null +++ b/ext/impl/psi_z_convert_dia_from_coo.f90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_z_convert_dia_from_coo(a,tmp,info) + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psi_z_convert_dia_from_coo + use psi_ext_util_mod + implicit none + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,nd + integer(psb_ipk_),allocatable :: d(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza,ir,ic + + info = psb_success_ + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + ndiag = nr+nc-1 + allocate(d(ndiag),stat=info) + if (info /= 0) return + call psb_realloc(ndiag,a%offset,info) + if (info /= 0) return + + call psi_dia_offset_from_coo(nr,nc,nza,tmp%ia,tmp%ja, & + & nd,d,a%offset,info,initd=.true.,cleard=.false.) + + call psb_realloc(nd,a%offset,info) + if (info /= 0) return + call psb_realloc(nr,nd,a%data,info) + if (info /= 0) return + a%nzeros = nza + + call psi_xtr_dia_from_coo(nr,nc,nza,tmp%ia,tmp%ja,tmp%val,& + & d,nr,nd,a%data,info,initdata=.true.) + + deallocate(d,stat=info) + if (info /= 0) return + +end subroutine psi_z_convert_dia_from_coo diff --git a/ext/impl/psi_z_convert_ell_from_coo.f90 b/ext/impl/psi_z_convert_ell_from_coo.f90 new file mode 100644 index 00000000..3d37c11f --- /dev/null +++ b/ext/impl/psi_z_convert_ell_from_coo.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_z_convert_ell_from_coo(a,tmp,info,hacksize) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psi_z_convert_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, hsz_, ldv + + info = psb_success_ + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= psb_success_) return + a%irn = 0 + do i=1, nza + ir = tmp%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + end do + ! Allocate and extract. + call psb_realloc(nr,a%idiag,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%val,info) + if (info /= psb_success_) return + + call psi_z_xtr_ell_from_coo(ione,nr,nzm,tmp%ia,tmp%ja,tmp%val,& + & a%ja,a%val,a%irn,a%idiag,ldv) + +end subroutine psi_z_convert_ell_from_coo + diff --git a/ext/impl/psi_z_convert_hll_from_coo.f90 b/ext/impl/psi_z_convert_hll_from_coo.f90 new file mode 100644 index 00000000..bc9fdde1 --- /dev/null +++ b/ext/impl/psi_z_convert_hll_from_coo.f90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_z_convert_hll_from_coo(a,hksz,tmp,info) + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psi_z_convert_hll_from_coo + use psi_ext_util_mod + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + + + if (.not.tmp%is_by_rows()) then + info = -98765 + return + end if + + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) return + a%irn = 0 + do i=1, nza + a%irn(tmp%ia(i)) = a%irn(tmp%ia(i)) + 1 + end do + + a%nzt = nza + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + do k=1,ir-1 + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(isz,a%ja,info) + if (info == 0) call psb_realloc(isz,a%val,info) + if (info /= 0) return + ! Init last chunk of data + nzm = a%hkoffs(noffs+1)-a%hkoffs(noffs) + a%val(isz-(nzm-1):isz) = zzero + a%ja(isz-(nzm-1):isz) = nr + ! + ! Now copy everything, noting the position of the diagonal. + ! + kc = 1 + k = 1 + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + mxrwl = isz/hksz + nza = sum(a%irn(i:i+ir-1)) + call psi_z_xtr_ell_from_coo(i,ir,mxrwl,tmp%ia(kc:kc+nza-1),& + & tmp%ja(kc:kc+nza-1),tmp%val(kc:kc+nza-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & a%idiag(i:i+ir-1),hksz) + k = k + isz + kc = kc + nza + + enddo + + ! Third copy the other stuff + if (info /= 0) return + call a%set_sorted(.true.) + +end subroutine psi_z_convert_hll_from_coo diff --git a/ext/impl/psi_z_xtr_coo_from_dia.f90 b/ext/impl/psi_z_xtr_coo_from_dia.f90 new file mode 100644 index 00000000..70d0938f --- /dev/null +++ b/ext/impl/psi_z_xtr_coo_from_dia.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_z_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,info,rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, zzero + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:),nz + complex(psb_dpk_), intent(inout) :: val(:) + complex(psb_dpk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + integer(psb_ipk_) :: rdisp_, nrcmdisp, rdisp1 + integer(psb_ipk_) :: i,j,ir1, ir2, ir, ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_coo_from_dia',nr,nc,nrd,ncd, rdisp_ + nrcmdisp = min(nr-rdisp_,nc-rdisp_) + rdisp1 = 1-rdisp_ + nz = 0 + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + if (debug) write(0,*) ' Loop J',j,ir1,ir2, offsets(j) + do i=ir1,ir2 + ir = i + rdisp_ + ic = i + rdisp_ + offsets(j) + if (debug) write(0,*) ' Loop I',i,ir,ic + nz = nz + 1 + ia(nz) = ir + ja(nz) = ic + val(nz) = data(i,j) + enddo + end do + +end subroutine psi_z_xtr_coo_from_dia + diff --git a/ext/impl/psi_z_xtr_dia_from_coo.f90 b/ext/impl/psi_z_xtr_dia_from_coo.f90 new file mode 100644 index 00000000..6b2542c6 --- /dev/null +++ b/ext/impl/psi_z_xtr_dia_from_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_z_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata, rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, zzero + + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd,ncd,ia(:), ja(:), d(:) + complex(psb_dpk_), intent(in) :: val(:) + complex(psb_dpk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + logical :: initdata_ + integer(psb_ipk_) :: rdisp_ + integer(psb_ipk_) :: i,ir,ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + initdata_ = .true. + if (present(initdata)) initdata_ = initdata + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_dia_from_coo',nr,nc,nz,nrd,ncd,initdata_, rdisp_ + + if (initdata_) data(1:nrd,1:ncd) = zzero + + do i=1,nz + ir = ia(i) + k = ja(i) - ir + ic = d(nr+k) + if (debug) write(0,*) 'loop xtr_dia_from_coo :',ia(i),ja(i),k,ir-rdisp_,ic + data(ir-rdisp_,ic) = val(i) + enddo + + +end subroutine psi_z_xtr_dia_from_coo diff --git a/ext/impl/psi_z_xtr_ell_from_coo.f90 b/ext/impl/psi_z_xtr_ell_from_coo.f90 new file mode 100644 index 00000000..7133f2ae --- /dev/null +++ b/ext/impl/psi_z_xtr_ell_from_coo.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_z_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,valc, & + & ja,val,irn,diag,ld) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, zzero + + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + complex(psb_dpk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc, ir, ic + kc = 1 + do ii = 1, nr + nc = irn(ii) + do jj=1,nc + !if (iac(kc) /= i+ii-1) write(0,*) 'Copy mismatch',iac(kc),i,ii,i+ii-1 + ir = iac(kc) + ic = jac(kc) + if (ir == ic) diag(ii) = jj + ja(ii,jj) = ic + val(ii,jj) = valc(kc) + kc = kc + 1 + end do + ! We are assuming that jac contains at least one valid entry + ! If the previous loop did not have any entries, pick one valid + ! value. + if (nc == 0) ic = jac(1) + do jj = nc+1,mxrwl + ja(ii,jj) = ic + val(ii,jj) = zzero + end do + end do +end subroutine psi_z_xtr_ell_from_coo + diff --git a/ext/psb_c_dia_mat_mod.f90 b/ext/psb_c_dia_mat_mod.f90 new file mode 100644 index 00000000..8311487b --- /dev/null +++ b/ext/psb_c_dia_mat_mod.f90 @@ -0,0 +1,513 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_dia_mat_mod + + use psb_c_base_mat_mod + + type, extends(psb_c_base_sparse_mat) :: psb_c_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + complex(psb_spk_), allocatable :: data(:,:) + + contains + ! procedure, pass(a) :: get_size => c_dia_get_size + procedure, pass(a) :: get_nzeros => c_dia_get_nzeros + procedure, nopass :: get_fmt => c_dia_get_fmt + procedure, pass(a) :: sizeof => c_dia_sizeof + procedure, pass(a) :: csmm => psb_c_dia_csmm + procedure, pass(a) :: csmv => psb_c_dia_csmv + ! procedure, pass(a) :: inner_cssm => psb_c_dia_cssm + ! procedure, pass(a) :: inner_cssv => psb_c_dia_cssv + procedure, pass(a) :: scals => psb_c_dia_scals + procedure, pass(a) :: scalv => psb_c_dia_scal + procedure, pass(a) :: maxval => psb_c_dia_maxval + procedure, pass(a) :: rowsum => psb_c_dia_rowsum + procedure, pass(a) :: arwsum => psb_c_dia_arwsum + procedure, pass(a) :: colsum => psb_c_dia_colsum + procedure, pass(a) :: aclsum => psb_c_dia_aclsum + procedure, pass(a) :: reallocate_nz => psb_c_dia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_dia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_dia_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_dia_from_coo + ! procedure, pass(a) :: mv_to_coo => psb_c_mv_dia_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_dia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_c_mv_dia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_c_mv_dia_from_fmt + ! procedure, pass(a) :: csput_a => psb_c_dia_csput_a + procedure, pass(a) :: get_diag => psb_c_dia_get_diag + procedure, pass(a) :: csgetptn => psb_c_dia_csgetptn + procedure, pass(a) :: csgetrow => psb_c_dia_csgetrow + ! procedure, pass(a) :: get_nz_row => c_dia_get_nz_row + procedure, pass(a) :: reinit => psb_c_dia_reinit + ! procedure, pass(a) :: trim => psb_c_dia_trim + procedure, pass(a) :: print => psb_c_dia_print + procedure, pass(a) :: free => c_dia_free + procedure, pass(a) :: mold => psb_c_dia_mold + + end type psb_c_dia_sparse_mat + + private :: c_dia_get_nzeros, c_dia_free, c_dia_get_fmt, & + & c_dia_sizeof !, c_dia_get_size, c_dia_get_nz_row + + interface + subroutine psb_c_dia_reallocate_nz(nz,a) + import :: psb_c_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_dia_sparse_mat), intent(inout) :: a + end subroutine psb_c_dia_reallocate_nz + end interface + + interface + subroutine psb_c_dia_reinit(a,clear) + import :: psb_c_dia_sparse_mat + class(psb_c_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_c_dia_reinit + end interface + + interface + subroutine psb_c_dia_trim(a) + import :: psb_c_dia_sparse_mat + class(psb_c_dia_sparse_mat), intent(inout) :: a + end subroutine psb_c_dia_trim + end interface + + interface + subroutine psb_c_dia_mold(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dia_mold + end interface + + interface + subroutine psb_c_dia_allocate_mnnz(m,n,a,nz) + import :: psb_c_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_dia_allocate_mnnz + end interface + + interface + subroutine psb_c_dia_print(iout,a,iv,head,ivr,ivc) + import :: psb_c_dia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_c_dia_print + end interface + + interface + subroutine psb_c_cp_dia_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_dia_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dia_to_coo + end interface + + interface + subroutine psb_c_cp_dia_from_coo(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dia_from_coo + end interface + + interface + subroutine psb_c_cp_dia_to_fmt(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dia_to_fmt + end interface + + interface + subroutine psb_c_cp_dia_from_fmt(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dia_from_fmt + end interface + + interface + subroutine psb_c_mv_dia_to_coo(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dia_to_coo + end interface + + interface + subroutine psb_c_mv_dia_from_coo(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dia_from_coo + end interface + + interface + subroutine psb_c_mv_dia_to_fmt(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dia_to_fmt + end interface + + interface + subroutine psb_c_mv_dia_from_fmt(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dia_from_fmt + end interface + + interface + subroutine psb_c_dia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dia_csput_a + end interface + + interface + subroutine psb_c_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_dia_csgetptn + end interface + + interface + subroutine psb_c_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_c_dia_csgetrow + end interface + + interface + subroutine psb_c_dia_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_dia_csgetblk + end interface + + interface + subroutine psb_c_dia_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dia_cssv + subroutine psb_c_dia_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dia_cssm + end interface + + interface + subroutine psb_c_dia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dia_csmv + subroutine psb_c_dia_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dia_csmm + end interface + + + interface + function psb_c_dia_maxval(a) result(res) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_dia_maxval + end interface + + interface + function psb_c_dia_csnmi(a) result(res) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_dia_csnmi + end interface + + interface + function psb_c_dia_csnm1(a) result(res) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_dia_csnm1 + end interface + + interface + subroutine psb_c_dia_rowsum(d,a) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_dia_rowsum + end interface + + interface + subroutine psb_c_dia_arwsum(d,a) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_dia_arwsum + end interface + + interface + subroutine psb_c_dia_colsum(d,a) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_dia_colsum + end interface + + interface + subroutine psb_c_dia_aclsum(d,a) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_dia_aclsum + end interface + + interface + subroutine psb_c_dia_get_diag(a,d,info) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dia_get_diag + end interface + + interface + subroutine psb_c_dia_scal(d,a,info,side) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_dia_scal + end interface + + interface + subroutine psb_c_dia_scals(d,a,info) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dia_scals + end interface + + interface psi_convert_dia_from_coo + subroutine psi_c_convert_dia_from_coo(a,tmp,info) + import :: psb_c_dia_sparse_mat, psb_ipk_, psb_c_coo_sparse_mat + implicit none + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_c_convert_dia_from_coo + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_dia_sizeof(a) result(res) + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + end function c_dia_sizeof + + function c_dia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIA' + end function c_dia_get_fmt + + function c_dia_get_nzeros(a) result(res) + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function c_dia_get_nzeros + + ! function c_dia_get_size(a) result(res) + ! implicit none + ! class(psb_c_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function c_dia_get_size + + + ! function c_dia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_c_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function c_dia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine c_dia_free(a) + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + + if (allocated(a%data)) deallocate(a%data) + if (allocated(a%offset)) deallocate(a%offset) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine c_dia_free + + +end module psb_c_dia_mat_mod diff --git a/ext/psb_c_dns_mat_mod.f90 b/ext/psb_c_dns_mat_mod.f90 new file mode 100644 index 00000000..5e5a191d --- /dev/null +++ b/ext/psb_c_dns_mat_mod.f90 @@ -0,0 +1,467 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module psb_c_dns_mat_mod + + use psb_c_base_mat_mod + + type, extends(psb_c_base_sparse_mat) :: psb_c_dns_sparse_mat + ! + ! DNS format: a very simple dense matrix storage + ! psb_spk_ : kind for double precision reals + ! psb_ipk_: kind for normal integers. + ! psb_sizeof_dp: variable holding size in bytes of + ! a double + ! psb_sizeof_ip: size in bytes of an integer + ! + ! psb_realloc(n,v,info) Reallocate: does what it says + ! psb_realloc(m,n,a,info) on rank 1 and 2 arrays, may start + ! from unallocated + ! + ! + integer(psb_ipk_) :: nnz + complex(psb_spk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: get_size => c_dns_get_size + procedure, pass(a) :: get_nzeros => c_dns_get_nzeros + procedure, nopass :: get_fmt => c_dns_get_fmt + procedure, pass(a) :: sizeof => c_dns_sizeof + procedure, pass(a) :: csmv => psb_c_dns_csmv + procedure, pass(a) :: csmm => psb_c_dns_csmm + procedure, pass(a) :: csnmi => psb_c_dns_csnmi + procedure, pass(a) :: reallocate_nz => psb_c_dns_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_dns_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_dns_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_dns_from_coo + procedure, pass(a) :: mv_to_coo => psb_c_mv_dns_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_dns_from_coo + procedure, pass(a) :: get_diag => psb_c_dns_get_diag + procedure, pass(a) :: csgetrow => psb_c_dns_csgetrow + procedure, pass(a) :: get_nz_row => c_dns_get_nz_row + procedure, pass(a) :: trim => psb_c_dns_trim + procedure, pass(a) :: free => c_dns_free + procedure, pass(a) :: mold => psb_c_dns_mold + + end type psb_c_dns_sparse_mat + + private :: c_dns_get_nzeros, c_dns_free, c_dns_get_fmt, & + & c_dns_get_size, c_dns_sizeof, c_dns_get_nz_row + + ! + ! + !> Function reallocate_nz + !! \memberof psb_c_dns_sparse_mat + !! \brief One--parameters version of (re)allocate + !! + !! \param nz number of nonzeros to allocate for + !! i.e. makes sure that the internal storage + !! allows for NZ coefficients and their indices. + ! + interface + subroutine psb_c_dns_reallocate_nz(nz,a) + import :: psb_c_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_dns_sparse_mat), intent(inout) :: a + end subroutine psb_c_dns_reallocate_nz + end interface + + !> Function trim + !! \memberof psb_c_dns_sparse_mat + !! \brief Memory trim + !! Make sure the memory allocation of the sparse matrix is as tight as + !! possible given the actual number of nonzeros it contains. + ! + interface + subroutine psb_c_dns_trim(a) + import :: psb_c_dns_sparse_mat + class(psb_c_dns_sparse_mat), intent(inout) :: a + end subroutine psb_c_dns_trim + end interface + + ! + !> Function mold: + !! \memberof psb_c_dns_sparse_mat + !! \brief Allocate a class(psb_c_dns_sparse_mat) with the + !! same dynamic type as the input. + !! This is equivalent to allocate( mold= ) and is provided + !! for those compilers not yet supporting mold. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_c_dns_mold(a,b,info) + import :: psb_c_dns_sparse_mat, psb_c_base_sparse_mat, psb_epk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dns_mold + end interface + + ! + ! + !> Function allocate_mnnz + !! \memberof psb_c_dns_sparse_mat + !! \brief Three-parameters version of allocate + !! + !! \param m number of rows + !! \param n number of cols + !! \param nz [estimated internally] number of nonzeros to allocate for + ! + interface + subroutine psb_c_dns_allocate_mnnz(m,n,a,nz) + import :: psb_c_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_dns_allocate_mnnz + end interface + + ! + !> Function cp_to_coo: + !! \memberof psb_c_dns_sparse_mat + !! \brief Copy and convert to psb_c_coo_sparse_mat + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_c_cp_dns_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_dns_sparse_mat, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dns_to_coo + end interface + + ! + !> Function cp_from_coo: + !! \memberof psb_c_dns_sparse_mat + !! \brief Copy and convert from psb_c_coo_sparse_mat + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_c_cp_dns_from_coo(a,b,info) + import :: psb_c_dns_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dns_from_coo + end interface + + ! + !> Function mv_to_coo: + !! \memberof psb_c_dns_sparse_mat + !! \brief Convert to psb_c_coo_sparse_mat, freeing the source. + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_c_mv_dns_to_coo(a,b,info) + import :: psb_c_dns_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dns_to_coo + end interface + + ! + !> Function mv_from_coo: + !! \memberof psb_c_dns_sparse_mat + !! \brief Convert from psb_c_coo_sparse_mat, freeing the source. + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_c_mv_dns_from_coo(a,b,info) + import :: psb_c_dns_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dns_from_coo + end interface + + ! + ! + !> Function csgetrow: + !! \memberof psb_c_dns_sparse_mat + !! \brief Get a (subset of) row(s) + !! + !! getrow is the basic method by which the other (getblk, clip) can + !! be implemented. + !! + !! Returns the set + !! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) + !! each identifying the position of a nonzero in A + !! i.e. + !! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) + !! with IMIN<=IA(:)<=IMAX + !! with JMIN<=JA(:)<=JMAX + !! IA,JA are reallocated as necessary. + !! + !! \param imin the minimum row index we are interested in + !! \param imax the minimum row index we are interested in + !! \param nz the number of output coefficients + !! \param ia(:) the output row indices + !! \param ja(:) the output col indices + !! \param val(:) the output coefficients + !! \param info return code + !! \param jmin [1] minimum col index + !! \param jmax [a\%get_ncols()] maximum col index + !! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) + !! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] + !! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] + !! ( iren cannot be specified with rscale/cscale) + !! \param append [false] append to ia,ja + !! \param nzin [none] if append, then first new entry should go in entry nzin+1 + !! + ! + interface + subroutine psb_c_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_c_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_c_dns_csgetrow + end interface + + + + !> Function csmv: + !! \memberof psb_c_dns_sparse_mat + !! \brief Product by a dense rank 1 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_c_dns_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dns_csmv + end interface + + !> Function csmm: + !! \memberof psb_c_dns_sparse_mat + !! \brief Product by a dense rank 2 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:,:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:,:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_c_dns_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dns_csmm + end interface + + ! + ! + !> Function csnmi: + !! \memberof psb_c_dns_sparse_mat + !! \brief Operator infinity norm + !! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) + !! + ! + interface + function psb_c_dns_csnmi(a) result(res) + import :: psb_c_dns_sparse_mat, psb_spk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_dns_csnmi + end interface + + ! + !> Function get_diag: + !! \memberof psb_c_dns_sparse_mat + !! \brief Extract the diagonal of A. + !! + !! D(i) = A(i:i), i=1:min(nrows,ncols) + !! + !! \param d(:) The output diagonal + !! \param info return code. + ! + interface + subroutine psb_c_dns_get_diag(a,d,info) + import :: psb_c_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dns_get_diag + end interface + + +contains + + ! + !> Function sizeof + !! \memberof psb_c_dns_sparse_mat + !! \brief Memory occupation in bytes + ! + function c_dns_sizeof(a) result(res) + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip + + end function c_dns_sizeof + + ! + !> Function get_fmt + !! \memberof psb_c_dns_sparse_mat + !! \brief return a short descriptive name (e.g. COO CSR etc.) + ! + function c_dns_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNS' + end function c_dns_get_fmt + + ! + !> Function get_nzeros + !! \memberof psb_c_dns_sparse_mat + !! \brief Current number of nonzero entries + ! + function c_dns_get_nzeros(a) result(res) + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nnz + end function c_dns_get_nzeros + + ! + !> Function get_size + !! \memberof psb_c_dns_sparse_mat + !! \brief Maximum number of nonzeros the current structure can hold + ! this is fixed once you initialize the matrix, with dense storage + ! you can hold up to MxN entries + function c_dns_get_size(a) result(res) + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = size(a%val) + + end function c_dns_get_size + + + ! + !> Function get_nz_row. + !! \memberof psb_c_coo_sparse_mat + !! \brief How many nonzeros in a row? + !! + !! \param idx The row to search. + !! + ! + function c_dns_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = count(a%val(idx,:) /= dzero) + end if + + end function c_dns_get_nz_row + + ! + !> Function free + !! \memberof psb_c_dns_sparse_mat + !! Name says all + + subroutine c_dns_free(a) + implicit none + + class(psb_c_dns_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) deallocate(a%val) + a%nnz = 0 + + + ! + ! Mark the object as empty just in case + ! + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine c_dns_free + + +end module psb_c_dns_mat_mod diff --git a/ext/psb_c_ell_mat_mod.f90 b/ext/psb_c_ell_mat_mod.f90 new file mode 100644 index 00000000..6954946f --- /dev/null +++ b/ext/psb_c_ell_mat_mod.f90 @@ -0,0 +1,552 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_ell_mat_mod + + use psb_c_base_mat_mod + + type, extends(psb_c_base_sparse_mat) :: psb_c_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! Based on M. Heroux "A proposal for a sparse BLAS toolkit". + ! IRN is our addition, should help in transferring to/from + ! other formats (should come in handy for GPUs). + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! + + integer(psb_ipk_) :: nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + complex(psb_spk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: is_by_rows => c_ell_is_by_rows + procedure, pass(a) :: get_size => c_ell_get_size + procedure, pass(a) :: get_nzeros => c_ell_get_nzeros + procedure, nopass :: get_fmt => c_ell_get_fmt + procedure, pass(a) :: sizeof => c_ell_sizeof + procedure, pass(a) :: csmm => psb_c_ell_csmm + procedure, pass(a) :: csmv => psb_c_ell_csmv + procedure, pass(a) :: inner_cssm => psb_c_ell_cssm + procedure, pass(a) :: inner_cssv => psb_c_ell_cssv + procedure, pass(a) :: scals => psb_c_ell_scals + procedure, pass(a) :: scalv => psb_c_ell_scal + procedure, pass(a) :: maxval => psb_c_ell_maxval + procedure, pass(a) :: csnmi => psb_c_ell_csnmi + procedure, pass(a) :: csnm1 => psb_c_ell_csnm1 + procedure, pass(a) :: rowsum => psb_c_ell_rowsum + procedure, pass(a) :: arwsum => psb_c_ell_arwsum + procedure, pass(a) :: colsum => psb_c_ell_colsum + procedure, pass(a) :: aclsum => psb_c_ell_aclsum + procedure, pass(a) :: reallocate_nz => psb_c_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_ell_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_ell_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_ell_from_coo + procedure, pass(a) :: cp_to_fmt => psb_c_cp_ell_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_c_cp_ell_from_fmt + procedure, pass(a) :: mv_to_coo => psb_c_mv_ell_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_ell_from_coo + procedure, pass(a) :: mv_to_fmt => psb_c_mv_ell_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_c_mv_ell_from_fmt + procedure, pass(a) :: csput_a => psb_c_ell_csput_a + procedure, pass(a) :: get_diag => psb_c_ell_get_diag + procedure, pass(a) :: csgetptn => psb_c_ell_csgetptn + procedure, pass(a) :: csgetrow => psb_c_ell_csgetrow + procedure, pass(a) :: get_nz_row => c_ell_get_nz_row + procedure, pass(a) :: reinit => psb_c_ell_reinit + procedure, pass(a) :: trim => psb_c_ell_trim + procedure, pass(a) :: print => psb_c_ell_print + procedure, pass(a) :: free => c_ell_free + procedure, pass(a) :: mold => psb_c_ell_mold + procedure, pass(a) :: get_nrm => c_ell_get_nrm + + end type psb_c_ell_sparse_mat + + private :: c_ell_get_nzeros, c_ell_free, c_ell_get_fmt, & + & c_ell_get_size, c_ell_sizeof, c_ell_get_nz_row, & + & c_ell_is_by_rows + + interface + subroutine psb_c_ell_reallocate_nz(nz,a) + import :: psb_c_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_ell_sparse_mat), intent(inout) :: a + end subroutine psb_c_ell_reallocate_nz + end interface + + interface + subroutine psb_c_ell_reinit(a,clear) + import :: psb_c_ell_sparse_mat + class(psb_c_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_c_ell_reinit + end interface + + interface + subroutine psb_c_ell_trim(a) + import :: psb_c_ell_sparse_mat + class(psb_c_ell_sparse_mat), intent(inout) :: a + end subroutine psb_c_ell_trim + end interface + + interface + subroutine psb_c_ell_mold(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ell_mold + end interface + + interface + subroutine psb_c_ell_allocate_mnnz(m,n,a,nz) + import :: psb_c_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_ell_allocate_mnnz + end interface + + interface + subroutine psb_c_ell_print(iout,a,iv,head,ivr,ivc) + import :: psb_c_ell_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_c_ell_print + end interface + + interface + subroutine psb_c_cp_ell_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_ell_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ell_to_coo + end interface + + interface + subroutine psb_c_cp_ell_from_coo(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ell_from_coo + end interface + + interface + subroutine psb_c_cp_ell_to_fmt(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ell_to_fmt + end interface + + interface + subroutine psb_c_cp_ell_from_fmt(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ell_from_fmt + end interface + + interface + subroutine psb_c_mv_ell_to_coo(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ell_to_coo + end interface + + interface + subroutine psb_c_mv_ell_from_coo(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ell_from_coo + end interface + + interface + subroutine psb_c_mv_ell_to_fmt(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ell_to_fmt + end interface + + interface + subroutine psb_c_mv_ell_from_fmt(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ell_from_fmt + end interface + + interface + subroutine psb_c_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ell_csput_a + end interface + + interface + subroutine psb_c_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_ell_csgetptn + end interface + + interface + subroutine psb_c_ell_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_c_ell_csgetrow + end interface + + interface + subroutine psb_c_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_ell_csgetblk + end interface + + interface + subroutine psb_c_ell_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ell_cssv + subroutine psb_c_ell_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ell_cssm + end interface + + interface + subroutine psb_c_ell_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ell_csmv + subroutine psb_c_ell_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ell_csmm + end interface + + + interface + function psb_c_ell_maxval(a) result(res) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_ell_maxval + end interface + + interface + function psb_c_ell_csnmi(a) result(res) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_ell_csnmi + end interface + + interface + function psb_c_ell_csnm1(a) result(res) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_ell_csnm1 + end interface + + interface + subroutine psb_c_ell_rowsum(d,a) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_ell_rowsum + end interface + + interface + subroutine psb_c_ell_arwsum(d,a) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_ell_arwsum + end interface + + interface + subroutine psb_c_ell_colsum(d,a) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_ell_colsum + end interface + + interface + subroutine psb_c_ell_aclsum(d,a) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_ell_aclsum + end interface + + interface + subroutine psb_c_ell_get_diag(a,d,info) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ell_get_diag + end interface + + interface + subroutine psb_c_ell_scal(d,a,info,side) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_ell_scal + end interface + + interface + subroutine psb_c_ell_scals(d,a,info) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ell_scals + end interface + + interface + subroutine psi_c_convert_ell_from_coo(a,tmp,info,hacksize) + import :: psb_c_ell_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + implicit none + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + end subroutine psi_c_convert_ell_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_ell_is_by_rows(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + logical :: res + res = .true. + end function c_ell_is_by_rows + + function c_ell_sizeof(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + + end function c_ell_sizeof + + function c_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL' + end function c_ell_get_fmt + + function c_ell_get_nrm(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = size(a%val,2) + end function c_ell_get_nrm + + function c_ell_get_nzeros(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function c_ell_get_nzeros + + function c_ell_get_size(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = -1 + if (a%is_dev()) call a%sync() + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function c_ell_get_size + + + function c_ell_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + if (a%is_dev()) call a%sync() + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function c_ell_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_ell_free(a) + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine c_ell_free + + +end module psb_c_ell_mat_mod diff --git a/ext/psb_c_hdia_mat_mod.f90 b/ext/psb_c_hdia_mat_mod.f90 new file mode 100644 index 00000000..fbac05de --- /dev/null +++ b/ext/psb_c_hdia_mat_mod.f90 @@ -0,0 +1,534 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module psb_c_hdia_mat_mod + + use psb_c_base_mat_mod + + + type, extends(psb_c_base_sparse_mat) :: psb_c_hdia_sparse_mat + ! + ! HDIA format + ! + integer(psb_ipk_), allocatable :: hackOffsets(:), diaOffsets(:) + complex(psb_spk_), allocatable :: val(:) + + + integer(psb_ipk_) :: nhacks, nzeros + integer(psb_ipk_) :: hacksize = 32 + integer(psb_epk_) :: dim=0 + + contains + ! procedure, pass(a) :: get_size => c_hdia_get_size + procedure, pass(a) :: get_nzeros => c_hdia_get_nzeros + procedure, pass(a) :: set_nzeros => c_hdia_set_nzeros + procedure, nopass :: get_fmt => c_hdia_get_fmt + procedure, pass(a) :: sizeof => c_hdia_sizeof + ! procedure, pass(a) :: csmm => psb_c_hdia_csmm + procedure, pass(a) :: csmv => psb_c_hdia_csmv + ! procedure, pass(a) :: inner_cssm => psb_c_hdia_cssm + ! procedure, pass(a) :: inner_cssv => psb_c_hdia_cssv + ! procedure, pass(a) :: scals => psb_c_hdia_scals + ! procedure, pass(a) :: scalv => psb_c_hdia_scal + ! procedure, pass(a) :: maxval => psb_c_hdia_maxval + ! procedure, pass(a) :: csnmi => psb_c_hdia_csnmi + ! procedure, pass(a) :: csnm1 => psb_c_hdia_csnm1 + ! procedure, pass(a) :: rowsum => psb_c_hdia_rowsum + ! procedure, pass(a) :: arwsum => psb_c_hdia_arwsum + ! procedure, pass(a) :: colsum => psb_c_hdia_colsum + ! procedure, pass(a) :: aclsum => psb_c_hdia_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_c_hdia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_hdia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_hdia_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_hdia_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_c_cp_hdia_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_c_cp_hdia_from_fmt + procedure, pass(a) :: mv_to_coo => psb_c_mv_hdia_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_hdia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_c_mv_hdia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_c_mv_hdia_from_fmt + ! procedure, pass(a) :: csput_a => psb_c_hdia_csput_a + ! procedure, pass(a) :: get_diag => psb_c_hdia_get_diag + ! procedure, pass(a) :: csgetptn => psb_c_hdia_csgetptn + ! procedure, pass(a) :: csgetrow => psb_c_hdia_csgetrow + ! procedure, pass(a) :: get_nz_row => c_hdia_get_nz_row + ! procedure, pass(a) :: reinit => psb_c_hdia_reinit + ! procedure, pass(a) :: trim => psb_c_hdia_trim + procedure, pass(a) :: print => psb_c_hdia_print + procedure, pass(a) :: free => c_hdia_free + procedure, pass(a) :: mold => psb_c_hdia_mold + + end type psb_c_hdia_sparse_mat + + private :: c_hdia_get_nzeros, c_hdia_set_nzeros, c_hdia_free, & + & c_hdia_get_fmt, c_hdia_sizeof +!!$ & +!!$ & c_hdia_get_nz_row c_hdia_get_size, + +!!$ interface +!!$ subroutine psb_c_hdia_reallocate_nz(nz,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_hdia_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_reinit(a,clear) +!!$ import :: psb_c_hdia_sparse_mat +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ logical, intent(in), optional :: clear +!!$ end subroutine psb_c_hdia_reinit +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_trim(a) +!!$ import :: psb_c_hdia_sparse_mat +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_hdia_trim +!!$ end interface + + interface + subroutine psb_c_hdia_mold(a,b,info) + import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hdia_mold + end interface + + interface + subroutine psb_c_hdia_allocate_mnnz(m,n,a,nz) + import :: psb_c_hdia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_hdia_allocate_mnnz + end interface + + interface + subroutine psb_c_hdia_print(iout,a,iv,head,ivr,ivc) + import :: psb_c_hdia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_c_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_c_hdia_print + end interface + + interface + subroutine psb_c_cp_hdia_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_hdia_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hdia_to_coo + end interface + + interface + subroutine psb_c_cp_hdia_from_coo(a,b,info) + import :: psb_c_hdia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_c_cp_hdia_to_fmt(a,b,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_c_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cp_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cp_hdia_from_fmt(a,b,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cp_hdia_from_fmt +!!$ end interface + + interface + subroutine psb_c_mv_hdia_to_coo(a,b,info) + import :: psb_c_hdia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hdia_to_coo + end interface + + interface + subroutine psb_c_mv_hdia_from_coo(a,b,info) + import :: psb_c_hdia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_c_mv_hdia_to_fmt(a,b,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_mv_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_mv_hdia_from_fmt(a,b,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_mv_hdia_from_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& +!!$ & imin,imax,jmin,jmax +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_hdia_csput_a +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_csgetptn(imin,imax,a,nz,ia,ja,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_c_hdia_csgetptn +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ complex(psb_spk_), allocatable, intent(inout) :: val(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_c_hdia_csgetrow +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_csgetblk(imin,imax,a,b,info,& +!!$ & jmin,jmax,iren,append,rscale,cscale) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_c_coo_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_c_coo_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_c_hdia_csgetblk +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_cssv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:) +!!$ complex(psb_spk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_hdia_cssv +!!$ subroutine psb_c_hdia_cssm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_hdia_cssm +!!$ end interface + + interface + subroutine psb_c_hdia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hdia_csmv +!!$ subroutine psb_c_hdia_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_hdia_csmm + end interface + + +!!$ interface +!!$ function psb_c_hdia_maxval(a) result(res) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_c_hdia_maxval +!!$ end interface +!!$ +!!$ interface +!!$ function psb_c_hdia_csnmi(a) result(res) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_c_hdia_csnmi +!!$ end interface +!!$ +!!$ interface +!!$ function psb_c_hdia_csnm1(a) result(res) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_c_hdia_csnm1 +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_rowsum(d,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_c_hdia_rowsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_arwsum(d,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_c_hdia_arwsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_colsum(d,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_c_hdia_colsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_aclsum(d,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_c_hdia_aclsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_get_diag(a,d,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(out) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_hdia_get_diag +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_scal(d,a,info,side) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_c_hdia_scal +!!$ end interface + +!!$ interface +!!$ subroutine psb_c_hdia_scals(d,a,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_hdia_scals +!!$ end interface +!!$ + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_hdia_sizeof(a) result(res) + use psb_realloc_mod, only : psb_size + implicit none + class(psb_c_hdia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + integer(psb_ipk_) :: i + + if (a%is_dev()) call a%sync() + res = 0 + + res = res + psb_size(a%hackOffsets)*psb_sizeof_ip + res = res + psb_size(a%diaOffsets)*psb_sizeof_ip + res = res + psb_size(a%val) * (2*psb_sizeof_sp) + + end function c_hdia_sizeof + + function c_hdia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIA' + end function c_hdia_get_fmt + + function c_hdia_get_nzeros(a) result(res) + implicit none + class(psb_c_hdia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function c_hdia_get_nzeros + + subroutine c_hdia_set_nzeros(a,nz) + implicit none + class(psb_c_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + a%nzeros = nz + end subroutine c_hdia_set_nzeros + + ! function c_hdia_get_size(a) result(res) + ! implicit none + ! class(psb_c_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function c_hdia_get_size + + + ! function c_hdia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_c_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function c_hdia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine c_hdia_free(a) + implicit none + + class(psb_c_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: i, info + + + if (allocated(a%hackOffsets))& + & deallocate(a%hackOffsets,stat=info) + if (allocated(a%diaOffsets))& + & deallocate(a%diaOffsets,stat=info) + if (allocated(a%val))& + & deallocate(a%val,stat=info) + a%nhacks=0 + + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine c_hdia_free + + +end module psb_c_hdia_mat_mod diff --git a/ext/psb_c_hll_mat_mod.f90 b/ext/psb_c_hll_mat_mod.f90 new file mode 100644 index 00000000..966b60f5 --- /dev/null +++ b/ext/psb_c_hll_mat_mod.f90 @@ -0,0 +1,564 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_hll_mat_mod + + use psb_c_base_mat_mod + use psi_ext_util_mod + + type, extends(psb_c_base_sparse_mat) :: psb_c_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! A modification of ELL. + ! Basic idea: pack and pad data in blocks of HCK rows; + ! this reduces the impact of a lone, very long row. + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! 6. hksz: hack size (multiple of 32) + ! 7. hkoffs(:): offsets of the starts of hacks inside ja/val + ! + ! + ! + integer(psb_ipk_) :: hksz, nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + complex(psb_spk_), allocatable :: val(:) + + contains + + procedure, pass(a) :: get_hksz => c_hll_get_hksz + procedure, pass(a) :: set_hksz => c_hll_set_hksz + procedure, pass(a) :: get_size => c_hll_get_size + procedure, pass(a) :: set_nzeros => c_hll_set_nzeros + procedure, pass(a) :: get_nzeros => c_hll_get_nzeros + procedure, nopass :: get_fmt => c_hll_get_fmt + procedure, pass(a) :: sizeof => c_hll_sizeof + procedure, pass(a) :: csmm => psb_c_hll_csmm + procedure, pass(a) :: csmv => psb_c_hll_csmv + procedure, pass(a) :: inner_cssm => psb_c_hll_cssm + procedure, pass(a) :: inner_cssv => psb_c_hll_cssv + procedure, pass(a) :: scals => psb_c_hll_scals + procedure, pass(a) :: scalv => psb_c_hll_scal + procedure, pass(a) :: maxval => psb_c_hll_maxval + procedure, pass(a) :: csnmi => psb_c_hll_csnmi + procedure, pass(a) :: csnm1 => psb_c_hll_csnm1 + procedure, pass(a) :: rowsum => psb_c_hll_rowsum + procedure, pass(a) :: arwsum => psb_c_hll_arwsum + procedure, pass(a) :: colsum => psb_c_hll_colsum + procedure, pass(a) :: aclsum => psb_c_hll_aclsum + procedure, pass(a) :: reallocate_nz => psb_c_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_hll_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_hll_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_hll_from_coo + procedure, pass(a) :: cp_to_fmt => psb_c_cp_hll_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_c_cp_hll_from_fmt + procedure, pass(a) :: mv_to_coo => psb_c_mv_hll_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_hll_from_coo + procedure, pass(a) :: mv_to_fmt => psb_c_mv_hll_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_c_mv_hll_from_fmt + procedure, pass(a) :: csput_a => psb_c_hll_csput_a + procedure, pass(a) :: get_diag => psb_c_hll_get_diag + procedure, pass(a) :: csgetptn => psb_c_hll_csgetptn + procedure, pass(a) :: csgetrow => psb_c_hll_csgetrow + procedure, pass(a) :: get_nz_row => c_hll_get_nz_row + procedure, pass(a) :: reinit => psb_c_hll_reinit + procedure, pass(a) :: print => psb_c_hll_print + procedure, pass(a) :: free => c_hll_free + procedure, pass(a) :: mold => psb_c_hll_mold + + end type psb_c_hll_sparse_mat + + private :: c_hll_get_nzeros, c_hll_free, c_hll_get_fmt, & + & c_hll_get_size, c_hll_sizeof, c_hll_get_nz_row, & + & c_hll_set_nzeros, c_hll_get_hksz, c_hll_set_hksz + + interface + subroutine psb_c_hll_reallocate_nz(nz,a) + import :: psb_c_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_hll_sparse_mat), intent(inout) :: a + end subroutine psb_c_hll_reallocate_nz + end interface + + interface + subroutine psb_c_hll_reinit(a,clear) + import :: psb_c_hll_sparse_mat + class(psb_c_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_c_hll_reinit + end interface + + interface + subroutine psb_c_hll_mold(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hll_mold + end interface + + interface + subroutine psb_c_hll_allocate_mnnz(m,n,a,nz) + import :: psb_c_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_hll_allocate_mnnz + end interface + + interface + subroutine psb_c_hll_print(iout,a,iv,head,ivr,ivc) + import :: psb_c_hll_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_c_hll_print + end interface + + interface + subroutine psb_c_cp_hll_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_hll_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hll_to_coo + end interface + + interface + subroutine psb_c_cp_hll_from_coo(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hll_from_coo + end interface + + interface + subroutine psb_c_cp_hll_to_fmt(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hll_to_fmt + end interface + + interface + subroutine psb_c_cp_hll_from_fmt(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hll_from_fmt + end interface + + interface + subroutine psb_c_mv_hll_to_coo(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hll_to_coo + end interface + + interface + subroutine psb_c_mv_hll_from_coo(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hll_from_coo + end interface + + interface + subroutine psb_c_mv_hll_to_fmt(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hll_to_fmt + end interface + + interface + subroutine psb_c_mv_hll_from_fmt(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hll_from_fmt + end interface + + interface + subroutine psb_c_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hll_csput_a + end interface + + interface + subroutine psb_c_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_hll_csgetptn + end interface + + interface + subroutine psb_c_hll_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_c_hll_csgetrow + end interface + + interface + subroutine psb_c_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_hll_csgetblk + end interface + + interface + subroutine psb_c_hll_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hll_cssv + subroutine psb_c_hll_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hll_cssm + end interface + + interface + subroutine psb_c_hll_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hll_csmv + subroutine psb_c_hll_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hll_csmm + end interface + + + interface + function psb_c_hll_maxval(a) result(res) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_hll_maxval + end interface + + interface + function psb_c_hll_csnmi(a) result(res) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_hll_csnmi + end interface + + interface + function psb_c_hll_csnm1(a) result(res) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_hll_csnm1 + end interface + + interface + subroutine psb_c_hll_rowsum(d,a) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_hll_rowsum + end interface + + interface + subroutine psb_c_hll_arwsum(d,a) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_hll_arwsum + end interface + + interface + subroutine psb_c_hll_colsum(d,a) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_hll_colsum + end interface + + interface + subroutine psb_c_hll_aclsum(d,a) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_hll_aclsum + end interface + + interface + subroutine psb_c_hll_get_diag(a,d,info) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hll_get_diag + end interface + + interface + subroutine psb_c_hll_scal(d,a,info,side) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_hll_scal + end interface + + interface + subroutine psb_c_hll_scals(d,a,info) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hll_scals + end interface + + interface psi_convert_hll_from_coo + subroutine psi_c_convert_hll_from_coo(a,hksz,tmp,info) + import :: psb_c_hll_sparse_mat, psb_ipk_, psb_c_coo_sparse_mat + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: hksz + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_c_convert_hll_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_hll_sizeof(a) result(res) + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%hkoffs) + + end function c_hll_sizeof + + function c_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL' + end function c_hll_get_fmt + + subroutine c_hll_set_nzeros(a,n) + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%nzt = n + end subroutine c_hll_set_nzeros + + function c_hll_get_nzeros(a) result(res) + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function c_hll_get_nzeros + + function c_hll_get_size(a) result(res) + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + if (a%is_dev()) call a%sync() + + res = -1 + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function c_hll_get_size + + + + function c_hll_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function c_hll_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_hll_free(a) + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + if (allocated(a%val)) deallocate(a%hkoffs) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + call a%set_hksz(izero) + + return + + end subroutine c_hll_free + + subroutine c_hll_set_hksz(a,n) + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%hksz = n + end subroutine c_hll_set_hksz + + function c_hll_get_hksz(a) result(res) + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = a%hksz + + end function c_hll_get_hksz + +end module psb_c_hll_mat_mod diff --git a/ext/psb_d_dia_mat_mod.f90 b/ext/psb_d_dia_mat_mod.f90 new file mode 100644 index 00000000..7df615ac --- /dev/null +++ b/ext/psb_d_dia_mat_mod.f90 @@ -0,0 +1,513 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_dia_mat_mod + + use psb_d_base_mat_mod + + type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + real(psb_dpk_), allocatable :: data(:,:) + + contains + ! procedure, pass(a) :: get_size => d_dia_get_size + procedure, pass(a) :: get_nzeros => d_dia_get_nzeros + procedure, nopass :: get_fmt => d_dia_get_fmt + procedure, pass(a) :: sizeof => d_dia_sizeof + procedure, pass(a) :: csmm => psb_d_dia_csmm + procedure, pass(a) :: csmv => psb_d_dia_csmv + ! procedure, pass(a) :: inner_cssm => psb_d_dia_cssm + ! procedure, pass(a) :: inner_cssv => psb_d_dia_cssv + procedure, pass(a) :: scals => psb_d_dia_scals + procedure, pass(a) :: scalv => psb_d_dia_scal + procedure, pass(a) :: maxval => psb_d_dia_maxval + procedure, pass(a) :: rowsum => psb_d_dia_rowsum + procedure, pass(a) :: arwsum => psb_d_dia_arwsum + procedure, pass(a) :: colsum => psb_d_dia_colsum + procedure, pass(a) :: aclsum => psb_d_dia_aclsum + procedure, pass(a) :: reallocate_nz => psb_d_dia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_dia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_dia_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_dia_from_coo + ! procedure, pass(a) :: mv_to_coo => psb_d_mv_dia_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_dia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_d_mv_dia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_d_mv_dia_from_fmt + ! procedure, pass(a) :: csput_a => psb_d_dia_csput_a + procedure, pass(a) :: get_diag => psb_d_dia_get_diag + procedure, pass(a) :: csgetptn => psb_d_dia_csgetptn + procedure, pass(a) :: csgetrow => psb_d_dia_csgetrow + ! procedure, pass(a) :: get_nz_row => d_dia_get_nz_row + procedure, pass(a) :: reinit => psb_d_dia_reinit + ! procedure, pass(a) :: trim => psb_d_dia_trim + procedure, pass(a) :: print => psb_d_dia_print + procedure, pass(a) :: free => d_dia_free + procedure, pass(a) :: mold => psb_d_dia_mold + + end type psb_d_dia_sparse_mat + + private :: d_dia_get_nzeros, d_dia_free, d_dia_get_fmt, & + & d_dia_sizeof !, d_dia_get_size, d_dia_get_nz_row + + interface + subroutine psb_d_dia_reallocate_nz(nz,a) + import :: psb_d_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_dia_sparse_mat), intent(inout) :: a + end subroutine psb_d_dia_reallocate_nz + end interface + + interface + subroutine psb_d_dia_reinit(a,clear) + import :: psb_d_dia_sparse_mat + class(psb_d_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_d_dia_reinit + end interface + + interface + subroutine psb_d_dia_trim(a) + import :: psb_d_dia_sparse_mat + class(psb_d_dia_sparse_mat), intent(inout) :: a + end subroutine psb_d_dia_trim + end interface + + interface + subroutine psb_d_dia_mold(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dia_mold + end interface + + interface + subroutine psb_d_dia_allocate_mnnz(m,n,a,nz) + import :: psb_d_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_dia_allocate_mnnz + end interface + + interface + subroutine psb_d_dia_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_dia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_dia_print + end interface + + interface + subroutine psb_d_cp_dia_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_dia_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dia_to_coo + end interface + + interface + subroutine psb_d_cp_dia_from_coo(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dia_from_coo + end interface + + interface + subroutine psb_d_cp_dia_to_fmt(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dia_to_fmt + end interface + + interface + subroutine psb_d_cp_dia_from_fmt(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dia_from_fmt + end interface + + interface + subroutine psb_d_mv_dia_to_coo(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dia_to_coo + end interface + + interface + subroutine psb_d_mv_dia_from_coo(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dia_from_coo + end interface + + interface + subroutine psb_d_mv_dia_to_fmt(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dia_to_fmt + end interface + + interface + subroutine psb_d_mv_dia_from_fmt(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dia_from_fmt + end interface + + interface + subroutine psb_d_dia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dia_csput_a + end interface + + interface + subroutine psb_d_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_dia_csgetptn + end interface + + interface + subroutine psb_d_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_d_dia_csgetrow + end interface + + interface + subroutine psb_d_dia_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_dia_csgetblk + end interface + + interface + subroutine psb_d_dia_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dia_cssv + subroutine psb_d_dia_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dia_cssm + end interface + + interface + subroutine psb_d_dia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dia_csmv + subroutine psb_d_dia_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dia_csmm + end interface + + + interface + function psb_d_dia_maxval(a) result(res) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_dia_maxval + end interface + + interface + function psb_d_dia_csnmi(a) result(res) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_dia_csnmi + end interface + + interface + function psb_d_dia_csnm1(a) result(res) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_dia_csnm1 + end interface + + interface + subroutine psb_d_dia_rowsum(d,a) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_dia_rowsum + end interface + + interface + subroutine psb_d_dia_arwsum(d,a) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_dia_arwsum + end interface + + interface + subroutine psb_d_dia_colsum(d,a) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_dia_colsum + end interface + + interface + subroutine psb_d_dia_aclsum(d,a) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_dia_aclsum + end interface + + interface + subroutine psb_d_dia_get_diag(a,d,info) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dia_get_diag + end interface + + interface + subroutine psb_d_dia_scal(d,a,info,side) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_dia_scal + end interface + + interface + subroutine psb_d_dia_scals(d,a,info) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dia_scals + end interface + + interface psi_convert_dia_from_coo + subroutine psi_d_convert_dia_from_coo(a,tmp,info) + import :: psb_d_dia_sparse_mat, psb_ipk_, psb_d_coo_sparse_mat + implicit none + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_d_convert_dia_from_coo + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_dia_sizeof(a) result(res) + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + end function d_dia_sizeof + + function d_dia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIA' + end function d_dia_get_fmt + + function d_dia_get_nzeros(a) result(res) + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function d_dia_get_nzeros + + ! function d_dia_get_size(a) result(res) + ! implicit none + ! class(psb_d_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function d_dia_get_size + + + ! function d_dia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_d_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function d_dia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine d_dia_free(a) + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + + if (allocated(a%data)) deallocate(a%data) + if (allocated(a%offset)) deallocate(a%offset) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine d_dia_free + + +end module psb_d_dia_mat_mod diff --git a/ext/psb_d_dns_mat_mod.f90 b/ext/psb_d_dns_mat_mod.f90 new file mode 100644 index 00000000..f8c977bc --- /dev/null +++ b/ext/psb_d_dns_mat_mod.f90 @@ -0,0 +1,467 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module psb_d_dns_mat_mod + + use psb_d_base_mat_mod + + type, extends(psb_d_base_sparse_mat) :: psb_d_dns_sparse_mat + ! + ! DNS format: a very simple dense matrix storage + ! psb_dpk_ : kind for double precision reals + ! psb_ipk_: kind for normal integers. + ! psb_sizeof_dp: variable holding size in bytes of + ! a double + ! psb_sizeof_ip: size in bytes of an integer + ! + ! psb_realloc(n,v,info) Reallocate: does what it says + ! psb_realloc(m,n,a,info) on rank 1 and 2 arrays, may start + ! from unallocated + ! + ! + integer(psb_ipk_) :: nnz + real(psb_dpk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: get_size => d_dns_get_size + procedure, pass(a) :: get_nzeros => d_dns_get_nzeros + procedure, nopass :: get_fmt => d_dns_get_fmt + procedure, pass(a) :: sizeof => d_dns_sizeof + procedure, pass(a) :: csmv => psb_d_dns_csmv + procedure, pass(a) :: csmm => psb_d_dns_csmm + procedure, pass(a) :: csnmi => psb_d_dns_csnmi + procedure, pass(a) :: reallocate_nz => psb_d_dns_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_dns_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_dns_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_dns_from_coo + procedure, pass(a) :: mv_to_coo => psb_d_mv_dns_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_dns_from_coo + procedure, pass(a) :: get_diag => psb_d_dns_get_diag + procedure, pass(a) :: csgetrow => psb_d_dns_csgetrow + procedure, pass(a) :: get_nz_row => d_dns_get_nz_row + procedure, pass(a) :: trim => psb_d_dns_trim + procedure, pass(a) :: free => d_dns_free + procedure, pass(a) :: mold => psb_d_dns_mold + + end type psb_d_dns_sparse_mat + + private :: d_dns_get_nzeros, d_dns_free, d_dns_get_fmt, & + & d_dns_get_size, d_dns_sizeof, d_dns_get_nz_row + + ! + ! + !> Function reallocate_nz + !! \memberof psb_d_dns_sparse_mat + !! \brief One--parameters version of (re)allocate + !! + !! \param nz number of nonzeros to allocate for + !! i.e. makes sure that the internal storage + !! allows for NZ coefficients and their indices. + ! + interface + subroutine psb_d_dns_reallocate_nz(nz,a) + import :: psb_d_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_dns_sparse_mat), intent(inout) :: a + end subroutine psb_d_dns_reallocate_nz + end interface + + !> Function trim + !! \memberof psb_d_dns_sparse_mat + !! \brief Memory trim + !! Make sure the memory allocation of the sparse matrix is as tight as + !! possible given the actual number of nonzeros it contains. + ! + interface + subroutine psb_d_dns_trim(a) + import :: psb_d_dns_sparse_mat + class(psb_d_dns_sparse_mat), intent(inout) :: a + end subroutine psb_d_dns_trim + end interface + + ! + !> Function mold: + !! \memberof psb_d_dns_sparse_mat + !! \brief Allocate a class(psb_d_dns_sparse_mat) with the + !! same dynamic type as the input. + !! This is equivalent to allocate( mold= ) and is provided + !! for those compilers not yet supporting mold. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_d_dns_mold(a,b,info) + import :: psb_d_dns_sparse_mat, psb_d_base_sparse_mat, psb_epk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dns_mold + end interface + + ! + ! + !> Function allocate_mnnz + !! \memberof psb_d_dns_sparse_mat + !! \brief Three-parameters version of allocate + !! + !! \param m number of rows + !! \param n number of cols + !! \param nz [estimated internally] number of nonzeros to allocate for + ! + interface + subroutine psb_d_dns_allocate_mnnz(m,n,a,nz) + import :: psb_d_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_dns_allocate_mnnz + end interface + + ! + !> Function cp_to_coo: + !! \memberof psb_d_dns_sparse_mat + !! \brief Copy and convert to psb_d_coo_sparse_mat + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_d_cp_dns_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_dns_sparse_mat, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dns_to_coo + end interface + + ! + !> Function cp_from_coo: + !! \memberof psb_d_dns_sparse_mat + !! \brief Copy and convert from psb_d_coo_sparse_mat + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_d_cp_dns_from_coo(a,b,info) + import :: psb_d_dns_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dns_from_coo + end interface + + ! + !> Function mv_to_coo: + !! \memberof psb_d_dns_sparse_mat + !! \brief Convert to psb_d_coo_sparse_mat, freeing the source. + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_d_mv_dns_to_coo(a,b,info) + import :: psb_d_dns_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dns_to_coo + end interface + + ! + !> Function mv_from_coo: + !! \memberof psb_d_dns_sparse_mat + !! \brief Convert from psb_d_coo_sparse_mat, freeing the source. + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_d_mv_dns_from_coo(a,b,info) + import :: psb_d_dns_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dns_from_coo + end interface + + ! + ! + !> Function csgetrow: + !! \memberof psb_d_dns_sparse_mat + !! \brief Get a (subset of) row(s) + !! + !! getrow is the basic method by which the other (getblk, clip) can + !! be implemented. + !! + !! Returns the set + !! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) + !! each identifying the position of a nonzero in A + !! i.e. + !! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) + !! with IMIN<=IA(:)<=IMAX + !! with JMIN<=JA(:)<=JMAX + !! IA,JA are reallocated as necessary. + !! + !! \param imin the minimum row index we are interested in + !! \param imax the minimum row index we are interested in + !! \param nz the number of output coefficients + !! \param ia(:) the output row indices + !! \param ja(:) the output col indices + !! \param val(:) the output coefficients + !! \param info return code + !! \param jmin [1] minimum col index + !! \param jmax [a\%get_ncols()] maximum col index + !! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) + !! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] + !! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] + !! ( iren cannot be specified with rscale/cscale) + !! \param append [false] append to ia,ja + !! \param nzin [none] if append, then first new entry should go in entry nzin+1 + !! + ! + interface + subroutine psb_d_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_d_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_d_dns_csgetrow + end interface + + + + !> Function csmv: + !! \memberof psb_d_dns_sparse_mat + !! \brief Product by a dense rank 1 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_d_dns_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dns_csmv + end interface + + !> Function csmm: + !! \memberof psb_d_dns_sparse_mat + !! \brief Product by a dense rank 2 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:,:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:,:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_d_dns_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dns_csmm + end interface + + ! + ! + !> Function csnmi: + !! \memberof psb_d_dns_sparse_mat + !! \brief Operator infinity norm + !! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) + !! + ! + interface + function psb_d_dns_csnmi(a) result(res) + import :: psb_d_dns_sparse_mat, psb_dpk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_dns_csnmi + end interface + + ! + !> Function get_diag: + !! \memberof psb_d_dns_sparse_mat + !! \brief Extract the diagonal of A. + !! + !! D(i) = A(i:i), i=1:min(nrows,ncols) + !! + !! \param d(:) The output diagonal + !! \param info return code. + ! + interface + subroutine psb_d_dns_get_diag(a,d,info) + import :: psb_d_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dns_get_diag + end interface + + +contains + + ! + !> Function sizeof + !! \memberof psb_d_dns_sparse_mat + !! \brief Memory occupation in bytes + ! + function d_dns_sizeof(a) result(res) + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip + + end function d_dns_sizeof + + ! + !> Function get_fmt + !! \memberof psb_d_dns_sparse_mat + !! \brief return a short descriptive name (e.g. COO CSR etc.) + ! + function d_dns_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNS' + end function d_dns_get_fmt + + ! + !> Function get_nzeros + !! \memberof psb_d_dns_sparse_mat + !! \brief Current number of nonzero entries + ! + function d_dns_get_nzeros(a) result(res) + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nnz + end function d_dns_get_nzeros + + ! + !> Function get_size + !! \memberof psb_d_dns_sparse_mat + !! \brief Maximum number of nonzeros the current structure can hold + ! this is fixed once you initialize the matrix, with dense storage + ! you can hold up to MxN entries + function d_dns_get_size(a) result(res) + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = size(a%val) + + end function d_dns_get_size + + + ! + !> Function get_nz_row. + !! \memberof psb_d_coo_sparse_mat + !! \brief How many nonzeros in a row? + !! + !! \param idx The row to search. + !! + ! + function d_dns_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = count(a%val(idx,:) /= dzero) + end if + + end function d_dns_get_nz_row + + ! + !> Function free + !! \memberof psb_d_dns_sparse_mat + !! Name says all + + subroutine d_dns_free(a) + implicit none + + class(psb_d_dns_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) deallocate(a%val) + a%nnz = 0 + + + ! + ! Mark the object as empty just in case + ! + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine d_dns_free + + +end module psb_d_dns_mat_mod diff --git a/ext/psb_d_ell_mat_mod.f90 b/ext/psb_d_ell_mat_mod.f90 new file mode 100644 index 00000000..cc945baf --- /dev/null +++ b/ext/psb_d_ell_mat_mod.f90 @@ -0,0 +1,552 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_ell_mat_mod + + use psb_d_base_mat_mod + + type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! Based on M. Heroux "A proposal for a sparse BLAS toolkit". + ! IRN is our addition, should help in transferring to/from + ! other formats (should come in handy for GPUs). + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! + + integer(psb_ipk_) :: nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + real(psb_dpk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: is_by_rows => d_ell_is_by_rows + procedure, pass(a) :: get_size => d_ell_get_size + procedure, pass(a) :: get_nzeros => d_ell_get_nzeros + procedure, nopass :: get_fmt => d_ell_get_fmt + procedure, pass(a) :: sizeof => d_ell_sizeof + procedure, pass(a) :: csmm => psb_d_ell_csmm + procedure, pass(a) :: csmv => psb_d_ell_csmv + procedure, pass(a) :: inner_cssm => psb_d_ell_cssm + procedure, pass(a) :: inner_cssv => psb_d_ell_cssv + procedure, pass(a) :: scals => psb_d_ell_scals + procedure, pass(a) :: scalv => psb_d_ell_scal + procedure, pass(a) :: maxval => psb_d_ell_maxval + procedure, pass(a) :: csnmi => psb_d_ell_csnmi + procedure, pass(a) :: csnm1 => psb_d_ell_csnm1 + procedure, pass(a) :: rowsum => psb_d_ell_rowsum + procedure, pass(a) :: arwsum => psb_d_ell_arwsum + procedure, pass(a) :: colsum => psb_d_ell_colsum + procedure, pass(a) :: aclsum => psb_d_ell_aclsum + procedure, pass(a) :: reallocate_nz => psb_d_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_ell_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_ell_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_ell_from_coo + procedure, pass(a) :: cp_to_fmt => psb_d_cp_ell_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_d_cp_ell_from_fmt + procedure, pass(a) :: mv_to_coo => psb_d_mv_ell_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_ell_from_coo + procedure, pass(a) :: mv_to_fmt => psb_d_mv_ell_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_d_mv_ell_from_fmt + procedure, pass(a) :: csput_a => psb_d_ell_csput_a + procedure, pass(a) :: get_diag => psb_d_ell_get_diag + procedure, pass(a) :: csgetptn => psb_d_ell_csgetptn + procedure, pass(a) :: csgetrow => psb_d_ell_csgetrow + procedure, pass(a) :: get_nz_row => d_ell_get_nz_row + procedure, pass(a) :: reinit => psb_d_ell_reinit + procedure, pass(a) :: trim => psb_d_ell_trim + procedure, pass(a) :: print => psb_d_ell_print + procedure, pass(a) :: free => d_ell_free + procedure, pass(a) :: mold => psb_d_ell_mold + procedure, pass(a) :: get_nrm => d_ell_get_nrm + + end type psb_d_ell_sparse_mat + + private :: d_ell_get_nzeros, d_ell_free, d_ell_get_fmt, & + & d_ell_get_size, d_ell_sizeof, d_ell_get_nz_row, & + & d_ell_is_by_rows + + interface + subroutine psb_d_ell_reallocate_nz(nz,a) + import :: psb_d_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_ell_sparse_mat), intent(inout) :: a + end subroutine psb_d_ell_reallocate_nz + end interface + + interface + subroutine psb_d_ell_reinit(a,clear) + import :: psb_d_ell_sparse_mat + class(psb_d_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_d_ell_reinit + end interface + + interface + subroutine psb_d_ell_trim(a) + import :: psb_d_ell_sparse_mat + class(psb_d_ell_sparse_mat), intent(inout) :: a + end subroutine psb_d_ell_trim + end interface + + interface + subroutine psb_d_ell_mold(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ell_mold + end interface + + interface + subroutine psb_d_ell_allocate_mnnz(m,n,a,nz) + import :: psb_d_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_ell_allocate_mnnz + end interface + + interface + subroutine psb_d_ell_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_ell_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_ell_print + end interface + + interface + subroutine psb_d_cp_ell_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_ell_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ell_to_coo + end interface + + interface + subroutine psb_d_cp_ell_from_coo(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ell_from_coo + end interface + + interface + subroutine psb_d_cp_ell_to_fmt(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ell_to_fmt + end interface + + interface + subroutine psb_d_cp_ell_from_fmt(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ell_from_fmt + end interface + + interface + subroutine psb_d_mv_ell_to_coo(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ell_to_coo + end interface + + interface + subroutine psb_d_mv_ell_from_coo(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ell_from_coo + end interface + + interface + subroutine psb_d_mv_ell_to_fmt(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ell_to_fmt + end interface + + interface + subroutine psb_d_mv_ell_from_fmt(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ell_from_fmt + end interface + + interface + subroutine psb_d_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ell_csput_a + end interface + + interface + subroutine psb_d_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_ell_csgetptn + end interface + + interface + subroutine psb_d_ell_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_d_ell_csgetrow + end interface + + interface + subroutine psb_d_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_ell_csgetblk + end interface + + interface + subroutine psb_d_ell_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ell_cssv + subroutine psb_d_ell_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ell_cssm + end interface + + interface + subroutine psb_d_ell_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ell_csmv + subroutine psb_d_ell_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ell_csmm + end interface + + + interface + function psb_d_ell_maxval(a) result(res) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_ell_maxval + end interface + + interface + function psb_d_ell_csnmi(a) result(res) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_ell_csnmi + end interface + + interface + function psb_d_ell_csnm1(a) result(res) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_ell_csnm1 + end interface + + interface + subroutine psb_d_ell_rowsum(d,a) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_ell_rowsum + end interface + + interface + subroutine psb_d_ell_arwsum(d,a) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_ell_arwsum + end interface + + interface + subroutine psb_d_ell_colsum(d,a) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_ell_colsum + end interface + + interface + subroutine psb_d_ell_aclsum(d,a) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_ell_aclsum + end interface + + interface + subroutine psb_d_ell_get_diag(a,d,info) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ell_get_diag + end interface + + interface + subroutine psb_d_ell_scal(d,a,info,side) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_ell_scal + end interface + + interface + subroutine psb_d_ell_scals(d,a,info) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ell_scals + end interface + + interface + subroutine psi_d_convert_ell_from_coo(a,tmp,info,hacksize) + import :: psb_d_ell_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + implicit none + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + end subroutine psi_d_convert_ell_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_ell_is_by_rows(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + logical :: res + res = .true. + end function d_ell_is_by_rows + + function d_ell_sizeof(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + + end function d_ell_sizeof + + function d_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL' + end function d_ell_get_fmt + + function d_ell_get_nrm(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = size(a%val,2) + end function d_ell_get_nrm + + function d_ell_get_nzeros(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function d_ell_get_nzeros + + function d_ell_get_size(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = -1 + if (a%is_dev()) call a%sync() + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function d_ell_get_size + + + function d_ell_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + if (a%is_dev()) call a%sync() + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function d_ell_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_ell_free(a) + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine d_ell_free + + +end module psb_d_ell_mat_mod diff --git a/ext/psb_d_hdia_mat_mod.f90 b/ext/psb_d_hdia_mat_mod.f90 new file mode 100644 index 00000000..25bc6898 --- /dev/null +++ b/ext/psb_d_hdia_mat_mod.f90 @@ -0,0 +1,534 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module psb_d_hdia_mat_mod + + use psb_d_base_mat_mod + + + type, extends(psb_d_base_sparse_mat) :: psb_d_hdia_sparse_mat + ! + ! HDIA format + ! + integer(psb_ipk_), allocatable :: hackOffsets(:), diaOffsets(:) + real(psb_dpk_), allocatable :: val(:) + + + integer(psb_ipk_) :: nhacks, nzeros + integer(psb_ipk_) :: hacksize = 32 + integer(psb_epk_) :: dim=0 + + contains + ! procedure, pass(a) :: get_size => d_hdia_get_size + procedure, pass(a) :: get_nzeros => d_hdia_get_nzeros + procedure, pass(a) :: set_nzeros => d_hdia_set_nzeros + procedure, nopass :: get_fmt => d_hdia_get_fmt + procedure, pass(a) :: sizeof => d_hdia_sizeof + ! procedure, pass(a) :: csmm => psb_d_hdia_csmm + procedure, pass(a) :: csmv => psb_d_hdia_csmv + ! procedure, pass(a) :: inner_cssm => psb_d_hdia_cssm + ! procedure, pass(a) :: inner_cssv => psb_d_hdia_cssv + ! procedure, pass(a) :: scals => psb_d_hdia_scals + ! procedure, pass(a) :: scalv => psb_d_hdia_scal + ! procedure, pass(a) :: maxval => psb_d_hdia_maxval + ! procedure, pass(a) :: csnmi => psb_d_hdia_csnmi + ! procedure, pass(a) :: csnm1 => psb_d_hdia_csnm1 + ! procedure, pass(a) :: rowsum => psb_d_hdia_rowsum + ! procedure, pass(a) :: arwsum => psb_d_hdia_arwsum + ! procedure, pass(a) :: colsum => psb_d_hdia_colsum + ! procedure, pass(a) :: aclsum => psb_d_hdia_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_d_hdia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_hdia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_hdia_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_hdia_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_d_cp_hdia_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_d_cp_hdia_from_fmt + procedure, pass(a) :: mv_to_coo => psb_d_mv_hdia_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_hdia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_d_mv_hdia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_d_mv_hdia_from_fmt + ! procedure, pass(a) :: csput_a => psb_d_hdia_csput_a + ! procedure, pass(a) :: get_diag => psb_d_hdia_get_diag + ! procedure, pass(a) :: csgetptn => psb_d_hdia_csgetptn + ! procedure, pass(a) :: csgetrow => psb_d_hdia_csgetrow + ! procedure, pass(a) :: get_nz_row => d_hdia_get_nz_row + ! procedure, pass(a) :: reinit => psb_d_hdia_reinit + ! procedure, pass(a) :: trim => psb_d_hdia_trim + procedure, pass(a) :: print => psb_d_hdia_print + procedure, pass(a) :: free => d_hdia_free + procedure, pass(a) :: mold => psb_d_hdia_mold + + end type psb_d_hdia_sparse_mat + + private :: d_hdia_get_nzeros, d_hdia_set_nzeros, d_hdia_free, & + & d_hdia_get_fmt, d_hdia_sizeof +!!$ & +!!$ & d_hdia_get_nz_row d_hdia_get_size, + +!!$ interface +!!$ subroutine psb_d_hdia_reallocate_nz(nz,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_hdia_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_reinit(a,clear) +!!$ import :: psb_d_hdia_sparse_mat +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ logical, intent(in), optional :: clear +!!$ end subroutine psb_d_hdia_reinit +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_trim(a) +!!$ import :: psb_d_hdia_sparse_mat +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_hdia_trim +!!$ end interface + + interface + subroutine psb_d_hdia_mold(a,b,info) + import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hdia_mold + end interface + + interface + subroutine psb_d_hdia_allocate_mnnz(m,n,a,nz) + import :: psb_d_hdia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_hdia_allocate_mnnz + end interface + + interface + subroutine psb_d_hdia_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_hdia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_hdia_print + end interface + + interface + subroutine psb_d_cp_hdia_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_hdia_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hdia_to_coo + end interface + + interface + subroutine psb_d_cp_hdia_from_coo(a,b,info) + import :: psb_d_hdia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_d_cp_hdia_to_fmt(a,b,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_d_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cp_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cp_hdia_from_fmt(a,b,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cp_hdia_from_fmt +!!$ end interface + + interface + subroutine psb_d_mv_hdia_to_coo(a,b,info) + import :: psb_d_hdia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hdia_to_coo + end interface + + interface + subroutine psb_d_mv_hdia_from_coo(a,b,info) + import :: psb_d_hdia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_d_mv_hdia_to_fmt(a,b,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_mv_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_mv_hdia_from_fmt(a,b,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_mv_hdia_from_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& +!!$ & imin,imax,jmin,jmax +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_hdia_csput_a +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_csgetptn(imin,imax,a,nz,ia,ja,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_d_hdia_csgetptn +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ real(psb_dpk_), allocatable, intent(inout) :: val(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_d_hdia_csgetrow +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_csgetblk(imin,imax,a,b,info,& +!!$ & jmin,jmax,iren,append,rscale,cscale) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_d_coo_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_d_hdia_csgetblk +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_cssv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:) +!!$ real(psb_dpk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_hdia_cssv +!!$ subroutine psb_d_hdia_cssm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_hdia_cssm +!!$ end interface + + interface + subroutine psb_d_hdia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hdia_csmv +!!$ subroutine psb_d_hdia_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_hdia_csmm + end interface + + +!!$ interface +!!$ function psb_d_hdia_maxval(a) result(res) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_d_hdia_maxval +!!$ end interface +!!$ +!!$ interface +!!$ function psb_d_hdia_csnmi(a) result(res) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_d_hdia_csnmi +!!$ end interface +!!$ +!!$ interface +!!$ function psb_d_hdia_csnm1(a) result(res) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_d_hdia_csnm1 +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_rowsum(d,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_d_hdia_rowsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_arwsum(d,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_d_hdia_arwsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_colsum(d,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_d_hdia_colsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_aclsum(d,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_d_hdia_aclsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_get_diag(a,d,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_hdia_get_diag +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_scal(d,a,info,side) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_d_hdia_scal +!!$ end interface + +!!$ interface +!!$ subroutine psb_d_hdia_scals(d,a,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_hdia_scals +!!$ end interface +!!$ + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_hdia_sizeof(a) result(res) + use psb_realloc_mod, only : psb_size + implicit none + class(psb_d_hdia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + integer(psb_ipk_) :: i + + if (a%is_dev()) call a%sync() + res = 0 + + res = res + psb_size(a%hackOffsets)*psb_sizeof_ip + res = res + psb_size(a%diaOffsets)*psb_sizeof_ip + res = res + psb_size(a%val) * psb_sizeof_dp + + end function d_hdia_sizeof + + function d_hdia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIA' + end function d_hdia_get_fmt + + function d_hdia_get_nzeros(a) result(res) + implicit none + class(psb_d_hdia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function d_hdia_get_nzeros + + subroutine d_hdia_set_nzeros(a,nz) + implicit none + class(psb_d_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + a%nzeros = nz + end subroutine d_hdia_set_nzeros + + ! function d_hdia_get_size(a) result(res) + ! implicit none + ! class(psb_d_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function d_hdia_get_size + + + ! function d_hdia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_d_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function d_hdia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine d_hdia_free(a) + implicit none + + class(psb_d_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: i, info + + + if (allocated(a%hackOffsets))& + & deallocate(a%hackOffsets,stat=info) + if (allocated(a%diaOffsets))& + & deallocate(a%diaOffsets,stat=info) + if (allocated(a%val))& + & deallocate(a%val,stat=info) + a%nhacks=0 + + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine d_hdia_free + + +end module psb_d_hdia_mat_mod diff --git a/ext/psb_d_hll_mat_mod.f90 b/ext/psb_d_hll_mat_mod.f90 new file mode 100644 index 00000000..acc3b312 --- /dev/null +++ b/ext/psb_d_hll_mat_mod.f90 @@ -0,0 +1,564 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_hll_mat_mod + + use psb_d_base_mat_mod + use psi_ext_util_mod + + type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! A modification of ELL. + ! Basic idea: pack and pad data in blocks of HCK rows; + ! this reduces the impact of a lone, very long row. + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! 6. hksz: hack size (multiple of 32) + ! 7. hkoffs(:): offsets of the starts of hacks inside ja/val + ! + ! + ! + integer(psb_ipk_) :: hksz, nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + real(psb_dpk_), allocatable :: val(:) + + contains + + procedure, pass(a) :: get_hksz => d_hll_get_hksz + procedure, pass(a) :: set_hksz => d_hll_set_hksz + procedure, pass(a) :: get_size => d_hll_get_size + procedure, pass(a) :: set_nzeros => d_hll_set_nzeros + procedure, pass(a) :: get_nzeros => d_hll_get_nzeros + procedure, nopass :: get_fmt => d_hll_get_fmt + procedure, pass(a) :: sizeof => d_hll_sizeof + procedure, pass(a) :: csmm => psb_d_hll_csmm + procedure, pass(a) :: csmv => psb_d_hll_csmv + procedure, pass(a) :: inner_cssm => psb_d_hll_cssm + procedure, pass(a) :: inner_cssv => psb_d_hll_cssv + procedure, pass(a) :: scals => psb_d_hll_scals + procedure, pass(a) :: scalv => psb_d_hll_scal + procedure, pass(a) :: maxval => psb_d_hll_maxval + procedure, pass(a) :: csnmi => psb_d_hll_csnmi + procedure, pass(a) :: csnm1 => psb_d_hll_csnm1 + procedure, pass(a) :: rowsum => psb_d_hll_rowsum + procedure, pass(a) :: arwsum => psb_d_hll_arwsum + procedure, pass(a) :: colsum => psb_d_hll_colsum + procedure, pass(a) :: aclsum => psb_d_hll_aclsum + procedure, pass(a) :: reallocate_nz => psb_d_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_hll_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_hll_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_hll_from_coo + procedure, pass(a) :: cp_to_fmt => psb_d_cp_hll_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_d_cp_hll_from_fmt + procedure, pass(a) :: mv_to_coo => psb_d_mv_hll_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_hll_from_coo + procedure, pass(a) :: mv_to_fmt => psb_d_mv_hll_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_d_mv_hll_from_fmt + procedure, pass(a) :: csput_a => psb_d_hll_csput_a + procedure, pass(a) :: get_diag => psb_d_hll_get_diag + procedure, pass(a) :: csgetptn => psb_d_hll_csgetptn + procedure, pass(a) :: csgetrow => psb_d_hll_csgetrow + procedure, pass(a) :: get_nz_row => d_hll_get_nz_row + procedure, pass(a) :: reinit => psb_d_hll_reinit + procedure, pass(a) :: print => psb_d_hll_print + procedure, pass(a) :: free => d_hll_free + procedure, pass(a) :: mold => psb_d_hll_mold + + end type psb_d_hll_sparse_mat + + private :: d_hll_get_nzeros, d_hll_free, d_hll_get_fmt, & + & d_hll_get_size, d_hll_sizeof, d_hll_get_nz_row, & + & d_hll_set_nzeros, d_hll_get_hksz, d_hll_set_hksz + + interface + subroutine psb_d_hll_reallocate_nz(nz,a) + import :: psb_d_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_hll_sparse_mat), intent(inout) :: a + end subroutine psb_d_hll_reallocate_nz + end interface + + interface + subroutine psb_d_hll_reinit(a,clear) + import :: psb_d_hll_sparse_mat + class(psb_d_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_d_hll_reinit + end interface + + interface + subroutine psb_d_hll_mold(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hll_mold + end interface + + interface + subroutine psb_d_hll_allocate_mnnz(m,n,a,nz) + import :: psb_d_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_hll_allocate_mnnz + end interface + + interface + subroutine psb_d_hll_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_hll_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_hll_print + end interface + + interface + subroutine psb_d_cp_hll_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_hll_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hll_to_coo + end interface + + interface + subroutine psb_d_cp_hll_from_coo(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hll_from_coo + end interface + + interface + subroutine psb_d_cp_hll_to_fmt(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hll_to_fmt + end interface + + interface + subroutine psb_d_cp_hll_from_fmt(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hll_from_fmt + end interface + + interface + subroutine psb_d_mv_hll_to_coo(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hll_to_coo + end interface + + interface + subroutine psb_d_mv_hll_from_coo(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hll_from_coo + end interface + + interface + subroutine psb_d_mv_hll_to_fmt(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hll_to_fmt + end interface + + interface + subroutine psb_d_mv_hll_from_fmt(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hll_from_fmt + end interface + + interface + subroutine psb_d_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hll_csput_a + end interface + + interface + subroutine psb_d_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_hll_csgetptn + end interface + + interface + subroutine psb_d_hll_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_d_hll_csgetrow + end interface + + interface + subroutine psb_d_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_hll_csgetblk + end interface + + interface + subroutine psb_d_hll_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hll_cssv + subroutine psb_d_hll_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hll_cssm + end interface + + interface + subroutine psb_d_hll_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hll_csmv + subroutine psb_d_hll_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hll_csmm + end interface + + + interface + function psb_d_hll_maxval(a) result(res) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_hll_maxval + end interface + + interface + function psb_d_hll_csnmi(a) result(res) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_hll_csnmi + end interface + + interface + function psb_d_hll_csnm1(a) result(res) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_hll_csnm1 + end interface + + interface + subroutine psb_d_hll_rowsum(d,a) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_hll_rowsum + end interface + + interface + subroutine psb_d_hll_arwsum(d,a) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_hll_arwsum + end interface + + interface + subroutine psb_d_hll_colsum(d,a) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_hll_colsum + end interface + + interface + subroutine psb_d_hll_aclsum(d,a) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_hll_aclsum + end interface + + interface + subroutine psb_d_hll_get_diag(a,d,info) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hll_get_diag + end interface + + interface + subroutine psb_d_hll_scal(d,a,info,side) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_hll_scal + end interface + + interface + subroutine psb_d_hll_scals(d,a,info) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hll_scals + end interface + + interface psi_convert_hll_from_coo + subroutine psi_d_convert_hll_from_coo(a,hksz,tmp,info) + import :: psb_d_hll_sparse_mat, psb_ipk_, psb_d_coo_sparse_mat + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: hksz + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_d_convert_hll_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_hll_sizeof(a) result(res) + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%hkoffs) + + end function d_hll_sizeof + + function d_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL' + end function d_hll_get_fmt + + subroutine d_hll_set_nzeros(a,n) + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%nzt = n + end subroutine d_hll_set_nzeros + + function d_hll_get_nzeros(a) result(res) + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function d_hll_get_nzeros + + function d_hll_get_size(a) result(res) + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + if (a%is_dev()) call a%sync() + + res = -1 + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function d_hll_get_size + + + + function d_hll_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function d_hll_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_hll_free(a) + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + if (allocated(a%val)) deallocate(a%hkoffs) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + call a%set_hksz(izero) + + return + + end subroutine d_hll_free + + subroutine d_hll_set_hksz(a,n) + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%hksz = n + end subroutine d_hll_set_hksz + + function d_hll_get_hksz(a) result(res) + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = a%hksz + + end function d_hll_get_hksz + +end module psb_d_hll_mat_mod diff --git a/ext/psb_ext_mod.F90 b/ext/psb_ext_mod.F90 new file mode 100644 index 00000000..b1dbdb59 --- /dev/null +++ b/ext/psb_ext_mod.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_ext_mod + use psb_const_mod + use psi_ext_util_mod + + use psb_s_dns_mat_mod + use psb_d_dns_mat_mod + use psb_c_dns_mat_mod + use psb_z_dns_mat_mod + + use psb_d_ell_mat_mod + use psb_s_ell_mat_mod + use psb_z_ell_mat_mod + use psb_c_ell_mat_mod + + use psb_s_hll_mat_mod + use psb_d_hll_mat_mod + use psb_c_hll_mat_mod + use psb_z_hll_mat_mod + + use psb_s_dia_mat_mod + use psb_d_dia_mat_mod + use psb_c_dia_mat_mod + use psb_z_dia_mat_mod + + use psb_s_hdia_mat_mod + use psb_d_hdia_mat_mod + use psb_c_hdia_mat_mod + use psb_z_hdia_mat_mod + +#ifdef HAVE_RSB + use psb_d_rsb_mat_mod +#endif +end module psb_ext_mod diff --git a/ext/psb_s_dia_mat_mod.f90 b/ext/psb_s_dia_mat_mod.f90 new file mode 100644 index 00000000..3a11d959 --- /dev/null +++ b/ext/psb_s_dia_mat_mod.f90 @@ -0,0 +1,513 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_dia_mat_mod + + use psb_s_base_mat_mod + + type, extends(psb_s_base_sparse_mat) :: psb_s_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + real(psb_spk_), allocatable :: data(:,:) + + contains + ! procedure, pass(a) :: get_size => s_dia_get_size + procedure, pass(a) :: get_nzeros => s_dia_get_nzeros + procedure, nopass :: get_fmt => s_dia_get_fmt + procedure, pass(a) :: sizeof => s_dia_sizeof + procedure, pass(a) :: csmm => psb_s_dia_csmm + procedure, pass(a) :: csmv => psb_s_dia_csmv + ! procedure, pass(a) :: inner_cssm => psb_s_dia_cssm + ! procedure, pass(a) :: inner_cssv => psb_s_dia_cssv + procedure, pass(a) :: scals => psb_s_dia_scals + procedure, pass(a) :: scalv => psb_s_dia_scal + procedure, pass(a) :: maxval => psb_s_dia_maxval + procedure, pass(a) :: rowsum => psb_s_dia_rowsum + procedure, pass(a) :: arwsum => psb_s_dia_arwsum + procedure, pass(a) :: colsum => psb_s_dia_colsum + procedure, pass(a) :: aclsum => psb_s_dia_aclsum + procedure, pass(a) :: reallocate_nz => psb_s_dia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_dia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_dia_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_dia_from_coo + ! procedure, pass(a) :: mv_to_coo => psb_s_mv_dia_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_dia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_s_mv_dia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_s_mv_dia_from_fmt + ! procedure, pass(a) :: csput_a => psb_s_dia_csput_a + procedure, pass(a) :: get_diag => psb_s_dia_get_diag + procedure, pass(a) :: csgetptn => psb_s_dia_csgetptn + procedure, pass(a) :: csgetrow => psb_s_dia_csgetrow + ! procedure, pass(a) :: get_nz_row => s_dia_get_nz_row + procedure, pass(a) :: reinit => psb_s_dia_reinit + ! procedure, pass(a) :: trim => psb_s_dia_trim + procedure, pass(a) :: print => psb_s_dia_print + procedure, pass(a) :: free => s_dia_free + procedure, pass(a) :: mold => psb_s_dia_mold + + end type psb_s_dia_sparse_mat + + private :: s_dia_get_nzeros, s_dia_free, s_dia_get_fmt, & + & s_dia_sizeof !, s_dia_get_size, s_dia_get_nz_row + + interface + subroutine psb_s_dia_reallocate_nz(nz,a) + import :: psb_s_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_dia_sparse_mat), intent(inout) :: a + end subroutine psb_s_dia_reallocate_nz + end interface + + interface + subroutine psb_s_dia_reinit(a,clear) + import :: psb_s_dia_sparse_mat + class(psb_s_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_s_dia_reinit + end interface + + interface + subroutine psb_s_dia_trim(a) + import :: psb_s_dia_sparse_mat + class(psb_s_dia_sparse_mat), intent(inout) :: a + end subroutine psb_s_dia_trim + end interface + + interface + subroutine psb_s_dia_mold(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dia_mold + end interface + + interface + subroutine psb_s_dia_allocate_mnnz(m,n,a,nz) + import :: psb_s_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_dia_allocate_mnnz + end interface + + interface + subroutine psb_s_dia_print(iout,a,iv,head,ivr,ivc) + import :: psb_s_dia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_s_dia_print + end interface + + interface + subroutine psb_s_cp_dia_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_dia_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dia_to_coo + end interface + + interface + subroutine psb_s_cp_dia_from_coo(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dia_from_coo + end interface + + interface + subroutine psb_s_cp_dia_to_fmt(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dia_to_fmt + end interface + + interface + subroutine psb_s_cp_dia_from_fmt(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dia_from_fmt + end interface + + interface + subroutine psb_s_mv_dia_to_coo(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dia_to_coo + end interface + + interface + subroutine psb_s_mv_dia_from_coo(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dia_from_coo + end interface + + interface + subroutine psb_s_mv_dia_to_fmt(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dia_to_fmt + end interface + + interface + subroutine psb_s_mv_dia_from_fmt(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dia_from_fmt + end interface + + interface + subroutine psb_s_dia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dia_csput_a + end interface + + interface + subroutine psb_s_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_dia_csgetptn + end interface + + interface + subroutine psb_s_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_s_dia_csgetrow + end interface + + interface + subroutine psb_s_dia_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_dia_csgetblk + end interface + + interface + subroutine psb_s_dia_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dia_cssv + subroutine psb_s_dia_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dia_cssm + end interface + + interface + subroutine psb_s_dia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dia_csmv + subroutine psb_s_dia_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dia_csmm + end interface + + + interface + function psb_s_dia_maxval(a) result(res) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_dia_maxval + end interface + + interface + function psb_s_dia_csnmi(a) result(res) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_dia_csnmi + end interface + + interface + function psb_s_dia_csnm1(a) result(res) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_dia_csnm1 + end interface + + interface + subroutine psb_s_dia_rowsum(d,a) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_dia_rowsum + end interface + + interface + subroutine psb_s_dia_arwsum(d,a) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_dia_arwsum + end interface + + interface + subroutine psb_s_dia_colsum(d,a) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_dia_colsum + end interface + + interface + subroutine psb_s_dia_aclsum(d,a) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_dia_aclsum + end interface + + interface + subroutine psb_s_dia_get_diag(a,d,info) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dia_get_diag + end interface + + interface + subroutine psb_s_dia_scal(d,a,info,side) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_dia_scal + end interface + + interface + subroutine psb_s_dia_scals(d,a,info) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dia_scals + end interface + + interface psi_convert_dia_from_coo + subroutine psi_s_convert_dia_from_coo(a,tmp,info) + import :: psb_s_dia_sparse_mat, psb_ipk_, psb_s_coo_sparse_mat + implicit none + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_s_convert_dia_from_coo + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_dia_sizeof(a) result(res) + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + end function s_dia_sizeof + + function s_dia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIA' + end function s_dia_get_fmt + + function s_dia_get_nzeros(a) result(res) + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function s_dia_get_nzeros + + ! function s_dia_get_size(a) result(res) + ! implicit none + ! class(psb_s_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function s_dia_get_size + + + ! function s_dia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_s_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function s_dia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine s_dia_free(a) + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + + if (allocated(a%data)) deallocate(a%data) + if (allocated(a%offset)) deallocate(a%offset) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine s_dia_free + + +end module psb_s_dia_mat_mod diff --git a/ext/psb_s_dns_mat_mod.f90 b/ext/psb_s_dns_mat_mod.f90 new file mode 100644 index 00000000..e9ea5f26 --- /dev/null +++ b/ext/psb_s_dns_mat_mod.f90 @@ -0,0 +1,467 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module psb_s_dns_mat_mod + + use psb_s_base_mat_mod + + type, extends(psb_s_base_sparse_mat) :: psb_s_dns_sparse_mat + ! + ! DNS format: a very simple dense matrix storage + ! psb_spk_ : kind for double precision reals + ! psb_ipk_: kind for normal integers. + ! psb_sizeof_dp: variable holding size in bytes of + ! a double + ! psb_sizeof_ip: size in bytes of an integer + ! + ! psb_realloc(n,v,info) Reallocate: does what it says + ! psb_realloc(m,n,a,info) on rank 1 and 2 arrays, may start + ! from unallocated + ! + ! + integer(psb_ipk_) :: nnz + real(psb_spk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: get_size => s_dns_get_size + procedure, pass(a) :: get_nzeros => s_dns_get_nzeros + procedure, nopass :: get_fmt => s_dns_get_fmt + procedure, pass(a) :: sizeof => s_dns_sizeof + procedure, pass(a) :: csmv => psb_s_dns_csmv + procedure, pass(a) :: csmm => psb_s_dns_csmm + procedure, pass(a) :: csnmi => psb_s_dns_csnmi + procedure, pass(a) :: reallocate_nz => psb_s_dns_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_dns_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_dns_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_dns_from_coo + procedure, pass(a) :: mv_to_coo => psb_s_mv_dns_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_dns_from_coo + procedure, pass(a) :: get_diag => psb_s_dns_get_diag + procedure, pass(a) :: csgetrow => psb_s_dns_csgetrow + procedure, pass(a) :: get_nz_row => s_dns_get_nz_row + procedure, pass(a) :: trim => psb_s_dns_trim + procedure, pass(a) :: free => s_dns_free + procedure, pass(a) :: mold => psb_s_dns_mold + + end type psb_s_dns_sparse_mat + + private :: s_dns_get_nzeros, s_dns_free, s_dns_get_fmt, & + & s_dns_get_size, s_dns_sizeof, s_dns_get_nz_row + + ! + ! + !> Function reallocate_nz + !! \memberof psb_s_dns_sparse_mat + !! \brief One--parameters version of (re)allocate + !! + !! \param nz number of nonzeros to allocate for + !! i.e. makes sure that the internal storage + !! allows for NZ coefficients and their indices. + ! + interface + subroutine psb_s_dns_reallocate_nz(nz,a) + import :: psb_s_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_dns_sparse_mat), intent(inout) :: a + end subroutine psb_s_dns_reallocate_nz + end interface + + !> Function trim + !! \memberof psb_s_dns_sparse_mat + !! \brief Memory trim + !! Make sure the memory allocation of the sparse matrix is as tight as + !! possible given the actual number of nonzeros it contains. + ! + interface + subroutine psb_s_dns_trim(a) + import :: psb_s_dns_sparse_mat + class(psb_s_dns_sparse_mat), intent(inout) :: a + end subroutine psb_s_dns_trim + end interface + + ! + !> Function mold: + !! \memberof psb_s_dns_sparse_mat + !! \brief Allocate a class(psb_s_dns_sparse_mat) with the + !! same dynamic type as the input. + !! This is equivalent to allocate( mold= ) and is provided + !! for those compilers not yet supporting mold. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_s_dns_mold(a,b,info) + import :: psb_s_dns_sparse_mat, psb_s_base_sparse_mat, psb_epk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dns_mold + end interface + + ! + ! + !> Function allocate_mnnz + !! \memberof psb_s_dns_sparse_mat + !! \brief Three-parameters version of allocate + !! + !! \param m number of rows + !! \param n number of cols + !! \param nz [estimated internally] number of nonzeros to allocate for + ! + interface + subroutine psb_s_dns_allocate_mnnz(m,n,a,nz) + import :: psb_s_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_dns_allocate_mnnz + end interface + + ! + !> Function cp_to_coo: + !! \memberof psb_s_dns_sparse_mat + !! \brief Copy and convert to psb_s_coo_sparse_mat + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_s_cp_dns_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_dns_sparse_mat, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dns_to_coo + end interface + + ! + !> Function cp_from_coo: + !! \memberof psb_s_dns_sparse_mat + !! \brief Copy and convert from psb_s_coo_sparse_mat + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_s_cp_dns_from_coo(a,b,info) + import :: psb_s_dns_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dns_from_coo + end interface + + ! + !> Function mv_to_coo: + !! \memberof psb_s_dns_sparse_mat + !! \brief Convert to psb_s_coo_sparse_mat, freeing the source. + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_s_mv_dns_to_coo(a,b,info) + import :: psb_s_dns_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dns_to_coo + end interface + + ! + !> Function mv_from_coo: + !! \memberof psb_s_dns_sparse_mat + !! \brief Convert from psb_s_coo_sparse_mat, freeing the source. + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_s_mv_dns_from_coo(a,b,info) + import :: psb_s_dns_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dns_from_coo + end interface + + ! + ! + !> Function csgetrow: + !! \memberof psb_s_dns_sparse_mat + !! \brief Get a (subset of) row(s) + !! + !! getrow is the basic method by which the other (getblk, clip) can + !! be implemented. + !! + !! Returns the set + !! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) + !! each identifying the position of a nonzero in A + !! i.e. + !! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) + !! with IMIN<=IA(:)<=IMAX + !! with JMIN<=JA(:)<=JMAX + !! IA,JA are reallocated as necessary. + !! + !! \param imin the minimum row index we are interested in + !! \param imax the minimum row index we are interested in + !! \param nz the number of output coefficients + !! \param ia(:) the output row indices + !! \param ja(:) the output col indices + !! \param val(:) the output coefficients + !! \param info return code + !! \param jmin [1] minimum col index + !! \param jmax [a\%get_ncols()] maximum col index + !! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) + !! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] + !! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] + !! ( iren cannot be specified with rscale/cscale) + !! \param append [false] append to ia,ja + !! \param nzin [none] if append, then first new entry should go in entry nzin+1 + !! + ! + interface + subroutine psb_s_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_s_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_s_dns_csgetrow + end interface + + + + !> Function csmv: + !! \memberof psb_s_dns_sparse_mat + !! \brief Product by a dense rank 1 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_s_dns_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dns_csmv + end interface + + !> Function csmm: + !! \memberof psb_s_dns_sparse_mat + !! \brief Product by a dense rank 2 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:,:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:,:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_s_dns_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dns_csmm + end interface + + ! + ! + !> Function csnmi: + !! \memberof psb_s_dns_sparse_mat + !! \brief Operator infinity norm + !! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) + !! + ! + interface + function psb_s_dns_csnmi(a) result(res) + import :: psb_s_dns_sparse_mat, psb_spk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_dns_csnmi + end interface + + ! + !> Function get_diag: + !! \memberof psb_s_dns_sparse_mat + !! \brief Extract the diagonal of A. + !! + !! D(i) = A(i:i), i=1:min(nrows,ncols) + !! + !! \param d(:) The output diagonal + !! \param info return code. + ! + interface + subroutine psb_s_dns_get_diag(a,d,info) + import :: psb_s_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dns_get_diag + end interface + + +contains + + ! + !> Function sizeof + !! \memberof psb_s_dns_sparse_mat + !! \brief Memory occupation in bytes + ! + function s_dns_sizeof(a) result(res) + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip + + end function s_dns_sizeof + + ! + !> Function get_fmt + !! \memberof psb_s_dns_sparse_mat + !! \brief return a short descriptive name (e.g. COO CSR etc.) + ! + function s_dns_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNS' + end function s_dns_get_fmt + + ! + !> Function get_nzeros + !! \memberof psb_s_dns_sparse_mat + !! \brief Current number of nonzero entries + ! + function s_dns_get_nzeros(a) result(res) + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nnz + end function s_dns_get_nzeros + + ! + !> Function get_size + !! \memberof psb_s_dns_sparse_mat + !! \brief Maximum number of nonzeros the current structure can hold + ! this is fixed once you initialize the matrix, with dense storage + ! you can hold up to MxN entries + function s_dns_get_size(a) result(res) + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = size(a%val) + + end function s_dns_get_size + + + ! + !> Function get_nz_row. + !! \memberof psb_s_coo_sparse_mat + !! \brief How many nonzeros in a row? + !! + !! \param idx The row to search. + !! + ! + function s_dns_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = count(a%val(idx,:) /= dzero) + end if + + end function s_dns_get_nz_row + + ! + !> Function free + !! \memberof psb_s_dns_sparse_mat + !! Name says all + + subroutine s_dns_free(a) + implicit none + + class(psb_s_dns_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) deallocate(a%val) + a%nnz = 0 + + + ! + ! Mark the object as empty just in case + ! + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine s_dns_free + + +end module psb_s_dns_mat_mod diff --git a/ext/psb_s_ell_mat_mod.f90 b/ext/psb_s_ell_mat_mod.f90 new file mode 100644 index 00000000..9e3dd8b4 --- /dev/null +++ b/ext/psb_s_ell_mat_mod.f90 @@ -0,0 +1,552 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_ell_mat_mod + + use psb_s_base_mat_mod + + type, extends(psb_s_base_sparse_mat) :: psb_s_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! Based on M. Heroux "A proposal for a sparse BLAS toolkit". + ! IRN is our addition, should help in transferring to/from + ! other formats (should come in handy for GPUs). + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! + + integer(psb_ipk_) :: nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + real(psb_spk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: is_by_rows => s_ell_is_by_rows + procedure, pass(a) :: get_size => s_ell_get_size + procedure, pass(a) :: get_nzeros => s_ell_get_nzeros + procedure, nopass :: get_fmt => s_ell_get_fmt + procedure, pass(a) :: sizeof => s_ell_sizeof + procedure, pass(a) :: csmm => psb_s_ell_csmm + procedure, pass(a) :: csmv => psb_s_ell_csmv + procedure, pass(a) :: inner_cssm => psb_s_ell_cssm + procedure, pass(a) :: inner_cssv => psb_s_ell_cssv + procedure, pass(a) :: scals => psb_s_ell_scals + procedure, pass(a) :: scalv => psb_s_ell_scal + procedure, pass(a) :: maxval => psb_s_ell_maxval + procedure, pass(a) :: csnmi => psb_s_ell_csnmi + procedure, pass(a) :: csnm1 => psb_s_ell_csnm1 + procedure, pass(a) :: rowsum => psb_s_ell_rowsum + procedure, pass(a) :: arwsum => psb_s_ell_arwsum + procedure, pass(a) :: colsum => psb_s_ell_colsum + procedure, pass(a) :: aclsum => psb_s_ell_aclsum + procedure, pass(a) :: reallocate_nz => psb_s_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_ell_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_ell_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_ell_from_coo + procedure, pass(a) :: cp_to_fmt => psb_s_cp_ell_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_s_cp_ell_from_fmt + procedure, pass(a) :: mv_to_coo => psb_s_mv_ell_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_ell_from_coo + procedure, pass(a) :: mv_to_fmt => psb_s_mv_ell_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_s_mv_ell_from_fmt + procedure, pass(a) :: csput_a => psb_s_ell_csput_a + procedure, pass(a) :: get_diag => psb_s_ell_get_diag + procedure, pass(a) :: csgetptn => psb_s_ell_csgetptn + procedure, pass(a) :: csgetrow => psb_s_ell_csgetrow + procedure, pass(a) :: get_nz_row => s_ell_get_nz_row + procedure, pass(a) :: reinit => psb_s_ell_reinit + procedure, pass(a) :: trim => psb_s_ell_trim + procedure, pass(a) :: print => psb_s_ell_print + procedure, pass(a) :: free => s_ell_free + procedure, pass(a) :: mold => psb_s_ell_mold + procedure, pass(a) :: get_nrm => s_ell_get_nrm + + end type psb_s_ell_sparse_mat + + private :: s_ell_get_nzeros, s_ell_free, s_ell_get_fmt, & + & s_ell_get_size, s_ell_sizeof, s_ell_get_nz_row, & + & s_ell_is_by_rows + + interface + subroutine psb_s_ell_reallocate_nz(nz,a) + import :: psb_s_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_ell_sparse_mat), intent(inout) :: a + end subroutine psb_s_ell_reallocate_nz + end interface + + interface + subroutine psb_s_ell_reinit(a,clear) + import :: psb_s_ell_sparse_mat + class(psb_s_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_s_ell_reinit + end interface + + interface + subroutine psb_s_ell_trim(a) + import :: psb_s_ell_sparse_mat + class(psb_s_ell_sparse_mat), intent(inout) :: a + end subroutine psb_s_ell_trim + end interface + + interface + subroutine psb_s_ell_mold(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ell_mold + end interface + + interface + subroutine psb_s_ell_allocate_mnnz(m,n,a,nz) + import :: psb_s_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_ell_allocate_mnnz + end interface + + interface + subroutine psb_s_ell_print(iout,a,iv,head,ivr,ivc) + import :: psb_s_ell_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_s_ell_print + end interface + + interface + subroutine psb_s_cp_ell_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_ell_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ell_to_coo + end interface + + interface + subroutine psb_s_cp_ell_from_coo(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ell_from_coo + end interface + + interface + subroutine psb_s_cp_ell_to_fmt(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ell_to_fmt + end interface + + interface + subroutine psb_s_cp_ell_from_fmt(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ell_from_fmt + end interface + + interface + subroutine psb_s_mv_ell_to_coo(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ell_to_coo + end interface + + interface + subroutine psb_s_mv_ell_from_coo(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ell_from_coo + end interface + + interface + subroutine psb_s_mv_ell_to_fmt(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ell_to_fmt + end interface + + interface + subroutine psb_s_mv_ell_from_fmt(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ell_from_fmt + end interface + + interface + subroutine psb_s_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ell_csput_a + end interface + + interface + subroutine psb_s_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_ell_csgetptn + end interface + + interface + subroutine psb_s_ell_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_s_ell_csgetrow + end interface + + interface + subroutine psb_s_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_ell_csgetblk + end interface + + interface + subroutine psb_s_ell_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ell_cssv + subroutine psb_s_ell_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ell_cssm + end interface + + interface + subroutine psb_s_ell_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ell_csmv + subroutine psb_s_ell_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ell_csmm + end interface + + + interface + function psb_s_ell_maxval(a) result(res) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_ell_maxval + end interface + + interface + function psb_s_ell_csnmi(a) result(res) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_ell_csnmi + end interface + + interface + function psb_s_ell_csnm1(a) result(res) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_ell_csnm1 + end interface + + interface + subroutine psb_s_ell_rowsum(d,a) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_ell_rowsum + end interface + + interface + subroutine psb_s_ell_arwsum(d,a) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_ell_arwsum + end interface + + interface + subroutine psb_s_ell_colsum(d,a) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_ell_colsum + end interface + + interface + subroutine psb_s_ell_aclsum(d,a) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_ell_aclsum + end interface + + interface + subroutine psb_s_ell_get_diag(a,d,info) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ell_get_diag + end interface + + interface + subroutine psb_s_ell_scal(d,a,info,side) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_ell_scal + end interface + + interface + subroutine psb_s_ell_scals(d,a,info) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ell_scals + end interface + + interface + subroutine psi_s_convert_ell_from_coo(a,tmp,info,hacksize) + import :: psb_s_ell_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + implicit none + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + end subroutine psi_s_convert_ell_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_ell_is_by_rows(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + logical :: res + res = .true. + end function s_ell_is_by_rows + + function s_ell_sizeof(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + + end function s_ell_sizeof + + function s_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL' + end function s_ell_get_fmt + + function s_ell_get_nrm(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = size(a%val,2) + end function s_ell_get_nrm + + function s_ell_get_nzeros(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function s_ell_get_nzeros + + function s_ell_get_size(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = -1 + if (a%is_dev()) call a%sync() + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function s_ell_get_size + + + function s_ell_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + if (a%is_dev()) call a%sync() + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function s_ell_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_ell_free(a) + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine s_ell_free + + +end module psb_s_ell_mat_mod diff --git a/ext/psb_s_hdia_mat_mod.f90 b/ext/psb_s_hdia_mat_mod.f90 new file mode 100644 index 00000000..b7b2b110 --- /dev/null +++ b/ext/psb_s_hdia_mat_mod.f90 @@ -0,0 +1,534 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module psb_s_hdia_mat_mod + + use psb_s_base_mat_mod + + + type, extends(psb_s_base_sparse_mat) :: psb_s_hdia_sparse_mat + ! + ! HDIA format + ! + integer(psb_ipk_), allocatable :: hackOffsets(:), diaOffsets(:) + real(psb_spk_), allocatable :: val(:) + + + integer(psb_ipk_) :: nhacks, nzeros + integer(psb_ipk_) :: hacksize = 32 + integer(psb_epk_) :: dim=0 + + contains + ! procedure, pass(a) :: get_size => s_hdia_get_size + procedure, pass(a) :: get_nzeros => s_hdia_get_nzeros + procedure, pass(a) :: set_nzeros => s_hdia_set_nzeros + procedure, nopass :: get_fmt => s_hdia_get_fmt + procedure, pass(a) :: sizeof => s_hdia_sizeof + ! procedure, pass(a) :: csmm => psb_s_hdia_csmm + procedure, pass(a) :: csmv => psb_s_hdia_csmv + ! procedure, pass(a) :: inner_cssm => psb_s_hdia_cssm + ! procedure, pass(a) :: inner_cssv => psb_s_hdia_cssv + ! procedure, pass(a) :: scals => psb_s_hdia_scals + ! procedure, pass(a) :: scalv => psb_s_hdia_scal + ! procedure, pass(a) :: maxval => psb_s_hdia_maxval + ! procedure, pass(a) :: csnmi => psb_s_hdia_csnmi + ! procedure, pass(a) :: csnm1 => psb_s_hdia_csnm1 + ! procedure, pass(a) :: rowsum => psb_s_hdia_rowsum + ! procedure, pass(a) :: arwsum => psb_s_hdia_arwsum + ! procedure, pass(a) :: colsum => psb_s_hdia_colsum + ! procedure, pass(a) :: aclsum => psb_s_hdia_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_s_hdia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_hdia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_hdia_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_hdia_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_s_cp_hdia_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_s_cp_hdia_from_fmt + procedure, pass(a) :: mv_to_coo => psb_s_mv_hdia_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_hdia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_s_mv_hdia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_s_mv_hdia_from_fmt + ! procedure, pass(a) :: csput_a => psb_s_hdia_csput_a + ! procedure, pass(a) :: get_diag => psb_s_hdia_get_diag + ! procedure, pass(a) :: csgetptn => psb_s_hdia_csgetptn + ! procedure, pass(a) :: csgetrow => psb_s_hdia_csgetrow + ! procedure, pass(a) :: get_nz_row => s_hdia_get_nz_row + ! procedure, pass(a) :: reinit => psb_s_hdia_reinit + ! procedure, pass(a) :: trim => psb_s_hdia_trim + procedure, pass(a) :: print => psb_s_hdia_print + procedure, pass(a) :: free => s_hdia_free + procedure, pass(a) :: mold => psb_s_hdia_mold + + end type psb_s_hdia_sparse_mat + + private :: s_hdia_get_nzeros, s_hdia_set_nzeros, s_hdia_free, & + & s_hdia_get_fmt, s_hdia_sizeof +!!$ & +!!$ & s_hdia_get_nz_row s_hdia_get_size, + +!!$ interface +!!$ subroutine psb_s_hdia_reallocate_nz(nz,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_hdia_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_reinit(a,clear) +!!$ import :: psb_s_hdia_sparse_mat +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ logical, intent(in), optional :: clear +!!$ end subroutine psb_s_hdia_reinit +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_trim(a) +!!$ import :: psb_s_hdia_sparse_mat +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_hdia_trim +!!$ end interface + + interface + subroutine psb_s_hdia_mold(a,b,info) + import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hdia_mold + end interface + + interface + subroutine psb_s_hdia_allocate_mnnz(m,n,a,nz) + import :: psb_s_hdia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_hdia_allocate_mnnz + end interface + + interface + subroutine psb_s_hdia_print(iout,a,iv,head,ivr,ivc) + import :: psb_s_hdia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_s_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_s_hdia_print + end interface + + interface + subroutine psb_s_cp_hdia_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_hdia_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hdia_to_coo + end interface + + interface + subroutine psb_s_cp_hdia_from_coo(a,b,info) + import :: psb_s_hdia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_s_cp_hdia_to_fmt(a,b,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_s_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cp_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cp_hdia_from_fmt(a,b,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cp_hdia_from_fmt +!!$ end interface + + interface + subroutine psb_s_mv_hdia_to_coo(a,b,info) + import :: psb_s_hdia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hdia_to_coo + end interface + + interface + subroutine psb_s_mv_hdia_from_coo(a,b,info) + import :: psb_s_hdia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_s_mv_hdia_to_fmt(a,b,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_mv_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_mv_hdia_from_fmt(a,b,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_mv_hdia_from_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& +!!$ & imin,imax,jmin,jmax +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_hdia_csput_a +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_csgetptn(imin,imax,a,nz,ia,ja,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_s_hdia_csgetptn +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ real(psb_spk_), allocatable, intent(inout) :: val(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_s_hdia_csgetrow +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_csgetblk(imin,imax,a,b,info,& +!!$ & jmin,jmax,iren,append,rscale,cscale) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_s_coo_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_s_coo_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_s_hdia_csgetblk +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_cssv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:) +!!$ real(psb_spk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_hdia_cssv +!!$ subroutine psb_s_hdia_cssm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_hdia_cssm +!!$ end interface + + interface + subroutine psb_s_hdia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hdia_csmv +!!$ subroutine psb_s_hdia_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_hdia_csmm + end interface + + +!!$ interface +!!$ function psb_s_hdia_maxval(a) result(res) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_s_hdia_maxval +!!$ end interface +!!$ +!!$ interface +!!$ function psb_s_hdia_csnmi(a) result(res) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_s_hdia_csnmi +!!$ end interface +!!$ +!!$ interface +!!$ function psb_s_hdia_csnm1(a) result(res) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_s_hdia_csnm1 +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_rowsum(d,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_s_hdia_rowsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_arwsum(d,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_s_hdia_arwsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_colsum(d,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_s_hdia_colsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_aclsum(d,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_s_hdia_aclsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_get_diag(a,d,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_hdia_get_diag +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_scal(d,a,info,side) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_s_hdia_scal +!!$ end interface + +!!$ interface +!!$ subroutine psb_s_hdia_scals(d,a,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_hdia_scals +!!$ end interface +!!$ + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_hdia_sizeof(a) result(res) + use psb_realloc_mod, only : psb_size + implicit none + class(psb_s_hdia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + integer(psb_ipk_) :: i + + if (a%is_dev()) call a%sync() + res = 0 + + res = res + psb_size(a%hackOffsets)*psb_sizeof_ip + res = res + psb_size(a%diaOffsets)*psb_sizeof_ip + res = res + psb_size(a%val) * psb_sizeof_sp + + end function s_hdia_sizeof + + function s_hdia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIA' + end function s_hdia_get_fmt + + function s_hdia_get_nzeros(a) result(res) + implicit none + class(psb_s_hdia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function s_hdia_get_nzeros + + subroutine s_hdia_set_nzeros(a,nz) + implicit none + class(psb_s_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + a%nzeros = nz + end subroutine s_hdia_set_nzeros + + ! function s_hdia_get_size(a) result(res) + ! implicit none + ! class(psb_s_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function s_hdia_get_size + + + ! function s_hdia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_s_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function s_hdia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine s_hdia_free(a) + implicit none + + class(psb_s_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: i, info + + + if (allocated(a%hackOffsets))& + & deallocate(a%hackOffsets,stat=info) + if (allocated(a%diaOffsets))& + & deallocate(a%diaOffsets,stat=info) + if (allocated(a%val))& + & deallocate(a%val,stat=info) + a%nhacks=0 + + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine s_hdia_free + + +end module psb_s_hdia_mat_mod diff --git a/ext/psb_s_hll_mat_mod.f90 b/ext/psb_s_hll_mat_mod.f90 new file mode 100644 index 00000000..735091c8 --- /dev/null +++ b/ext/psb_s_hll_mat_mod.f90 @@ -0,0 +1,564 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_hll_mat_mod + + use psb_s_base_mat_mod + use psi_ext_util_mod + + type, extends(psb_s_base_sparse_mat) :: psb_s_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! A modification of ELL. + ! Basic idea: pack and pad data in blocks of HCK rows; + ! this reduces the impact of a lone, very long row. + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! 6. hksz: hack size (multiple of 32) + ! 7. hkoffs(:): offsets of the starts of hacks inside ja/val + ! + ! + ! + integer(psb_ipk_) :: hksz, nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + real(psb_spk_), allocatable :: val(:) + + contains + + procedure, pass(a) :: get_hksz => s_hll_get_hksz + procedure, pass(a) :: set_hksz => s_hll_set_hksz + procedure, pass(a) :: get_size => s_hll_get_size + procedure, pass(a) :: set_nzeros => s_hll_set_nzeros + procedure, pass(a) :: get_nzeros => s_hll_get_nzeros + procedure, nopass :: get_fmt => s_hll_get_fmt + procedure, pass(a) :: sizeof => s_hll_sizeof + procedure, pass(a) :: csmm => psb_s_hll_csmm + procedure, pass(a) :: csmv => psb_s_hll_csmv + procedure, pass(a) :: inner_cssm => psb_s_hll_cssm + procedure, pass(a) :: inner_cssv => psb_s_hll_cssv + procedure, pass(a) :: scals => psb_s_hll_scals + procedure, pass(a) :: scalv => psb_s_hll_scal + procedure, pass(a) :: maxval => psb_s_hll_maxval + procedure, pass(a) :: csnmi => psb_s_hll_csnmi + procedure, pass(a) :: csnm1 => psb_s_hll_csnm1 + procedure, pass(a) :: rowsum => psb_s_hll_rowsum + procedure, pass(a) :: arwsum => psb_s_hll_arwsum + procedure, pass(a) :: colsum => psb_s_hll_colsum + procedure, pass(a) :: aclsum => psb_s_hll_aclsum + procedure, pass(a) :: reallocate_nz => psb_s_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_hll_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_hll_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_hll_from_coo + procedure, pass(a) :: cp_to_fmt => psb_s_cp_hll_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_s_cp_hll_from_fmt + procedure, pass(a) :: mv_to_coo => psb_s_mv_hll_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_hll_from_coo + procedure, pass(a) :: mv_to_fmt => psb_s_mv_hll_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_s_mv_hll_from_fmt + procedure, pass(a) :: csput_a => psb_s_hll_csput_a + procedure, pass(a) :: get_diag => psb_s_hll_get_diag + procedure, pass(a) :: csgetptn => psb_s_hll_csgetptn + procedure, pass(a) :: csgetrow => psb_s_hll_csgetrow + procedure, pass(a) :: get_nz_row => s_hll_get_nz_row + procedure, pass(a) :: reinit => psb_s_hll_reinit + procedure, pass(a) :: print => psb_s_hll_print + procedure, pass(a) :: free => s_hll_free + procedure, pass(a) :: mold => psb_s_hll_mold + + end type psb_s_hll_sparse_mat + + private :: s_hll_get_nzeros, s_hll_free, s_hll_get_fmt, & + & s_hll_get_size, s_hll_sizeof, s_hll_get_nz_row, & + & s_hll_set_nzeros, s_hll_get_hksz, s_hll_set_hksz + + interface + subroutine psb_s_hll_reallocate_nz(nz,a) + import :: psb_s_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_hll_sparse_mat), intent(inout) :: a + end subroutine psb_s_hll_reallocate_nz + end interface + + interface + subroutine psb_s_hll_reinit(a,clear) + import :: psb_s_hll_sparse_mat + class(psb_s_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_s_hll_reinit + end interface + + interface + subroutine psb_s_hll_mold(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hll_mold + end interface + + interface + subroutine psb_s_hll_allocate_mnnz(m,n,a,nz) + import :: psb_s_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_hll_allocate_mnnz + end interface + + interface + subroutine psb_s_hll_print(iout,a,iv,head,ivr,ivc) + import :: psb_s_hll_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_s_hll_print + end interface + + interface + subroutine psb_s_cp_hll_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_hll_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hll_to_coo + end interface + + interface + subroutine psb_s_cp_hll_from_coo(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hll_from_coo + end interface + + interface + subroutine psb_s_cp_hll_to_fmt(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hll_to_fmt + end interface + + interface + subroutine psb_s_cp_hll_from_fmt(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hll_from_fmt + end interface + + interface + subroutine psb_s_mv_hll_to_coo(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hll_to_coo + end interface + + interface + subroutine psb_s_mv_hll_from_coo(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hll_from_coo + end interface + + interface + subroutine psb_s_mv_hll_to_fmt(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hll_to_fmt + end interface + + interface + subroutine psb_s_mv_hll_from_fmt(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hll_from_fmt + end interface + + interface + subroutine psb_s_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hll_csput_a + end interface + + interface + subroutine psb_s_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_hll_csgetptn + end interface + + interface + subroutine psb_s_hll_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_s_hll_csgetrow + end interface + + interface + subroutine psb_s_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_hll_csgetblk + end interface + + interface + subroutine psb_s_hll_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hll_cssv + subroutine psb_s_hll_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hll_cssm + end interface + + interface + subroutine psb_s_hll_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hll_csmv + subroutine psb_s_hll_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hll_csmm + end interface + + + interface + function psb_s_hll_maxval(a) result(res) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_hll_maxval + end interface + + interface + function psb_s_hll_csnmi(a) result(res) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_hll_csnmi + end interface + + interface + function psb_s_hll_csnm1(a) result(res) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_hll_csnm1 + end interface + + interface + subroutine psb_s_hll_rowsum(d,a) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_hll_rowsum + end interface + + interface + subroutine psb_s_hll_arwsum(d,a) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_hll_arwsum + end interface + + interface + subroutine psb_s_hll_colsum(d,a) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_hll_colsum + end interface + + interface + subroutine psb_s_hll_aclsum(d,a) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_hll_aclsum + end interface + + interface + subroutine psb_s_hll_get_diag(a,d,info) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hll_get_diag + end interface + + interface + subroutine psb_s_hll_scal(d,a,info,side) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_hll_scal + end interface + + interface + subroutine psb_s_hll_scals(d,a,info) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hll_scals + end interface + + interface psi_convert_hll_from_coo + subroutine psi_s_convert_hll_from_coo(a,hksz,tmp,info) + import :: psb_s_hll_sparse_mat, psb_ipk_, psb_s_coo_sparse_mat + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: hksz + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_s_convert_hll_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_hll_sizeof(a) result(res) + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%hkoffs) + + end function s_hll_sizeof + + function s_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL' + end function s_hll_get_fmt + + subroutine s_hll_set_nzeros(a,n) + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%nzt = n + end subroutine s_hll_set_nzeros + + function s_hll_get_nzeros(a) result(res) + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function s_hll_get_nzeros + + function s_hll_get_size(a) result(res) + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + if (a%is_dev()) call a%sync() + + res = -1 + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function s_hll_get_size + + + + function s_hll_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function s_hll_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_hll_free(a) + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + if (allocated(a%val)) deallocate(a%hkoffs) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + call a%set_hksz(izero) + + return + + end subroutine s_hll_free + + subroutine s_hll_set_hksz(a,n) + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%hksz = n + end subroutine s_hll_set_hksz + + function s_hll_get_hksz(a) result(res) + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = a%hksz + + end function s_hll_get_hksz + +end module psb_s_hll_mat_mod diff --git a/ext/psb_z_dia_mat_mod.f90 b/ext/psb_z_dia_mat_mod.f90 new file mode 100644 index 00000000..76d071af --- /dev/null +++ b/ext/psb_z_dia_mat_mod.f90 @@ -0,0 +1,513 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_dia_mat_mod + + use psb_z_base_mat_mod + + type, extends(psb_z_base_sparse_mat) :: psb_z_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + complex(psb_dpk_), allocatable :: data(:,:) + + contains + ! procedure, pass(a) :: get_size => z_dia_get_size + procedure, pass(a) :: get_nzeros => z_dia_get_nzeros + procedure, nopass :: get_fmt => z_dia_get_fmt + procedure, pass(a) :: sizeof => z_dia_sizeof + procedure, pass(a) :: csmm => psb_z_dia_csmm + procedure, pass(a) :: csmv => psb_z_dia_csmv + ! procedure, pass(a) :: inner_cssm => psb_z_dia_cssm + ! procedure, pass(a) :: inner_cssv => psb_z_dia_cssv + procedure, pass(a) :: scals => psb_z_dia_scals + procedure, pass(a) :: scalv => psb_z_dia_scal + procedure, pass(a) :: maxval => psb_z_dia_maxval + procedure, pass(a) :: rowsum => psb_z_dia_rowsum + procedure, pass(a) :: arwsum => psb_z_dia_arwsum + procedure, pass(a) :: colsum => psb_z_dia_colsum + procedure, pass(a) :: aclsum => psb_z_dia_aclsum + procedure, pass(a) :: reallocate_nz => psb_z_dia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_dia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_dia_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_dia_from_coo + ! procedure, pass(a) :: mv_to_coo => psb_z_mv_dia_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_dia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_z_mv_dia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_z_mv_dia_from_fmt + ! procedure, pass(a) :: csput_a => psb_z_dia_csput_a + procedure, pass(a) :: get_diag => psb_z_dia_get_diag + procedure, pass(a) :: csgetptn => psb_z_dia_csgetptn + procedure, pass(a) :: csgetrow => psb_z_dia_csgetrow + ! procedure, pass(a) :: get_nz_row => z_dia_get_nz_row + procedure, pass(a) :: reinit => psb_z_dia_reinit + ! procedure, pass(a) :: trim => psb_z_dia_trim + procedure, pass(a) :: print => psb_z_dia_print + procedure, pass(a) :: free => z_dia_free + procedure, pass(a) :: mold => psb_z_dia_mold + + end type psb_z_dia_sparse_mat + + private :: z_dia_get_nzeros, z_dia_free, z_dia_get_fmt, & + & z_dia_sizeof !, z_dia_get_size, z_dia_get_nz_row + + interface + subroutine psb_z_dia_reallocate_nz(nz,a) + import :: psb_z_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_dia_sparse_mat), intent(inout) :: a + end subroutine psb_z_dia_reallocate_nz + end interface + + interface + subroutine psb_z_dia_reinit(a,clear) + import :: psb_z_dia_sparse_mat + class(psb_z_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_z_dia_reinit + end interface + + interface + subroutine psb_z_dia_trim(a) + import :: psb_z_dia_sparse_mat + class(psb_z_dia_sparse_mat), intent(inout) :: a + end subroutine psb_z_dia_trim + end interface + + interface + subroutine psb_z_dia_mold(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dia_mold + end interface + + interface + subroutine psb_z_dia_allocate_mnnz(m,n,a,nz) + import :: psb_z_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_dia_allocate_mnnz + end interface + + interface + subroutine psb_z_dia_print(iout,a,iv,head,ivr,ivc) + import :: psb_z_dia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_z_dia_print + end interface + + interface + subroutine psb_z_cp_dia_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_dia_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dia_to_coo + end interface + + interface + subroutine psb_z_cp_dia_from_coo(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dia_from_coo + end interface + + interface + subroutine psb_z_cp_dia_to_fmt(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dia_to_fmt + end interface + + interface + subroutine psb_z_cp_dia_from_fmt(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dia_from_fmt + end interface + + interface + subroutine psb_z_mv_dia_to_coo(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dia_to_coo + end interface + + interface + subroutine psb_z_mv_dia_from_coo(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dia_from_coo + end interface + + interface + subroutine psb_z_mv_dia_to_fmt(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dia_to_fmt + end interface + + interface + subroutine psb_z_mv_dia_from_fmt(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dia_from_fmt + end interface + + interface + subroutine psb_z_dia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dia_csput_a + end interface + + interface + subroutine psb_z_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_dia_csgetptn + end interface + + interface + subroutine psb_z_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_z_dia_csgetrow + end interface + + interface + subroutine psb_z_dia_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_dia_csgetblk + end interface + + interface + subroutine psb_z_dia_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dia_cssv + subroutine psb_z_dia_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dia_cssm + end interface + + interface + subroutine psb_z_dia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dia_csmv + subroutine psb_z_dia_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dia_csmm + end interface + + + interface + function psb_z_dia_maxval(a) result(res) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_dia_maxval + end interface + + interface + function psb_z_dia_csnmi(a) result(res) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_dia_csnmi + end interface + + interface + function psb_z_dia_csnm1(a) result(res) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_dia_csnm1 + end interface + + interface + subroutine psb_z_dia_rowsum(d,a) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_dia_rowsum + end interface + + interface + subroutine psb_z_dia_arwsum(d,a) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_dia_arwsum + end interface + + interface + subroutine psb_z_dia_colsum(d,a) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_dia_colsum + end interface + + interface + subroutine psb_z_dia_aclsum(d,a) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_dia_aclsum + end interface + + interface + subroutine psb_z_dia_get_diag(a,d,info) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dia_get_diag + end interface + + interface + subroutine psb_z_dia_scal(d,a,info,side) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_dia_scal + end interface + + interface + subroutine psb_z_dia_scals(d,a,info) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dia_scals + end interface + + interface psi_convert_dia_from_coo + subroutine psi_z_convert_dia_from_coo(a,tmp,info) + import :: psb_z_dia_sparse_mat, psb_ipk_, psb_z_coo_sparse_mat + implicit none + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_z_convert_dia_from_coo + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_dia_sizeof(a) result(res) + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + end function z_dia_sizeof + + function z_dia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIA' + end function z_dia_get_fmt + + function z_dia_get_nzeros(a) result(res) + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function z_dia_get_nzeros + + ! function z_dia_get_size(a) result(res) + ! implicit none + ! class(psb_z_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function z_dia_get_size + + + ! function z_dia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_z_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function z_dia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine z_dia_free(a) + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + + if (allocated(a%data)) deallocate(a%data) + if (allocated(a%offset)) deallocate(a%offset) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine z_dia_free + + +end module psb_z_dia_mat_mod diff --git a/ext/psb_z_dns_mat_mod.f90 b/ext/psb_z_dns_mat_mod.f90 new file mode 100644 index 00000000..6147057d --- /dev/null +++ b/ext/psb_z_dns_mat_mod.f90 @@ -0,0 +1,467 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module psb_z_dns_mat_mod + + use psb_z_base_mat_mod + + type, extends(psb_z_base_sparse_mat) :: psb_z_dns_sparse_mat + ! + ! DNS format: a very simple dense matrix storage + ! psb_dpk_ : kind for double precision reals + ! psb_ipk_: kind for normal integers. + ! psb_sizeof_dp: variable holding size in bytes of + ! a double + ! psb_sizeof_ip: size in bytes of an integer + ! + ! psb_realloc(n,v,info) Reallocate: does what it says + ! psb_realloc(m,n,a,info) on rank 1 and 2 arrays, may start + ! from unallocated + ! + ! + integer(psb_ipk_) :: nnz + complex(psb_dpk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: get_size => z_dns_get_size + procedure, pass(a) :: get_nzeros => z_dns_get_nzeros + procedure, nopass :: get_fmt => z_dns_get_fmt + procedure, pass(a) :: sizeof => z_dns_sizeof + procedure, pass(a) :: csmv => psb_z_dns_csmv + procedure, pass(a) :: csmm => psb_z_dns_csmm + procedure, pass(a) :: csnmi => psb_z_dns_csnmi + procedure, pass(a) :: reallocate_nz => psb_z_dns_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_dns_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_dns_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_dns_from_coo + procedure, pass(a) :: mv_to_coo => psb_z_mv_dns_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_dns_from_coo + procedure, pass(a) :: get_diag => psb_z_dns_get_diag + procedure, pass(a) :: csgetrow => psb_z_dns_csgetrow + procedure, pass(a) :: get_nz_row => z_dns_get_nz_row + procedure, pass(a) :: trim => psb_z_dns_trim + procedure, pass(a) :: free => z_dns_free + procedure, pass(a) :: mold => psb_z_dns_mold + + end type psb_z_dns_sparse_mat + + private :: z_dns_get_nzeros, z_dns_free, z_dns_get_fmt, & + & z_dns_get_size, z_dns_sizeof, z_dns_get_nz_row + + ! + ! + !> Function reallocate_nz + !! \memberof psb_z_dns_sparse_mat + !! \brief One--parameters version of (re)allocate + !! + !! \param nz number of nonzeros to allocate for + !! i.e. makes sure that the internal storage + !! allows for NZ coefficients and their indices. + ! + interface + subroutine psb_z_dns_reallocate_nz(nz,a) + import :: psb_z_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_dns_sparse_mat), intent(inout) :: a + end subroutine psb_z_dns_reallocate_nz + end interface + + !> Function trim + !! \memberof psb_z_dns_sparse_mat + !! \brief Memory trim + !! Make sure the memory allocation of the sparse matrix is as tight as + !! possible given the actual number of nonzeros it contains. + ! + interface + subroutine psb_z_dns_trim(a) + import :: psb_z_dns_sparse_mat + class(psb_z_dns_sparse_mat), intent(inout) :: a + end subroutine psb_z_dns_trim + end interface + + ! + !> Function mold: + !! \memberof psb_z_dns_sparse_mat + !! \brief Allocate a class(psb_z_dns_sparse_mat) with the + !! same dynamic type as the input. + !! This is equivalent to allocate( mold= ) and is provided + !! for those compilers not yet supporting mold. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_z_dns_mold(a,b,info) + import :: psb_z_dns_sparse_mat, psb_z_base_sparse_mat, psb_epk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dns_mold + end interface + + ! + ! + !> Function allocate_mnnz + !! \memberof psb_z_dns_sparse_mat + !! \brief Three-parameters version of allocate + !! + !! \param m number of rows + !! \param n number of cols + !! \param nz [estimated internally] number of nonzeros to allocate for + ! + interface + subroutine psb_z_dns_allocate_mnnz(m,n,a,nz) + import :: psb_z_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_dns_allocate_mnnz + end interface + + ! + !> Function cp_to_coo: + !! \memberof psb_z_dns_sparse_mat + !! \brief Copy and convert to psb_z_coo_sparse_mat + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_z_cp_dns_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_dns_sparse_mat, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dns_to_coo + end interface + + ! + !> Function cp_from_coo: + !! \memberof psb_z_dns_sparse_mat + !! \brief Copy and convert from psb_z_coo_sparse_mat + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_z_cp_dns_from_coo(a,b,info) + import :: psb_z_dns_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dns_from_coo + end interface + + ! + !> Function mv_to_coo: + !! \memberof psb_z_dns_sparse_mat + !! \brief Convert to psb_z_coo_sparse_mat, freeing the source. + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_z_mv_dns_to_coo(a,b,info) + import :: psb_z_dns_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dns_to_coo + end interface + + ! + !> Function mv_from_coo: + !! \memberof psb_z_dns_sparse_mat + !! \brief Convert from psb_z_coo_sparse_mat, freeing the source. + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_z_mv_dns_from_coo(a,b,info) + import :: psb_z_dns_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dns_from_coo + end interface + + ! + ! + !> Function csgetrow: + !! \memberof psb_z_dns_sparse_mat + !! \brief Get a (subset of) row(s) + !! + !! getrow is the basic method by which the other (getblk, clip) can + !! be implemented. + !! + !! Returns the set + !! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) + !! each identifying the position of a nonzero in A + !! i.e. + !! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) + !! with IMIN<=IA(:)<=IMAX + !! with JMIN<=JA(:)<=JMAX + !! IA,JA are reallocated as necessary. + !! + !! \param imin the minimum row index we are interested in + !! \param imax the minimum row index we are interested in + !! \param nz the number of output coefficients + !! \param ia(:) the output row indices + !! \param ja(:) the output col indices + !! \param val(:) the output coefficients + !! \param info return code + !! \param jmin [1] minimum col index + !! \param jmax [a\%get_ncols()] maximum col index + !! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) + !! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] + !! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] + !! ( iren cannot be specified with rscale/cscale) + !! \param append [false] append to ia,ja + !! \param nzin [none] if append, then first new entry should go in entry nzin+1 + !! + ! + interface + subroutine psb_z_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_z_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_z_dns_csgetrow + end interface + + + + !> Function csmv: + !! \memberof psb_z_dns_sparse_mat + !! \brief Product by a dense rank 1 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_z_dns_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dns_csmv + end interface + + !> Function csmm: + !! \memberof psb_z_dns_sparse_mat + !! \brief Product by a dense rank 2 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:,:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:,:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_z_dns_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dns_csmm + end interface + + ! + ! + !> Function csnmi: + !! \memberof psb_z_dns_sparse_mat + !! \brief Operator infinity norm + !! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) + !! + ! + interface + function psb_z_dns_csnmi(a) result(res) + import :: psb_z_dns_sparse_mat, psb_dpk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_dns_csnmi + end interface + + ! + !> Function get_diag: + !! \memberof psb_z_dns_sparse_mat + !! \brief Extract the diagonal of A. + !! + !! D(i) = A(i:i), i=1:min(nrows,ncols) + !! + !! \param d(:) The output diagonal + !! \param info return code. + ! + interface + subroutine psb_z_dns_get_diag(a,d,info) + import :: psb_z_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dns_get_diag + end interface + + +contains + + ! + !> Function sizeof + !! \memberof psb_z_dns_sparse_mat + !! \brief Memory occupation in bytes + ! + function z_dns_sizeof(a) result(res) + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip + + end function z_dns_sizeof + + ! + !> Function get_fmt + !! \memberof psb_z_dns_sparse_mat + !! \brief return a short descriptive name (e.g. COO CSR etc.) + ! + function z_dns_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNS' + end function z_dns_get_fmt + + ! + !> Function get_nzeros + !! \memberof psb_z_dns_sparse_mat + !! \brief Current number of nonzero entries + ! + function z_dns_get_nzeros(a) result(res) + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nnz + end function z_dns_get_nzeros + + ! + !> Function get_size + !! \memberof psb_z_dns_sparse_mat + !! \brief Maximum number of nonzeros the current structure can hold + ! this is fixed once you initialize the matrix, with dense storage + ! you can hold up to MxN entries + function z_dns_get_size(a) result(res) + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = size(a%val) + + end function z_dns_get_size + + + ! + !> Function get_nz_row. + !! \memberof psb_z_coo_sparse_mat + !! \brief How many nonzeros in a row? + !! + !! \param idx The row to search. + !! + ! + function z_dns_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = count(a%val(idx,:) /= dzero) + end if + + end function z_dns_get_nz_row + + ! + !> Function free + !! \memberof psb_z_dns_sparse_mat + !! Name says all + + subroutine z_dns_free(a) + implicit none + + class(psb_z_dns_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) deallocate(a%val) + a%nnz = 0 + + + ! + ! Mark the object as empty just in case + ! + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine z_dns_free + + +end module psb_z_dns_mat_mod diff --git a/ext/psb_z_ell_mat_mod.f90 b/ext/psb_z_ell_mat_mod.f90 new file mode 100644 index 00000000..0e1f0e00 --- /dev/null +++ b/ext/psb_z_ell_mat_mod.f90 @@ -0,0 +1,552 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_ell_mat_mod + + use psb_z_base_mat_mod + + type, extends(psb_z_base_sparse_mat) :: psb_z_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! Based on M. Heroux "A proposal for a sparse BLAS toolkit". + ! IRN is our addition, should help in transferring to/from + ! other formats (should come in handy for GPUs). + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! + + integer(psb_ipk_) :: nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + complex(psb_dpk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: is_by_rows => z_ell_is_by_rows + procedure, pass(a) :: get_size => z_ell_get_size + procedure, pass(a) :: get_nzeros => z_ell_get_nzeros + procedure, nopass :: get_fmt => z_ell_get_fmt + procedure, pass(a) :: sizeof => z_ell_sizeof + procedure, pass(a) :: csmm => psb_z_ell_csmm + procedure, pass(a) :: csmv => psb_z_ell_csmv + procedure, pass(a) :: inner_cssm => psb_z_ell_cssm + procedure, pass(a) :: inner_cssv => psb_z_ell_cssv + procedure, pass(a) :: scals => psb_z_ell_scals + procedure, pass(a) :: scalv => psb_z_ell_scal + procedure, pass(a) :: maxval => psb_z_ell_maxval + procedure, pass(a) :: csnmi => psb_z_ell_csnmi + procedure, pass(a) :: csnm1 => psb_z_ell_csnm1 + procedure, pass(a) :: rowsum => psb_z_ell_rowsum + procedure, pass(a) :: arwsum => psb_z_ell_arwsum + procedure, pass(a) :: colsum => psb_z_ell_colsum + procedure, pass(a) :: aclsum => psb_z_ell_aclsum + procedure, pass(a) :: reallocate_nz => psb_z_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_ell_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_ell_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_ell_from_coo + procedure, pass(a) :: cp_to_fmt => psb_z_cp_ell_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_z_cp_ell_from_fmt + procedure, pass(a) :: mv_to_coo => psb_z_mv_ell_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_ell_from_coo + procedure, pass(a) :: mv_to_fmt => psb_z_mv_ell_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_z_mv_ell_from_fmt + procedure, pass(a) :: csput_a => psb_z_ell_csput_a + procedure, pass(a) :: get_diag => psb_z_ell_get_diag + procedure, pass(a) :: csgetptn => psb_z_ell_csgetptn + procedure, pass(a) :: csgetrow => psb_z_ell_csgetrow + procedure, pass(a) :: get_nz_row => z_ell_get_nz_row + procedure, pass(a) :: reinit => psb_z_ell_reinit + procedure, pass(a) :: trim => psb_z_ell_trim + procedure, pass(a) :: print => psb_z_ell_print + procedure, pass(a) :: free => z_ell_free + procedure, pass(a) :: mold => psb_z_ell_mold + procedure, pass(a) :: get_nrm => z_ell_get_nrm + + end type psb_z_ell_sparse_mat + + private :: z_ell_get_nzeros, z_ell_free, z_ell_get_fmt, & + & z_ell_get_size, z_ell_sizeof, z_ell_get_nz_row, & + & z_ell_is_by_rows + + interface + subroutine psb_z_ell_reallocate_nz(nz,a) + import :: psb_z_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_ell_sparse_mat), intent(inout) :: a + end subroutine psb_z_ell_reallocate_nz + end interface + + interface + subroutine psb_z_ell_reinit(a,clear) + import :: psb_z_ell_sparse_mat + class(psb_z_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_z_ell_reinit + end interface + + interface + subroutine psb_z_ell_trim(a) + import :: psb_z_ell_sparse_mat + class(psb_z_ell_sparse_mat), intent(inout) :: a + end subroutine psb_z_ell_trim + end interface + + interface + subroutine psb_z_ell_mold(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ell_mold + end interface + + interface + subroutine psb_z_ell_allocate_mnnz(m,n,a,nz) + import :: psb_z_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_ell_allocate_mnnz + end interface + + interface + subroutine psb_z_ell_print(iout,a,iv,head,ivr,ivc) + import :: psb_z_ell_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_z_ell_print + end interface + + interface + subroutine psb_z_cp_ell_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_ell_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ell_to_coo + end interface + + interface + subroutine psb_z_cp_ell_from_coo(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ell_from_coo + end interface + + interface + subroutine psb_z_cp_ell_to_fmt(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ell_to_fmt + end interface + + interface + subroutine psb_z_cp_ell_from_fmt(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ell_from_fmt + end interface + + interface + subroutine psb_z_mv_ell_to_coo(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ell_to_coo + end interface + + interface + subroutine psb_z_mv_ell_from_coo(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ell_from_coo + end interface + + interface + subroutine psb_z_mv_ell_to_fmt(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ell_to_fmt + end interface + + interface + subroutine psb_z_mv_ell_from_fmt(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ell_from_fmt + end interface + + interface + subroutine psb_z_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ell_csput_a + end interface + + interface + subroutine psb_z_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_ell_csgetptn + end interface + + interface + subroutine psb_z_ell_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_z_ell_csgetrow + end interface + + interface + subroutine psb_z_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_ell_csgetblk + end interface + + interface + subroutine psb_z_ell_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ell_cssv + subroutine psb_z_ell_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ell_cssm + end interface + + interface + subroutine psb_z_ell_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ell_csmv + subroutine psb_z_ell_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ell_csmm + end interface + + + interface + function psb_z_ell_maxval(a) result(res) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_ell_maxval + end interface + + interface + function psb_z_ell_csnmi(a) result(res) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_ell_csnmi + end interface + + interface + function psb_z_ell_csnm1(a) result(res) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_ell_csnm1 + end interface + + interface + subroutine psb_z_ell_rowsum(d,a) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_ell_rowsum + end interface + + interface + subroutine psb_z_ell_arwsum(d,a) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_ell_arwsum + end interface + + interface + subroutine psb_z_ell_colsum(d,a) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_ell_colsum + end interface + + interface + subroutine psb_z_ell_aclsum(d,a) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_ell_aclsum + end interface + + interface + subroutine psb_z_ell_get_diag(a,d,info) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ell_get_diag + end interface + + interface + subroutine psb_z_ell_scal(d,a,info,side) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_ell_scal + end interface + + interface + subroutine psb_z_ell_scals(d,a,info) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ell_scals + end interface + + interface + subroutine psi_z_convert_ell_from_coo(a,tmp,info,hacksize) + import :: psb_z_ell_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + implicit none + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + end subroutine psi_z_convert_ell_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_ell_is_by_rows(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + logical :: res + res = .true. + end function z_ell_is_by_rows + + function z_ell_sizeof(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + + end function z_ell_sizeof + + function z_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL' + end function z_ell_get_fmt + + function z_ell_get_nrm(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = size(a%val,2) + end function z_ell_get_nrm + + function z_ell_get_nzeros(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function z_ell_get_nzeros + + function z_ell_get_size(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = -1 + if (a%is_dev()) call a%sync() + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function z_ell_get_size + + + function z_ell_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + if (a%is_dev()) call a%sync() + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function z_ell_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_ell_free(a) + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine z_ell_free + + +end module psb_z_ell_mat_mod diff --git a/ext/psb_z_hdia_mat_mod.f90 b/ext/psb_z_hdia_mat_mod.f90 new file mode 100644 index 00000000..e7c11321 --- /dev/null +++ b/ext/psb_z_hdia_mat_mod.f90 @@ -0,0 +1,534 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module psb_z_hdia_mat_mod + + use psb_z_base_mat_mod + + + type, extends(psb_z_base_sparse_mat) :: psb_z_hdia_sparse_mat + ! + ! HDIA format + ! + integer(psb_ipk_), allocatable :: hackOffsets(:), diaOffsets(:) + complex(psb_dpk_), allocatable :: val(:) + + + integer(psb_ipk_) :: nhacks, nzeros + integer(psb_ipk_) :: hacksize = 32 + integer(psb_epk_) :: dim=0 + + contains + ! procedure, pass(a) :: get_size => z_hdia_get_size + procedure, pass(a) :: get_nzeros => z_hdia_get_nzeros + procedure, pass(a) :: set_nzeros => z_hdia_set_nzeros + procedure, nopass :: get_fmt => z_hdia_get_fmt + procedure, pass(a) :: sizeof => z_hdia_sizeof + ! procedure, pass(a) :: csmm => psb_z_hdia_csmm + procedure, pass(a) :: csmv => psb_z_hdia_csmv + ! procedure, pass(a) :: inner_cssm => psb_z_hdia_cssm + ! procedure, pass(a) :: inner_cssv => psb_z_hdia_cssv + ! procedure, pass(a) :: scals => psb_z_hdia_scals + ! procedure, pass(a) :: scalv => psb_z_hdia_scal + ! procedure, pass(a) :: maxval => psb_z_hdia_maxval + ! procedure, pass(a) :: csnmi => psb_z_hdia_csnmi + ! procedure, pass(a) :: csnm1 => psb_z_hdia_csnm1 + ! procedure, pass(a) :: rowsum => psb_z_hdia_rowsum + ! procedure, pass(a) :: arwsum => psb_z_hdia_arwsum + ! procedure, pass(a) :: colsum => psb_z_hdia_colsum + ! procedure, pass(a) :: aclsum => psb_z_hdia_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_z_hdia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_hdia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_hdia_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_hdia_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_z_cp_hdia_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_z_cp_hdia_from_fmt + procedure, pass(a) :: mv_to_coo => psb_z_mv_hdia_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_hdia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_z_mv_hdia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_z_mv_hdia_from_fmt + ! procedure, pass(a) :: csput_a => psb_z_hdia_csput_a + ! procedure, pass(a) :: get_diag => psb_z_hdia_get_diag + ! procedure, pass(a) :: csgetptn => psb_z_hdia_csgetptn + ! procedure, pass(a) :: csgetrow => psb_z_hdia_csgetrow + ! procedure, pass(a) :: get_nz_row => z_hdia_get_nz_row + ! procedure, pass(a) :: reinit => psb_z_hdia_reinit + ! procedure, pass(a) :: trim => psb_z_hdia_trim + procedure, pass(a) :: print => psb_z_hdia_print + procedure, pass(a) :: free => z_hdia_free + procedure, pass(a) :: mold => psb_z_hdia_mold + + end type psb_z_hdia_sparse_mat + + private :: z_hdia_get_nzeros, z_hdia_set_nzeros, z_hdia_free, & + & z_hdia_get_fmt, z_hdia_sizeof +!!$ & +!!$ & z_hdia_get_nz_row z_hdia_get_size, + +!!$ interface +!!$ subroutine psb_z_hdia_reallocate_nz(nz,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_hdia_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_reinit(a,clear) +!!$ import :: psb_z_hdia_sparse_mat +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ logical, intent(in), optional :: clear +!!$ end subroutine psb_z_hdia_reinit +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_trim(a) +!!$ import :: psb_z_hdia_sparse_mat +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_hdia_trim +!!$ end interface + + interface + subroutine psb_z_hdia_mold(a,b,info) + import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hdia_mold + end interface + + interface + subroutine psb_z_hdia_allocate_mnnz(m,n,a,nz) + import :: psb_z_hdia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_hdia_allocate_mnnz + end interface + + interface + subroutine psb_z_hdia_print(iout,a,iv,head,ivr,ivc) + import :: psb_z_hdia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_z_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_z_hdia_print + end interface + + interface + subroutine psb_z_cp_hdia_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_hdia_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hdia_to_coo + end interface + + interface + subroutine psb_z_cp_hdia_from_coo(a,b,info) + import :: psb_z_hdia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_z_cp_hdia_to_fmt(a,b,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_z_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cp_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cp_hdia_from_fmt(a,b,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cp_hdia_from_fmt +!!$ end interface + + interface + subroutine psb_z_mv_hdia_to_coo(a,b,info) + import :: psb_z_hdia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hdia_to_coo + end interface + + interface + subroutine psb_z_mv_hdia_from_coo(a,b,info) + import :: psb_z_hdia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_z_mv_hdia_to_fmt(a,b,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_mv_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_mv_hdia_from_fmt(a,b,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_mv_hdia_from_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& +!!$ & imin,imax,jmin,jmax +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_hdia_csput_a +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_csgetptn(imin,imax,a,nz,ia,ja,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_z_hdia_csgetptn +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ complex(psb_dpk_), allocatable, intent(inout) :: val(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_z_hdia_csgetrow +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_csgetblk(imin,imax,a,b,info,& +!!$ & jmin,jmax,iren,append,rscale,cscale) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_z_coo_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_z_coo_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_z_hdia_csgetblk +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_cssv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:) +!!$ complex(psb_dpk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_hdia_cssv +!!$ subroutine psb_z_hdia_cssm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_hdia_cssm +!!$ end interface + + interface + subroutine psb_z_hdia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hdia_csmv +!!$ subroutine psb_z_hdia_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_hdia_csmm + end interface + + +!!$ interface +!!$ function psb_z_hdia_maxval(a) result(res) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_z_hdia_maxval +!!$ end interface +!!$ +!!$ interface +!!$ function psb_z_hdia_csnmi(a) result(res) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_z_hdia_csnmi +!!$ end interface +!!$ +!!$ interface +!!$ function psb_z_hdia_csnm1(a) result(res) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_z_hdia_csnm1 +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_rowsum(d,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_z_hdia_rowsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_arwsum(d,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_z_hdia_arwsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_colsum(d,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_z_hdia_colsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_aclsum(d,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_z_hdia_aclsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_get_diag(a,d,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(out) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_hdia_get_diag +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_scal(d,a,info,side) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_z_hdia_scal +!!$ end interface + +!!$ interface +!!$ subroutine psb_z_hdia_scals(d,a,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_hdia_scals +!!$ end interface +!!$ + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_hdia_sizeof(a) result(res) + use psb_realloc_mod, only : psb_size + implicit none + class(psb_z_hdia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + integer(psb_ipk_) :: i + + if (a%is_dev()) call a%sync() + res = 0 + + res = res + psb_size(a%hackOffsets)*psb_sizeof_ip + res = res + psb_size(a%diaOffsets)*psb_sizeof_ip + res = res + psb_size(a%val) * (2*psb_sizeof_dp) + + end function z_hdia_sizeof + + function z_hdia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIA' + end function z_hdia_get_fmt + + function z_hdia_get_nzeros(a) result(res) + implicit none + class(psb_z_hdia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function z_hdia_get_nzeros + + subroutine z_hdia_set_nzeros(a,nz) + implicit none + class(psb_z_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + a%nzeros = nz + end subroutine z_hdia_set_nzeros + + ! function z_hdia_get_size(a) result(res) + ! implicit none + ! class(psb_z_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function z_hdia_get_size + + + ! function z_hdia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_z_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function z_hdia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine z_hdia_free(a) + implicit none + + class(psb_z_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: i, info + + + if (allocated(a%hackOffsets))& + & deallocate(a%hackOffsets,stat=info) + if (allocated(a%diaOffsets))& + & deallocate(a%diaOffsets,stat=info) + if (allocated(a%val))& + & deallocate(a%val,stat=info) + a%nhacks=0 + + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine z_hdia_free + + +end module psb_z_hdia_mat_mod diff --git a/ext/psb_z_hll_mat_mod.f90 b/ext/psb_z_hll_mat_mod.f90 new file mode 100644 index 00000000..98eb403f --- /dev/null +++ b/ext/psb_z_hll_mat_mod.f90 @@ -0,0 +1,564 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_hll_mat_mod + + use psb_z_base_mat_mod + use psi_ext_util_mod + + type, extends(psb_z_base_sparse_mat) :: psb_z_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! A modification of ELL. + ! Basic idea: pack and pad data in blocks of HCK rows; + ! this reduces the impact of a lone, very long row. + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! 6. hksz: hack size (multiple of 32) + ! 7. hkoffs(:): offsets of the starts of hacks inside ja/val + ! + ! + ! + integer(psb_ipk_) :: hksz, nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + complex(psb_dpk_), allocatable :: val(:) + + contains + + procedure, pass(a) :: get_hksz => z_hll_get_hksz + procedure, pass(a) :: set_hksz => z_hll_set_hksz + procedure, pass(a) :: get_size => z_hll_get_size + procedure, pass(a) :: set_nzeros => z_hll_set_nzeros + procedure, pass(a) :: get_nzeros => z_hll_get_nzeros + procedure, nopass :: get_fmt => z_hll_get_fmt + procedure, pass(a) :: sizeof => z_hll_sizeof + procedure, pass(a) :: csmm => psb_z_hll_csmm + procedure, pass(a) :: csmv => psb_z_hll_csmv + procedure, pass(a) :: inner_cssm => psb_z_hll_cssm + procedure, pass(a) :: inner_cssv => psb_z_hll_cssv + procedure, pass(a) :: scals => psb_z_hll_scals + procedure, pass(a) :: scalv => psb_z_hll_scal + procedure, pass(a) :: maxval => psb_z_hll_maxval + procedure, pass(a) :: csnmi => psb_z_hll_csnmi + procedure, pass(a) :: csnm1 => psb_z_hll_csnm1 + procedure, pass(a) :: rowsum => psb_z_hll_rowsum + procedure, pass(a) :: arwsum => psb_z_hll_arwsum + procedure, pass(a) :: colsum => psb_z_hll_colsum + procedure, pass(a) :: aclsum => psb_z_hll_aclsum + procedure, pass(a) :: reallocate_nz => psb_z_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_hll_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_hll_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_hll_from_coo + procedure, pass(a) :: cp_to_fmt => psb_z_cp_hll_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_z_cp_hll_from_fmt + procedure, pass(a) :: mv_to_coo => psb_z_mv_hll_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_hll_from_coo + procedure, pass(a) :: mv_to_fmt => psb_z_mv_hll_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_z_mv_hll_from_fmt + procedure, pass(a) :: csput_a => psb_z_hll_csput_a + procedure, pass(a) :: get_diag => psb_z_hll_get_diag + procedure, pass(a) :: csgetptn => psb_z_hll_csgetptn + procedure, pass(a) :: csgetrow => psb_z_hll_csgetrow + procedure, pass(a) :: get_nz_row => z_hll_get_nz_row + procedure, pass(a) :: reinit => psb_z_hll_reinit + procedure, pass(a) :: print => psb_z_hll_print + procedure, pass(a) :: free => z_hll_free + procedure, pass(a) :: mold => psb_z_hll_mold + + end type psb_z_hll_sparse_mat + + private :: z_hll_get_nzeros, z_hll_free, z_hll_get_fmt, & + & z_hll_get_size, z_hll_sizeof, z_hll_get_nz_row, & + & z_hll_set_nzeros, z_hll_get_hksz, z_hll_set_hksz + + interface + subroutine psb_z_hll_reallocate_nz(nz,a) + import :: psb_z_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_hll_sparse_mat), intent(inout) :: a + end subroutine psb_z_hll_reallocate_nz + end interface + + interface + subroutine psb_z_hll_reinit(a,clear) + import :: psb_z_hll_sparse_mat + class(psb_z_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_z_hll_reinit + end interface + + interface + subroutine psb_z_hll_mold(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hll_mold + end interface + + interface + subroutine psb_z_hll_allocate_mnnz(m,n,a,nz) + import :: psb_z_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_hll_allocate_mnnz + end interface + + interface + subroutine psb_z_hll_print(iout,a,iv,head,ivr,ivc) + import :: psb_z_hll_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_z_hll_print + end interface + + interface + subroutine psb_z_cp_hll_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_hll_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hll_to_coo + end interface + + interface + subroutine psb_z_cp_hll_from_coo(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hll_from_coo + end interface + + interface + subroutine psb_z_cp_hll_to_fmt(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hll_to_fmt + end interface + + interface + subroutine psb_z_cp_hll_from_fmt(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hll_from_fmt + end interface + + interface + subroutine psb_z_mv_hll_to_coo(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hll_to_coo + end interface + + interface + subroutine psb_z_mv_hll_from_coo(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hll_from_coo + end interface + + interface + subroutine psb_z_mv_hll_to_fmt(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hll_to_fmt + end interface + + interface + subroutine psb_z_mv_hll_from_fmt(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hll_from_fmt + end interface + + interface + subroutine psb_z_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hll_csput_a + end interface + + interface + subroutine psb_z_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_hll_csgetptn + end interface + + interface + subroutine psb_z_hll_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_z_hll_csgetrow + end interface + + interface + subroutine psb_z_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_hll_csgetblk + end interface + + interface + subroutine psb_z_hll_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hll_cssv + subroutine psb_z_hll_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hll_cssm + end interface + + interface + subroutine psb_z_hll_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hll_csmv + subroutine psb_z_hll_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hll_csmm + end interface + + + interface + function psb_z_hll_maxval(a) result(res) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_hll_maxval + end interface + + interface + function psb_z_hll_csnmi(a) result(res) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_hll_csnmi + end interface + + interface + function psb_z_hll_csnm1(a) result(res) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_hll_csnm1 + end interface + + interface + subroutine psb_z_hll_rowsum(d,a) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_hll_rowsum + end interface + + interface + subroutine psb_z_hll_arwsum(d,a) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_hll_arwsum + end interface + + interface + subroutine psb_z_hll_colsum(d,a) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_hll_colsum + end interface + + interface + subroutine psb_z_hll_aclsum(d,a) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_hll_aclsum + end interface + + interface + subroutine psb_z_hll_get_diag(a,d,info) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hll_get_diag + end interface + + interface + subroutine psb_z_hll_scal(d,a,info,side) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_hll_scal + end interface + + interface + subroutine psb_z_hll_scals(d,a,info) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hll_scals + end interface + + interface psi_convert_hll_from_coo + subroutine psi_z_convert_hll_from_coo(a,hksz,tmp,info) + import :: psb_z_hll_sparse_mat, psb_ipk_, psb_z_coo_sparse_mat + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: hksz + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_z_convert_hll_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_hll_sizeof(a) result(res) + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%hkoffs) + + end function z_hll_sizeof + + function z_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL' + end function z_hll_get_fmt + + subroutine z_hll_set_nzeros(a,n) + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%nzt = n + end subroutine z_hll_set_nzeros + + function z_hll_get_nzeros(a) result(res) + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function z_hll_get_nzeros + + function z_hll_get_size(a) result(res) + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + if (a%is_dev()) call a%sync() + + res = -1 + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function z_hll_get_size + + + + function z_hll_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function z_hll_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_hll_free(a) + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + if (allocated(a%val)) deallocate(a%hkoffs) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + call a%set_hksz(izero) + + return + + end subroutine z_hll_free + + subroutine z_hll_set_hksz(a,n) + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%hksz = n + end subroutine z_hll_set_hksz + + function z_hll_get_hksz(a) result(res) + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = a%hksz + + end function z_hll_get_hksz + +end module psb_z_hll_mat_mod diff --git a/ext/psi_c_ext_util_mod.f90 b/ext/psi_c_ext_util_mod.f90 new file mode 100644 index 00000000..e58c0d93 --- /dev/null +++ b/ext/psi_c_ext_util_mod.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psi_c_ext_util_mod + + use psb_base_mod, only : psb_ipk_, psb_spk_ + + interface psi_xtr_dia_from_coo + subroutine psi_c_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata,rdisp) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd, ncd, ia(:), ja(:), d(:) + complex(psb_spk_), intent(in) :: val(:) + complex(psb_spk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + end subroutine psi_c_xtr_dia_from_coo + end interface + + interface psi_xtr_ell_from_coo + subroutine psi_c_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,& + & valc,ja,val,irn,diag,ld) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + complex(psb_spk_) :: valc(*), val(ld,*) + + end subroutine psi_c_xtr_ell_from_coo + end interface psi_xtr_ell_from_coo + + interface psi_xtr_coo_from_dia + subroutine psi_c_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,& + & info,rdisp) + import :: psb_ipk_, psb_spk_ + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), nz + complex(psb_spk_), intent(inout) :: val(:) + complex(psb_spk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + end subroutine psi_c_xtr_coo_from_dia + end interface + +end module psi_c_ext_util_mod diff --git a/ext/psi_d_ext_util_mod.f90 b/ext/psi_d_ext_util_mod.f90 new file mode 100644 index 00000000..07de8ad1 --- /dev/null +++ b/ext/psi_d_ext_util_mod.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psi_d_ext_util_mod + + use psb_base_mod, only : psb_ipk_, psb_dpk_ + + interface psi_xtr_dia_from_coo + subroutine psi_d_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata,rdisp) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd, ncd, ia(:), ja(:), d(:) + real(psb_dpk_), intent(in) :: val(:) + real(psb_dpk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + end subroutine psi_d_xtr_dia_from_coo + end interface + + interface psi_xtr_ell_from_coo + subroutine psi_d_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,& + & valc,ja,val,irn,diag,ld) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + real(psb_dpk_) :: valc(*), val(ld,*) + + end subroutine psi_d_xtr_ell_from_coo + end interface psi_xtr_ell_from_coo + + interface psi_xtr_coo_from_dia + subroutine psi_d_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,& + & info,rdisp) + import :: psb_ipk_, psb_dpk_ + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), nz + real(psb_dpk_), intent(inout) :: val(:) + real(psb_dpk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + end subroutine psi_d_xtr_coo_from_dia + end interface + +end module psi_d_ext_util_mod diff --git a/ext/psi_ext_util_mod.f90 b/ext/psi_ext_util_mod.f90 new file mode 100644 index 00000000..afb2c749 --- /dev/null +++ b/ext/psi_ext_util_mod.f90 @@ -0,0 +1,41 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psi_ext_util_mod + + use psi_i_ext_util_mod + use psi_s_ext_util_mod + use psi_c_ext_util_mod + use psi_d_ext_util_mod + use psi_z_ext_util_mod + +end module psi_ext_util_mod diff --git a/ext/psi_i_ext_util_mod.f90 b/ext/psi_i_ext_util_mod.f90 new file mode 100644 index 00000000..ac073f1d --- /dev/null +++ b/ext/psi_i_ext_util_mod.f90 @@ -0,0 +1,175 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psi_i_ext_util_mod + + use psb_base_mod, only : psb_ipk_ + ! + ! Hack size for HLL format. + ! + integer(psb_ipk_), parameter :: psb_hksz_def_ = 32 + integer(psb_ipk_), private, save :: psb_hksz = psb_hksz_def_ + logical, private, save :: psb_hll_use_vector = .true. +contains + + function psi_get_hksz() result(res) + implicit none + integer(psb_ipk_) :: res + res = psb_hksz + end function psi_get_hksz + + subroutine psi_set_hksz(size) + implicit none + integer(psb_ipk_), intent(in) :: size + if (size > 0) psb_hksz = size + end subroutine psi_set_hksz + + subroutine psi_set_hll_vector(val) + implicit none + logical, optional :: val + if (present(val)) then + psb_hll_use_vector = val + else + psb_hll_use_vector = .true. + end if + + end subroutine psi_set_hll_vector + + function psi_get_hll_vector() result(res) + implicit none + logical :: res + + res = psb_hll_use_vector + end function psi_get_hll_vector + + + ! + ! Compute offsets and allocation for DIAgonal storage. + ! Input: + ! nr,nc,nz,ia,ja: the matrix pattern in COO + ! Note: This routine is designed to be called + ! with either a full matrix or an horizontal stripe, + ! with the COO entries sorted in row major order, hence + ! it will handle the conversion of a strip, so it can + ! be used by both DIA and HDIA. In both cases NR and NC + ! *MUST* be the *GLOBAL* number of rows/columns, not those + ! of the strips, i.e. it must be that all entris in IA <=NR + ! and JA <= NC. + ! Output: + ! nd: number of nonzero diagonals + ! d: d(k) contains the index inside offset of diagonal k, + ! which is, if A(I,J) /= 0 then K=NR+J-I, or (optionally) 0. + ! *MUST* be allocated on the *global* size NR+NC-1 + ! + ! offset: for each of the ND nonzero diagonals, its offset J-I + ! + ! Notes: D and OFFSET together represent the set of diagonals; + ! D can be used outside to quickly find which entry of OFFSET + ! a given a(i,j) corresponds to, without doing a search. + ! + ! 1. Optionally init D vector to zeros + ! 2. Walk through the NZ pairs (I,J): + ! a. if it is a new diagonal add to a heap; + ! b. increase its population count stored in D(J-I+NR) + ! c. Keep track of maximum population count. + ! 3. Go through the ND diagonals, getting them K out of the heap in order: + ! a. Set offset(i) to K-NR == J-I + ! b. Set D(K) = i or 0 (depending on cleard) + ! + ! Setting to 0 allows to reuse this function in a loop in a dry run + ! to estimate the allocation size for HDIA; without settng to 0 we + ! would need to zero the whole vector, resulting + ! in a quadratic overall cost. Outside this subroutine, it is possible + ! to zero selectively the entres in D by using the indices in OFFSET. + ! + ! + subroutine psi_dia_offset_from_coo(nr,nc,nz,ia,ja,nd,d,offset,info,& + & initd,cleard) + use psb_base_mod + + implicit none + + integer(psb_ipk_), intent(in) :: nr, nc, nz, ia(:), ja(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_), intent(out) :: offset(:) + integer(psb_ipk_), intent(out) :: nd + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initd,cleard + + type(psb_i_heap) :: heap + integer(psb_ipk_) :: k,i,j,ir,ic, ndiag, id + logical :: initd_, cleard_ + character(len=20) :: name + + info = psb_success_ + initd_ = .true. + if (present(initd)) initd_ = initd + cleard_ = .false. + if (present(cleard)) cleard_ = cleard + + if (initd_) d(:) = 0 + + ndiag = nr+nc-1 + if (size(d) psb_ckrylov_vect + use psb_linsolve_mod, psb_protect_name => psb_ckrylov_vect character(len=*) :: method Type(psb_cspmat_type), Intent(in) :: a diff --git a/krylov/psb_crgmres.f90 b/linsolve/impl/psb_crgmres.f90 similarity index 97% rename from krylov/psb_crgmres.f90 rename to linsolve/impl/psb_crgmres.f90 index 80aa34c3..e0c0b38d 100644 --- a/krylov/psb_crgmres.f90 +++ b/linsolve/impl/psb_crgmres.f90 @@ -110,8 +110,8 @@ subroutine psb_crgmres_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,irst,istop) use psb_base_mod use psb_prec_mod - use psb_c_krylov_conv_mod - use psb_krylov_mod + use psb_c_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_cspmat_type), intent(in) :: a Type(psb_desc_type), Intent(in) :: desc_a @@ -344,19 +344,16 @@ subroutine psb_crgmres_vect(a,prec,b,x,eps,desc_a,info,& goto 9999 end if - if (errnum <= eps*errden) exit restart + if ((errnum <= eps*errden).or.(itx >= litmax)) exit restart if (itrace_ > 0) & & call log_conv(methdname,me,itx,itrace_,errnum,errden,deps) call v(1)%scal(scal) !v(1) = v(1) * scal - if (itx >= litmax) exit restart - ! ! inner iterations ! - inner: Do i=1,nl itx = itx + 1 @@ -446,6 +443,8 @@ subroutine psb_crgmres_vect(a,prec,b,x,eps,desc_a,info,& call psb_geaxpby(cone,w,cone,x,desc_a,info) end if + if (itrace_ > 0) & + & call log_conv(methdname,me,itx,ione,errnum,errden,deps) exit restart end if @@ -472,10 +471,16 @@ subroutine psb_crgmres_vect(a,prec,b,x,eps,desc_a,info,& call prec%apply(w1,w,desc_a,info) call psb_geaxpby(cone,w,cone,x,desc_a,info) end if - + + if (itx >= litmax) then + if (itrace_ > 0) then + if (mod(itx,itrace_)/=0) & + & call log_conv(methdname,me,itx,ione,errnum,errden,deps) + end if + exit restart + end if + end do restart - if (itrace_ > 0) & - & call log_conv(methdname,me,itx,ione,errnum,errden,deps) call log_end(methdname,me,itx,itrace_,errnum,errden,deps,err=derr,iter=iter) if (present(err)) err = derr diff --git a/linsolve/impl/psb_crichardson.f90 b/linsolve/impl/psb_crichardson.f90 new file mode 100644 index 00000000..3eb4c263 --- /dev/null +++ b/linsolve/impl/psb_crichardson.f90 @@ -0,0 +1,216 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! File: psb_richardson_mod.f90 +! Interfaces for Richardson iterative methods. +! +! +! Subroutine: psb_crichardson +! +! Front-end for the Richardson iterations, complexversion +! +! Arguments: +! +! a - type(psb_cspmat_type) Input: sparse matrix containing A. +! prec - class(psb_cprec_type) Input: preconditioner +! b - complex,dimension(:) Input: vector containing the +! right hand side B +! x - complex,dimension(:) Input/Output: vector containing the +! initial guess and final solution X. +! eps - real Input: Stopping tolerance; the iteration is +! stopped when the error +! estimate |err| <= eps +! +! desc_a - type(psb_desc_type). Input: The communication descriptor. +! info - integer. Output: Return code +! +! itmax - integer(optional) Input: maximum number of iterations to be +! performed. +! iter - integer(optional) Output: how many iterations have been +! performed. +! err - real (optional) Output: error estimate on exit +! itrace - integer(optional) Input: print an informational message +! with the error estimate every itrace +! iterations +! istop - integer(optional) Input: stopping criterion, or how +! to estimate the error. +! 1: err = |r|/(|a||x|+|b|) +! 2: err = |r|/|b| +! where r is the (preconditioned, recursive +! estimate of) residual +! +Subroutine psb_crichardson_vect(a,prec,b,x,eps,desc_a,info,& + & itmax,iter,err,itrace,istop) + + use psb_base_mod + use psb_prec_mod + use psb_c_linsolve_conv_mod + use psb_linsolve_mod, psb_protect_name => psb_crichardson_vect + + Type(psb_cspmat_type), Intent(in) :: a + Type(psb_desc_type), Intent(in) :: desc_a + class(psb_cprec_type), intent(inout) :: prec + type(psb_c_vect_type), Intent(inout) :: b + type(psb_c_vect_type), Intent(inout) :: x + Real(psb_spk_), Intent(in) :: eps + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, istop + integer(psb_ipk_), Optional, Intent(out) :: iter + Real(psb_spk_), Optional, Intent(out) :: err + + + logical :: do_alloc_wrk + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: me,np,err_act + complex(psb_spk_), allocatable, target :: aux(:) + type(psb_c_vect_type), allocatable, target :: wwrk(:) + type(psb_c_vect_type), pointer :: q, p, r, z, w + real(psb_dpk_) :: derr + integer(psb_ipk_) :: itmax_, istop_, naux, it, itx, itrace_,& + & n_col, n_row,ieg,nspl, istebz + integer(psb_lpk_) :: mglob + integer(psb_ipk_) :: debug_level, debug_unit + type(psb_itconv_type) :: stopdat + character(len=20) :: name + character(len=*), parameter :: methdname='RICHARDSON' + + info = psb_success_ + name = 'psb_crichardson' + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + + if (present(itrace)) then + itrace_ = itrace + else + itrace_ = -1 + end if + + if (present(istop)) then + istop_ = istop + else + istop_ = 2 + endif + if (present(itmax)) then + itmax_ = itmax + else + itmax_ = 1000 + endif + + do_alloc_wrk = .not.prec%is_allocated_wrk() + if (do_alloc_wrk) call prec%allocate_wrk(info,vmold=x%v,desc=desc_a) + + if (.not.allocated(b%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + mglob = desc_a%get_global_rows() + n_row = desc_a%get_local_rows() + n_col = desc_a%get_local_cols() + + call psb_chkvect(mglob,lone,x%get_nrows(),lone,lone,desc_a,info) + if (info == psb_success_)& + & call psb_chkvect(mglob,lone,b%get_nrows(),lone,lone,desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_chkvect on X/B') + goto 9999 + end if + + naux=4*n_col + allocate(aux(naux), stat=info) + if (info == psb_success_) call psb_geall(wwrk,desc_a,info,n=5_psb_ipk_) + if (info == psb_success_) call psb_geasb(wwrk,desc_a,info,mold=x%v,scratch=.true.) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + p => wwrk(1) + q => wwrk(2) + r => wwrk(3) + z => wwrk(4) + w => wwrk(5) + + call psb_geaxpby(cone,b,czero,r,desc_a,info) + if (info == psb_success_) call psb_spmm(-cone,a,x,cone,r,desc_a,info,work=aux) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + + call psb_init_conv(methdname,istop_,itrace_,itmax_,a,x,b,eps,desc_a,stopdat,info) + if (info /= psb_success_) Then + call psb_errpush(psb_err_from_subroutine_non_,name) + goto 9999 + End If + + loop: do itx=1,itmax_ + call prec%apply(r,z,desc_a,info,work=aux) + call psb_geaxpby(cone,z,cone,x,desc_a,info) + call psb_geaxpby(cone,b,czero,r,desc_a,info) + if (info == psb_success_) call psb_spmm(-cone,a,x,cone,r,desc_a,info,work=aux) + if (psb_check_conv(methdname,itx,x,r,desc_a,stopdat,info)) exit loop + end do loop + call psb_end_conv(methdname,itx,desc_a,stopdat,info,derr,iter) + if (present(err)) err = derr + + if (info == psb_success_) call psb_gefree(wwrk,desc_a,info) + if (info == psb_success_) deallocate(aux,stat=info) + if ((info==psb_success_).and.do_alloc_wrk) call prec%free_wrk(info) + + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err=trim(methdname)) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_crichardson_vect + diff --git a/krylov/psb_dbicg.f90 b/linsolve/impl/psb_dbicg.f90 similarity index 99% rename from krylov/psb_dbicg.f90 rename to linsolve/impl/psb_dbicg.f90 index 5ac94d3c..ebcc8eb9 100644 --- a/krylov/psb_dbicg.f90 +++ b/linsolve/impl/psb_dbicg.f90 @@ -98,8 +98,8 @@ subroutine psb_dbicg_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,istop) use psb_base_mod use psb_prec_mod - use psb_d_krylov_conv_mod - use psb_krylov_mod + use psb_d_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_dspmat_type), intent(in) :: a type(psb_desc_type), intent(in) :: desc_a diff --git a/krylov/psb_dcg.F90 b/linsolve/impl/psb_dcg.F90 similarity index 99% rename from krylov/psb_dcg.F90 rename to linsolve/impl/psb_dcg.F90 index 669573be..a5993731 100644 --- a/krylov/psb_dcg.F90 +++ b/linsolve/impl/psb_dcg.F90 @@ -99,8 +99,8 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,istop,cond) use psb_base_mod use psb_prec_mod - use psb_d_krylov_conv_mod - use psb_krylov_mod + use psb_d_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_dspmat_type), intent(in) :: a Type(psb_desc_type), Intent(in) :: desc_a @@ -295,7 +295,7 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& end do restart if (do_cond) then if (me == psb_root_) then -#if defined(HAVE_LAPACK) +#if defined(PSB_HAVE_LAPACK) call dstebz('A','E',istebz,dzero,dzero,0,0,-done,td,tu,& & ieg,nspl,eig,ibl,ispl,ewrk,iwrk,info) if (info < 0) then diff --git a/krylov/psb_dcgs.f90 b/linsolve/impl/psb_dcgs.f90 similarity index 99% rename from krylov/psb_dcgs.f90 rename to linsolve/impl/psb_dcgs.f90 index 78a3905c..5c2401d1 100644 --- a/krylov/psb_dcgs.f90 +++ b/linsolve/impl/psb_dcgs.f90 @@ -96,8 +96,8 @@ Subroutine psb_dcgs_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,istop) use psb_base_mod use psb_prec_mod - use psb_d_krylov_conv_mod - use psb_krylov_mod + use psb_d_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_dspmat_type), intent(in) :: a Type(psb_desc_type), Intent(in) :: desc_a diff --git a/krylov/psb_dcgstab.f90 b/linsolve/impl/psb_dcgstab.f90 similarity index 99% rename from krylov/psb_dcgstab.f90 rename to linsolve/impl/psb_dcgstab.f90 index bec3329a..749015d7 100644 --- a/krylov/psb_dcgstab.f90 +++ b/linsolve/impl/psb_dcgstab.f90 @@ -96,8 +96,8 @@ Subroutine psb_dcgstab_vect(a,prec,b,x,eps,desc_a,info,itmax,iter,err,itrace,istop) use psb_base_mod use psb_prec_mod - use psb_d_krylov_conv_mod - use psb_krylov_mod + use psb_d_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_dspmat_type), intent(in) :: a class(psb_dprec_type), Intent(inout) :: prec diff --git a/krylov/psb_dcgstabl.f90 b/linsolve/impl/psb_dcgstabl.f90 similarity index 99% rename from krylov/psb_dcgstabl.f90 rename to linsolve/impl/psb_dcgstabl.f90 index 01641226..a2ae6164 100644 --- a/krylov/psb_dcgstabl.f90 +++ b/linsolve/impl/psb_dcgstabl.f90 @@ -107,8 +107,8 @@ Subroutine psb_dcgstabl_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,irst,istop) use psb_base_mod use psb_prec_mod - use psb_d_krylov_conv_mod - use psb_krylov_mod + use psb_d_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_dspmat_type), intent(in) :: a class(psb_dprec_type), Intent(inout) :: prec diff --git a/krylov/psb_dfcg.F90 b/linsolve/impl/psb_dfcg.F90 similarity index 99% rename from krylov/psb_dfcg.F90 rename to linsolve/impl/psb_dfcg.F90 index d3b2c9d2..cb741795 100644 --- a/krylov/psb_dfcg.F90 +++ b/linsolve/impl/psb_dfcg.F90 @@ -107,8 +107,8 @@ subroutine psb_dfcg_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,istop,cond) use psb_base_mod use psb_prec_mod - use psb_d_krylov_conv_mod - use psb_krylov_mod + use psb_d_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_dspmat_type), intent(in) :: a Type(psb_desc_type), Intent(in) :: desc_a diff --git a/krylov/psb_dgcr.f90 b/linsolve/impl/psb_dgcr.f90 similarity index 96% rename from krylov/psb_dgcr.f90 rename to linsolve/impl/psb_dgcr.f90 index b7480f84..cf2e2b0e 100644 --- a/krylov/psb_dgcr.f90 +++ b/linsolve/impl/psb_dgcr.f90 @@ -109,8 +109,8 @@ subroutine psb_dgcr_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace, irst, istop) use psb_base_mod use psb_prec_mod - use psb_d_krylov_conv_mod - use psb_krylov_mod + use psb_d_linsolve_conv_mod + use psb_linsolve_mod implicit none diff --git a/krylov/psb_dkrylov.f90 b/linsolve/impl/psb_dkrylov.f90 similarity index 98% rename from krylov/psb_dkrylov.f90 rename to linsolve/impl/psb_dkrylov.f90 index d5d40eaf..d858c228 100644 --- a/krylov/psb_dkrylov.f90 +++ b/linsolve/impl/psb_dkrylov.f90 @@ -42,6 +42,7 @@ ! ! methd - character The specific method; can take the values: ! CG +! FCG ! CGS ! BICG ! BICGSTAB @@ -83,7 +84,7 @@ Subroutine psb_dkrylov_vect(method,a,prec,b,x,eps,desc_a,info,& use psb_base_mod use psb_prec_mod,only : psb_dprec_type - use psb_krylov_mod, psb_protect_name => psb_dkrylov_vect + use psb_linsolve_mod, psb_protect_name => psb_dkrylov_vect character(len=*) :: method Type(psb_dspmat_type), Intent(in) :: a diff --git a/krylov/psb_drgmres.f90 b/linsolve/impl/psb_drgmres.f90 similarity index 97% rename from krylov/psb_drgmres.f90 rename to linsolve/impl/psb_drgmres.f90 index 1503748a..e0d78382 100644 --- a/krylov/psb_drgmres.f90 +++ b/linsolve/impl/psb_drgmres.f90 @@ -110,8 +110,8 @@ subroutine psb_drgmres_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,irst,istop) use psb_base_mod use psb_prec_mod - use psb_d_krylov_conv_mod - use psb_krylov_mod + use psb_d_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_dspmat_type), intent(in) :: a Type(psb_desc_type), Intent(in) :: desc_a @@ -344,19 +344,16 @@ subroutine psb_drgmres_vect(a,prec,b,x,eps,desc_a,info,& goto 9999 end if - if (errnum <= eps*errden) exit restart + if ((errnum <= eps*errden).or.(itx >= litmax)) exit restart if (itrace_ > 0) & & call log_conv(methdname,me,itx,itrace_,errnum,errden,deps) call v(1)%scal(scal) !v(1) = v(1) * scal - if (itx >= litmax) exit restart - ! ! inner iterations ! - inner: Do i=1,nl itx = itx + 1 @@ -446,6 +443,8 @@ subroutine psb_drgmres_vect(a,prec,b,x,eps,desc_a,info,& call psb_geaxpby(done,w,done,x,desc_a,info) end if + if (itrace_ > 0) & + & call log_conv(methdname,me,itx,ione,errnum,errden,deps) exit restart end if @@ -472,10 +471,16 @@ subroutine psb_drgmres_vect(a,prec,b,x,eps,desc_a,info,& call prec%apply(w1,w,desc_a,info) call psb_geaxpby(done,w,done,x,desc_a,info) end if - + + if (itx >= litmax) then + if (itrace_ > 0) then + if (mod(itx,itrace_)/=0) & + & call log_conv(methdname,me,itx,ione,errnum,errden,deps) + end if + exit restart + end if + end do restart - if (itrace_ > 0) & - & call log_conv(methdname,me,itx,ione,errnum,errden,deps) call log_end(methdname,me,itx,itrace_,errnum,errden,deps,err=derr,iter=iter) if (present(err)) err = derr diff --git a/linsolve/impl/psb_drichardson.f90 b/linsolve/impl/psb_drichardson.f90 new file mode 100644 index 00000000..b0a53a4b --- /dev/null +++ b/linsolve/impl/psb_drichardson.f90 @@ -0,0 +1,216 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! File: psb_richardson_mod.f90 +! Interfaces for Richardson iterative methods. +! +! +! Subroutine: psb_drichardson +! +! Front-end for the Richardson iterations, realversion +! +! Arguments: +! +! a - type(psb_dspmat_type) Input: sparse matrix containing A. +! prec - class(psb_dprec_type) Input: preconditioner +! b - real,dimension(:) Input: vector containing the +! right hand side B +! x - real,dimension(:) Input/Output: vector containing the +! initial guess and final solution X. +! eps - real Input: Stopping tolerance; the iteration is +! stopped when the error +! estimate |err| <= eps +! +! desc_a - type(psb_desc_type). Input: The communication descriptor. +! info - integer. Output: Return code +! +! itmax - integer(optional) Input: maximum number of iterations to be +! performed. +! iter - integer(optional) Output: how many iterations have been +! performed. +! err - real (optional) Output: error estimate on exit +! itrace - integer(optional) Input: print an informational message +! with the error estimate every itrace +! iterations +! istop - integer(optional) Input: stopping criterion, or how +! to estimate the error. +! 1: err = |r|/(|a||x|+|b|) +! 2: err = |r|/|b| +! where r is the (preconditioned, recursive +! estimate of) residual +! +Subroutine psb_drichardson_vect(a,prec,b,x,eps,desc_a,info,& + & itmax,iter,err,itrace,istop) + + use psb_base_mod + use psb_prec_mod + use psb_d_linsolve_conv_mod + use psb_linsolve_mod, psb_protect_name => psb_drichardson_vect + + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_desc_type), Intent(in) :: desc_a + class(psb_dprec_type), intent(inout) :: prec + type(psb_d_vect_type), Intent(inout) :: b + type(psb_d_vect_type), Intent(inout) :: x + Real(psb_dpk_), Intent(in) :: eps + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, istop + integer(psb_ipk_), Optional, Intent(out) :: iter + Real(psb_dpk_), Optional, Intent(out) :: err + + + logical :: do_alloc_wrk + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: me,np,err_act + real(psb_dpk_), allocatable, target :: aux(:) + type(psb_d_vect_type), allocatable, target :: wwrk(:) + type(psb_d_vect_type), pointer :: q, p, r, z, w + real(psb_dpk_) :: derr + integer(psb_ipk_) :: itmax_, istop_, naux, it, itx, itrace_,& + & n_col, n_row,ieg,nspl, istebz + integer(psb_lpk_) :: mglob + integer(psb_ipk_) :: debug_level, debug_unit + type(psb_itconv_type) :: stopdat + character(len=20) :: name + character(len=*), parameter :: methdname='RICHARDSON' + + info = psb_success_ + name = 'psb_drichardson' + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + + if (present(itrace)) then + itrace_ = itrace + else + itrace_ = -1 + end if + + if (present(istop)) then + istop_ = istop + else + istop_ = 2 + endif + if (present(itmax)) then + itmax_ = itmax + else + itmax_ = 1000 + endif + + do_alloc_wrk = .not.prec%is_allocated_wrk() + if (do_alloc_wrk) call prec%allocate_wrk(info,vmold=x%v,desc=desc_a) + + if (.not.allocated(b%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + mglob = desc_a%get_global_rows() + n_row = desc_a%get_local_rows() + n_col = desc_a%get_local_cols() + + call psb_chkvect(mglob,lone,x%get_nrows(),lone,lone,desc_a,info) + if (info == psb_success_)& + & call psb_chkvect(mglob,lone,b%get_nrows(),lone,lone,desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_chkvect on X/B') + goto 9999 + end if + + naux=4*n_col + allocate(aux(naux), stat=info) + if (info == psb_success_) call psb_geall(wwrk,desc_a,info,n=5_psb_ipk_) + if (info == psb_success_) call psb_geasb(wwrk,desc_a,info,mold=x%v,scratch=.true.) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + p => wwrk(1) + q => wwrk(2) + r => wwrk(3) + z => wwrk(4) + w => wwrk(5) + + call psb_geaxpby(done,b,dzero,r,desc_a,info) + if (info == psb_success_) call psb_spmm(-done,a,x,done,r,desc_a,info,work=aux) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + + call psb_init_conv(methdname,istop_,itrace_,itmax_,a,x,b,eps,desc_a,stopdat,info) + if (info /= psb_success_) Then + call psb_errpush(psb_err_from_subroutine_non_,name) + goto 9999 + End If + + loop: do itx=1,itmax_ + call prec%apply(r,z,desc_a,info,work=aux) + call psb_geaxpby(done,z,done,x,desc_a,info) + call psb_geaxpby(done,b,dzero,r,desc_a,info) + if (info == psb_success_) call psb_spmm(-done,a,x,done,r,desc_a,info,work=aux) + if (psb_check_conv(methdname,itx,x,r,desc_a,stopdat,info)) exit loop + end do loop + call psb_end_conv(methdname,itx,desc_a,stopdat,info,derr,iter) + if (present(err)) err = derr + + if (info == psb_success_) call psb_gefree(wwrk,desc_a,info) + if (info == psb_success_) deallocate(aux,stat=info) + if ((info==psb_success_).and.do_alloc_wrk) call prec%free_wrk(info) + + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err=trim(methdname)) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_drichardson_vect + diff --git a/krylov/psb_sbicg.f90 b/linsolve/impl/psb_sbicg.f90 similarity index 99% rename from krylov/psb_sbicg.f90 rename to linsolve/impl/psb_sbicg.f90 index 609d3a5f..1b5f1fb8 100644 --- a/krylov/psb_sbicg.f90 +++ b/linsolve/impl/psb_sbicg.f90 @@ -98,8 +98,8 @@ subroutine psb_sbicg_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,istop) use psb_base_mod use psb_prec_mod - use psb_s_krylov_conv_mod - use psb_krylov_mod + use psb_s_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_sspmat_type), intent(in) :: a type(psb_desc_type), intent(in) :: desc_a diff --git a/krylov/psb_scg.F90 b/linsolve/impl/psb_scg.F90 similarity index 99% rename from krylov/psb_scg.F90 rename to linsolve/impl/psb_scg.F90 index c16dbf6a..c116edc4 100644 --- a/krylov/psb_scg.F90 +++ b/linsolve/impl/psb_scg.F90 @@ -99,8 +99,8 @@ subroutine psb_scg_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,istop,cond) use psb_base_mod use psb_prec_mod - use psb_s_krylov_conv_mod - use psb_krylov_mod + use psb_s_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_sspmat_type), intent(in) :: a Type(psb_desc_type), Intent(in) :: desc_a @@ -295,7 +295,7 @@ subroutine psb_scg_vect(a,prec,b,x,eps,desc_a,info,& end do restart if (do_cond) then if (me == psb_root_) then -#if defined(HAVE_LAPACK) +#if defined(PSB_HAVE_LAPACK) call sstebz('A','E',istebz,szero,szero,0,0,-sone,td,tu,& & ieg,nspl,eig,ibl,ispl,ewrk,iwrk,info) if (info < 0) then diff --git a/krylov/psb_scgs.f90 b/linsolve/impl/psb_scgs.f90 similarity index 99% rename from krylov/psb_scgs.f90 rename to linsolve/impl/psb_scgs.f90 index 48fe5372..4034283e 100644 --- a/krylov/psb_scgs.f90 +++ b/linsolve/impl/psb_scgs.f90 @@ -96,8 +96,8 @@ Subroutine psb_scgs_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,istop) use psb_base_mod use psb_prec_mod - use psb_s_krylov_conv_mod - use psb_krylov_mod + use psb_s_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_sspmat_type), intent(in) :: a Type(psb_desc_type), Intent(in) :: desc_a diff --git a/krylov/psb_scgstab.f90 b/linsolve/impl/psb_scgstab.f90 similarity index 99% rename from krylov/psb_scgstab.f90 rename to linsolve/impl/psb_scgstab.f90 index 2a811b8d..a0b57fa8 100644 --- a/krylov/psb_scgstab.f90 +++ b/linsolve/impl/psb_scgstab.f90 @@ -96,8 +96,8 @@ Subroutine psb_scgstab_vect(a,prec,b,x,eps,desc_a,info,itmax,iter,err,itrace,istop) use psb_base_mod use psb_prec_mod - use psb_s_krylov_conv_mod - use psb_krylov_mod + use psb_s_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_sspmat_type), intent(in) :: a class(psb_sprec_type), Intent(inout) :: prec diff --git a/krylov/psb_scgstabl.f90 b/linsolve/impl/psb_scgstabl.f90 similarity index 99% rename from krylov/psb_scgstabl.f90 rename to linsolve/impl/psb_scgstabl.f90 index c2fc9833..e55d2746 100644 --- a/krylov/psb_scgstabl.f90 +++ b/linsolve/impl/psb_scgstabl.f90 @@ -107,8 +107,8 @@ Subroutine psb_scgstabl_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,irst,istop) use psb_base_mod use psb_prec_mod - use psb_s_krylov_conv_mod - use psb_krylov_mod + use psb_s_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_sspmat_type), intent(in) :: a class(psb_sprec_type), Intent(inout) :: prec diff --git a/krylov/psb_sfcg.F90 b/linsolve/impl/psb_sfcg.F90 similarity index 99% rename from krylov/psb_sfcg.F90 rename to linsolve/impl/psb_sfcg.F90 index 3a518bb2..449942cd 100644 --- a/krylov/psb_sfcg.F90 +++ b/linsolve/impl/psb_sfcg.F90 @@ -107,8 +107,8 @@ subroutine psb_sfcg_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,istop,cond) use psb_base_mod use psb_prec_mod - use psb_s_krylov_conv_mod - use psb_krylov_mod + use psb_s_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_sspmat_type), intent(in) :: a Type(psb_desc_type), Intent(in) :: desc_a diff --git a/krylov/psb_sgcr.f90 b/linsolve/impl/psb_sgcr.f90 similarity index 96% rename from krylov/psb_sgcr.f90 rename to linsolve/impl/psb_sgcr.f90 index dd0aca16..b2d09d3e 100644 --- a/krylov/psb_sgcr.f90 +++ b/linsolve/impl/psb_sgcr.f90 @@ -109,8 +109,8 @@ subroutine psb_sgcr_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace, irst, istop) use psb_base_mod use psb_prec_mod - use psb_s_krylov_conv_mod - use psb_krylov_mod + use psb_s_linsolve_conv_mod + use psb_linsolve_mod implicit none diff --git a/krylov/psb_skrylov.f90 b/linsolve/impl/psb_skrylov.f90 similarity index 98% rename from krylov/psb_skrylov.f90 rename to linsolve/impl/psb_skrylov.f90 index 39aecc36..1a612a12 100644 --- a/krylov/psb_skrylov.f90 +++ b/linsolve/impl/psb_skrylov.f90 @@ -42,6 +42,7 @@ ! ! methd - character The specific method; can take the values: ! CG +! FCG ! CGS ! BICG ! BICGSTAB @@ -83,7 +84,7 @@ Subroutine psb_skrylov_vect(method,a,prec,b,x,eps,desc_a,info,& use psb_base_mod use psb_prec_mod,only : psb_sprec_type - use psb_krylov_mod, psb_protect_name => psb_skrylov_vect + use psb_linsolve_mod, psb_protect_name => psb_skrylov_vect character(len=*) :: method Type(psb_sspmat_type), Intent(in) :: a diff --git a/krylov/psb_srgmres.f90 b/linsolve/impl/psb_srgmres.f90 similarity index 97% rename from krylov/psb_srgmres.f90 rename to linsolve/impl/psb_srgmres.f90 index 02836dd7..32129e35 100644 --- a/krylov/psb_srgmres.f90 +++ b/linsolve/impl/psb_srgmres.f90 @@ -110,8 +110,8 @@ subroutine psb_srgmres_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,irst,istop) use psb_base_mod use psb_prec_mod - use psb_s_krylov_conv_mod - use psb_krylov_mod + use psb_s_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_sspmat_type), intent(in) :: a Type(psb_desc_type), Intent(in) :: desc_a @@ -344,19 +344,16 @@ subroutine psb_srgmres_vect(a,prec,b,x,eps,desc_a,info,& goto 9999 end if - if (errnum <= eps*errden) exit restart + if ((errnum <= eps*errden).or.(itx >= litmax)) exit restart if (itrace_ > 0) & & call log_conv(methdname,me,itx,itrace_,errnum,errden,deps) call v(1)%scal(scal) !v(1) = v(1) * scal - if (itx >= litmax) exit restart - ! ! inner iterations ! - inner: Do i=1,nl itx = itx + 1 @@ -446,6 +443,8 @@ subroutine psb_srgmres_vect(a,prec,b,x,eps,desc_a,info,& call psb_geaxpby(sone,w,sone,x,desc_a,info) end if + if (itrace_ > 0) & + & call log_conv(methdname,me,itx,ione,errnum,errden,deps) exit restart end if @@ -472,10 +471,16 @@ subroutine psb_srgmres_vect(a,prec,b,x,eps,desc_a,info,& call prec%apply(w1,w,desc_a,info) call psb_geaxpby(sone,w,sone,x,desc_a,info) end if - + + if (itx >= litmax) then + if (itrace_ > 0) then + if (mod(itx,itrace_)/=0) & + & call log_conv(methdname,me,itx,ione,errnum,errden,deps) + end if + exit restart + end if + end do restart - if (itrace_ > 0) & - & call log_conv(methdname,me,itx,ione,errnum,errden,deps) call log_end(methdname,me,itx,itrace_,errnum,errden,deps,err=derr,iter=iter) if (present(err)) err = derr diff --git a/linsolve/impl/psb_srichardson.f90 b/linsolve/impl/psb_srichardson.f90 new file mode 100644 index 00000000..660778fc --- /dev/null +++ b/linsolve/impl/psb_srichardson.f90 @@ -0,0 +1,216 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! File: psb_richardson_mod.f90 +! Interfaces for Richardson iterative methods. +! +! +! Subroutine: psb_srichardson +! +! Front-end for the Richardson iterations, realversion +! +! Arguments: +! +! a - type(psb_sspmat_type) Input: sparse matrix containing A. +! prec - class(psb_sprec_type) Input: preconditioner +! b - real,dimension(:) Input: vector containing the +! right hand side B +! x - real,dimension(:) Input/Output: vector containing the +! initial guess and final solution X. +! eps - real Input: Stopping tolerance; the iteration is +! stopped when the error +! estimate |err| <= eps +! +! desc_a - type(psb_desc_type). Input: The communication descriptor. +! info - integer. Output: Return code +! +! itmax - integer(optional) Input: maximum number of iterations to be +! performed. +! iter - integer(optional) Output: how many iterations have been +! performed. +! err - real (optional) Output: error estimate on exit +! itrace - integer(optional) Input: print an informational message +! with the error estimate every itrace +! iterations +! istop - integer(optional) Input: stopping criterion, or how +! to estimate the error. +! 1: err = |r|/(|a||x|+|b|) +! 2: err = |r|/|b| +! where r is the (preconditioned, recursive +! estimate of) residual +! +Subroutine psb_srichardson_vect(a,prec,b,x,eps,desc_a,info,& + & itmax,iter,err,itrace,istop) + + use psb_base_mod + use psb_prec_mod + use psb_s_linsolve_conv_mod + use psb_linsolve_mod, psb_protect_name => psb_srichardson_vect + + Type(psb_sspmat_type), Intent(in) :: a + Type(psb_desc_type), Intent(in) :: desc_a + class(psb_sprec_type), intent(inout) :: prec + type(psb_s_vect_type), Intent(inout) :: b + type(psb_s_vect_type), Intent(inout) :: x + Real(psb_spk_), Intent(in) :: eps + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, istop + integer(psb_ipk_), Optional, Intent(out) :: iter + Real(psb_spk_), Optional, Intent(out) :: err + + + logical :: do_alloc_wrk + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: me,np,err_act + real(psb_spk_), allocatable, target :: aux(:) + type(psb_s_vect_type), allocatable, target :: wwrk(:) + type(psb_s_vect_type), pointer :: q, p, r, z, w + real(psb_dpk_) :: derr + integer(psb_ipk_) :: itmax_, istop_, naux, it, itx, itrace_,& + & n_col, n_row,ieg,nspl, istebz + integer(psb_lpk_) :: mglob + integer(psb_ipk_) :: debug_level, debug_unit + type(psb_itconv_type) :: stopdat + character(len=20) :: name + character(len=*), parameter :: methdname='RICHARDSON' + + info = psb_success_ + name = 'psb_srichardson' + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + + if (present(itrace)) then + itrace_ = itrace + else + itrace_ = -1 + end if + + if (present(istop)) then + istop_ = istop + else + istop_ = 2 + endif + if (present(itmax)) then + itmax_ = itmax + else + itmax_ = 1000 + endif + + do_alloc_wrk = .not.prec%is_allocated_wrk() + if (do_alloc_wrk) call prec%allocate_wrk(info,vmold=x%v,desc=desc_a) + + if (.not.allocated(b%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + mglob = desc_a%get_global_rows() + n_row = desc_a%get_local_rows() + n_col = desc_a%get_local_cols() + + call psb_chkvect(mglob,lone,x%get_nrows(),lone,lone,desc_a,info) + if (info == psb_success_)& + & call psb_chkvect(mglob,lone,b%get_nrows(),lone,lone,desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_chkvect on X/B') + goto 9999 + end if + + naux=4*n_col + allocate(aux(naux), stat=info) + if (info == psb_success_) call psb_geall(wwrk,desc_a,info,n=5_psb_ipk_) + if (info == psb_success_) call psb_geasb(wwrk,desc_a,info,mold=x%v,scratch=.true.) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + p => wwrk(1) + q => wwrk(2) + r => wwrk(3) + z => wwrk(4) + w => wwrk(5) + + call psb_geaxpby(sone,b,szero,r,desc_a,info) + if (info == psb_success_) call psb_spmm(-sone,a,x,sone,r,desc_a,info,work=aux) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + + call psb_init_conv(methdname,istop_,itrace_,itmax_,a,x,b,eps,desc_a,stopdat,info) + if (info /= psb_success_) Then + call psb_errpush(psb_err_from_subroutine_non_,name) + goto 9999 + End If + + loop: do itx=1,itmax_ + call prec%apply(r,z,desc_a,info,work=aux) + call psb_geaxpby(sone,z,sone,x,desc_a,info) + call psb_geaxpby(sone,b,szero,r,desc_a,info) + if (info == psb_success_) call psb_spmm(-sone,a,x,sone,r,desc_a,info,work=aux) + if (psb_check_conv(methdname,itx,x,r,desc_a,stopdat,info)) exit loop + end do loop + call psb_end_conv(methdname,itx,desc_a,stopdat,info,derr,iter) + if (present(err)) err = derr + + if (info == psb_success_) call psb_gefree(wwrk,desc_a,info) + if (info == psb_success_) deallocate(aux,stat=info) + if ((info==psb_success_).and.do_alloc_wrk) call prec%free_wrk(info) + + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err=trim(methdname)) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_srichardson_vect + diff --git a/krylov/psb_zbicg.f90 b/linsolve/impl/psb_zbicg.f90 similarity index 99% rename from krylov/psb_zbicg.f90 rename to linsolve/impl/psb_zbicg.f90 index c22e499a..6a3e0215 100644 --- a/krylov/psb_zbicg.f90 +++ b/linsolve/impl/psb_zbicg.f90 @@ -98,8 +98,8 @@ subroutine psb_zbicg_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,istop) use psb_base_mod use psb_prec_mod - use psb_z_krylov_conv_mod - use psb_krylov_mod + use psb_z_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_zspmat_type), intent(in) :: a type(psb_desc_type), intent(in) :: desc_a diff --git a/krylov/psb_zcg.F90 b/linsolve/impl/psb_zcg.F90 similarity index 99% rename from krylov/psb_zcg.F90 rename to linsolve/impl/psb_zcg.F90 index a4a521d8..6ca5909c 100644 --- a/krylov/psb_zcg.F90 +++ b/linsolve/impl/psb_zcg.F90 @@ -99,8 +99,8 @@ subroutine psb_zcg_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,istop,cond) use psb_base_mod use psb_prec_mod - use psb_z_krylov_conv_mod - use psb_krylov_mod + use psb_z_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_zspmat_type), intent(in) :: a Type(psb_desc_type), Intent(in) :: desc_a diff --git a/krylov/psb_zcgs.f90 b/linsolve/impl/psb_zcgs.f90 similarity index 99% rename from krylov/psb_zcgs.f90 rename to linsolve/impl/psb_zcgs.f90 index 3ccce860..c5140545 100644 --- a/krylov/psb_zcgs.f90 +++ b/linsolve/impl/psb_zcgs.f90 @@ -96,8 +96,8 @@ Subroutine psb_zcgs_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,istop) use psb_base_mod use psb_prec_mod - use psb_z_krylov_conv_mod - use psb_krylov_mod + use psb_z_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_zspmat_type), intent(in) :: a Type(psb_desc_type), Intent(in) :: desc_a diff --git a/krylov/psb_zcgstab.f90 b/linsolve/impl/psb_zcgstab.f90 similarity index 99% rename from krylov/psb_zcgstab.f90 rename to linsolve/impl/psb_zcgstab.f90 index 95ff129a..e04cd7c4 100644 --- a/krylov/psb_zcgstab.f90 +++ b/linsolve/impl/psb_zcgstab.f90 @@ -96,8 +96,8 @@ Subroutine psb_zcgstab_vect(a,prec,b,x,eps,desc_a,info,itmax,iter,err,itrace,istop) use psb_base_mod use psb_prec_mod - use psb_z_krylov_conv_mod - use psb_krylov_mod + use psb_z_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_zspmat_type), intent(in) :: a class(psb_zprec_type), Intent(inout) :: prec diff --git a/krylov/psb_zcgstabl.f90 b/linsolve/impl/psb_zcgstabl.f90 similarity index 99% rename from krylov/psb_zcgstabl.f90 rename to linsolve/impl/psb_zcgstabl.f90 index 2cf3a0e5..93abd056 100644 --- a/krylov/psb_zcgstabl.f90 +++ b/linsolve/impl/psb_zcgstabl.f90 @@ -107,8 +107,8 @@ Subroutine psb_zcgstabl_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,irst,istop) use psb_base_mod use psb_prec_mod - use psb_z_krylov_conv_mod - use psb_krylov_mod + use psb_z_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_zspmat_type), intent(in) :: a class(psb_zprec_type), Intent(inout) :: prec diff --git a/krylov/psb_zfcg.F90 b/linsolve/impl/psb_zfcg.F90 similarity index 99% rename from krylov/psb_zfcg.F90 rename to linsolve/impl/psb_zfcg.F90 index 3c26ad3d..1ab036fe 100644 --- a/krylov/psb_zfcg.F90 +++ b/linsolve/impl/psb_zfcg.F90 @@ -107,8 +107,8 @@ subroutine psb_zfcg_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,istop,cond) use psb_base_mod use psb_prec_mod - use psb_z_krylov_conv_mod - use psb_krylov_mod + use psb_z_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_zspmat_type), intent(in) :: a Type(psb_desc_type), Intent(in) :: desc_a diff --git a/krylov/psb_zgcr.f90 b/linsolve/impl/psb_zgcr.f90 similarity index 96% rename from krylov/psb_zgcr.f90 rename to linsolve/impl/psb_zgcr.f90 index 2399160c..e48fc4c1 100644 --- a/krylov/psb_zgcr.f90 +++ b/linsolve/impl/psb_zgcr.f90 @@ -109,8 +109,8 @@ subroutine psb_zgcr_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace, irst, istop) use psb_base_mod use psb_prec_mod - use psb_z_krylov_conv_mod - use psb_krylov_mod + use psb_z_linsolve_conv_mod + use psb_linsolve_mod implicit none diff --git a/krylov/psb_zkrylov.f90 b/linsolve/impl/psb_zkrylov.f90 similarity index 98% rename from krylov/psb_zkrylov.f90 rename to linsolve/impl/psb_zkrylov.f90 index a70cc98a..900e50b6 100644 --- a/krylov/psb_zkrylov.f90 +++ b/linsolve/impl/psb_zkrylov.f90 @@ -42,6 +42,7 @@ ! ! methd - character The specific method; can take the values: ! CG +! FCG ! CGS ! BICG ! BICGSTAB @@ -83,7 +84,7 @@ Subroutine psb_zkrylov_vect(method,a,prec,b,x,eps,desc_a,info,& use psb_base_mod use psb_prec_mod,only : psb_zprec_type - use psb_krylov_mod, psb_protect_name => psb_zkrylov_vect + use psb_linsolve_mod, psb_protect_name => psb_zkrylov_vect character(len=*) :: method Type(psb_zspmat_type), Intent(in) :: a diff --git a/krylov/psb_zrgmres.f90 b/linsolve/impl/psb_zrgmres.f90 similarity index 97% rename from krylov/psb_zrgmres.f90 rename to linsolve/impl/psb_zrgmres.f90 index 3aaf0032..c6bcc018 100644 --- a/krylov/psb_zrgmres.f90 +++ b/linsolve/impl/psb_zrgmres.f90 @@ -110,8 +110,8 @@ subroutine psb_zrgmres_vect(a,prec,b,x,eps,desc_a,info,& & itmax,iter,err,itrace,irst,istop) use psb_base_mod use psb_prec_mod - use psb_z_krylov_conv_mod - use psb_krylov_mod + use psb_z_linsolve_conv_mod + use psb_linsolve_mod implicit none type(psb_zspmat_type), intent(in) :: a Type(psb_desc_type), Intent(in) :: desc_a @@ -344,19 +344,16 @@ subroutine psb_zrgmres_vect(a,prec,b,x,eps,desc_a,info,& goto 9999 end if - if (errnum <= eps*errden) exit restart + if ((errnum <= eps*errden).or.(itx >= litmax)) exit restart if (itrace_ > 0) & & call log_conv(methdname,me,itx,itrace_,errnum,errden,deps) call v(1)%scal(scal) !v(1) = v(1) * scal - if (itx >= litmax) exit restart - ! ! inner iterations ! - inner: Do i=1,nl itx = itx + 1 @@ -446,6 +443,8 @@ subroutine psb_zrgmres_vect(a,prec,b,x,eps,desc_a,info,& call psb_geaxpby(zone,w,zone,x,desc_a,info) end if + if (itrace_ > 0) & + & call log_conv(methdname,me,itx,ione,errnum,errden,deps) exit restart end if @@ -472,10 +471,16 @@ subroutine psb_zrgmres_vect(a,prec,b,x,eps,desc_a,info,& call prec%apply(w1,w,desc_a,info) call psb_geaxpby(zone,w,zone,x,desc_a,info) end if - + + if (itx >= litmax) then + if (itrace_ > 0) then + if (mod(itx,itrace_)/=0) & + & call log_conv(methdname,me,itx,ione,errnum,errden,deps) + end if + exit restart + end if + end do restart - if (itrace_ > 0) & - & call log_conv(methdname,me,itx,ione,errnum,errden,deps) call log_end(methdname,me,itx,itrace_,errnum,errden,deps,err=derr,iter=iter) if (present(err)) err = derr diff --git a/linsolve/impl/psb_zrichardson.f90 b/linsolve/impl/psb_zrichardson.f90 new file mode 100644 index 00000000..4c2bc567 --- /dev/null +++ b/linsolve/impl/psb_zrichardson.f90 @@ -0,0 +1,216 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! File: psb_richardson_mod.f90 +! Interfaces for Richardson iterative methods. +! +! +! Subroutine: psb_zrichardson +! +! Front-end for the Richardson iterations, complexversion +! +! Arguments: +! +! a - type(psb_zspmat_type) Input: sparse matrix containing A. +! prec - class(psb_zprec_type) Input: preconditioner +! b - complex,dimension(:) Input: vector containing the +! right hand side B +! x - complex,dimension(:) Input/Output: vector containing the +! initial guess and final solution X. +! eps - real Input: Stopping tolerance; the iteration is +! stopped when the error +! estimate |err| <= eps +! +! desc_a - type(psb_desc_type). Input: The communication descriptor. +! info - integer. Output: Return code +! +! itmax - integer(optional) Input: maximum number of iterations to be +! performed. +! iter - integer(optional) Output: how many iterations have been +! performed. +! err - real (optional) Output: error estimate on exit +! itrace - integer(optional) Input: print an informational message +! with the error estimate every itrace +! iterations +! istop - integer(optional) Input: stopping criterion, or how +! to estimate the error. +! 1: err = |r|/(|a||x|+|b|) +! 2: err = |r|/|b| +! where r is the (preconditioned, recursive +! estimate of) residual +! +Subroutine psb_zrichardson_vect(a,prec,b,x,eps,desc_a,info,& + & itmax,iter,err,itrace,istop) + + use psb_base_mod + use psb_prec_mod + use psb_z_linsolve_conv_mod + use psb_linsolve_mod, psb_protect_name => psb_zrichardson_vect + + Type(psb_zspmat_type), Intent(in) :: a + Type(psb_desc_type), Intent(in) :: desc_a + class(psb_zprec_type), intent(inout) :: prec + type(psb_z_vect_type), Intent(inout) :: b + type(psb_z_vect_type), Intent(inout) :: x + Real(psb_dpk_), Intent(in) :: eps + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, istop + integer(psb_ipk_), Optional, Intent(out) :: iter + Real(psb_dpk_), Optional, Intent(out) :: err + + + logical :: do_alloc_wrk + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: me,np,err_act + complex(psb_dpk_), allocatable, target :: aux(:) + type(psb_z_vect_type), allocatable, target :: wwrk(:) + type(psb_z_vect_type), pointer :: q, p, r, z, w + real(psb_dpk_) :: derr + integer(psb_ipk_) :: itmax_, istop_, naux, it, itx, itrace_,& + & n_col, n_row,ieg,nspl, istebz + integer(psb_lpk_) :: mglob + integer(psb_ipk_) :: debug_level, debug_unit + type(psb_itconv_type) :: stopdat + character(len=20) :: name + character(len=*), parameter :: methdname='RICHARDSON' + + info = psb_success_ + name = 'psb_zrichardson' + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + + if (present(itrace)) then + itrace_ = itrace + else + itrace_ = -1 + end if + + if (present(istop)) then + istop_ = istop + else + istop_ = 2 + endif + if (present(itmax)) then + itmax_ = itmax + else + itmax_ = 1000 + endif + + do_alloc_wrk = .not.prec%is_allocated_wrk() + if (do_alloc_wrk) call prec%allocate_wrk(info,vmold=x%v,desc=desc_a) + + if (.not.allocated(b%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + mglob = desc_a%get_global_rows() + n_row = desc_a%get_local_rows() + n_col = desc_a%get_local_cols() + + call psb_chkvect(mglob,lone,x%get_nrows(),lone,lone,desc_a,info) + if (info == psb_success_)& + & call psb_chkvect(mglob,lone,b%get_nrows(),lone,lone,desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='psb_chkvect on X/B') + goto 9999 + end if + + naux=4*n_col + allocate(aux(naux), stat=info) + if (info == psb_success_) call psb_geall(wwrk,desc_a,info,n=5_psb_ipk_) + if (info == psb_success_) call psb_geasb(wwrk,desc_a,info,mold=x%v,scratch=.true.) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + p => wwrk(1) + q => wwrk(2) + r => wwrk(3) + z => wwrk(4) + w => wwrk(5) + + call psb_geaxpby(zone,b,zzero,r,desc_a,info) + if (info == psb_success_) call psb_spmm(-zone,a,x,zone,r,desc_a,info,work=aux) + if (info /= psb_success_) then + info=psb_err_from_subroutine_non_ + call psb_errpush(info,name) + goto 9999 + end if + + + call psb_init_conv(methdname,istop_,itrace_,itmax_,a,x,b,eps,desc_a,stopdat,info) + if (info /= psb_success_) Then + call psb_errpush(psb_err_from_subroutine_non_,name) + goto 9999 + End If + + loop: do itx=1,itmax_ + call prec%apply(r,z,desc_a,info,work=aux) + call psb_geaxpby(zone,z,zone,x,desc_a,info) + call psb_geaxpby(zone,b,zzero,r,desc_a,info) + if (info == psb_success_) call psb_spmm(-zone,a,x,zone,r,desc_a,info,work=aux) + if (psb_check_conv(methdname,itx,x,r,desc_a,stopdat,info)) exit loop + end do loop + call psb_end_conv(methdname,itx,desc_a,stopdat,info,derr,iter) + if (present(err)) err = derr + + if (info == psb_success_) call psb_gefree(wwrk,desc_a,info) + if (info == psb_success_) deallocate(aux,stat=info) + if ((info==psb_success_).and.do_alloc_wrk) call prec%free_wrk(info) + + if(info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err=trim(methdname)) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_zrichardson_vect + diff --git a/krylov/psb_base_krylov_conv_mod.f90 b/linsolve/psb_base_linsolve_conv_mod.f90 similarity index 97% rename from krylov/psb_base_krylov_conv_mod.f90 rename to linsolve/psb_base_linsolve_conv_mod.f90 index be7723f5..8d36fb53 100644 --- a/krylov/psb_base_krylov_conv_mod.f90 +++ b/linsolve/psb_base_linsolve_conv_mod.f90 @@ -30,10 +30,10 @@ ! ! ! -! File: psb_krylov_mod.f90 -! Interfaces for Krylov subspace iterative methods. +! File: psb_base_linsolve_mod.f90 +! Interfaces for linear solvers' convergence handling. ! -Module psb_base_krylov_conv_mod +Module psb_base_linsolve_conv_mod use psb_const_mod @@ -166,4 +166,4 @@ contains end subroutine psb_d_end_conv -end module psb_base_krylov_conv_mod +end module psb_base_linsolve_conv_mod diff --git a/krylov/psb_c_krylov_conv_mod.f90 b/linsolve/psb_c_linsolve_conv_mod.f90 similarity index 98% rename from krylov/psb_c_krylov_conv_mod.f90 rename to linsolve/psb_c_linsolve_conv_mod.f90 index 85a2bca7..73db081e 100644 --- a/krylov/psb_c_krylov_conv_mod.f90 +++ b/linsolve/psb_c_linsolve_conv_mod.f90 @@ -30,12 +30,12 @@ ! ! ! -! File: psb_krylov_conv_mod.f90 -! Interfaces for Krylov subspace iterative methods. +! File: psb_linsolve_conv_mod.f90 +! Interfaces for linear solvers' convergence handling. ! -Module psb_c_krylov_conv_mod +Module psb_c_linsolve_conv_mod - use psb_base_krylov_conv_mod + use psb_base_linsolve_conv_mod interface psb_init_conv module procedure psb_c_init_conv, psb_c_init_conv_vect @@ -355,4 +355,4 @@ contains end function psb_c_check_conv_vect -end module psb_c_krylov_conv_mod +end module psb_c_linsolve_conv_mod diff --git a/krylov/psb_d_krylov_conv_mod.f90 b/linsolve/psb_d_linsolve_conv_mod.f90 similarity index 98% rename from krylov/psb_d_krylov_conv_mod.f90 rename to linsolve/psb_d_linsolve_conv_mod.f90 index 4f9b9f2e..ba59923f 100644 --- a/krylov/psb_d_krylov_conv_mod.f90 +++ b/linsolve/psb_d_linsolve_conv_mod.f90 @@ -30,12 +30,12 @@ ! ! ! -! File: psb_krylov_conv_mod.f90 -! Interfaces for Krylov subspace iterative methods. +! File: psb_linsolve_conv_mod.f90 +! Interfaces for linear solvers' convergence handling. ! -Module psb_d_krylov_conv_mod +Module psb_d_linsolve_conv_mod - use psb_base_krylov_conv_mod + use psb_base_linsolve_conv_mod interface psb_init_conv module procedure psb_d_init_conv, psb_d_init_conv_vect @@ -355,4 +355,4 @@ contains end function psb_d_check_conv_vect -end module psb_d_krylov_conv_mod +end module psb_d_linsolve_conv_mod diff --git a/krylov/psb_krylov_conv_mod.f90 b/linsolve/psb_linsolve_conv_mod.f90 similarity index 86% rename from krylov/psb_krylov_conv_mod.f90 rename to linsolve/psb_linsolve_conv_mod.f90 index 2ec83178..23d6c84c 100644 --- a/krylov/psb_krylov_conv_mod.f90 +++ b/linsolve/psb_linsolve_conv_mod.f90 @@ -30,12 +30,12 @@ ! ! ! -! File: psb_krylov_mod.f90 -! Interfaces for Krylov subspace iterative methods. +! File: psb_linsolve_conv_mod.f90 +! Interfaces for linear solvers' convergence handling. ! -module psb_krylov_conv_mod - use psb_s_krylov_conv_mod - use psb_d_krylov_conv_mod - use psb_c_krylov_conv_mod - use psb_z_krylov_conv_mod -end module psb_krylov_conv_mod +module psb_linsolve_conv_mod + use psb_s_linsolve_conv_mod + use psb_d_linsolve_conv_mod + use psb_c_linsolve_conv_mod + use psb_z_linsolve_conv_mod +end module psb_linsolve_conv_mod diff --git a/krylov/psb_krylov_mod.f90 b/linsolve/psb_linsolve_mod.f90 similarity index 60% rename from krylov/psb_krylov_mod.f90 rename to linsolve/psb_linsolve_mod.f90 index d8d4d904..c54326f3 100644 --- a/krylov/psb_krylov_mod.f90 +++ b/linsolve/psb_linsolve_mod.f90 @@ -30,10 +30,10 @@ ! ! ! -! File: psb_krylov_mod.f90 -! Interfaces for Krylov subspace iterative methods. +! File: psb_linsolve_mod.f90 +! Interfaces for linear solvers. ! -Module psb_krylov_mod +Module psb_linsolve_mod use psb_const_mod public @@ -127,4 +127,88 @@ Module psb_krylov_mod end interface -end module psb_krylov_mod + interface psb_richardson + + Subroutine psb_srichardson_vect(a,prec,b,x,eps,desc_a,info,& + & itmax,iter,err,itrace,istop) + + use psb_base_mod, only : psb_ipk_, psb_desc_type, psb_sspmat_type, & + & psb_spk_, psb_s_vect_type + use psb_prec_mod, only : psb_sprec_type + + Type(psb_sspmat_type), Intent(in) :: a + Type(psb_desc_type), Intent(in) :: desc_a + class(psb_sprec_type), intent(inout) :: prec + type(psb_s_vect_type), Intent(inout) :: b + type(psb_s_vect_type), Intent(inout) :: x + Real(psb_spk_), Intent(in) :: eps + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, istop + integer(psb_ipk_), Optional, Intent(out) :: iter + Real(psb_spk_), Optional, Intent(out) :: err + + end Subroutine psb_srichardson_vect + + Subroutine psb_crichardson_vect(a,prec,b,x,eps,desc_a,info,& + & itmax,iter,err,itrace,istop) + + use psb_base_mod, only : psb_ipk_, psb_desc_type, psb_cspmat_type, & + & psb_spk_, psb_c_vect_type + use psb_prec_mod, only : psb_cprec_type + + Type(psb_cspmat_type), Intent(in) :: a + Type(psb_desc_type), Intent(in) :: desc_a + class(psb_cprec_type), intent(inout) :: prec + type(psb_c_vect_type), Intent(inout) :: b + type(psb_c_vect_type), Intent(inout) :: x + Real(psb_spk_), Intent(in) :: eps + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, istop + integer(psb_ipk_), Optional, Intent(out) :: iter + Real(psb_spk_), Optional, Intent(out) :: err + + end Subroutine psb_crichardson_vect + + Subroutine psb_drichardson_vect(a,prec,b,x,eps,desc_a,info,& + & itmax,iter,err,itrace,istop) + + use psb_base_mod, only : psb_ipk_, psb_desc_type, psb_dspmat_type, & + & psb_dpk_, psb_d_vect_type + use psb_prec_mod, only : psb_dprec_type + + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_desc_type), Intent(in) :: desc_a + class(psb_dprec_type), intent(inout) :: prec + type(psb_d_vect_type), Intent(inout) :: b + type(psb_d_vect_type), Intent(inout) :: x + Real(psb_dpk_), Intent(in) :: eps + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, istop + integer(psb_ipk_), Optional, Intent(out) :: iter + Real(psb_dpk_), Optional, Intent(out) :: err + + end Subroutine psb_drichardson_vect + + Subroutine psb_zrichardson_vect(a,prec,b,x,eps,desc_a,info,& + & itmax,iter,err,itrace,istop) + + use psb_base_mod, only : psb_ipk_, psb_desc_type, psb_zspmat_type, & + & psb_dpk_, psb_z_vect_type + use psb_prec_mod, only : psb_zprec_type + + Type(psb_zspmat_type), Intent(in) :: a + Type(psb_desc_type), Intent(in) :: desc_a + class(psb_zprec_type), intent(inout) :: prec + type(psb_z_vect_type), Intent(inout) :: b + type(psb_z_vect_type), Intent(inout) :: x + Real(psb_dpk_), Intent(in) :: eps + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, istop + integer(psb_ipk_), Optional, Intent(out) :: iter + Real(psb_dpk_), Optional, Intent(out) :: err + + end Subroutine psb_zrichardson_vect + + end interface + +end module psb_linsolve_mod diff --git a/krylov/psb_s_krylov_conv_mod.f90 b/linsolve/psb_s_linsolve_conv_mod.f90 similarity index 98% rename from krylov/psb_s_krylov_conv_mod.f90 rename to linsolve/psb_s_linsolve_conv_mod.f90 index 29713c37..5de6ebb6 100644 --- a/krylov/psb_s_krylov_conv_mod.f90 +++ b/linsolve/psb_s_linsolve_conv_mod.f90 @@ -30,12 +30,12 @@ ! ! ! -! File: psb_krylov_conv_mod.f90 -! Interfaces for Krylov subspace iterative methods. +! File: psb_linsolve_conv_mod.f90 +! Interfaces for linear solvers' convergence handling. ! -Module psb_s_krylov_conv_mod +Module psb_s_linsolve_conv_mod - use psb_base_krylov_conv_mod + use psb_base_linsolve_conv_mod interface psb_init_conv module procedure psb_s_init_conv, psb_s_init_conv_vect @@ -355,4 +355,4 @@ contains end function psb_s_check_conv_vect -end module psb_s_krylov_conv_mod +end module psb_s_linsolve_conv_mod diff --git a/krylov/psb_z_krylov_conv_mod.f90 b/linsolve/psb_z_linsolve_conv_mod.f90 similarity index 98% rename from krylov/psb_z_krylov_conv_mod.f90 rename to linsolve/psb_z_linsolve_conv_mod.f90 index fc88ccf6..6c6276ff 100644 --- a/krylov/psb_z_krylov_conv_mod.f90 +++ b/linsolve/psb_z_linsolve_conv_mod.f90 @@ -30,12 +30,12 @@ ! ! ! -! File: psb_krylov_conv_mod.f90 -! Interfaces for Krylov subspace iterative methods. +! File: psb_linsolve_conv_mod.f90 +! Interfaces for linear solvers' convergence handling. ! -Module psb_z_krylov_conv_mod +Module psb_z_linsolve_conv_mod - use psb_base_krylov_conv_mod + use psb_base_linsolve_conv_mod interface psb_init_conv module procedure psb_z_init_conv, psb_z_init_conv_vect @@ -355,4 +355,4 @@ contains end function psb_z_check_conv_vect -end module psb_z_krylov_conv_mod +end module psb_z_linsolve_conv_mod diff --git a/openacc/Makefile b/openacc/Makefile new file mode 100644 index 00000000..6b1f3fbb --- /dev/null +++ b/openacc/Makefile @@ -0,0 +1,89 @@ +include ../Make.inc + + +#CC=mpicc +#FC=mpif90 +#FCOPT=-O0 -march=native +#OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70" + +LIBDIR=../lib +INCDIR=../include +MODDIR=../modules +IMPLDIR=./impl + +INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR) +FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG). +CINCLUDES= +#LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis + + +FOBJS= psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o \ + psb_s_oacc_vect_mod.o psb_s_oacc_csr_mat_mod.o \ + psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \ + psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \ + psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \ + psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o \ + psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o \ + psb_c_oacc_ell_mat_mod.o psb_c_oacc_hll_mat_mod.o \ + psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o \ + psb_oacc_mod.o psb_oacc_env_mod.o + + +LIBNAME=libpsb_openacc.a + +OBJS=$(COBJS) $(FOBJS) + + +lib: objs ilib + $(AR) $(LIBNAME) $(OBJS) + /bin/cp -p $(LIBNAME) $(LIBDIR) + +objs: $(OBJS) iobjs + /bin/cp -p *$(.mod) $(MODDIR) + +iobjs: $(OBJS) + $(MAKE) -C impl objs + +ilib: $(OBJS) + $(MAKE) -C impl lib + +psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o \ + psb_s_oacc_vect_mod.o psb_s_oacc_csr_mat_mod.o \ + psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \ + psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \ + psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \ + psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o \ + psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o \ + psb_c_oacc_ell_mat_mod.o psb_c_oacc_hll_mat_mod.o \ + psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o \ + psb_oacc_env_mod.o + +psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ + psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o psb_oacc_env_mod.o +psb_l_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_oacc_env_mod.o +psb_i_oacc_vect_mod.o: psb_oacc_env_mod.o + + +psb_s_oacc_csr_mat_mod.o psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o: psb_s_oacc_vect_mod.o +psb_d_oacc_csr_mat_mod.o psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o: psb_d_oacc_vect_mod.o +psb_c_oacc_csr_mat_mod.o psb_c_oacc_ell_mat_mod.o psb_c_oacc_hll_mat_mod.o: psb_c_oacc_vect_mod.o +psb_z_oacc_csr_mat_mod.o psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o: psb_z_oacc_vect_mod.o + + + +clean: cclean iclean + /bin/rm -f $(FOBJS) *$(.mod) *.a *.smod +veryclean: clean +cclean: + /bin/rm -f $(COBJS) +iclean: + $(MAKE) -C impl clean + +.c.o: + $(CC) $(CCOPT) $(CCOPENACC) $(CINCLUDES) $(CDEFINES) -c $< -o $@ +.f90.o: + $(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) -c $< -o $@ +.F90.o: + $(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) $(FDEFINES) -c $< -o $@ +.cpp.o: + $(CXX) $(CXXOPT) $(CXXOPENACC) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@ diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile new file mode 100755 index 00000000..d28d45dd --- /dev/null +++ b/openacc/impl/Makefile @@ -0,0 +1,285 @@ +include ../../Make.inc +LIBDIR=../../lib +INCDIR=../../include +MODDIR=../../modules +UP=.. +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. +LIBNAME=libpsb_openacc.a + +OBJS= \ +psb_s_oacc_csr_vect_mv.o \ +psb_s_oacc_csr_inner_vect_sv.o \ +psb_s_oacc_csr_scals.o \ +psb_s_oacc_csr_scal.o \ +psb_s_oacc_csr_allocate_mnnz.o \ +psb_s_oacc_csr_reallocate_nz.o \ +psb_s_oacc_csr_cp_from_coo.o \ +psb_s_oacc_csr_cp_from_fmt.o \ +psb_s_oacc_csr_mv_from_coo.o \ +psb_s_oacc_csr_mv_from_fmt.o \ +psb_s_oacc_csr_mold.o \ +psb_s_oacc_mlt_v_2.o \ +psb_s_oacc_mlt_v.o \ +psb_d_oacc_csr_vect_mv.o \ +psb_d_oacc_csr_inner_vect_sv.o \ +psb_d_oacc_csr_scals.o \ +psb_d_oacc_csr_scal.o \ +psb_d_oacc_csr_allocate_mnnz.o \ +psb_d_oacc_csr_reallocate_nz.o \ +psb_d_oacc_csr_cp_from_coo.o \ +psb_d_oacc_csr_cp_from_fmt.o \ +psb_d_oacc_csr_mv_from_coo.o \ +psb_d_oacc_csr_mv_from_fmt.o \ +psb_d_oacc_csr_mold.o \ +psb_d_oacc_mlt_v_2.o \ +psb_d_oacc_mlt_v.o \ +psb_c_oacc_csr_vect_mv.o \ +psb_c_oacc_csr_inner_vect_sv.o \ +psb_c_oacc_csr_scals.o \ +psb_c_oacc_csr_scal.o \ +psb_c_oacc_csr_allocate_mnnz.o \ +psb_c_oacc_csr_reallocate_nz.o \ +psb_c_oacc_csr_cp_from_coo.o \ +psb_c_oacc_csr_cp_from_fmt.o \ +psb_c_oacc_csr_mv_from_coo.o \ +psb_c_oacc_csr_mv_from_fmt.o \ +psb_c_oacc_csr_mold.o \ +psb_c_oacc_mlt_v_2.o \ +psb_c_oacc_mlt_v.o \ +psb_z_oacc_csr_vect_mv.o \ +psb_z_oacc_csr_inner_vect_sv.o \ +psb_z_oacc_csr_scals.o \ +psb_z_oacc_csr_scal.o \ +psb_z_oacc_csr_allocate_mnnz.o \ +psb_z_oacc_csr_reallocate_nz.o \ +psb_z_oacc_csr_cp_from_coo.o \ +psb_z_oacc_csr_cp_from_fmt.o \ +psb_z_oacc_csr_mv_from_coo.o \ +psb_z_oacc_csr_mv_from_fmt.o \ +psb_z_oacc_csr_mold.o \ +psb_z_oacc_mlt_v_2.o \ +psb_z_oacc_mlt_v.o \ +psb_s_oacc_ell_vect_mv.o \ +psb_s_oacc_ell_inner_vect_sv.o \ +psb_s_oacc_ell_scals.o \ +psb_s_oacc_ell_scal.o \ +psb_s_oacc_ell_reallocate_nz.o \ +psb_s_oacc_ell_allocate_mnnz.o \ +psb_s_oacc_ell_cp_from_coo.o \ +psb_s_oacc_ell_cp_from_fmt.o \ +psb_s_oacc_ell_mv_from_coo.o \ +psb_s_oacc_ell_mv_from_fmt.o \ +psb_s_oacc_ell_mold.o \ +psb_s_oacc_hll_mold.o \ +psb_s_oacc_hll_mv_from_fmt.o \ +psb_s_oacc_hll_mv_from_coo.o \ +psb_s_oacc_hll_cp_from_fmt.o \ +psb_s_oacc_hll_cp_from_coo.o \ +psb_s_oacc_hll_allocate_mnnz.o \ +psb_s_oacc_hll_reallocate_nz.o \ +psb_s_oacc_hll_scal.o \ +psb_s_oacc_hll_scals.o \ +psb_s_oacc_hll_inner_vect_sv.o \ +psb_s_oacc_hll_vect_mv.o \ +psb_d_oacc_ell_vect_mv.o \ +psb_d_oacc_ell_inner_vect_sv.o \ +psb_d_oacc_ell_scals.o \ +psb_d_oacc_ell_scal.o \ +psb_d_oacc_ell_reallocate_nz.o \ +psb_d_oacc_ell_allocate_mnnz.o \ +psb_d_oacc_ell_cp_from_coo.o \ +psb_d_oacc_ell_cp_from_fmt.o \ +psb_d_oacc_ell_mv_from_coo.o \ +psb_d_oacc_ell_mv_from_fmt.o \ +psb_d_oacc_ell_mold.o \ +psb_d_oacc_hll_mold.o \ +psb_d_oacc_hll_mv_from_fmt.o \ +psb_d_oacc_hll_mv_from_coo.o \ +psb_d_oacc_hll_cp_from_fmt.o \ +psb_d_oacc_hll_cp_from_coo.o \ +psb_d_oacc_hll_allocate_mnnz.o \ +psb_d_oacc_hll_reallocate_nz.o \ +psb_d_oacc_hll_scal.o \ +psb_d_oacc_hll_scals.o \ +psb_d_oacc_hll_inner_vect_sv.o \ +psb_d_oacc_hll_vect_mv.o \ +psb_c_oacc_ell_vect_mv.o \ +psb_c_oacc_ell_inner_vect_sv.o \ +psb_c_oacc_ell_scals.o \ +psb_c_oacc_ell_scal.o \ +psb_c_oacc_ell_reallocate_nz.o \ +psb_c_oacc_ell_allocate_mnnz.o \ +psb_c_oacc_ell_cp_from_coo.o \ +psb_c_oacc_ell_cp_from_fmt.o \ +psb_c_oacc_ell_mv_from_coo.o \ +psb_c_oacc_ell_mv_from_fmt.o \ +psb_c_oacc_ell_mold.o \ +psb_c_oacc_hll_mold.o \ +psb_c_oacc_hll_mv_from_fmt.o \ +psb_c_oacc_hll_mv_from_coo.o \ +psb_c_oacc_hll_cp_from_fmt.o \ +psb_c_oacc_hll_cp_from_coo.o \ +psb_c_oacc_hll_allocate_mnnz.o \ +psb_c_oacc_hll_reallocate_nz.o \ +psb_c_oacc_hll_scal.o \ +psb_c_oacc_hll_scals.o \ +psb_c_oacc_hll_inner_vect_sv.o \ +psb_c_oacc_hll_vect_mv.o \ +psb_z_oacc_ell_vect_mv.o \ +psb_z_oacc_ell_inner_vect_sv.o \ +psb_z_oacc_ell_scals.o \ +psb_z_oacc_ell_scal.o \ +psb_z_oacc_ell_reallocate_nz.o \ +psb_z_oacc_ell_allocate_mnnz.o \ +psb_z_oacc_ell_cp_from_coo.o \ +psb_z_oacc_ell_cp_from_fmt.o \ +psb_z_oacc_ell_mv_from_coo.o \ +psb_z_oacc_ell_mv_from_fmt.o \ +psb_z_oacc_ell_mold.o \ +psb_z_oacc_hll_mold.o \ +psb_z_oacc_hll_mv_from_fmt.o \ +psb_z_oacc_hll_mv_from_coo.o \ +psb_z_oacc_hll_cp_from_fmt.o \ +psb_z_oacc_hll_cp_from_coo.o \ +psb_z_oacc_hll_allocate_mnnz.o \ +psb_z_oacc_hll_reallocate_nz.o \ +psb_z_oacc_hll_scal.o \ +psb_z_oacc_hll_scals.o \ +psb_z_oacc_hll_inner_vect_sv.o \ +psb_z_oacc_hll_vect_mv.o \ +psb_z_oacc_ell_vect_mv.o \ +psb_z_oacc_ell_inner_vect_sv.o \ +psb_z_oacc_ell_scals.o \ +psb_z_oacc_ell_scal.o \ +psb_z_oacc_ell_reallocate_nz.o \ +psb_z_oacc_ell_allocate_mnnz.o \ +psb_z_oacc_ell_cp_from_coo.o \ +psb_z_oacc_ell_cp_from_fmt.o \ +psb_z_oacc_ell_mv_from_coo.o \ +psb_z_oacc_ell_mv_from_fmt.o \ +psb_z_oacc_ell_mold.o \ +psb_z_oacc_hll_mold.o \ +psb_z_oacc_hll_mv_from_fmt.o \ +psb_z_oacc_hll_mv_from_coo.o \ +psb_z_oacc_hll_cp_from_fmt.o \ +psb_z_oacc_hll_cp_from_coo.o \ +psb_z_oacc_hll_allocate_mnnz.o \ +psb_z_oacc_hll_reallocate_nz.o \ +psb_z_oacc_hll_scal.o \ +psb_z_oacc_hll_scals.o \ +psb_z_oacc_hll_inner_vect_sv.o \ +psb_z_oacc_hll_vect_mv.o + + +objs: $(OBJS) + +lib: objs + $(AR) ../$(LIBNAME) $(OBJS) + +psb_s_oacc_csr_vect_mv.o psb_s_oacc_csr_inner_vect_sv.o \ + psb_s_oacc_csr_scals.o \ + psb_s_oacc_csr_scal.o psb_s_oacc_csr_allocate_mnnz.o \ + psb_s_oacc_csr_reallocate_nz.o psb_s_oacc_csr_cp_from_coo.o \ + psb_s_oacc_csr_cp_from_fmt.o psb_s_oacc_csr_mv_from_coo.o \ + psb_s_oacc_csr_mv_from_fmt.o psb_s_oacc_csr_mold.o: $(UP)/psb_s_oacc_csr_mat_mod.o $(UP)/psb_s_oacc_vect_mod.o + +psb_s_oacc_ell_vect_mv.o psb_s_oacc_ell_inner_vect_sv.o \ + psb_s_oacc_ell_scals.o \ + psb_s_oacc_ell_scal.o psb_s_oacc_ell_allocate_mnnz.o \ + psb_s_oacc_ell_reallocate_nz.o psb_s_oacc_ell_cp_from_coo.o \ + psb_s_oacc_ell_cp_from_fmt.o psb_s_oacc_ell_mv_from_coo.o \ + psb_s_oacc_ell_mv_from_fmt.o psb_s_oacc_ell_mold.o: $(UP)/psb_s_oacc_ell_mat_mod.o $(UP)/psb_s_oacc_vect_mod.o + +psb_s_oacc_hll_vect_mv.o psb_s_oacc_hll_inner_vect_sv.o \ + psb_s_oacc_hll_scals.o \ + psb_s_oacc_hll_scal.o psb_s_oacc_hll_allocate_mnnz.o \ + psb_s_oacc_hll_reallocate_nz.o psb_s_oacc_hll_cp_from_coo.o \ + psb_s_oacc_hll_cp_from_fmt.o psb_s_oacc_hll_mv_from_coo.o \ + psb_s_oacc_hll_mv_from_fmt.o psb_s_oacc_hll_mold.o: $(UP)/psb_s_oacc_hll_mat_mod.o $(UP)/psb_s_oacc_vect_mod.o + + +psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \ + psb_d_oacc_csr_scals.o \ + psb_d_oacc_csr_scal.o psb_d_oacc_csr_allocate_mnnz.o \ + psb_d_oacc_csr_reallocate_nz.o psb_d_oacc_csr_cp_from_coo.o \ + psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo.o \ + psb_d_oacc_csr_mv_from_fmt.o psb_d_oacc_csr_mold.o: $(UP)/psb_d_oacc_csr_mat_mod.o $(UP)/psb_d_oacc_vect_mod.o + +psb_d_oacc_ell_vect_mv.o psb_d_oacc_ell_inner_vect_sv.o \ + psb_d_oacc_ell_scals.o \ + psb_d_oacc_ell_scal.o psb_d_oacc_ell_allocate_mnnz.o \ + psb_d_oacc_ell_reallocate_nz.o psb_d_oacc_ell_cp_from_coo.o \ + psb_d_oacc_ell_cp_from_fmt.o psb_d_oacc_ell_mv_from_coo.o \ + psb_d_oacc_ell_mv_from_fmt.o psb_d_oacc_ell_mold.o: $(UP)/psb_d_oacc_ell_mat_mod.o $(UP)/psb_d_oacc_vect_mod.o + +psb_d_oacc_hll_vect_mv.o psb_d_oacc_hll_inner_vect_sv.o \ + psb_d_oacc_hll_scals.o \ + psb_d_oacc_hll_scal.o psb_d_oacc_hll_allocate_mnnz.o \ + psb_d_oacc_hll_reallocate_nz.o psb_d_oacc_hll_cp_from_coo.o \ + psb_d_oacc_hll_cp_from_fmt.o psb_d_oacc_hll_mv_from_coo.o \ + psb_d_oacc_hll_mv_from_fmt.o psb_d_oacc_hll_mold.o: $(UP)/psb_d_oacc_hll_mat_mod.o $(UP)/psb_d_oacc_vect_mod.o + +psb_c_oacc_csr_vect_mv.o psb_c_oacc_csr_inner_vect_sv.o \ + psb_c_oacc_csr_scals.o \ + psb_c_oacc_csr_scal.o psb_c_oacc_csr_allocate_mnnz.o \ + psb_c_oacc_csr_reallocate_nz.o psb_c_oacc_csr_cp_from_coo.o \ + psb_c_oacc_csr_cp_from_fmt.o psb_c_oacc_csr_mv_from_coo.o \ + psb_c_oacc_csr_mv_from_fmt.o psb_c_oacc_csr_mold.o: $(UP)/psb_c_oacc_csr_mat_mod.o $(UP)/psb_c_oacc_vect_mod.o + +psb_c_oacc_ell_vect_mv.o psb_c_oacc_ell_inner_vect_sv.o \ + psb_c_oacc_ell_scals.o \ + psb_c_oacc_ell_scal.o psb_c_oacc_ell_allocate_mnnz.o \ + psb_c_oacc_ell_reallocate_nz.o psb_c_oacc_ell_cp_from_coo.o \ + psb_c_oacc_ell_cp_from_fmt.o psb_c_oacc_ell_mv_from_coo.o \ + psb_c_oacc_ell_mv_from_fmt.o psb_c_oacc_ell_mold.o: $(UP)/psb_c_oacc_ell_mat_mod.o $(UP)/psb_c_oacc_vect_mod.o + +psb_c_oacc_hll_vect_mv.o psb_c_oacc_hll_inner_vect_sv.o \ + psb_c_oacc_hll_scals.o \ + psb_c_oacc_hll_scal.o psb_c_oacc_hll_allocate_mnnz.o \ + psb_c_oacc_hll_reallocate_nz.o psb_c_oacc_hll_cp_from_coo.o \ + psb_c_oacc_hll_cp_from_fmt.o psb_c_oacc_hll_mv_from_coo.o \ + psb_c_oacc_hll_mv_from_fmt.o psb_c_oacc_hll_mold.o: $(UP)/psb_c_oacc_hll_mat_mod.o $(UP)/psb_c_oacc_vect_mod.o + + +psb_z_oacc_csr_vect_mv.o psb_z_oacc_csr_inner_vect_sv.o \ + psb_z_oacc_csr_scals.o \ + psb_z_oacc_csr_scal.o psb_z_oacc_csr_allocate_mnnz.o \ + psb_z_oacc_csr_reallocate_nz.o psb_z_oacc_csr_cp_from_coo.o \ + psb_z_oacc_csr_cp_from_fmt.o psb_z_oacc_csr_mv_from_coo.o \ + psb_z_oacc_csr_mv_from_fmt.o psb_z_oacc_csr_mold.o: $(UP)/psb_z_oacc_csr_mat_mod.o $(UP)/psb_z_oacc_vect_mod.o + +psb_z_oacc_ell_vect_mv.o psb_z_oacc_ell_inner_vect_sv.o \ + psb_z_oacc_ell_scals.o \ + psb_z_oacc_ell_scal.o psb_z_oacc_ell_allocate_mnnz.o \ + psb_z_oacc_ell_reallocate_nz.o psb_z_oacc_ell_cp_from_coo.o \ + psb_z_oacc_ell_cp_from_fmt.o psb_z_oacc_ell_mv_from_coo.o \ + psb_z_oacc_ell_mv_from_fmt.o psb_z_oacc_ell_mold.o: $(UP)/psb_z_oacc_ell_mat_mod.o $(UP)/psb_z_oacc_vect_mod.o + +psb_z_oacc_hll_vect_mv.o psb_z_oacc_hll_inner_vect_sv.o \ + psb_z_oacc_hll_scals.o \ + psb_z_oacc_hll_scal.o psb_z_oacc_hll_allocate_mnnz.o \ + psb_z_oacc_hll_reallocate_nz.o psb_z_oacc_hll_cp_from_coo.o \ + psb_z_oacc_hll_cp_from_fmt.o psb_z_oacc_hll_mv_from_coo.o \ + psb_z_oacc_hll_mv_from_fmt.o psb_z_oacc_hll_mold.o: $(UP)/psb_z_oacc_hll_mat_mod.o $(UP)/psb_z_oacc_vect_mod.o + +psb_s_oacc_mlt_v_2.o psb_s_oacc_mlt_v.o: $(UP)/psb_s_oacc_vect_mod.o +psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o: $(UP)/psb_d_oacc_vect_mod.o +psb_c_oacc_mlt_v_2.o psb_c_oacc_mlt_v.o: $(UP)/psb_c_oacc_vect_mod.o +psb_z_oacc_mlt_v_2.o psb_z_oacc_mlt_v.o: $(UP)/psb_z_oacc_vect_mod.o + + +clean: + /bin/rm -f $(OBJS) *.smod + +.c.o: + $(CC) $(CCOPT) $(CCOPENACC) $(CINCLUDES) $(CDEFINES) -c $< -o $@ +.f90.o: + $(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) -c $< -o $@ +.F90.o: + $(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) $(FDEFINES) -c $< -o $@ +.cpp.o: + $(CXX) $(CXXOPT) $(CXXOPENACC) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@ diff --git a/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 new file mode 100644 index 00000000..a6fe9b9c --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 @@ -0,0 +1,29 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_csr_sparse_mat%allocate(m, n, nz) + call a%set_host() + call a%sync_dev_space() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_csr_allocate_mnnz +end submodule psb_c_oacc_csr_allocate_mnnz_impl diff --git a/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 new file mode 100644 index 00000000..ab5f514b --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_cp_from_coo(a, b, info) + implicit none + + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_c_csr_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_c_oacc_csr_cp_from_coo +end submodule psb_c_oacc_csr_cp_from_coo_impl diff --git a/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 new file mode 100644 index 00000000..c6287cbb --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_cp_from_fmt(a, b, info) + implicit none + + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_c_csr_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_c_oacc_csr_cp_from_fmt +end submodule psb_c_oacc_csr_cp_from_fmt_impl diff --git a/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 new file mode 100644 index 00000000..833140f4 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 @@ -0,0 +1,83 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'c_oacc_csr_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_c_vect_oacc) + select type(yy => y) + type is (psb_c_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val) + yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i) + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'csrg_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_c_oacc_csr_inner_vect_sv +end submodule psb_c_oacc_csr_inner_vect_sv_impl + diff --git a/openacc/impl/psb_c_oacc_csr_mold.F90 b/openacc/impl/psb_c_oacc_csr_mold.F90 new file mode 100644 index 00000000..cf32ea94 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_mold.F90 @@ -0,0 +1,35 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_mold_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_mold(a, b, info) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='csr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_c_oacc_csr_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_c_oacc_csr_mold +end submodule psb_c_oacc_csr_mold_impl + diff --git a/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 new file mode 100644 index 00000000..73f02b84 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_mv_from_coo(a, b, info) + implicit none + + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_c_csr_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_c_oacc_csr_mv_from_coo +end submodule psb_c_oacc_csr_mv_from_coo_impl diff --git a/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 new file mode 100644 index 00000000..853a3269 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_mv_from_fmt(a, b, info) + implicit none + + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_c_csr_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_c_oacc_csr_mv_from_fmt +end submodule psb_c_oacc_csr_mv_from_fmt_impl diff --git a/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 new file mode 100644 index 00000000..e7dc970a --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_oacc_csr_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_csr_sparse_mat%reallocate(nz) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_csr_reallocate_nz +end submodule psb_c_oacc_csr_reallocate_nz_impl diff --git a/openacc/impl/psb_c_oacc_csr_scal.F90 b/openacc/impl/psb_c_oacc_csr_scal.F90 new file mode 100644 index 00000000..5dece48b --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_scal.F90 @@ -0,0 +1,53 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_scal_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_scal(d, a, info, side) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop present(a, d) + do i = 1, a%get_nrows() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop present(a, d) + do i = 1, a%get_ncols() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + !$acc parallel loop present(a, d) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d(i) + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_csr_scal +end submodule psb_c_oacc_csr_scal_impl diff --git a/openacc/impl/psb_c_oacc_csr_scals.F90 b/openacc/impl/psb_c_oacc_csr_scals.F90 new file mode 100644 index 00000000..aba22d43 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_scals.F90 @@ -0,0 +1,34 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_scals_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_scals(d, a, info) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + !$acc parallel loop present(a) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_csr_scals +end submodule psb_c_oacc_csr_scals_impl diff --git a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 b/openacc/impl/psb_c_oacc_csr_vect_mv.F90 new file mode 100644 index 00000000..c1030094 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_vect_mv.F90 @@ -0,0 +1,86 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n + character :: trans_ + logical :: device_done, tra + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + + if ((n > size(x%v)) .or. (m > size(y%v))) then + write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (.not.tra) then + select type(xx => x) + class is (psb_c_vect_oacc) + select type (yy => y) + class is (psb_c_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_c_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if + contains + + subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), irp(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + complex(psb_spk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = irp(i), irp(i + 1) - 1 + tmp = tmp + val(j) * x(ja(j)) + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_c_oacc_csr_vect_mv +end submodule psb_c_oacc_csr_vect_mv_impl diff --git a/openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90 new file mode 100644 index 00000000..5a363434 --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90 @@ -0,0 +1,35 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_c_ell_sparse_mat%allocate(m, n, nz_) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_ell_allocate_mnnz +end submodule psb_c_oacc_ell_allocate_mnnz_impl diff --git a/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 new file mode 100644 index 00000000..47118e48 --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_cp_from_coo(a, b, info) + implicit none + + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%free_dev_space() + call a%psb_c_ell_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_c_oacc_ell_cp_from_coo +end submodule psb_c_oacc_ell_cp_from_coo_impl diff --git a/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 new file mode 100644 index 00000000..559cb65c --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_cp_from_fmt(a, b, info) + implicit none + + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_c_ell_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_c_oacc_ell_cp_from_fmt +end submodule psb_c_oacc_ell_cp_from_fmt_impl diff --git a/openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90 new file mode 100644 index 00000000..cfabcb78 --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90 @@ -0,0 +1,85 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'c_oacc_ell_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nzt + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_c_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_c_vect_oacc) + select type(yy => y) + type is (psb_c_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nzt = a%nzt + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val, 1) + do j = 1, nzt + yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'ell_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_c_oacc_ell_inner_vect_sv +end submodule psb_c_oacc_ell_inner_vect_sv_impl diff --git a/openacc/impl/psb_c_oacc_ell_mold.F90 b/openacc/impl/psb_c_oacc_ell_mold.F90 new file mode 100644 index 00000000..88331d1d --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mold_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_mold(a, b, info) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'ell_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_c_oacc_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_c_oacc_ell_mold +end submodule psb_c_oacc_ell_mold_impl diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 new file mode 100644 index 00000000..183be9a5 --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_mv_from_coo(a, b, info) + implicit none + + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_c_ell_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_c_oacc_ell_mv_from_coo +end submodule psb_c_oacc_ell_mv_from_coo_impl diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 new file mode 100644 index 00000000..d1bd6330 --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_mv_from_fmt(a, b, info) + implicit none + + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_c_ell_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_c_oacc_ell_mv_from_fmt +end submodule psb_c_oacc_ell_mv_from_fmt_impl diff --git a/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 new file mode 100644 index 00000000..24d153f6 --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_oacc_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_ell_sparse_mat%reallocate(nz) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_ell_reallocate_nz +end submodule psb_c_oacc_ell_reallocate_nz_impl diff --git a/openacc/impl/psb_c_oacc_ell_scal.F90 b/openacc/impl/psb_c_oacc_ell_scal.F90 new file mode 100644 index 00000000..b3ea90fb --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_scal.F90 @@ -0,0 +1,58 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_scal_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_scal(d, a, info, side) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, m, nzt + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(a%ja(i, j)) + end do + end do + end if + else + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(j) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_ell_scal +end submodule psb_c_oacc_ell_scal_impl diff --git a/openacc/impl/psb_c_oacc_ell_scals.F90 b/openacc/impl/psb_c_oacc_ell_scals.F90 new file mode 100644 index 00000000..f067f253 --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_scals.F90 @@ -0,0 +1,39 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_scals_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_scals(d, a, info) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, nzt, m + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_ell_scals +end submodule psb_c_oacc_ell_scals_impl diff --git a/openacc/impl/psb_c_oacc_ell_vect_mv.F90 b/openacc/impl/psb_c_oacc_ell_vect_mv.F90 new file mode 100644 index 00000000..7a39c031 --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_vect_mv.F90 @@ -0,0 +1,90 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nzt, nc + character :: trans_ + logical :: device_done, tra + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nzt = a%nzt + nc = size(a%ja,2) + if ((n > size(x%v)) .or. (m > size(y%v))) then + write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (.not.tra) then + select type(xx => x) + class is (psb_c_vect_oacc) + select type (yy => y) + class is (psb_c_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_c_ell_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if + + contains + + subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n, nc + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_) :: val(:,:), x(:), y(:) + integer(psb_ipk_) :: ja(:,:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + complex(psb_spk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = 1, nc + if (ja(i,j) > 0) then + tmp = tmp + val(i,j) * x(ja(i,j)) + end if + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_c_oacc_ell_vect_mv +end submodule psb_c_oacc_ell_vect_mv_impl diff --git a/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 new file mode 100644 index 00000000..c90fc652 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 @@ -0,0 +1,36 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_c_hll_sparse_mat%allocate(m, n, nz_) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_hll_allocate_mnnz +end submodule psb_c_oacc_hll_allocate_mnnz_impl diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 new file mode 100644 index 00000000..0eaebf9d --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_cp_from_coo(a, b, info) + implicit none + + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_c_hll_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_c_oacc_hll_cp_from_coo +end submodule psb_c_oacc_hll_cp_from_coo_impl diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 new file mode 100644 index 00000000..29b18470 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_cp_from_fmt(a, b, info) + implicit none + + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_c_hll_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_c_oacc_hll_cp_from_fmt +end submodule psb_c_oacc_hll_cp_from_fmt_impl diff --git a/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 new file mode 100644 index 00000000..4b45f518 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 @@ -0,0 +1,86 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'c_oacc_hll_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nhacks, hksz + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_c_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_c_vect_oacc) + select type(yy => y) + type is (psb_c_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + !$acc parallel loop present(a, xx, yy) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i+1) - 1 + yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j)) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'hll_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_c_oacc_hll_inner_vect_sv +end submodule psb_c_oacc_hll_inner_vect_sv_impl diff --git a/openacc/impl/psb_c_oacc_hll_mold.F90 b/openacc/impl/psb_c_oacc_hll_mold.F90 new file mode 100644 index 00000000..f480f3ab --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mold_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_mold(a, b, info) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'hll_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_c_oacc_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_c_oacc_hll_mold +end submodule psb_c_oacc_hll_mold_impl diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 new file mode 100644 index 00000000..193f9753 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_mv_from_coo(a, b, info) + implicit none + + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_c_hll_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_c_oacc_hll_mv_from_coo +end submodule psb_c_oacc_hll_mv_from_coo_impl diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 new file mode 100644 index 00000000..1c928067 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 @@ -0,0 +1,25 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_mv_from_fmt(a, b, info) + implicit none + + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_c_hll_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + end subroutine psb_c_oacc_hll_mv_from_fmt +end submodule psb_c_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 new file mode 100644 index 00000000..9290c381 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 @@ -0,0 +1,29 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_oacc_hll_reallocate_nz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_hll_sparse_mat%reallocate(nz) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_hll_reallocate_nz +end submodule psb_c_oacc_hll_reallocate_nz_impl diff --git a/openacc/impl/psb_c_oacc_hll_scal.F90 b/openacc/impl/psb_c_oacc_hll_scal.F90 new file mode 100644 index 00000000..527a0ec1 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_scal.F90 @@ -0,0 +1,62 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_scal_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_scal(d, a, info, side) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1 + a%val(j) = a%val(j) * d(k) + end do + end do + else if (side == 'R') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_hll_scal +end submodule psb_c_oacc_hll_scal_impl diff --git a/openacc/impl/psb_c_oacc_hll_scals.F90 b/openacc/impl/psb_c_oacc_hll_scals.F90 new file mode 100644 index 00000000..00f24721 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_scals.F90 @@ -0,0 +1,40 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_scals_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_scals(d, a, info) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + ! $ a parallel loop collapse(2) present(a) + !$acc parallel loop present(a) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_hll_scals +end submodule psb_c_oacc_hll_scals_impl diff --git a/openacc/impl/psb_c_oacc_hll_vect_mv.F90 b/openacc/impl/psb_c_oacc_hll_vect_mv.F90 new file mode 100644 index 00000000..494ed149 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_vect_mv.F90 @@ -0,0 +1,90 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nhacks, hksz + character :: trans_ + logical :: device_done, tra + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + + if ((n > size(x%v)) .or. (m > size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (.not.tra) then + select type(xx => x) + class is (psb_c_vect_oacc) + select type (yy => y) + class is (psb_c_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_c_hll_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if + contains + + subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, nhacks, hksz + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), hkoffs(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii + complex(psb_spk_) :: tmp + + info = 0 + !$acc parallel loop private(nlc, isz,ir,nr) + do i = 1, nhacks + isz = hkoffs(i + 1) - hkoffs(i) + nlc = isz/hksz + ir = (i-1)*hksz + nr = min(hksz,m-ir) + !$acc loop independent private(tmp,ii,ipnt) + do ii = 1, nr + ipnt = hkoffs(i) + ii + tmp = czero + !$acc loop seq + do j = 1, nlc + tmp = tmp + val(ipnt) * x(ja(ipnt)) + ipnt = ipnt + hksz + end do + y(ii+ir) = alpha * tmp + beta * y(ii+ir) + end do + end do + end subroutine inner_spmv + end subroutine psb_c_oacc_hll_vect_mv +end submodule psb_c_oacc_hll_vect_mv_impl diff --git a/openacc/impl/psb_c_oacc_mlt_v.f90 b/openacc/impl/psb_c_oacc_mlt_v.f90 new file mode 100644 index 00000000..e5f215be --- /dev/null +++ b/openacc/impl/psb_c_oacc_mlt_v.f90 @@ -0,0 +1,46 @@ + +subroutine psb_c_oacc_mlt_v(x, y, info) + use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v + + implicit none + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, n + + info = 0 + n = min(x%get_nrows(), y%get_nrows()) + info = 0 + n = min(x%get_nrows(), y%get_nrows()) + select type(xx => x) + class is (psb_c_vect_oacc) + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + call c_inner_oacc_mlt_v(n,xx%v, y%v) +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + end select +contains + subroutine c_inner_oacc_mlt_v(n,x, y) + implicit none + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_), intent(inout) :: x(:), y(:) + + integer(psb_ipk_) :: i + !$acc parallel loop present(x,y) + do i = 1, n + y(i) = (x(i)) * (y(i)) + end do + end subroutine c_inner_oacc_mlt_v +end subroutine psb_c_oacc_mlt_v diff --git a/openacc/impl/psb_c_oacc_mlt_v_2.f90 b/openacc/impl/psb_c_oacc_mlt_v_2.f90 new file mode 100644 index 00000000..ed0fc88e --- /dev/null +++ b/openacc/impl/psb_c_oacc_mlt_v_2.f90 @@ -0,0 +1,91 @@ +subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v_2 + use psb_string_mod + implicit none + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_, device_done + + conjgx_ = .false. + conjgy_ = .false. + device_done = .false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') + + n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) + info = 0 + select type(xx => x) + class is (psb_c_vect_oacc) + select type (yy => y) + class is (psb_c_vect_oacc) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= czero) .and. (z%is_host())) call z%sync() + call c_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) + call z%set_dev() + device_done = .true. + end select + end select + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= czero) .and. (z%is_dev())) call z%sync() + if (conjgx_.and.conjgy_) then + do i = 1, n + z%v(i) = alpha * conjg(x%v(i)) * conjg(y%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + do i = 1, n + z%v(i) = alpha * conjg(x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * conjg(y%v(i)) + beta * z%v(i) + end do + else + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + end if + call z%set_host() + end if + +contains + subroutine c_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy) + implicit none + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_), intent(inout) :: x(:), y(:), z(:) + integer(psb_ipk_), intent(out) :: info + logical, intent(in) :: conjgx, conjgy + + integer(psb_ipk_) :: i + if (conjgx.and.conjgy) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * conjg(x(i)) * conjg(y(i)) + beta * z(i) + end do + else if (conjgx.and.(.not.conjgy)) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * conjg(x(i)) * (y(i)) + beta * z(i) + end do + else if ((.not.conjgx).and.(conjgy)) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * conjg(y(i)) + beta * z(i) + end do + else + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + end if + end subroutine c_inner_oacc_mlt_v_2 +end subroutine psb_c_oacc_mlt_v_2 + diff --git a/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 new file mode 100644 index 00000000..1f210a09 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 @@ -0,0 +1,29 @@ +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_csr_sparse_mat%allocate(m, n, nz) + call a%set_host() + call a%sync_dev_space() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_csr_allocate_mnnz +end submodule psb_d_oacc_csr_allocate_mnnz_impl diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 new file mode 100644 index 00000000..92770e0f --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_cp_from_coo(a, b, info) + implicit none + + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_d_csr_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_d_oacc_csr_cp_from_coo +end submodule psb_d_oacc_csr_cp_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 new file mode 100644 index 00000000..9da6d861 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_cp_from_fmt(a, b, info) + implicit none + + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_d_csr_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_d_oacc_csr_cp_from_fmt +end submodule psb_d_oacc_csr_cp_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 new file mode 100644 index 00000000..c9a875bc --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 @@ -0,0 +1,83 @@ +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_csr_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_oacc) + select type(yy => y) + type is (psb_d_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val) + yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i) + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'csrg_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_d_oacc_csr_inner_vect_sv +end submodule psb_d_oacc_csr_inner_vect_sv_impl + diff --git a/openacc/impl/psb_d_oacc_csr_mold.F90 b/openacc/impl/psb_d_oacc_csr_mold.F90 new file mode 100644 index 00000000..dc9ff711 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_mold.F90 @@ -0,0 +1,35 @@ +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mold_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_mold(a, b, info) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='csr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_d_oacc_csr_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_d_oacc_csr_mold +end submodule psb_d_oacc_csr_mold_impl + diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 new file mode 100644 index 00000000..0f020e06 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_mv_from_coo(a, b, info) + implicit none + + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_d_csr_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_d_oacc_csr_mv_from_coo +end submodule psb_d_oacc_csr_mv_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 new file mode 100644 index 00000000..cfd7d7dd --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_mv_from_fmt(a, b, info) + implicit none + + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_d_csr_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_d_oacc_csr_mv_from_fmt +end submodule psb_d_oacc_csr_mv_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 new file mode 100644 index 00000000..8f746105 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_oacc_csr_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_csr_sparse_mat%reallocate(nz) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_csr_reallocate_nz +end submodule psb_d_oacc_csr_reallocate_nz_impl diff --git a/openacc/impl/psb_d_oacc_csr_scal.F90 b/openacc/impl/psb_d_oacc_csr_scal.F90 new file mode 100644 index 00000000..cc693fa2 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_scal.F90 @@ -0,0 +1,53 @@ +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_scal_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_scal(d, a, info, side) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop present(a, d) + do i = 1, a%get_nrows() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop present(a, d) + do i = 1, a%get_ncols() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + !$acc parallel loop present(a, d) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d(i) + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_csr_scal +end submodule psb_d_oacc_csr_scal_impl diff --git a/openacc/impl/psb_d_oacc_csr_scals.F90 b/openacc/impl/psb_d_oacc_csr_scals.F90 new file mode 100644 index 00000000..157355d8 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_scals.F90 @@ -0,0 +1,34 @@ +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_scals_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_scals(d, a, info) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + !$acc parallel loop present(a) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_csr_scals +end submodule psb_d_oacc_csr_scals_impl diff --git a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 b/openacc/impl/psb_d_oacc_csr_vect_mv.F90 new file mode 100644 index 00000000..a2efdc3e --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_vect_mv.F90 @@ -0,0 +1,86 @@ +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n + character :: trans_ + logical :: device_done, tra + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + + if ((n > size(x%v)) .or. (m > size(y%v))) then + write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (.not.tra) then + select type(xx => x) + class is (psb_d_vect_oacc) + select type (yy => y) + class is (psb_d_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_d_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if + contains + + subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), irp(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + real(psb_dpk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = irp(i), irp(i + 1) - 1 + tmp = tmp + val(j) * x(ja(j)) + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_d_oacc_csr_vect_mv +end submodule psb_d_oacc_csr_vect_mv_impl diff --git a/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 new file mode 100644 index 00000000..ca0a0f84 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 @@ -0,0 +1,35 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_d_ell_sparse_mat%allocate(m, n, nz_) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_ell_allocate_mnnz +end submodule psb_d_oacc_ell_allocate_mnnz_impl diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 new file mode 100644 index 00000000..384701ce --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_cp_from_coo(a, b, info) + implicit none + + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%free_dev_space() + call a%psb_d_ell_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_d_oacc_ell_cp_from_coo +end submodule psb_d_oacc_ell_cp_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 new file mode 100644 index 00000000..6622a642 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_cp_from_fmt(a, b, info) + implicit none + + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_d_ell_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_d_oacc_ell_cp_from_fmt +end submodule psb_d_oacc_ell_cp_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 new file mode 100644 index 00000000..466594fb --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 @@ -0,0 +1,85 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_ell_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nzt + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_d_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_oacc) + select type(yy => y) + type is (psb_d_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nzt = a%nzt + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val, 1) + do j = 1, nzt + yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'ell_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_d_oacc_ell_inner_vect_sv +end submodule psb_d_oacc_ell_inner_vect_sv_impl diff --git a/openacc/impl/psb_d_oacc_ell_mold.F90 b/openacc/impl/psb_d_oacc_ell_mold.F90 new file mode 100644 index 00000000..767e7f13 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mold_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_mold(a, b, info) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'ell_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_d_oacc_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_d_oacc_ell_mold +end submodule psb_d_oacc_ell_mold_impl diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 new file mode 100644 index 00000000..7bddced9 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_mv_from_coo(a, b, info) + implicit none + + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_d_ell_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_d_oacc_ell_mv_from_coo +end submodule psb_d_oacc_ell_mv_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 new file mode 100644 index 00000000..53e45b98 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_mv_from_fmt(a, b, info) + implicit none + + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_d_ell_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_d_oacc_ell_mv_from_fmt +end submodule psb_d_oacc_ell_mv_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 new file mode 100644 index 00000000..130a931e --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_oacc_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_ell_sparse_mat%reallocate(nz) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_ell_reallocate_nz +end submodule psb_d_oacc_ell_reallocate_nz_impl diff --git a/openacc/impl/psb_d_oacc_ell_scal.F90 b/openacc/impl/psb_d_oacc_ell_scal.F90 new file mode 100644 index 00000000..39948d5f --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_scal.F90 @@ -0,0 +1,58 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_scal_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_scal(d, a, info, side) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, m, nzt + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(a%ja(i, j)) + end do + end do + end if + else + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(j) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_ell_scal +end submodule psb_d_oacc_ell_scal_impl diff --git a/openacc/impl/psb_d_oacc_ell_scals.F90 b/openacc/impl/psb_d_oacc_ell_scals.F90 new file mode 100644 index 00000000..a6292b72 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_scals.F90 @@ -0,0 +1,39 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_scals_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_scals(d, a, info) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, nzt, m + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_ell_scals +end submodule psb_d_oacc_ell_scals_impl diff --git a/openacc/impl/psb_d_oacc_ell_vect_mv.F90 b/openacc/impl/psb_d_oacc_ell_vect_mv.F90 new file mode 100644 index 00000000..b233669d --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_vect_mv.F90 @@ -0,0 +1,90 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nzt, nc + character :: trans_ + logical :: device_done, tra + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nzt = a%nzt + nc = size(a%ja,2) + if ((n > size(x%v)) .or. (m > size(y%v))) then + write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (.not.tra) then + select type(xx => x) + class is (psb_d_vect_oacc) + select type (yy => y) + class is (psb_d_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_d_ell_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if + + contains + + subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n, nc + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_) :: val(:,:), x(:), y(:) + integer(psb_ipk_) :: ja(:,:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + real(psb_dpk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = 1, nc + if (ja(i,j) > 0) then + tmp = tmp + val(i,j) * x(ja(i,j)) + end if + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_d_oacc_ell_vect_mv +end submodule psb_d_oacc_ell_vect_mv_impl diff --git a/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 new file mode 100644 index 00000000..a30a0b00 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 @@ -0,0 +1,36 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_d_hll_sparse_mat%allocate(m, n, nz_) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_hll_allocate_mnnz +end submodule psb_d_oacc_hll_allocate_mnnz_impl diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 new file mode 100644 index 00000000..34a31b0b --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_cp_from_coo(a, b, info) + implicit none + + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_d_hll_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_d_oacc_hll_cp_from_coo +end submodule psb_d_oacc_hll_cp_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 new file mode 100644 index 00000000..7d14f65c --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_cp_from_fmt(a, b, info) + implicit none + + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_d_hll_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_d_oacc_hll_cp_from_fmt +end submodule psb_d_oacc_hll_cp_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 new file mode 100644 index 00000000..ff6a4580 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 @@ -0,0 +1,86 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_hll_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nhacks, hksz + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_d_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_oacc) + select type(yy => y) + type is (psb_d_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + !$acc parallel loop present(a, xx, yy) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i+1) - 1 + yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j)) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'hll_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_d_oacc_hll_inner_vect_sv +end submodule psb_d_oacc_hll_inner_vect_sv_impl diff --git a/openacc/impl/psb_d_oacc_hll_mold.F90 b/openacc/impl/psb_d_oacc_hll_mold.F90 new file mode 100644 index 00000000..89ead65b --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mold_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_mold(a, b, info) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'hll_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_d_oacc_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_d_oacc_hll_mold +end submodule psb_d_oacc_hll_mold_impl diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 new file mode 100644 index 00000000..0f0ce6f1 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_mv_from_coo(a, b, info) + implicit none + + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_d_hll_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_d_oacc_hll_mv_from_coo +end submodule psb_d_oacc_hll_mv_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 new file mode 100644 index 00000000..1fcfa4f6 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 @@ -0,0 +1,25 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_mv_from_fmt(a, b, info) + implicit none + + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_d_hll_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + end subroutine psb_d_oacc_hll_mv_from_fmt +end submodule psb_d_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 new file mode 100644 index 00000000..21f5c48a --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 @@ -0,0 +1,29 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_oacc_hll_reallocate_nz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_hll_sparse_mat%reallocate(nz) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_hll_reallocate_nz +end submodule psb_d_oacc_hll_reallocate_nz_impl diff --git a/openacc/impl/psb_d_oacc_hll_scal.F90 b/openacc/impl/psb_d_oacc_hll_scal.F90 new file mode 100644 index 00000000..c0c284ef --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_scal.F90 @@ -0,0 +1,62 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_scal_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_scal(d, a, info, side) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1 + a%val(j) = a%val(j) * d(k) + end do + end do + else if (side == 'R') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_hll_scal +end submodule psb_d_oacc_hll_scal_impl diff --git a/openacc/impl/psb_d_oacc_hll_scals.F90 b/openacc/impl/psb_d_oacc_hll_scals.F90 new file mode 100644 index 00000000..1e3457b5 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_scals.F90 @@ -0,0 +1,40 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_scals_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_scals(d, a, info) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + ! $ a parallel loop collapse(2) present(a) + !$acc parallel loop present(a) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_hll_scals +end submodule psb_d_oacc_hll_scals_impl diff --git a/openacc/impl/psb_d_oacc_hll_vect_mv.F90 b/openacc/impl/psb_d_oacc_hll_vect_mv.F90 new file mode 100644 index 00000000..150ade8e --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_vect_mv.F90 @@ -0,0 +1,90 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nhacks, hksz + character :: trans_ + logical :: device_done, tra + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + + if ((n > size(x%v)) .or. (m > size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (.not.tra) then + select type(xx => x) + class is (psb_d_vect_oacc) + select type (yy => y) + class is (psb_d_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_d_hll_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if + contains + + subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, nhacks, hksz + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), hkoffs(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii + real(psb_dpk_) :: tmp + + info = 0 + !$acc parallel loop private(nlc, isz,ir,nr) + do i = 1, nhacks + isz = hkoffs(i + 1) - hkoffs(i) + nlc = isz/hksz + ir = (i-1)*hksz + nr = min(hksz,m-ir) + !$acc loop independent private(tmp,ii,ipnt) + do ii = 1, nr + ipnt = hkoffs(i) + ii + tmp = dzero + !$acc loop seq + do j = 1, nlc + tmp = tmp + val(ipnt) * x(ja(ipnt)) + ipnt = ipnt + hksz + end do + y(ii+ir) = alpha * tmp + beta * y(ii+ir) + end do + end do + end subroutine inner_spmv + end subroutine psb_d_oacc_hll_vect_mv +end submodule psb_d_oacc_hll_vect_mv_impl diff --git a/openacc/impl/psb_d_oacc_mlt_v.f90 b/openacc/impl/psb_d_oacc_mlt_v.f90 new file mode 100644 index 00000000..8b3a05b1 --- /dev/null +++ b/openacc/impl/psb_d_oacc_mlt_v.f90 @@ -0,0 +1,46 @@ + +subroutine psb_d_oacc_mlt_v(x, y, info) + use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v + + implicit none + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, n + + info = 0 + n = min(x%get_nrows(), y%get_nrows()) + info = 0 + n = min(x%get_nrows(), y%get_nrows()) + select type(xx => x) + class is (psb_d_vect_oacc) + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + call d_inner_oacc_mlt_v(n,xx%v, y%v) +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + end select +contains + subroutine d_inner_oacc_mlt_v(n,x, y) + implicit none + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_), intent(inout) :: x(:), y(:) + + integer(psb_ipk_) :: i + !$acc parallel loop present(x,y) + do i = 1, n + y(i) = (x(i)) * (y(i)) + end do + end subroutine d_inner_oacc_mlt_v +end subroutine psb_d_oacc_mlt_v diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90 new file mode 100644 index 00000000..7850329a --- /dev/null +++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90 @@ -0,0 +1,91 @@ +subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v_2 + use psb_string_mod + implicit none + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_, device_done + + conjgx_ = .false. + conjgy_ = .false. + device_done = .false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') + + n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) + info = 0 + select type(xx => x) + class is (psb_d_vect_oacc) + select type (yy => y) + class is (psb_d_vect_oacc) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= dzero) .and. (z%is_host())) call z%sync() + call d_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) + call z%set_dev() + device_done = .true. + end select + end select + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= dzero) .and. (z%is_dev())) call z%sync() + if (conjgx_.and.conjgy_) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + end if + call z%set_host() + end if + +contains + subroutine d_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy) + implicit none + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(inout) :: x(:), y(:), z(:) + integer(psb_ipk_), intent(out) :: info + logical, intent(in) :: conjgx, conjgy + + integer(psb_ipk_) :: i + if (conjgx.and.conjgy) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + else if (conjgx.and.(.not.conjgy)) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + else if ((.not.conjgx).and.(conjgy)) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + else + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + end if + end subroutine d_inner_oacc_mlt_v_2 +end subroutine psb_d_oacc_mlt_v_2 + diff --git a/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 new file mode 100644 index 00000000..0d531129 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 @@ -0,0 +1,29 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_csr_sparse_mat%allocate(m, n, nz) + call a%set_host() + call a%sync_dev_space() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_csr_allocate_mnnz +end submodule psb_s_oacc_csr_allocate_mnnz_impl diff --git a/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 new file mode 100644 index 00000000..f740cf18 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_cp_from_coo(a, b, info) + implicit none + + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_s_csr_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_s_oacc_csr_cp_from_coo +end submodule psb_s_oacc_csr_cp_from_coo_impl diff --git a/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 new file mode 100644 index 00000000..fc495668 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_cp_from_fmt(a, b, info) + implicit none + + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_s_csr_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_s_oacc_csr_cp_from_fmt +end submodule psb_s_oacc_csr_cp_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 new file mode 100644 index 00000000..b1785b49 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 @@ -0,0 +1,83 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_csr_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_s_vect_oacc) + select type(yy => y) + type is (psb_s_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val) + yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i) + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'csrg_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_s_oacc_csr_inner_vect_sv +end submodule psb_s_oacc_csr_inner_vect_sv_impl + diff --git a/openacc/impl/psb_s_oacc_csr_mold.F90 b/openacc/impl/psb_s_oacc_csr_mold.F90 new file mode 100644 index 00000000..95bddde8 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_mold.F90 @@ -0,0 +1,35 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_mold_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_mold(a, b, info) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='csr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_s_oacc_csr_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_s_oacc_csr_mold +end submodule psb_s_oacc_csr_mold_impl + diff --git a/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 new file mode 100644 index 00000000..0c92e476 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_mv_from_coo(a, b, info) + implicit none + + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_s_csr_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_s_oacc_csr_mv_from_coo +end submodule psb_s_oacc_csr_mv_from_coo_impl diff --git a/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 new file mode 100644 index 00000000..1c61eb3b --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_mv_from_fmt(a, b, info) + implicit none + + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_s_csr_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_s_oacc_csr_mv_from_fmt +end submodule psb_s_oacc_csr_mv_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 new file mode 100644 index 00000000..e49bf2c8 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_oacc_csr_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_csr_sparse_mat%reallocate(nz) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_csr_reallocate_nz +end submodule psb_s_oacc_csr_reallocate_nz_impl diff --git a/openacc/impl/psb_s_oacc_csr_scal.F90 b/openacc/impl/psb_s_oacc_csr_scal.F90 new file mode 100644 index 00000000..b9c8a986 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_scal.F90 @@ -0,0 +1,53 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_scal_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_scal(d, a, info, side) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop present(a, d) + do i = 1, a%get_nrows() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop present(a, d) + do i = 1, a%get_ncols() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + !$acc parallel loop present(a, d) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d(i) + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_csr_scal +end submodule psb_s_oacc_csr_scal_impl diff --git a/openacc/impl/psb_s_oacc_csr_scals.F90 b/openacc/impl/psb_s_oacc_csr_scals.F90 new file mode 100644 index 00000000..76ad7cf2 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_scals.F90 @@ -0,0 +1,34 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_scals_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_scals(d, a, info) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + !$acc parallel loop present(a) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_csr_scals +end submodule psb_s_oacc_csr_scals_impl diff --git a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 b/openacc/impl/psb_s_oacc_csr_vect_mv.F90 new file mode 100644 index 00000000..5d3cc30c --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_vect_mv.F90 @@ -0,0 +1,86 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n + character :: trans_ + logical :: device_done, tra + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + + if ((n > size(x%v)) .or. (m > size(y%v))) then + write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (.not.tra) then + select type(xx => x) + class is (psb_s_vect_oacc) + select type (yy => y) + class is (psb_s_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_s_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if + contains + + subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), irp(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + real(psb_spk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = irp(i), irp(i + 1) - 1 + tmp = tmp + val(j) * x(ja(j)) + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_s_oacc_csr_vect_mv +end submodule psb_s_oacc_csr_vect_mv_impl diff --git a/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 new file mode 100644 index 00000000..b9c25654 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 @@ -0,0 +1,35 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_s_ell_sparse_mat%allocate(m, n, nz_) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_ell_allocate_mnnz +end submodule psb_s_oacc_ell_allocate_mnnz_impl diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 new file mode 100644 index 00000000..5a607370 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_cp_from_coo(a, b, info) + implicit none + + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%free_dev_space() + call a%psb_s_ell_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_s_oacc_ell_cp_from_coo +end submodule psb_s_oacc_ell_cp_from_coo_impl diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 new file mode 100644 index 00000000..47ce5241 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_cp_from_fmt(a, b, info) + implicit none + + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_s_ell_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_s_oacc_ell_cp_from_fmt +end submodule psb_s_oacc_ell_cp_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 new file mode 100644 index 00000000..5fc672d0 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 @@ -0,0 +1,85 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_ell_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nzt + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_s_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_s_vect_oacc) + select type(yy => y) + type is (psb_s_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nzt = a%nzt + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val, 1) + do j = 1, nzt + yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'ell_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_s_oacc_ell_inner_vect_sv +end submodule psb_s_oacc_ell_inner_vect_sv_impl diff --git a/openacc/impl/psb_s_oacc_ell_mold.F90 b/openacc/impl/psb_s_oacc_ell_mold.F90 new file mode 100644 index 00000000..92f18f25 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_mold_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_mold(a, b, info) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'ell_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_s_oacc_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_s_oacc_ell_mold +end submodule psb_s_oacc_ell_mold_impl diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 new file mode 100644 index 00000000..736d4253 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_mv_from_coo(a, b, info) + implicit none + + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_s_ell_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_s_oacc_ell_mv_from_coo +end submodule psb_s_oacc_ell_mv_from_coo_impl diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 new file mode 100644 index 00000000..d8d13aa2 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_mv_from_fmt(a, b, info) + implicit none + + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_s_ell_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_s_oacc_ell_mv_from_fmt +end submodule psb_s_oacc_ell_mv_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 new file mode 100644 index 00000000..34036cdb --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_oacc_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_ell_sparse_mat%reallocate(nz) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_ell_reallocate_nz +end submodule psb_s_oacc_ell_reallocate_nz_impl diff --git a/openacc/impl/psb_s_oacc_ell_scal.F90 b/openacc/impl/psb_s_oacc_ell_scal.F90 new file mode 100644 index 00000000..180d8f9a --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_scal.F90 @@ -0,0 +1,58 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_scal_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_scal(d, a, info, side) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, m, nzt + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(a%ja(i, j)) + end do + end do + end if + else + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(j) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_ell_scal +end submodule psb_s_oacc_ell_scal_impl diff --git a/openacc/impl/psb_s_oacc_ell_scals.F90 b/openacc/impl/psb_s_oacc_ell_scals.F90 new file mode 100644 index 00000000..c1c305af --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_scals.F90 @@ -0,0 +1,39 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_scals_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_scals(d, a, info) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, nzt, m + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_ell_scals +end submodule psb_s_oacc_ell_scals_impl diff --git a/openacc/impl/psb_s_oacc_ell_vect_mv.F90 b/openacc/impl/psb_s_oacc_ell_vect_mv.F90 new file mode 100644 index 00000000..76b1fe5b --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_vect_mv.F90 @@ -0,0 +1,90 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nzt, nc + character :: trans_ + logical :: device_done, tra + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nzt = a%nzt + nc = size(a%ja,2) + if ((n > size(x%v)) .or. (m > size(y%v))) then + write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (.not.tra) then + select type(xx => x) + class is (psb_s_vect_oacc) + select type (yy => y) + class is (psb_s_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_s_ell_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if + + contains + + subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n, nc + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_) :: val(:,:), x(:), y(:) + integer(psb_ipk_) :: ja(:,:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + real(psb_spk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = 1, nc + if (ja(i,j) > 0) then + tmp = tmp + val(i,j) * x(ja(i,j)) + end if + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_s_oacc_ell_vect_mv +end submodule psb_s_oacc_ell_vect_mv_impl diff --git a/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 new file mode 100644 index 00000000..1db3e55d --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 @@ -0,0 +1,36 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_s_hll_sparse_mat%allocate(m, n, nz_) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_hll_allocate_mnnz +end submodule psb_s_oacc_hll_allocate_mnnz_impl diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 new file mode 100644 index 00000000..ca4afeb7 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_cp_from_coo(a, b, info) + implicit none + + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_s_hll_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_s_oacc_hll_cp_from_coo +end submodule psb_s_oacc_hll_cp_from_coo_impl diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 new file mode 100644 index 00000000..47a41b8d --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_cp_from_fmt(a, b, info) + implicit none + + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_s_hll_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_s_oacc_hll_cp_from_fmt +end submodule psb_s_oacc_hll_cp_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 new file mode 100644 index 00000000..ea81574b --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 @@ -0,0 +1,86 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_hll_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nhacks, hksz + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_s_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_s_vect_oacc) + select type(yy => y) + type is (psb_s_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + !$acc parallel loop present(a, xx, yy) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i+1) - 1 + yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j)) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'hll_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_s_oacc_hll_inner_vect_sv +end submodule psb_s_oacc_hll_inner_vect_sv_impl diff --git a/openacc/impl/psb_s_oacc_hll_mold.F90 b/openacc/impl/psb_s_oacc_hll_mold.F90 new file mode 100644 index 00000000..1e43b65b --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_mold_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_mold(a, b, info) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'hll_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_s_oacc_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_s_oacc_hll_mold +end submodule psb_s_oacc_hll_mold_impl diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 new file mode 100644 index 00000000..c26c5018 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_mv_from_coo(a, b, info) + implicit none + + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_s_hll_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_s_oacc_hll_mv_from_coo +end submodule psb_s_oacc_hll_mv_from_coo_impl diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 new file mode 100644 index 00000000..dd04bbab --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 @@ -0,0 +1,25 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_mv_from_fmt(a, b, info) + implicit none + + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_s_hll_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + end subroutine psb_s_oacc_hll_mv_from_fmt +end submodule psb_s_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 new file mode 100644 index 00000000..242ffe0d --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 @@ -0,0 +1,29 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_oacc_hll_reallocate_nz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_hll_sparse_mat%reallocate(nz) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_hll_reallocate_nz +end submodule psb_s_oacc_hll_reallocate_nz_impl diff --git a/openacc/impl/psb_s_oacc_hll_scal.F90 b/openacc/impl/psb_s_oacc_hll_scal.F90 new file mode 100644 index 00000000..ae36465e --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_scal.F90 @@ -0,0 +1,62 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_scal_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_scal(d, a, info, side) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1 + a%val(j) = a%val(j) * d(k) + end do + end do + else if (side == 'R') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_hll_scal +end submodule psb_s_oacc_hll_scal_impl diff --git a/openacc/impl/psb_s_oacc_hll_scals.F90 b/openacc/impl/psb_s_oacc_hll_scals.F90 new file mode 100644 index 00000000..360ea84d --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_scals.F90 @@ -0,0 +1,40 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_scals_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_scals(d, a, info) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + ! $ a parallel loop collapse(2) present(a) + !$acc parallel loop present(a) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_hll_scals +end submodule psb_s_oacc_hll_scals_impl diff --git a/openacc/impl/psb_s_oacc_hll_vect_mv.F90 b/openacc/impl/psb_s_oacc_hll_vect_mv.F90 new file mode 100644 index 00000000..e1d42252 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_vect_mv.F90 @@ -0,0 +1,90 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nhacks, hksz + character :: trans_ + logical :: device_done, tra + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + + if ((n > size(x%v)) .or. (m > size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (.not.tra) then + select type(xx => x) + class is (psb_s_vect_oacc) + select type (yy => y) + class is (psb_s_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_s_hll_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if + contains + + subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, nhacks, hksz + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), hkoffs(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii + real(psb_spk_) :: tmp + + info = 0 + !$acc parallel loop private(nlc, isz,ir,nr) + do i = 1, nhacks + isz = hkoffs(i + 1) - hkoffs(i) + nlc = isz/hksz + ir = (i-1)*hksz + nr = min(hksz,m-ir) + !$acc loop independent private(tmp,ii,ipnt) + do ii = 1, nr + ipnt = hkoffs(i) + ii + tmp = szero + !$acc loop seq + do j = 1, nlc + tmp = tmp + val(ipnt) * x(ja(ipnt)) + ipnt = ipnt + hksz + end do + y(ii+ir) = alpha * tmp + beta * y(ii+ir) + end do + end do + end subroutine inner_spmv + end subroutine psb_s_oacc_hll_vect_mv +end submodule psb_s_oacc_hll_vect_mv_impl diff --git a/openacc/impl/psb_s_oacc_mlt_v.f90 b/openacc/impl/psb_s_oacc_mlt_v.f90 new file mode 100644 index 00000000..11dcac6c --- /dev/null +++ b/openacc/impl/psb_s_oacc_mlt_v.f90 @@ -0,0 +1,46 @@ + +subroutine psb_s_oacc_mlt_v(x, y, info) + use psb_s_oacc_vect_mod, psb_protect_name => psb_s_oacc_mlt_v + + implicit none + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, n + + info = 0 + n = min(x%get_nrows(), y%get_nrows()) + info = 0 + n = min(x%get_nrows(), y%get_nrows()) + select type(xx => x) + class is (psb_s_vect_oacc) + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + call s_inner_oacc_mlt_v(n,xx%v, y%v) +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + end select +contains + subroutine s_inner_oacc_mlt_v(n,x, y) + implicit none + integer(psb_ipk_), intent(in) :: n + real(psb_spk_), intent(inout) :: x(:), y(:) + + integer(psb_ipk_) :: i + !$acc parallel loop present(x,y) + do i = 1, n + y(i) = (x(i)) * (y(i)) + end do + end subroutine s_inner_oacc_mlt_v +end subroutine psb_s_oacc_mlt_v diff --git a/openacc/impl/psb_s_oacc_mlt_v_2.f90 b/openacc/impl/psb_s_oacc_mlt_v_2.f90 new file mode 100644 index 00000000..b97f2f08 --- /dev/null +++ b/openacc/impl/psb_s_oacc_mlt_v_2.f90 @@ -0,0 +1,91 @@ +subroutine psb_s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + use psb_s_oacc_vect_mod, psb_protect_name => psb_s_oacc_mlt_v_2 + use psb_string_mod + implicit none + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_, device_done + + conjgx_ = .false. + conjgy_ = .false. + device_done = .false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') + + n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) + info = 0 + select type(xx => x) + class is (psb_s_vect_oacc) + select type (yy => y) + class is (psb_s_vect_oacc) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= szero) .and. (z%is_host())) call z%sync() + call s_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) + call z%set_dev() + device_done = .true. + end select + end select + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= szero) .and. (z%is_dev())) call z%sync() + if (conjgx_.and.conjgy_) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + end if + call z%set_host() + end if + +contains + subroutine s_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy) + implicit none + integer(psb_ipk_), intent(in) :: n + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_), intent(inout) :: x(:), y(:), z(:) + integer(psb_ipk_), intent(out) :: info + logical, intent(in) :: conjgx, conjgy + + integer(psb_ipk_) :: i + if (conjgx.and.conjgy) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + else if (conjgx.and.(.not.conjgy)) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + else if ((.not.conjgx).and.(conjgy)) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + else + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + end if + end subroutine s_inner_oacc_mlt_v_2 +end subroutine psb_s_oacc_mlt_v_2 + diff --git a/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 new file mode 100644 index 00000000..b7ba4bad --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 @@ -0,0 +1,29 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_csr_sparse_mat%allocate(m, n, nz) + call a%set_host() + call a%sync_dev_space() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_csr_allocate_mnnz +end submodule psb_z_oacc_csr_allocate_mnnz_impl diff --git a/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 new file mode 100644 index 00000000..ef1b5b73 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_cp_from_coo(a, b, info) + implicit none + + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_z_csr_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_z_oacc_csr_cp_from_coo +end submodule psb_z_oacc_csr_cp_from_coo_impl diff --git a/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 new file mode 100644 index 00000000..7c3470a2 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_cp_from_fmt(a, b, info) + implicit none + + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_z_csr_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_z_oacc_csr_cp_from_fmt +end submodule psb_z_oacc_csr_cp_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 new file mode 100644 index 00000000..25715903 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 @@ -0,0 +1,83 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_csr_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_z_vect_oacc) + select type(yy => y) + type is (psb_z_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val) + yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i) + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'csrg_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_z_oacc_csr_inner_vect_sv +end submodule psb_z_oacc_csr_inner_vect_sv_impl + diff --git a/openacc/impl/psb_z_oacc_csr_mold.F90 b/openacc/impl/psb_z_oacc_csr_mold.F90 new file mode 100644 index 00000000..93b6bb17 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_mold.F90 @@ -0,0 +1,35 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_mold_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_mold(a, b, info) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='csr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_z_oacc_csr_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_z_oacc_csr_mold +end submodule psb_z_oacc_csr_mold_impl + diff --git a/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 new file mode 100644 index 00000000..d2b9ee48 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_mv_from_coo(a, b, info) + implicit none + + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_z_csr_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_z_oacc_csr_mv_from_coo +end submodule psb_z_oacc_csr_mv_from_coo_impl diff --git a/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 new file mode 100644 index 00000000..af7fb1da --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_mv_from_fmt(a, b, info) + implicit none + + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_z_csr_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_z_oacc_csr_mv_from_fmt +end submodule psb_z_oacc_csr_mv_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 new file mode 100644 index 00000000..4cfea07c --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_oacc_csr_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_csr_sparse_mat%reallocate(nz) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_csr_reallocate_nz +end submodule psb_z_oacc_csr_reallocate_nz_impl diff --git a/openacc/impl/psb_z_oacc_csr_scal.F90 b/openacc/impl/psb_z_oacc_csr_scal.F90 new file mode 100644 index 00000000..f09ff595 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_scal.F90 @@ -0,0 +1,53 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_scal_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_scal(d, a, info, side) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop present(a, d) + do i = 1, a%get_nrows() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop present(a, d) + do i = 1, a%get_ncols() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + !$acc parallel loop present(a, d) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d(i) + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_csr_scal +end submodule psb_z_oacc_csr_scal_impl diff --git a/openacc/impl/psb_z_oacc_csr_scals.F90 b/openacc/impl/psb_z_oacc_csr_scals.F90 new file mode 100644 index 00000000..1fe64951 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_scals.F90 @@ -0,0 +1,34 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_scals_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_scals(d, a, info) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + !$acc parallel loop present(a) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_csr_scals +end submodule psb_z_oacc_csr_scals_impl diff --git a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 b/openacc/impl/psb_z_oacc_csr_vect_mv.F90 new file mode 100644 index 00000000..b312b6b7 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_vect_mv.F90 @@ -0,0 +1,86 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n + character :: trans_ + logical :: device_done, tra + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + + if ((n > size(x%v)) .or. (m > size(y%v))) then + write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (.not.tra) then + select type(xx => x) + class is (psb_z_vect_oacc) + select type (yy => y) + class is (psb_z_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_z_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if + contains + + subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), irp(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + complex(psb_dpk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = irp(i), irp(i + 1) - 1 + tmp = tmp + val(j) * x(ja(j)) + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_z_oacc_csr_vect_mv +end submodule psb_z_oacc_csr_vect_mv_impl diff --git a/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 new file mode 100644 index 00000000..aa8a03cd --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 @@ -0,0 +1,35 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_z_ell_sparse_mat%allocate(m, n, nz_) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_ell_allocate_mnnz +end submodule psb_z_oacc_ell_allocate_mnnz_impl diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 new file mode 100644 index 00000000..a3129ff6 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_cp_from_coo(a, b, info) + implicit none + + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%free_dev_space() + call a%psb_z_ell_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_z_oacc_ell_cp_from_coo +end submodule psb_z_oacc_ell_cp_from_coo_impl diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 new file mode 100644 index 00000000..f9a52d62 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_cp_from_fmt(a, b, info) + implicit none + + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_z_ell_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_z_oacc_ell_cp_from_fmt +end submodule psb_z_oacc_ell_cp_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 new file mode 100644 index 00000000..1cbe249c --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 @@ -0,0 +1,85 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_ell_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nzt + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_z_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_z_vect_oacc) + select type(yy => y) + type is (psb_z_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nzt = a%nzt + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val, 1) + do j = 1, nzt + yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'ell_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_z_oacc_ell_inner_vect_sv +end submodule psb_z_oacc_ell_inner_vect_sv_impl diff --git a/openacc/impl/psb_z_oacc_ell_mold.F90 b/openacc/impl/psb_z_oacc_ell_mold.F90 new file mode 100644 index 00000000..fcc222de --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_mold_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_mold(a, b, info) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'ell_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_z_oacc_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_z_oacc_ell_mold +end submodule psb_z_oacc_ell_mold_impl diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 new file mode 100644 index 00000000..9abb1a74 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_mv_from_coo(a, b, info) + implicit none + + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_z_ell_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_z_oacc_ell_mv_from_coo +end submodule psb_z_oacc_ell_mv_from_coo_impl diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 new file mode 100644 index 00000000..eb49b21a --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_mv_from_fmt(a, b, info) + implicit none + + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_z_ell_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_z_oacc_ell_mv_from_fmt +end submodule psb_z_oacc_ell_mv_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 new file mode 100644 index 00000000..c7c88c65 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_oacc_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_ell_sparse_mat%reallocate(nz) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_ell_reallocate_nz +end submodule psb_z_oacc_ell_reallocate_nz_impl diff --git a/openacc/impl/psb_z_oacc_ell_scal.F90 b/openacc/impl/psb_z_oacc_ell_scal.F90 new file mode 100644 index 00000000..e3d25ccb --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_scal.F90 @@ -0,0 +1,58 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_scal_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_scal(d, a, info, side) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, m, nzt + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(a%ja(i, j)) + end do + end do + end if + else + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(j) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_ell_scal +end submodule psb_z_oacc_ell_scal_impl diff --git a/openacc/impl/psb_z_oacc_ell_scals.F90 b/openacc/impl/psb_z_oacc_ell_scals.F90 new file mode 100644 index 00000000..c382627a --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_scals.F90 @@ -0,0 +1,39 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_scals_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_scals(d, a, info) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, nzt, m + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_ell_scals +end submodule psb_z_oacc_ell_scals_impl diff --git a/openacc/impl/psb_z_oacc_ell_vect_mv.F90 b/openacc/impl/psb_z_oacc_ell_vect_mv.F90 new file mode 100644 index 00000000..53283689 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_vect_mv.F90 @@ -0,0 +1,90 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nzt, nc + character :: trans_ + logical :: device_done, tra + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nzt = a%nzt + nc = size(a%ja,2) + if ((n > size(x%v)) .or. (m > size(y%v))) then + write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (.not.tra) then + select type(xx => x) + class is (psb_z_vect_oacc) + select type (yy => y) + class is (psb_z_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_z_ell_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if + + contains + + subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n, nc + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_) :: val(:,:), x(:), y(:) + integer(psb_ipk_) :: ja(:,:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + complex(psb_dpk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = 1, nc + if (ja(i,j) > 0) then + tmp = tmp + val(i,j) * x(ja(i,j)) + end if + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_z_oacc_ell_vect_mv +end submodule psb_z_oacc_ell_vect_mv_impl diff --git a/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 new file mode 100644 index 00000000..fb84dfb7 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 @@ -0,0 +1,36 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_z_hll_sparse_mat%allocate(m, n, nz_) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_hll_allocate_mnnz +end submodule psb_z_oacc_hll_allocate_mnnz_impl diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 new file mode 100644 index 00000000..9fb903ca --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_cp_from_coo(a, b, info) + implicit none + + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_z_hll_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_z_oacc_hll_cp_from_coo +end submodule psb_z_oacc_hll_cp_from_coo_impl diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 new file mode 100644 index 00000000..3a77865d --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 @@ -0,0 +1,26 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_cp_from_fmt(a, b, info) + implicit none + + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_z_hll_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + + end subroutine psb_z_oacc_hll_cp_from_fmt +end submodule psb_z_oacc_hll_cp_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 new file mode 100644 index 00000000..a5bd54ad --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 @@ -0,0 +1,86 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_hll_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nhacks, hksz + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_z_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_z_vect_oacc) + select type(yy => y) + type is (psb_z_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + !$acc parallel loop present(a, xx, yy) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i+1) - 1 + yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j)) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'hll_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_z_oacc_hll_inner_vect_sv +end submodule psb_z_oacc_hll_inner_vect_sv_impl diff --git a/openacc/impl/psb_z_oacc_hll_mold.F90 b/openacc/impl/psb_z_oacc_hll_mold.F90 new file mode 100644 index 00000000..f614ad89 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_mold_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_mold(a, b, info) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'hll_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_z_oacc_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_z_oacc_hll_mold +end submodule psb_z_oacc_hll_mold_impl diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 new file mode 100644 index 00000000..46d4d6bd --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 @@ -0,0 +1,27 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_mv_from_coo(a, b, info) + implicit none + + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%free_dev_space() + call a%psb_z_hll_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + call a%sync_dev_space() + call a%set_host() + call a%sync() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_z_oacc_hll_mv_from_coo +end submodule psb_z_oacc_hll_mv_from_coo_impl diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 new file mode 100644 index 00000000..45e9846a --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 @@ -0,0 +1,25 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_mv_from_fmt(a, b, info) + implicit none + + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%free_dev_space() + call a%psb_z_hll_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + call a%sync_dev_space() + call a%set_host() + call a%sync() + end select + end subroutine psb_z_oacc_hll_mv_from_fmt +end submodule psb_z_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 new file mode 100644 index 00000000..7d9589cb --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 @@ -0,0 +1,29 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_oacc_hll_reallocate_nz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_hll_sparse_mat%reallocate(nz) + call a%sync_dev_space() + call a%set_host() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_hll_reallocate_nz +end submodule psb_z_oacc_hll_reallocate_nz_impl diff --git a/openacc/impl/psb_z_oacc_hll_scal.F90 b/openacc/impl/psb_z_oacc_hll_scal.F90 new file mode 100644 index 00000000..a2f9aee7 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_scal.F90 @@ -0,0 +1,62 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_scal_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_scal(d, a, info, side) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1 + a%val(j) = a%val(j) * d(k) + end do + end do + else if (side == 'R') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_hll_scal +end submodule psb_z_oacc_hll_scal_impl diff --git a/openacc/impl/psb_z_oacc_hll_scals.F90 b/openacc/impl/psb_z_oacc_hll_scals.F90 new file mode 100644 index 00000000..888115cd --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_scals.F90 @@ -0,0 +1,40 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_scals_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_scals(d, a, info) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + ! $ a parallel loop collapse(2) present(a) + !$acc parallel loop present(a) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_hll_scals +end submodule psb_z_oacc_hll_scals_impl diff --git a/openacc/impl/psb_z_oacc_hll_vect_mv.F90 b/openacc/impl/psb_z_oacc_hll_vect_mv.F90 new file mode 100644 index 00000000..350592bc --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_vect_mv.F90 @@ -0,0 +1,90 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nhacks, hksz + character :: trans_ + logical :: device_done, tra + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + + if ((n > size(x%v)) .or. (m > size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (.not.tra) then + select type(xx => x) + class is (psb_z_vect_oacc) + select type (yy => y) + class is (psb_z_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_z_hll_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if + contains + + subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, nhacks, hksz + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), hkoffs(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii + complex(psb_dpk_) :: tmp + + info = 0 + !$acc parallel loop private(nlc, isz,ir,nr) + do i = 1, nhacks + isz = hkoffs(i + 1) - hkoffs(i) + nlc = isz/hksz + ir = (i-1)*hksz + nr = min(hksz,m-ir) + !$acc loop independent private(tmp,ii,ipnt) + do ii = 1, nr + ipnt = hkoffs(i) + ii + tmp = zzero + !$acc loop seq + do j = 1, nlc + tmp = tmp + val(ipnt) * x(ja(ipnt)) + ipnt = ipnt + hksz + end do + y(ii+ir) = alpha * tmp + beta * y(ii+ir) + end do + end do + end subroutine inner_spmv + end subroutine psb_z_oacc_hll_vect_mv +end submodule psb_z_oacc_hll_vect_mv_impl diff --git a/openacc/impl/psb_z_oacc_mlt_v.f90 b/openacc/impl/psb_z_oacc_mlt_v.f90 new file mode 100644 index 00000000..3e8f6030 --- /dev/null +++ b/openacc/impl/psb_z_oacc_mlt_v.f90 @@ -0,0 +1,46 @@ + +subroutine psb_z_oacc_mlt_v(x, y, info) + use psb_z_oacc_vect_mod, psb_protect_name => psb_z_oacc_mlt_v + + implicit none + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, n + + info = 0 + n = min(x%get_nrows(), y%get_nrows()) + info = 0 + n = min(x%get_nrows(), y%get_nrows()) + select type(xx => x) + class is (psb_z_vect_oacc) + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + call z_inner_oacc_mlt_v(n,xx%v, y%v) +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + end select +contains + subroutine z_inner_oacc_mlt_v(n,x, y) + implicit none + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_), intent(inout) :: x(:), y(:) + + integer(psb_ipk_) :: i + !$acc parallel loop present(x,y) + do i = 1, n + y(i) = (x(i)) * (y(i)) + end do + end subroutine z_inner_oacc_mlt_v +end subroutine psb_z_oacc_mlt_v diff --git a/openacc/impl/psb_z_oacc_mlt_v_2.f90 b/openacc/impl/psb_z_oacc_mlt_v_2.f90 new file mode 100644 index 00000000..c13b348c --- /dev/null +++ b/openacc/impl/psb_z_oacc_mlt_v_2.f90 @@ -0,0 +1,91 @@ +subroutine psb_z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + use psb_z_oacc_vect_mod, psb_protect_name => psb_z_oacc_mlt_v_2 + use psb_string_mod + implicit none + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_, device_done + + conjgx_ = .false. + conjgy_ = .false. + device_done = .false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') + + n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) + info = 0 + select type(xx => x) + class is (psb_z_vect_oacc) + select type (yy => y) + class is (psb_z_vect_oacc) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= zzero) .and. (z%is_host())) call z%sync() + call z_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) + call z%set_dev() + device_done = .true. + end select + end select + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= zzero) .and. (z%is_dev())) call z%sync() + if (conjgx_.and.conjgy_) then + do i = 1, n + z%v(i) = alpha * conjg(x%v(i)) * conjg(y%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + do i = 1, n + z%v(i) = alpha * conjg(x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * conjg(y%v(i)) + beta * z%v(i) + end do + else + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + end if + call z%set_host() + end if + +contains + subroutine z_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy) + implicit none + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_), intent(inout) :: x(:), y(:), z(:) + integer(psb_ipk_), intent(out) :: info + logical, intent(in) :: conjgx, conjgy + + integer(psb_ipk_) :: i + if (conjgx.and.conjgy) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * conjg(x(i)) * conjg(y(i)) + beta * z(i) + end do + else if (conjgx.and.(.not.conjgy)) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * conjg(x(i)) * (y(i)) + beta * z(i) + end do + else if ((.not.conjgx).and.(conjgy)) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * conjg(y(i)) + beta * z(i) + end do + else + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + end if + end subroutine z_inner_oacc_mlt_v_2 +end subroutine psb_z_oacc_mlt_v_2 + diff --git a/openacc/psb_c_oacc_csr_mat_mod.F90 b/openacc/psb_c_oacc_csr_mat_mod.F90 new file mode 100644 index 00000000..c6461fe3 --- /dev/null +++ b/openacc/psb_c_oacc_csr_mat_mod.F90 @@ -0,0 +1,290 @@ +module psb_c_oacc_csr_mat_mod + + use iso_c_binding + use openacc + use psb_c_mat_mod + use psb_c_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_csr_sparse_mat) :: psb_c_oacc_csr_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => c_oacc_csr_get_fmt + procedure, pass(a) :: sizeof => c_oacc_csr_sizeof + procedure, pass(a) :: vect_mv => psb_c_oacc_csr_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_oacc_csr_inner_vect_sv + procedure, pass(a) :: scals => psb_c_oacc_csr_scals + procedure, pass(a) :: scalv => psb_c_oacc_csr_scal + procedure, pass(a) :: reallocate_nz => psb_c_oacc_csr_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_oacc_csr_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_c_oacc_csr_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_oacc_csr_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_oacc_csr_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_oacc_csr_mv_from_fmt + procedure, pass(a) :: free => c_oacc_csr_free + procedure, pass(a) :: mold => psb_c_oacc_csr_mold + procedure, pass(a) :: all => c_oacc_csr_all + procedure, pass(a) :: is_host => c_oacc_csr_is_host + procedure, pass(a) :: is_sync => c_oacc_csr_is_sync + procedure, pass(a) :: is_dev => c_oacc_csr_is_dev + procedure, pass(a) :: set_host => c_oacc_csr_set_host + procedure, pass(a) :: set_sync => c_oacc_csr_set_sync + procedure, pass(a) :: set_dev => c_oacc_csr_set_dev + procedure, pass(a) :: free_dev_space => c_oacc_csr_free_dev_space + procedure, pass(a) :: sync_dev_space => c_oacc_csr_sync_dev_space + procedure, pass(a) :: sync => c_oacc_csr_sync + end type psb_c_oacc_csr_sparse_mat + + interface + module subroutine psb_c_oacc_csr_mold(a,b,info) + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_csr_mold + end interface + + interface + module subroutine psb_c_oacc_csr_cp_from_fmt(a,b,info) + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_csr_cp_from_fmt + end interface + + interface + module subroutine psb_c_oacc_csr_mv_from_coo(a,b,info) + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_csr_mv_from_coo + end interface + + interface + module subroutine psb_c_oacc_csr_mv_from_fmt(a,b,info) + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_csr_mv_from_fmt + end interface + + interface + module subroutine psb_c_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_csr_vect_mv + end interface + + interface + module subroutine psb_c_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_csr_inner_vect_sv + end interface + + interface + module subroutine psb_c_oacc_csr_scals(d, a, info) + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_csr_scals + end interface + + interface + module subroutine psb_c_oacc_csr_scal(d,a,info,side) + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_c_oacc_csr_scal + end interface + + interface + module subroutine psb_c_oacc_csr_reallocate_nz(nz,a) + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_c_oacc_csr_reallocate_nz + end interface + + interface + module subroutine psb_c_oacc_csr_allocate_mnnz(m,n,a,nz) + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_oacc_csr_allocate_mnnz + end interface + + interface + module subroutine psb_c_oacc_csr_cp_from_coo(a,b,info) + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_csr_cp_from_coo + end interface + +contains + + + subroutine c_oacc_csr_free_dev_space(a) + use psb_base_mod + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irp)>0) call acc_delete_finalize(a%irp) + + return + end subroutine c_oacc_csr_free_dev_space + + subroutine c_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + call a%free_dev_space() + call a%psb_c_csr_sparse_mat%free() + + return + end subroutine c_oacc_csr_free + + function c_oacc_csr_sizeof(a) result(res) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + + end function c_oacc_csr_sizeof + + + function c_oacc_csr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSROA' + end function c_oacc_csr_get_fmt + + subroutine c_oacc_csr_all(m, n, nz, a, info) + implicit none + integer(psb_ipk_), intent(in) :: m, n, nz + class(psb_c_oacc_csr_sparse_mat), intent(out) :: a + integer(psb_ipk_), intent(out) :: info + + info = 0 + call a%free() + + call a%set_nrows(m) + call a%set_ncols(n) + + allocate(a%val(nz),stat=info) + allocate(a%ja(nz),stat=info) + allocate(a%irp(m+1),stat=info) + if (info == 0) call a%set_host() + if (info == 0) call a%sync_dev_space() + end subroutine c_oacc_csr_all + + function c_oacc_csr_is_host(a) result(res) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_oacc_csr_is_host + + function c_oacc_csr_is_sync(a) result(res) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_oacc_csr_is_sync + + function c_oacc_csr_is_dev(a) result(res) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_oacc_csr_is_dev + + subroutine c_oacc_csr_set_host(a) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_oacc_csr_set_host + + subroutine c_oacc_csr_set_sync(a) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_oacc_csr_set_sync + + subroutine c_oacc_csr_set_dev(a) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_oacc_csr_set_dev + + subroutine c_oacc_csr_sync_dev_space(a) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irp)>0) call acc_copyin(a%irp) + end subroutine c_oacc_csr_sync_dev_space + + subroutine c_oacc_csr_sync(a) + implicit none + class(psb_c_oacc_csr_sparse_mat), target, intent(in) :: a + class(psb_c_oacc_csr_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (a%is_dev()) then + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irp)>0) call acc_update_self(a%irp) + else if (a%is_host()) then + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irp)>0) call acc_update_device(a%irp) + end if + call tmpa%set_sync() + end subroutine c_oacc_csr_sync + +end module psb_c_oacc_csr_mat_mod + diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90 new file mode 100644 index 00000000..b2168646 --- /dev/null +++ b/openacc/psb_c_oacc_ell_mat_mod.F90 @@ -0,0 +1,272 @@ +module psb_c_oacc_ell_mat_mod + use iso_c_binding + use openacc + use psb_c_mat_mod + use psb_c_ell_mat_mod + use psb_c_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_ell_sparse_mat) :: psb_c_oacc_ell_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => c_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => c_oacc_ell_sizeof + procedure, pass(a) :: is_host => c_oacc_ell_is_host + procedure, pass(a) :: is_sync => c_oacc_ell_is_sync + procedure, pass(a) :: is_dev => c_oacc_ell_is_dev + procedure, pass(a) :: set_host => c_oacc_ell_set_host + procedure, pass(a) :: set_sync => c_oacc_ell_set_sync + procedure, pass(a) :: set_dev => c_oacc_ell_set_dev + procedure, pass(a) :: sync_dev_space => c_oacc_ell_sync_dev_space + procedure, pass(a) :: sync => c_oacc_ell_sync + procedure, pass(a) :: free_dev_space => c_oacc_ell_free_dev_space + procedure, pass(a) :: free => c_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_c_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_oacc_ell_inner_vect_sv + procedure, pass(a) :: scals => psb_c_oacc_ell_scals + procedure, pass(a) :: scalv => psb_c_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_c_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_c_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_c_oacc_ell_mold + + end type psb_c_oacc_ell_sparse_mat + + interface + module subroutine psb_c_oacc_ell_mold(a,b,info) + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_mold + end interface + + interface + module subroutine psb_c_oacc_ell_cp_from_fmt(a,b,info) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_cp_from_fmt + end interface + + interface + module subroutine psb_c_oacc_ell_mv_from_coo(a,b,info) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_mv_from_coo + end interface + + interface + module subroutine psb_c_oacc_ell_mv_from_fmt(a,b,info) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_mv_from_fmt + end interface + + interface + module subroutine psb_c_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_ell_vect_mv + end interface + + interface + module subroutine psb_c_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_ell_inner_vect_sv + end interface + + interface + module subroutine psb_c_oacc_ell_scals(d, a, info) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_scals + end interface + + interface + module subroutine psb_c_oacc_ell_scal(d,a,info,side) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_c_oacc_ell_scal + end interface + + interface + module subroutine psb_c_oacc_ell_reallocate_nz(nz,a) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_c_oacc_ell_reallocate_nz + end interface + + interface + module subroutine psb_c_oacc_ell_allocate_mnnz(m,n,a,nz) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_oacc_ell_allocate_mnnz + end interface + + interface + module subroutine psb_c_oacc_ell_cp_from_coo(a,b,info) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_cp_from_coo + end interface + +contains + + subroutine c_oacc_ell_free_dev_space(a) + use psb_base_mod + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn) + if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag) + return + end subroutine c_oacc_ell_free_dev_space + + subroutine c_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + call a%free_dev_space() + call a%psb_c_ell_sparse_mat%free() + + return + end subroutine c_oacc_ell_free + + function c_oacc_ell_sizeof(a) result(res) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + + end function c_oacc_ell_sizeof + + subroutine c_oacc_ell_sync_dev_space(a) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irn)>0) call acc_copyin(a%irn) + if (psb_size(a%idiag)>0) call acc_copyin(a%idiag) + end subroutine c_oacc_ell_sync_dev_space + + function c_oacc_ell_is_host(a) result(res) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_oacc_ell_is_host + + function c_oacc_ell_is_sync(a) result(res) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_oacc_ell_is_sync + + function c_oacc_ell_is_dev(a) result(res) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_oacc_ell_is_dev + + subroutine c_oacc_ell_set_host(a) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_oacc_ell_set_host + + subroutine c_oacc_ell_set_sync(a) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_oacc_ell_set_sync + + subroutine c_oacc_ell_set_dev(a) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_oacc_ell_set_dev + + function c_oacc_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELLOA' + end function c_oacc_ell_get_fmt + + subroutine c_oacc_ell_sync(a) + implicit none + class(psb_c_oacc_ell_sparse_mat), target, intent(in) :: a + class(psb_c_oacc_ell_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (a%is_dev()) then + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irn)>0) call acc_update_self(a%irn) + if (psb_size(a%idiag)>0) call acc_update_self(a%idiag) + else if (a%is_host()) then + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irn)>0) call acc_update_device(a%irn) + if (psb_size(a%idiag)>0) call acc_update_device(a%idiag) + end if + call tmpa%set_sync() + end subroutine c_oacc_ell_sync + +end module psb_c_oacc_ell_mat_mod diff --git a/openacc/psb_c_oacc_hll_mat_mod.F90 b/openacc/psb_c_oacc_hll_mat_mod.F90 new file mode 100644 index 00000000..f8c19275 --- /dev/null +++ b/openacc/psb_c_oacc_hll_mat_mod.F90 @@ -0,0 +1,279 @@ +module psb_c_oacc_hll_mat_mod + use iso_c_binding + use openacc + use psb_c_mat_mod + use psb_c_hll_mat_mod + use psb_c_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_hll_sparse_mat) :: psb_c_oacc_hll_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => c_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => c_oacc_hll_sizeof + procedure, pass(a) :: is_host => c_oacc_hll_is_host + procedure, pass(a) :: is_sync => c_oacc_hll_is_sync + procedure, pass(a) :: is_dev => c_oacc_hll_is_dev + procedure, pass(a) :: set_host => c_oacc_hll_set_host + procedure, pass(a) :: set_sync => c_oacc_hll_set_sync + procedure, pass(a) :: set_dev => c_oacc_hll_set_dev + procedure, pass(a) :: sync_dev_space => c_oacc_hll_sync_dev_space + procedure, pass(a) :: sync => c_oacc_hll_sync + procedure, pass(a) :: free_dev_space => c_oacc_hll_free_dev_space + procedure, pass(a) :: free => c_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_c_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_oacc_hll_inner_vect_sv + procedure, pass(a) :: scals => psb_c_oacc_hll_scals + procedure, pass(a) :: scalv => psb_c_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_c_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_c_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_c_oacc_hll_mold + + end type psb_c_oacc_hll_sparse_mat + + interface + module subroutine psb_c_oacc_hll_mold(a,b,info) + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_mold + end interface + + interface + module subroutine psb_c_oacc_hll_cp_from_fmt(a,b,info) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_cp_from_fmt + end interface + + interface + module subroutine psb_c_oacc_hll_mv_from_coo(a,b,info) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_mv_from_coo + end interface + + interface + module subroutine psb_c_oacc_hll_mv_from_fmt(a,b,info) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_mv_from_fmt + end interface + + interface + module subroutine psb_c_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_hll_vect_mv + end interface + + interface + module subroutine psb_c_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_hll_inner_vect_sv + end interface + + interface + module subroutine psb_c_oacc_hll_scals(d, a, info) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_scals + end interface + + interface + module subroutine psb_c_oacc_hll_scal(d,a,info,side) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_c_oacc_hll_scal + end interface + + interface + module subroutine psb_c_oacc_hll_reallocate_nz(nz,a) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_c_oacc_hll_reallocate_nz + end interface + + interface + module subroutine psb_c_oacc_hll_allocate_mnnz(m,n,a,nz) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_oacc_hll_allocate_mnnz + end interface + + interface + module subroutine psb_c_oacc_hll_cp_from_coo(a,b,info) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_cp_from_coo + end interface + +contains + + subroutine c_oacc_hll_free_dev_space(a) + use psb_base_mod + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn) + if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_delete_finalize(a%hkoffs) + return + end subroutine c_oacc_hll_free_dev_space + + subroutine c_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + call a%free_dev_space() + call a%psb_c_hll_sparse_mat%free() + + return + end subroutine c_oacc_hll_free + + function c_oacc_hll_sizeof(a) result(res) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + end function c_oacc_hll_sizeof + + + + function c_oacc_hll_is_host(a) result(res) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_oacc_hll_is_host + + function c_oacc_hll_is_sync(a) result(res) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_oacc_hll_is_sync + + function c_oacc_hll_is_dev(a) result(res) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_oacc_hll_is_dev + + subroutine c_oacc_hll_set_host(a) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_oacc_hll_set_host + + subroutine c_oacc_hll_set_sync(a) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_oacc_hll_set_sync + + subroutine c_oacc_hll_set_dev(a) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_oacc_hll_set_dev + + function c_oacc_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLLOA' + end function c_oacc_hll_get_fmt + + subroutine c_oacc_hll_sync_dev_space(a) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irn)>0) call acc_copyin(a%irn) + if (psb_size(a%idiag)>0) call acc_copyin(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_copyin(a%hkoffs) + end subroutine c_oacc_hll_sync_dev_space + + + subroutine c_oacc_hll_sync(a) + implicit none + class(psb_c_oacc_hll_sparse_mat), target, intent(in) :: a + class(psb_c_oacc_hll_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (a%is_dev()) then + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irn)>0) call acc_update_self(a%irn) + if (psb_size(a%idiag)>0) call acc_update_self(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_update_self(a%hkoffs) + else if (a%is_host()) then + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irn)>0) call acc_update_device(a%irn) + if (psb_size(a%idiag)>0) call acc_update_device(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_update_device(a%hkoffs) + end if + call tmpa%set_sync() + end subroutine c_oacc_hll_sync + +end module psb_c_oacc_hll_mat_mod diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90 new file mode 100644 index 00000000..2aa11db9 --- /dev/null +++ b/openacc/psb_c_oacc_vect_mod.F90 @@ -0,0 +1,1013 @@ +module psb_c_oacc_vect_mod + use iso_c_binding + use openacc + use psb_const_mod + use psb_error_mod + use psb_realloc_mod + use psb_oacc_env_mod + use psb_c_vect_mod + use psb_i_vect_mod + use psb_i_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_base_vect_type) :: psb_c_vect_oacc + integer :: state = is_host + + contains + procedure, pass(x) :: get_nrows => c_oacc_get_nrows + procedure, nopass :: get_fmt => c_oacc_get_fmt + + procedure, pass(x) :: all => c_oacc_vect_all + procedure, pass(x) :: zero => c_oacc_zero + procedure, pass(x) :: asb_m => c_oacc_asb_m + procedure, pass(x) :: sync => c_oacc_sync + procedure, pass(x) :: sync_dev_space => c_oacc_sync_dev_space + procedure, pass(x) :: bld_x => c_oacc_bld_x + procedure, pass(x) :: bld_mn => c_oacc_bld_mn + procedure, pass(x) :: free => c_oacc_vect_free + procedure, pass(x) :: free_buffer => c_oacc_vect_free_buffer + procedure, pass(x) :: maybe_free_buffer => c_oacc_vect_maybe_free_buffer + procedure, pass(x) :: ins_a => c_oacc_ins_a + procedure, pass(x) :: ins_v => c_oacc_ins_v + procedure, pass(x) :: is_host => c_oacc_is_host + procedure, pass(x) :: is_dev => c_oacc_is_dev + procedure, pass(x) :: is_sync => c_oacc_is_sync + procedure, pass(x) :: set_host => c_oacc_set_host + procedure, pass(x) :: set_dev => c_oacc_set_dev + procedure, pass(x) :: set_sync => c_oacc_set_sync + procedure, pass(x) :: set_scal => c_oacc_set_scal + + procedure, pass(x) :: new_buffer => c_oacc_new_buffer + procedure, pass(x) :: gthzv_x => c_oacc_gthzv_x + procedure, pass(x) :: gthzbuf => c_oacc_gthzbuf + procedure, pass(y) :: sctb => c_oacc_sctb + procedure, pass(y) :: sctb_x => c_oacc_sctb_x + procedure, pass(y) :: sctb_buf => c_oacc_sctb_buf + procedure, nopass :: device_wait => c_oacc_device_wait + + procedure, pass(x) :: get_size => c_oacc_get_size + + procedure, pass(x) :: dot_v => c_oacc_vect_dot + procedure, pass(x) :: dot_a => c_oacc_dot_a + procedure, pass(y) :: axpby_v => c_oacc_axpby_v + procedure, pass(y) :: axpby_a => c_oacc_axpby_a + procedure, pass(z) :: upd_xyz => c_oacc_upd_xyz + procedure, pass(y) :: mlt_a => c_oacc_mlt_a + procedure, pass(z) :: mlt_a_2 => c_oacc_mlt_a_2 + procedure, pass(y) :: mlt_v => psb_c_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => psb_c_oacc_mlt_v_2 + procedure, pass(x) :: scal => c_oacc_scal + procedure, pass(x) :: nrm2 => c_oacc_nrm2 + procedure, pass(x) :: amax => c_oacc_amax + procedure, pass(x) :: asum => c_oacc_asum + procedure, pass(x) :: absval1 => c_oacc_absval1 + procedure, pass(x) :: absval2 => c_oacc_absval2 + final :: c_oacc_final_vect_free + end type psb_c_vect_oacc + + interface + subroutine psb_c_oacc_mlt_v(x, y, info) + import + implicit none + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_mlt_v + end interface + + interface + subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + import + implicit none + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + end subroutine psb_c_oacc_mlt_v_2 + end interface + +contains + + subroutine c_oacc_device_wait() + implicit none + call acc_wait_all() + end subroutine c_oacc_device_wait + + subroutine c_oacc_absval1(x) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: n + + if (x%is_host()) call x%sync() + n = size(x%v) + call c_inner_oacc_absval1(n,x%v) + call x%set_dev() + contains + subroutine c_inner_oacc_absval1(n,x) + implicit none + complex(psb_spk_), intent(inout) :: x(:) + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + !$acc parallel loop present(x) + do i = 1, n + x(i) = abs(x(i)) + end do + end subroutine c_inner_oacc_absval1 + end subroutine c_oacc_absval1 + + subroutine c_oacc_absval2(x, y) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + + n = min(size(x%v), size(y%v)) + select type (yy => y) + class is (psb_c_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + call c_inner_oacc_absval2(n,x%v,yy%v) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_c_base_vect_type%absval(y) + end select + contains + subroutine c_inner_oacc_absval2(n,x,y) + implicit none + complex(psb_spk_), intent(inout) :: x(:),y(:) + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + !$acc parallel loop present(x,y) + do i = 1, n + y(i) = abs(x(i)) + end do + end subroutine c_inner_oacc_absval2 + end subroutine c_oacc_absval2 + + subroutine c_oacc_scal(alpha, x) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + complex(psb_spk_), intent(in) :: alpha + integer(psb_ipk_) :: info + if (x%is_host()) call x%sync() + call c_inner_oacc_scal(alpha, x%v) + call x%set_dev() + contains + subroutine c_inner_oacc_scal(alpha, x) + complex(psb_spk_), intent(in) :: alpha + complex(psb_spk_), intent(inout) :: x(:) + integer(psb_ipk_) :: i + !$acc parallel loop present(x) + do i = 1, size(x) + x(i) = alpha * x(i) + end do + end subroutine c_inner_oacc_scal + end subroutine c_oacc_scal + + function c_oacc_nrm2(n, x) result(res) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() +!!$ write(0,*)'oacc_nrm2' + res = c_inner_oacc_nrm2(n, x%v) + contains + function c_inner_oacc_nrm2(n, x) result(res) + integer(psb_ipk_) :: n + complex(psb_spk_) :: x(:) + real(psb_spk_) :: res + real(psb_spk_) :: sum, mx + integer(psb_ipk_) :: i + mx = szero + !$acc parallel loop reduction(max:mx) present(x) + do i = 1, n + if (abs(x(i)) > mx) mx = abs(x(i)) + end do + if (mx == szero) then + res = mx + else + sum = szero + !$acc parallel loop reduction(+:sum) present(x) + do i = 1, n + sum = sum + abs(x(i)/mx)**2 + end do + res = mx*sqrt(sum) + end if + end function c_inner_oacc_nrm2 + end function c_oacc_nrm2 + + function c_oacc_amax(n, x) result(res) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + res = c_inner_oacc_amax(n, x%v) + contains + function c_inner_oacc_amax(n, x) result(res) + integer(psb_ipk_) :: n + complex(psb_spk_) :: x(:) + real(psb_spk_) :: res + real(psb_spk_) :: max_val + integer(psb_ipk_) :: i + max_val = szero + !$acc parallel loop reduction(max:max_val) present(x) + do i = 1, n + if (abs(x(i)) > max_val) max_val = abs(x(i)) + end do + res = max_val + end function c_inner_oacc_amax + end function c_oacc_amax + + function c_oacc_asum(n, x) result(res) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + complex(psb_spk_) :: sum + integer(psb_ipk_) :: i + if (x%is_host()) call x%sync() + res = c_inner_oacc_asum(n, x%v) + contains + function c_inner_oacc_asum(n, x) result(res) + integer(psb_ipk_) :: n + complex(psb_spk_) :: x(:) + real(psb_spk_) :: res + integer(psb_ipk_) :: i + res = szero + !$acc parallel loop reduction(+:res) present(x) + do i = 1, n + res = res + abs(x(i)) + end do + end function c_inner_oacc_asum + end function c_oacc_asum + + + subroutine c_oacc_mlt_a(x, y, info) + implicit none + complex(psb_spk_), intent(in) :: x(:) + class(psb_c_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + !$acc parallel loop present(x,y) + do i = 1, size(x) + y%v(i) = y%v(i) * x(i) + end do + call y%set_host() + end subroutine c_oacc_mlt_a + + subroutine c_oacc_mlt_a_2(alpha, x, y, beta, z, info) + implicit none + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_), intent(in) :: x(:) + complex(psb_spk_), intent(in) :: y(:) + class(psb_c_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + !$acc parallel loop present(x,y,z%v) + do i = 1, size(x) + z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) + end do + call z%set_host() + end subroutine c_oacc_mlt_a_2 + + subroutine c_oacc_axpby_v(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_vect_oacc), intent(inout) :: y + complex(psb_spk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, i + + info = psb_success_ + + select type(xx => x) + type is (psb_c_vect_oacc) + if ((beta /= czero) .and. y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + nx = size(xx%v) + ny = size(y%v) + if ((nx < m) .or. (ny < m)) then + info = psb_err_internal_error_ + else + call c_inner_oacc_axpby(m, alpha, x%v, beta, y%v, info) + end if + call y%set_dev() + class default + if ((alpha /= czero) .and. (x%is_dev())) call x%sync() + call y%axpby(m, alpha, x%v, beta, info) + end select + contains + subroutine c_inner_oacc_axpby(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent(inout) :: x(:) + complex(psb_spk_), intent(inout) :: y(:) + complex(psb_spk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + !$acc parallel present(x,y) + !$acc loop + do i = 1, m + y(i) = alpha * x(i) + beta * y(i) + end do + !$acc end parallel + end subroutine c_inner_oacc_axpby + end subroutine c_oacc_axpby_v + + subroutine c_oacc_axpby_a(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent(in) :: x(:) + class(psb_c_vect_oacc), intent(inout) :: y + complex(psb_spk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i + + if ((beta /= czero) .and. (y%is_dev())) call y%sync() + + do i = 1, m + y%v(i) = alpha * x(i) + beta * y%v(i) + end do + call y%set_host() + end subroutine c_oacc_axpby_a + + subroutine c_oacc_upd_xyz(m, alpha, beta, gamma, delta, x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_vect_oacc), intent(inout) :: z + complex(psb_spk_), intent(in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz, i + logical :: gpu_done + + info = psb_success_ + gpu_done = .false. + + select type(xx => x) + class is (psb_c_vect_oacc) + select type(yy => y) + class is (psb_c_vect_oacc) + select type(zz => z) + class is (psb_c_vect_oacc) + if ((beta /= czero) .and. yy%is_host()) call yy%sync() + if ((delta /= czero) .and. zz%is_host()) call zz%sync() + if (xx%is_host()) call xx%sync() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop present(xx%v,yy%v,zz%v) + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. + end select + end select + end select + + if (.not. gpu_done) then + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) + end if + end subroutine c_oacc_upd_xyz + + subroutine c_oacc_sctb_buf(i, n, idx, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type) :: idx + complex(psb_spk_) :: beta + class(psb_c_vect_oacc) :: y + integer(psb_ipk_) :: info, k + logical :: acc_done + if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if + + acc_done = .false. + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) + call y%set_dev() + acc_done = .true. + end select + + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k) + end do + end if + + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + complex(psb_spk_) :: beta,x(:), y(:) + integer(psb_ipk_) :: k + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + + end subroutine c_oacc_sctb_buf + + subroutine c_oacc_sctb_x(i, n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_):: i + integer(psb_mpk_):: n + class(psb_i_base_vect_type) :: idx + complex(psb_spk_) :: beta, x(:) + class(psb_c_vect_oacc) :: y + integer(psb_ipk_) :: info, ni, k + logical :: acc_done + + acc_done = .false. + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (acc_is_present(x)) then + call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1)) + acc_done = .true. + call y%set_dev() + end if + end select + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1) + end do + call y%set_host() + end if + + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + complex(psb_spk_) :: beta, x(:), y(:) + integer(psb_ipk_) :: k + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + + end subroutine c_oacc_sctb_x + + subroutine c_oacc_sctb(n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + complex(psb_spk_) :: beta, x(:) + class(psb_c_vect_oacc) :: y + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (n == 0) return + if (y%is_dev()) call y%sync() + + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + + call y%set_host() + end subroutine c_oacc_sctb + + subroutine c_oacc_gthzbuf(i, n, idx, x) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type) :: idx + class(psb_c_vect_oacc) :: x + integer(psb_ipk_) :: info,k + logical :: acc_done + + info = 0 + acc_done = .false. + + if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + select type (ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1)) + acc_done = .true. + end select + + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (x%is_dev()) call x%sync() + do k = 1, n + x%combuf(k+i-1) = x%v(idx%v(k+i-1)) + end do + end if + + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + complex(psb_spk_) :: x(:), y(:) + integer(psb_ipk_) :: k + ! + !$acc parallel loop present(x,y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y(1:n)) + end subroutine inner_gth + end subroutine c_oacc_gthzbuf + + subroutine c_oacc_gthzv_x(i, n, idx, x, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type):: idx + complex(psb_spk_) :: y(:) + class(psb_c_vect_oacc):: x + integer(psb_ipk_) :: info, k + logical :: acc_done + + info = 0 + acc_done = .false. + select type (ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + if (acc_is_present(y)) then + call inner_gth(n,x%v,y(i:),ii%v(i:)) + acc_done=.true. + end if + end select + if (.not.acc_done) then + if (x%is_dev()) call x%sync() + if (idx%is_dev()) call idx%sync() + do k = 1, n + y(k+i-1) = x%v(idx%v(k+i-1)) + !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k) + end do + end if + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + complex(psb_spk_) :: x(:), y(:) + integer(psb_ipk_) :: k + ! + !$acc parallel loop present(x,y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y(1:n)) + end subroutine inner_gth + end subroutine c_oacc_gthzv_x + + subroutine c_oacc_ins_v(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_c_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + type is (psb_i_vect_oacc) + select type(vval => val) + type is (psb_c_vect_oacc) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + !$acc parallel loop present(x%v,virl%v,vval%v) + do i = 1, n + x%v(virl%v(i)) = vval%v(i) + end do + call x%set_dev() + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + select type(virl => irl) + type is (psb_i_vect_oacc) + if (virl%is_dev()) call virl%sync() + end select + select type(vval => val) + type is (psb_c_vect_oacc) + if (vval%is_dev()) call vval%sync() + end select + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info, 'oacc_vect_ins') + return + end if + + end subroutine c_oacc_ins_v + + subroutine c_oacc_ins_a(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_c_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + + + end subroutine c_oacc_ins_a + + subroutine c_oacc_bld_mn(x, n) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%free(info) + call x%all(ione*n, info) + if (info /= 0) then + call psb_errpush(info, 'c_oacc_bld_mn',& + & i_err=ione*(/n, n, n, n, n/)) + end if + call x%set_host() + call x%sync_dev_space() + + end subroutine c_oacc_bld_mn + + + subroutine c_oacc_bld_x(x, this) + use psb_base_mod + implicit none + complex(psb_spk_), intent(in) :: this(:) + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%free(info) + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'c_oacc_bld_x', & + i_err=(/size(this)*ione, izero, izero, izero, izero/)) + return + end if + x%v(:) = this(:) + call x%set_host() + call x%sync_dev_space() + + end subroutine c_oacc_bld_x + + subroutine c_oacc_asb_m(n, x, info) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + info = psb_success_ + + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_c_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync() + call x%set_host() + end if + else + if (size(x%v) < n) then + call x%psb_c_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync() + call x%set_host() + end if + end if + end subroutine c_oacc_asb_m + + subroutine c_oacc_set_scal(x, val, first, last) + class(psb_c_vect_oacc), intent(inout) :: x + complex(psb_spk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(last, last_) + + !$acc parallel loop present(x%v) + do i = first_, last_ + x%v(i) = val + end do + !$acc end parallel loop + + call x%set_dev() + end subroutine c_oacc_set_scal + + subroutine c_oacc_zero(x) + use psi_serial_mod + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + call x%set_dev() + call x%set_scal(czero) + end subroutine c_oacc_zero + + function c_oacc_get_nrows(x) result(res) + implicit none + class(psb_c_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + if (allocated(x%v)) res = size(x%v) + end function c_oacc_get_nrows + + function c_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = "cOACC" + + end function c_oacc_get_fmt + + + function c_oacc_vect_dot(n, x, y) result(res) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_) :: res + integer(psb_ipk_) :: info + + res = czero +!!$ write(0,*) 'oacc_dot_v' + select type(yy => y) + type is (psb_c_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + res = c_inner_oacc_dot(n, x%v, yy%v) + class default + if (x%is_dev()) call x%sync() + res = y%dot(n, x%v) + end select + contains + function c_inner_oacc_dot(n, x, y) result(res) + implicit none + complex(psb_spk_), intent(in) :: x(:) + complex(psb_spk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_) :: res + integer(psb_ipk_) :: i + + !$acc parallel loop reduction(+:res) present(x, y) + do i = 1, n + res = res + x(i) * y(i) + end do + !$acc end parallel loop + end function c_inner_oacc_dot + end function c_oacc_vect_dot + + function c_oacc_dot_a(n, x, y) result(res) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + complex(psb_spk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_) :: res + complex(psb_spk_), external :: cdot + + if (x%is_dev()) call x%sync() + res = cdot(n, y, 1, x%v, 1) + + end function c_oacc_dot_a + + + subroutine c_oacc_new_buffer(n,x,info) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if + call x%psb_c_base_vect_type%new_buffer(n,info) + !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) + end if + end subroutine c_oacc_new_buffer + + subroutine c_oacc_sync_dev_space(x) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) + end subroutine c_oacc_sync_dev_space + + subroutine c_oacc_sync(x) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + if (x%is_dev()) then + if (psb_size(x%v)>0) call acc_update_self(x%v) + end if + if (x%is_host()) then + if (.not.acc_is_present(x%v)) call c_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) + end if + call x%set_sync() + end subroutine c_oacc_sync + + subroutine c_oacc_set_host(x) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + + x%state = is_host + end subroutine c_oacc_set_host + + subroutine c_oacc_set_dev(x) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + + x%state = is_dev + end subroutine c_oacc_set_dev + + subroutine c_oacc_set_sync(x) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + + x%state = is_sync + end subroutine c_oacc_set_sync + + function c_oacc_is_dev(x) result(res) + implicit none + class(psb_c_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function c_oacc_is_dev + + function c_oacc_is_host(x) result(res) + implicit none + class(psb_c_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function c_oacc_is_host + + function c_oacc_is_sync(x) result(res) + implicit none + class(psb_c_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function c_oacc_is_sync + + subroutine c_oacc_vect_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_c_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'c_oacc_all', & + i_err=(/n, n, n, n, n/)) + end if + call x%set_host() + call x%sync_dev_space() + end subroutine c_oacc_vect_all + + subroutine c_oacc_final_vect_free(x) + implicit none + type(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine c_oacc_final_vect_free + + subroutine c_oacc_vect_free(x, info) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + call x%psb_c_base_vect_type%free(info) + end subroutine c_oacc_vect_free + + subroutine c_oacc_vect_maybe_free_buffer(x,info) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if + + end subroutine c_oacc_vect_maybe_free_buffer + + subroutine c_oacc_vect_free_buffer(x,info) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info +! write(0,*) 'oacc free_buffer' + info = 0 + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_c_base_vect_type%free_buffer(info) + + end subroutine c_oacc_vect_free_buffer + + function c_oacc_get_size(x) result(res) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: res + + res = size(x%v) + end function c_oacc_get_size + +end module psb_c_oacc_vect_mod diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90 new file mode 100644 index 00000000..21907312 --- /dev/null +++ b/openacc/psb_d_oacc_csr_mat_mod.F90 @@ -0,0 +1,290 @@ +module psb_d_oacc_csr_mat_mod + + use iso_c_binding + use openacc + use psb_d_mat_mod + use psb_d_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_csr_sparse_mat) :: psb_d_oacc_csr_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => d_oacc_csr_get_fmt + procedure, pass(a) :: sizeof => d_oacc_csr_sizeof + procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_csr_inner_vect_sv + procedure, pass(a) :: scals => psb_d_oacc_csr_scals + procedure, pass(a) :: scalv => psb_d_oacc_csr_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_csr_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_csr_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_csr_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_csr_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_csr_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_csr_mv_from_fmt + procedure, pass(a) :: free => d_oacc_csr_free + procedure, pass(a) :: mold => psb_d_oacc_csr_mold + procedure, pass(a) :: all => d_oacc_csr_all + procedure, pass(a) :: is_host => d_oacc_csr_is_host + procedure, pass(a) :: is_sync => d_oacc_csr_is_sync + procedure, pass(a) :: is_dev => d_oacc_csr_is_dev + procedure, pass(a) :: set_host => d_oacc_csr_set_host + procedure, pass(a) :: set_sync => d_oacc_csr_set_sync + procedure, pass(a) :: set_dev => d_oacc_csr_set_dev + procedure, pass(a) :: free_dev_space => d_oacc_csr_free_dev_space + procedure, pass(a) :: sync_dev_space => d_oacc_csr_sync_dev_space + procedure, pass(a) :: sync => d_oacc_csr_sync + end type psb_d_oacc_csr_sparse_mat + + interface + module subroutine psb_d_oacc_csr_mold(a,b,info) + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_mold + end interface + + interface + module subroutine psb_d_oacc_csr_cp_from_fmt(a,b,info) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_cp_from_fmt + end interface + + interface + module subroutine psb_d_oacc_csr_mv_from_coo(a,b,info) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_mv_from_coo + end interface + + interface + module subroutine psb_d_oacc_csr_mv_from_fmt(a,b,info) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_mv_from_fmt + end interface + + interface + module subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_vect_mv + end interface + + interface + module subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_inner_vect_sv + end interface + + interface + module subroutine psb_d_oacc_csr_scals(d, a, info) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_scals + end interface + + interface + module subroutine psb_d_oacc_csr_scal(d,a,info,side) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_d_oacc_csr_scal + end interface + + interface + module subroutine psb_d_oacc_csr_reallocate_nz(nz,a) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_d_oacc_csr_reallocate_nz + end interface + + interface + module subroutine psb_d_oacc_csr_allocate_mnnz(m,n,a,nz) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_oacc_csr_allocate_mnnz + end interface + + interface + module subroutine psb_d_oacc_csr_cp_from_coo(a,b,info) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_cp_from_coo + end interface + +contains + + + subroutine d_oacc_csr_free_dev_space(a) + use psb_base_mod + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irp)>0) call acc_delete_finalize(a%irp) + + return + end subroutine d_oacc_csr_free_dev_space + + subroutine d_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + call a%free_dev_space() + call a%psb_d_csr_sparse_mat%free() + + return + end subroutine d_oacc_csr_free + + function d_oacc_csr_sizeof(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + + end function d_oacc_csr_sizeof + + + function d_oacc_csr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSROA' + end function d_oacc_csr_get_fmt + + subroutine d_oacc_csr_all(m, n, nz, a, info) + implicit none + integer(psb_ipk_), intent(in) :: m, n, nz + class(psb_d_oacc_csr_sparse_mat), intent(out) :: a + integer(psb_ipk_), intent(out) :: info + + info = 0 + call a%free() + + call a%set_nrows(m) + call a%set_ncols(n) + + allocate(a%val(nz),stat=info) + allocate(a%ja(nz),stat=info) + allocate(a%irp(m+1),stat=info) + if (info == 0) call a%set_host() + if (info == 0) call a%sync_dev_space() + end subroutine d_oacc_csr_all + + function d_oacc_csr_is_host(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_oacc_csr_is_host + + function d_oacc_csr_is_sync(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_oacc_csr_is_sync + + function d_oacc_csr_is_dev(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_oacc_csr_is_dev + + subroutine d_oacc_csr_set_host(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_oacc_csr_set_host + + subroutine d_oacc_csr_set_sync(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_oacc_csr_set_sync + + subroutine d_oacc_csr_set_dev(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_oacc_csr_set_dev + + subroutine d_oacc_csr_sync_dev_space(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irp)>0) call acc_copyin(a%irp) + end subroutine d_oacc_csr_sync_dev_space + + subroutine d_oacc_csr_sync(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), target, intent(in) :: a + class(psb_d_oacc_csr_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (a%is_dev()) then + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irp)>0) call acc_update_self(a%irp) + else if (a%is_host()) then + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irp)>0) call acc_update_device(a%irp) + end if + call tmpa%set_sync() + end subroutine d_oacc_csr_sync + +end module psb_d_oacc_csr_mat_mod + diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90 new file mode 100644 index 00000000..021face3 --- /dev/null +++ b/openacc/psb_d_oacc_ell_mat_mod.F90 @@ -0,0 +1,272 @@ +module psb_d_oacc_ell_mat_mod + use iso_c_binding + use openacc + use psb_d_mat_mod + use psb_d_ell_mat_mod + use psb_d_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_ell_sparse_mat) :: psb_d_oacc_ell_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => d_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => d_oacc_ell_sizeof + procedure, pass(a) :: is_host => d_oacc_ell_is_host + procedure, pass(a) :: is_sync => d_oacc_ell_is_sync + procedure, pass(a) :: is_dev => d_oacc_ell_is_dev + procedure, pass(a) :: set_host => d_oacc_ell_set_host + procedure, pass(a) :: set_sync => d_oacc_ell_set_sync + procedure, pass(a) :: set_dev => d_oacc_ell_set_dev + procedure, pass(a) :: sync_dev_space => d_oacc_ell_sync_dev_space + procedure, pass(a) :: sync => d_oacc_ell_sync + procedure, pass(a) :: free_dev_space => d_oacc_ell_free_dev_space + procedure, pass(a) :: free => d_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv + procedure, pass(a) :: scals => psb_d_oacc_ell_scals + procedure, pass(a) :: scalv => psb_d_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_d_oacc_ell_mold + + end type psb_d_oacc_ell_sparse_mat + + interface + module subroutine psb_d_oacc_ell_mold(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_mold + end interface + + interface + module subroutine psb_d_oacc_ell_cp_from_fmt(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_cp_from_fmt + end interface + + interface + module subroutine psb_d_oacc_ell_mv_from_coo(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_mv_from_coo + end interface + + interface + module subroutine psb_d_oacc_ell_mv_from_fmt(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_mv_from_fmt + end interface + + interface + module subroutine psb_d_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_vect_mv + end interface + + interface + module subroutine psb_d_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_inner_vect_sv + end interface + + interface + module subroutine psb_d_oacc_ell_scals(d, a, info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_scals + end interface + + interface + module subroutine psb_d_oacc_ell_scal(d,a,info,side) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_d_oacc_ell_scal + end interface + + interface + module subroutine psb_d_oacc_ell_reallocate_nz(nz,a) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_d_oacc_ell_reallocate_nz + end interface + + interface + module subroutine psb_d_oacc_ell_allocate_mnnz(m,n,a,nz) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_oacc_ell_allocate_mnnz + end interface + + interface + module subroutine psb_d_oacc_ell_cp_from_coo(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_cp_from_coo + end interface + +contains + + subroutine d_oacc_ell_free_dev_space(a) + use psb_base_mod + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn) + if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag) + return + end subroutine d_oacc_ell_free_dev_space + + subroutine d_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + call a%free_dev_space() + call a%psb_d_ell_sparse_mat%free() + + return + end subroutine d_oacc_ell_free + + function d_oacc_ell_sizeof(a) result(res) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + + end function d_oacc_ell_sizeof + + subroutine d_oacc_ell_sync_dev_space(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irn)>0) call acc_copyin(a%irn) + if (psb_size(a%idiag)>0) call acc_copyin(a%idiag) + end subroutine d_oacc_ell_sync_dev_space + + function d_oacc_ell_is_host(a) result(res) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_oacc_ell_is_host + + function d_oacc_ell_is_sync(a) result(res) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_oacc_ell_is_sync + + function d_oacc_ell_is_dev(a) result(res) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_oacc_ell_is_dev + + subroutine d_oacc_ell_set_host(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_oacc_ell_set_host + + subroutine d_oacc_ell_set_sync(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_oacc_ell_set_sync + + subroutine d_oacc_ell_set_dev(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_oacc_ell_set_dev + + function d_oacc_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELLOA' + end function d_oacc_ell_get_fmt + + subroutine d_oacc_ell_sync(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), target, intent(in) :: a + class(psb_d_oacc_ell_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (a%is_dev()) then + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irn)>0) call acc_update_self(a%irn) + if (psb_size(a%idiag)>0) call acc_update_self(a%idiag) + else if (a%is_host()) then + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irn)>0) call acc_update_device(a%irn) + if (psb_size(a%idiag)>0) call acc_update_device(a%idiag) + end if + call tmpa%set_sync() + end subroutine d_oacc_ell_sync + +end module psb_d_oacc_ell_mat_mod diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90 new file mode 100644 index 00000000..264bbcce --- /dev/null +++ b/openacc/psb_d_oacc_hll_mat_mod.F90 @@ -0,0 +1,279 @@ +module psb_d_oacc_hll_mat_mod + use iso_c_binding + use openacc + use psb_d_mat_mod + use psb_d_hll_mat_mod + use psb_d_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_hll_sparse_mat) :: psb_d_oacc_hll_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => d_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => d_oacc_hll_sizeof + procedure, pass(a) :: is_host => d_oacc_hll_is_host + procedure, pass(a) :: is_sync => d_oacc_hll_is_sync + procedure, pass(a) :: is_dev => d_oacc_hll_is_dev + procedure, pass(a) :: set_host => d_oacc_hll_set_host + procedure, pass(a) :: set_sync => d_oacc_hll_set_sync + procedure, pass(a) :: set_dev => d_oacc_hll_set_dev + procedure, pass(a) :: sync_dev_space => d_oacc_hll_sync_dev_space + procedure, pass(a) :: sync => d_oacc_hll_sync + procedure, pass(a) :: free_dev_space => d_oacc_hll_free_dev_space + procedure, pass(a) :: free => d_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv + procedure, pass(a) :: scals => psb_d_oacc_hll_scals + procedure, pass(a) :: scalv => psb_d_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_d_oacc_hll_mold + + end type psb_d_oacc_hll_sparse_mat + + interface + module subroutine psb_d_oacc_hll_mold(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_mold + end interface + + interface + module subroutine psb_d_oacc_hll_cp_from_fmt(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_cp_from_fmt + end interface + + interface + module subroutine psb_d_oacc_hll_mv_from_coo(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_mv_from_coo + end interface + + interface + module subroutine psb_d_oacc_hll_mv_from_fmt(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_mv_from_fmt + end interface + + interface + module subroutine psb_d_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_vect_mv + end interface + + interface + module subroutine psb_d_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_inner_vect_sv + end interface + + interface + module subroutine psb_d_oacc_hll_scals(d, a, info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_scals + end interface + + interface + module subroutine psb_d_oacc_hll_scal(d,a,info,side) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_d_oacc_hll_scal + end interface + + interface + module subroutine psb_d_oacc_hll_reallocate_nz(nz,a) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_d_oacc_hll_reallocate_nz + end interface + + interface + module subroutine psb_d_oacc_hll_allocate_mnnz(m,n,a,nz) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_oacc_hll_allocate_mnnz + end interface + + interface + module subroutine psb_d_oacc_hll_cp_from_coo(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_cp_from_coo + end interface + +contains + + subroutine d_oacc_hll_free_dev_space(a) + use psb_base_mod + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn) + if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_delete_finalize(a%hkoffs) + return + end subroutine d_oacc_hll_free_dev_space + + subroutine d_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + call a%free_dev_space() + call a%psb_d_hll_sparse_mat%free() + + return + end subroutine d_oacc_hll_free + + function d_oacc_hll_sizeof(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + end function d_oacc_hll_sizeof + + + + function d_oacc_hll_is_host(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_oacc_hll_is_host + + function d_oacc_hll_is_sync(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_oacc_hll_is_sync + + function d_oacc_hll_is_dev(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_oacc_hll_is_dev + + subroutine d_oacc_hll_set_host(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_oacc_hll_set_host + + subroutine d_oacc_hll_set_sync(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_oacc_hll_set_sync + + subroutine d_oacc_hll_set_dev(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_oacc_hll_set_dev + + function d_oacc_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLLOA' + end function d_oacc_hll_get_fmt + + subroutine d_oacc_hll_sync_dev_space(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irn)>0) call acc_copyin(a%irn) + if (psb_size(a%idiag)>0) call acc_copyin(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_copyin(a%hkoffs) + end subroutine d_oacc_hll_sync_dev_space + + + subroutine d_oacc_hll_sync(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), target, intent(in) :: a + class(psb_d_oacc_hll_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (a%is_dev()) then + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irn)>0) call acc_update_self(a%irn) + if (psb_size(a%idiag)>0) call acc_update_self(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_update_self(a%hkoffs) + else if (a%is_host()) then + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irn)>0) call acc_update_device(a%irn) + if (psb_size(a%idiag)>0) call acc_update_device(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_update_device(a%hkoffs) + end if + call tmpa%set_sync() + end subroutine d_oacc_hll_sync + +end module psb_d_oacc_hll_mat_mod diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 new file mode 100644 index 00000000..1e3f07d7 --- /dev/null +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -0,0 +1,1013 @@ +module psb_d_oacc_vect_mod + use iso_c_binding + use openacc + use psb_const_mod + use psb_error_mod + use psb_realloc_mod + use psb_oacc_env_mod + use psb_d_vect_mod + use psb_i_vect_mod + use psb_i_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_base_vect_type) :: psb_d_vect_oacc + integer :: state = is_host + + contains + procedure, pass(x) :: get_nrows => d_oacc_get_nrows + procedure, nopass :: get_fmt => d_oacc_get_fmt + + procedure, pass(x) :: all => d_oacc_vect_all + procedure, pass(x) :: zero => d_oacc_zero + procedure, pass(x) :: asb_m => d_oacc_asb_m + procedure, pass(x) :: sync => d_oacc_sync + procedure, pass(x) :: sync_dev_space => d_oacc_sync_dev_space + procedure, pass(x) :: bld_x => d_oacc_bld_x + procedure, pass(x) :: bld_mn => d_oacc_bld_mn + procedure, pass(x) :: free => d_oacc_vect_free + procedure, pass(x) :: free_buffer => d_oacc_vect_free_buffer + procedure, pass(x) :: maybe_free_buffer => d_oacc_vect_maybe_free_buffer + procedure, pass(x) :: ins_a => d_oacc_ins_a + procedure, pass(x) :: ins_v => d_oacc_ins_v + procedure, pass(x) :: is_host => d_oacc_is_host + procedure, pass(x) :: is_dev => d_oacc_is_dev + procedure, pass(x) :: is_sync => d_oacc_is_sync + procedure, pass(x) :: set_host => d_oacc_set_host + procedure, pass(x) :: set_dev => d_oacc_set_dev + procedure, pass(x) :: set_sync => d_oacc_set_sync + procedure, pass(x) :: set_scal => d_oacc_set_scal + + procedure, pass(x) :: new_buffer => d_oacc_new_buffer + procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x + procedure, pass(x) :: gthzbuf => d_oacc_gthzbuf + procedure, pass(y) :: sctb => d_oacc_sctb + procedure, pass(y) :: sctb_x => d_oacc_sctb_x + procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf + procedure, nopass :: device_wait => d_oacc_device_wait + + procedure, pass(x) :: get_size => d_oacc_get_size + + procedure, pass(x) :: dot_v => d_oacc_vect_dot + procedure, pass(x) :: dot_a => d_oacc_dot_a + procedure, pass(y) :: axpby_v => d_oacc_axpby_v + procedure, pass(y) :: axpby_a => d_oacc_axpby_a + procedure, pass(z) :: upd_xyz => d_oacc_upd_xyz + procedure, pass(y) :: mlt_a => d_oacc_mlt_a + procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 + procedure, pass(y) :: mlt_v => psb_d_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => psb_d_oacc_mlt_v_2 + procedure, pass(x) :: scal => d_oacc_scal + procedure, pass(x) :: nrm2 => d_oacc_nrm2 + procedure, pass(x) :: amax => d_oacc_amax + procedure, pass(x) :: asum => d_oacc_asum + procedure, pass(x) :: absval1 => d_oacc_absval1 + procedure, pass(x) :: absval2 => d_oacc_absval2 + final :: d_oacc_final_vect_free + end type psb_d_vect_oacc + + interface + subroutine psb_d_oacc_mlt_v(x, y, info) + import + implicit none + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_mlt_v + end interface + + interface + subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + import + implicit none + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + end subroutine psb_d_oacc_mlt_v_2 + end interface + +contains + + subroutine d_oacc_device_wait() + implicit none + call acc_wait_all() + end subroutine d_oacc_device_wait + + subroutine d_oacc_absval1(x) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: n + + if (x%is_host()) call x%sync() + n = size(x%v) + call d_inner_oacc_absval1(n,x%v) + call x%set_dev() + contains + subroutine d_inner_oacc_absval1(n,x) + implicit none + real(psb_dpk_), intent(inout) :: x(:) + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + !$acc parallel loop present(x) + do i = 1, n + x(i) = abs(x(i)) + end do + end subroutine d_inner_oacc_absval1 + end subroutine d_oacc_absval1 + + subroutine d_oacc_absval2(x, y) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + + n = min(size(x%v), size(y%v)) + select type (yy => y) + class is (psb_d_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + call d_inner_oacc_absval2(n,x%v,yy%v) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_d_base_vect_type%absval(y) + end select + contains + subroutine d_inner_oacc_absval2(n,x,y) + implicit none + real(psb_dpk_), intent(inout) :: x(:),y(:) + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + !$acc parallel loop present(x,y) + do i = 1, n + y(i) = abs(x(i)) + end do + end subroutine d_inner_oacc_absval2 + end subroutine d_oacc_absval2 + + subroutine d_oacc_scal(alpha, x) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + real(psb_dpk_), intent(in) :: alpha + integer(psb_ipk_) :: info + if (x%is_host()) call x%sync() + call d_inner_oacc_scal(alpha, x%v) + call x%set_dev() + contains + subroutine d_inner_oacc_scal(alpha, x) + real(psb_dpk_), intent(in) :: alpha + real(psb_dpk_), intent(inout) :: x(:) + integer(psb_ipk_) :: i + !$acc parallel loop present(x) + do i = 1, size(x) + x(i) = alpha * x(i) + end do + end subroutine d_inner_oacc_scal + end subroutine d_oacc_scal + + function d_oacc_nrm2(n, x) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() +!!$ write(0,*)'oacc_nrm2' + res = d_inner_oacc_nrm2(n, x%v) + contains + function d_inner_oacc_nrm2(n, x) result(res) + integer(psb_ipk_) :: n + real(psb_dpk_) :: x(:) + real(psb_dpk_) :: res + real(psb_dpk_) :: sum, mx + integer(psb_ipk_) :: i + mx = dzero + !$acc parallel loop reduction(max:mx) present(x) + do i = 1, n + if (abs(x(i)) > mx) mx = abs(x(i)) + end do + if (mx == dzero) then + res = mx + else + sum = dzero + !$acc parallel loop reduction(+:sum) present(x) + do i = 1, n + sum = sum + abs(x(i)/mx)**2 + end do + res = mx*sqrt(sum) + end if + end function d_inner_oacc_nrm2 + end function d_oacc_nrm2 + + function d_oacc_amax(n, x) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + res = d_inner_oacc_amax(n, x%v) + contains + function d_inner_oacc_amax(n, x) result(res) + integer(psb_ipk_) :: n + real(psb_dpk_) :: x(:) + real(psb_dpk_) :: res + real(psb_dpk_) :: max_val + integer(psb_ipk_) :: i + max_val = dzero + !$acc parallel loop reduction(max:max_val) present(x) + do i = 1, n + if (abs(x(i)) > max_val) max_val = abs(x(i)) + end do + res = max_val + end function d_inner_oacc_amax + end function d_oacc_amax + + function d_oacc_asum(n, x) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + real(psb_dpk_) :: sum + integer(psb_ipk_) :: i + if (x%is_host()) call x%sync() + res = d_inner_oacc_asum(n, x%v) + contains + function d_inner_oacc_asum(n, x) result(res) + integer(psb_ipk_) :: n + real(psb_dpk_) :: x(:) + real(psb_dpk_) :: res + integer(psb_ipk_) :: i + res = dzero + !$acc parallel loop reduction(+:res) present(x) + do i = 1, n + res = res + abs(x(i)) + end do + end function d_inner_oacc_asum + end function d_oacc_asum + + + subroutine d_oacc_mlt_a(x, y, info) + implicit none + real(psb_dpk_), intent(in) :: x(:) + class(psb_d_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + !$acc parallel loop present(x,y) + do i = 1, size(x) + y%v(i) = y%v(i) * x(i) + end do + call y%set_host() + end subroutine d_oacc_mlt_a + + subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info) + implicit none + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(in) :: x(:) + real(psb_dpk_), intent(in) :: y(:) + class(psb_d_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + !$acc parallel loop present(x,y,z%v) + do i = 1, size(x) + z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) + end do + call z%set_host() + end subroutine d_oacc_mlt_a_2 + + subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_oacc), intent(inout) :: y + real(psb_dpk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, i + + info = psb_success_ + + select type(xx => x) + type is (psb_d_vect_oacc) + if ((beta /= dzero) .and. y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + nx = size(xx%v) + ny = size(y%v) + if ((nx < m) .or. (ny < m)) then + info = psb_err_internal_error_ + else + call d_inner_oacc_axpby(m, alpha, x%v, beta, y%v, info) + end if + call y%set_dev() + class default + if ((alpha /= dzero) .and. (x%is_dev())) call x%sync() + call y%axpby(m, alpha, x%v, beta, info) + end select + contains + subroutine d_inner_oacc_axpby(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent(inout) :: x(:) + real(psb_dpk_), intent(inout) :: y(:) + real(psb_dpk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + !$acc parallel present(x,y) + !$acc loop + do i = 1, m + y(i) = alpha * x(i) + beta * y(i) + end do + !$acc end parallel + end subroutine d_inner_oacc_axpby + end subroutine d_oacc_axpby_v + + subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent(in) :: x(:) + class(psb_d_vect_oacc), intent(inout) :: y + real(psb_dpk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i + + if ((beta /= dzero) .and. (y%is_dev())) call y%sync() + + do i = 1, m + y%v(i) = alpha * x(i) + beta * y%v(i) + end do + call y%set_host() + end subroutine d_oacc_axpby_a + + subroutine d_oacc_upd_xyz(m, alpha, beta, gamma, delta, x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_oacc), intent(inout) :: z + real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz, i + logical :: gpu_done + + info = psb_success_ + gpu_done = .false. + + select type(xx => x) + class is (psb_d_vect_oacc) + select type(yy => y) + class is (psb_d_vect_oacc) + select type(zz => z) + class is (psb_d_vect_oacc) + if ((beta /= dzero) .and. yy%is_host()) call yy%sync() + if ((delta /= dzero) .and. zz%is_host()) call zz%sync() + if (xx%is_host()) call xx%sync() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop present(xx%v,yy%v,zz%v) + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. + end select + end select + end select + + if (.not. gpu_done) then + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) + end if + end subroutine d_oacc_upd_xyz + + subroutine d_oacc_sctb_buf(i, n, idx, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type) :: idx + real(psb_dpk_) :: beta + class(psb_d_vect_oacc) :: y + integer(psb_ipk_) :: info, k + logical :: acc_done + if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if + + acc_done = .false. + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) + call y%set_dev() + acc_done = .true. + end select + + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k) + end do + end if + + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + real(psb_dpk_) :: beta,x(:), y(:) + integer(psb_ipk_) :: k + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + + end subroutine d_oacc_sctb_buf + + subroutine d_oacc_sctb_x(i, n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_):: i + integer(psb_mpk_):: n + class(psb_i_base_vect_type) :: idx + real(psb_dpk_) :: beta, x(:) + class(psb_d_vect_oacc) :: y + integer(psb_ipk_) :: info, ni, k + logical :: acc_done + + acc_done = .false. + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (acc_is_present(x)) then + call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1)) + acc_done = .true. + call y%set_dev() + end if + end select + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1) + end do + call y%set_host() + end if + + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + real(psb_dpk_) :: beta, x(:), y(:) + integer(psb_ipk_) :: k + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + + end subroutine d_oacc_sctb_x + + subroutine d_oacc_sctb(n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + real(psb_dpk_) :: beta, x(:) + class(psb_d_vect_oacc) :: y + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (n == 0) return + if (y%is_dev()) call y%sync() + + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + + call y%set_host() + end subroutine d_oacc_sctb + + subroutine d_oacc_gthzbuf(i, n, idx, x) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type) :: idx + class(psb_d_vect_oacc) :: x + integer(psb_ipk_) :: info,k + logical :: acc_done + + info = 0 + acc_done = .false. + + if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + select type (ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1)) + acc_done = .true. + end select + + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (x%is_dev()) call x%sync() + do k = 1, n + x%combuf(k+i-1) = x%v(idx%v(k+i-1)) + end do + end if + + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + real(psb_dpk_) :: x(:), y(:) + integer(psb_ipk_) :: k + ! + !$acc parallel loop present(x,y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y(1:n)) + end subroutine inner_gth + end subroutine d_oacc_gthzbuf + + subroutine d_oacc_gthzv_x(i, n, idx, x, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type):: idx + real(psb_dpk_) :: y(:) + class(psb_d_vect_oacc):: x + integer(psb_ipk_) :: info, k + logical :: acc_done + + info = 0 + acc_done = .false. + select type (ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + if (acc_is_present(y)) then + call inner_gth(n,x%v,y(i:),ii%v(i:)) + acc_done=.true. + end if + end select + if (.not.acc_done) then + if (x%is_dev()) call x%sync() + if (idx%is_dev()) call idx%sync() + do k = 1, n + y(k+i-1) = x%v(idx%v(k+i-1)) + !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k) + end do + end if + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + real(psb_dpk_) :: x(:), y(:) + integer(psb_ipk_) :: k + ! + !$acc parallel loop present(x,y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y(1:n)) + end subroutine inner_gth + end subroutine d_oacc_gthzv_x + + subroutine d_oacc_ins_v(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_d_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + type is (psb_i_vect_oacc) + select type(vval => val) + type is (psb_d_vect_oacc) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + !$acc parallel loop present(x%v,virl%v,vval%v) + do i = 1, n + x%v(virl%v(i)) = vval%v(i) + end do + call x%set_dev() + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + select type(virl => irl) + type is (psb_i_vect_oacc) + if (virl%is_dev()) call virl%sync() + end select + select type(vval => val) + type is (psb_d_vect_oacc) + if (vval%is_dev()) call vval%sync() + end select + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info, 'oacc_vect_ins') + return + end if + + end subroutine d_oacc_ins_v + + subroutine d_oacc_ins_a(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + + + end subroutine d_oacc_ins_a + + subroutine d_oacc_bld_mn(x, n) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%free(info) + call x%all(ione*n, info) + if (info /= 0) then + call psb_errpush(info, 'd_oacc_bld_mn',& + & i_err=ione*(/n, n, n, n, n/)) + end if + call x%set_host() + call x%sync_dev_space() + + end subroutine d_oacc_bld_mn + + + subroutine d_oacc_bld_x(x, this) + use psb_base_mod + implicit none + real(psb_dpk_), intent(in) :: this(:) + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%free(info) + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'd_oacc_bld_x', & + i_err=(/size(this)*ione, izero, izero, izero, izero/)) + return + end if + x%v(:) = this(:) + call x%set_host() + call x%sync_dev_space() + + end subroutine d_oacc_bld_x + + subroutine d_oacc_asb_m(n, x, info) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + info = psb_success_ + + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_d_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync() + call x%set_host() + end if + else + if (size(x%v) < n) then + call x%psb_d_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync() + call x%set_host() + end if + end if + end subroutine d_oacc_asb_m + + subroutine d_oacc_set_scal(x, val, first, last) + class(psb_d_vect_oacc), intent(inout) :: x + real(psb_dpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(last, last_) + + !$acc parallel loop present(x%v) + do i = first_, last_ + x%v(i) = val + end do + !$acc end parallel loop + + call x%set_dev() + end subroutine d_oacc_set_scal + + subroutine d_oacc_zero(x) + use psi_serial_mod + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + call x%set_dev() + call x%set_scal(dzero) + end subroutine d_oacc_zero + + function d_oacc_get_nrows(x) result(res) + implicit none + class(psb_d_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + if (allocated(x%v)) res = size(x%v) + end function d_oacc_get_nrows + + function d_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = "dOACC" + + end function d_oacc_get_fmt + + + function d_oacc_vect_dot(n, x, y) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + res = dzero +!!$ write(0,*) 'oacc_dot_v' + select type(yy => y) + type is (psb_d_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + res = d_inner_oacc_dot(n, x%v, yy%v) + class default + if (x%is_dev()) call x%sync() + res = y%dot(n, x%v) + end select + contains + function d_inner_oacc_dot(n, x, y) result(res) + implicit none + real(psb_dpk_), intent(in) :: x(:) + real(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: i + + !$acc parallel loop reduction(+:res) present(x, y) + do i = 1, n + res = res + x(i) * y(i) + end do + !$acc end parallel loop + end function d_inner_oacc_dot + end function d_oacc_vect_dot + + function d_oacc_dot_a(n, x, y) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + real(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + real(psb_dpk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n, y, 1, x%v, 1) + + end function d_oacc_dot_a + + + subroutine d_oacc_new_buffer(n,x,info) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if + call x%psb_d_base_vect_type%new_buffer(n,info) + !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) + end if + end subroutine d_oacc_new_buffer + + subroutine d_oacc_sync_dev_space(x) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) + end subroutine d_oacc_sync_dev_space + + subroutine d_oacc_sync(x) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + if (x%is_dev()) then + if (psb_size(x%v)>0) call acc_update_self(x%v) + end if + if (x%is_host()) then + if (.not.acc_is_present(x%v)) call d_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) + end if + call x%set_sync() + end subroutine d_oacc_sync + + subroutine d_oacc_set_host(x) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + + x%state = is_host + end subroutine d_oacc_set_host + + subroutine d_oacc_set_dev(x) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + + x%state = is_dev + end subroutine d_oacc_set_dev + + subroutine d_oacc_set_sync(x) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + + x%state = is_sync + end subroutine d_oacc_set_sync + + function d_oacc_is_dev(x) result(res) + implicit none + class(psb_d_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function d_oacc_is_dev + + function d_oacc_is_host(x) result(res) + implicit none + class(psb_d_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function d_oacc_is_host + + function d_oacc_is_sync(x) result(res) + implicit none + class(psb_d_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function d_oacc_is_sync + + subroutine d_oacc_vect_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_d_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'd_oacc_all', & + i_err=(/n, n, n, n, n/)) + end if + call x%set_host() + call x%sync_dev_space() + end subroutine d_oacc_vect_all + + subroutine d_oacc_final_vect_free(x) + implicit none + type(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine d_oacc_final_vect_free + + subroutine d_oacc_vect_free(x, info) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + call x%psb_d_base_vect_type%free(info) + end subroutine d_oacc_vect_free + + subroutine d_oacc_vect_maybe_free_buffer(x,info) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if + + end subroutine d_oacc_vect_maybe_free_buffer + + subroutine d_oacc_vect_free_buffer(x,info) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info +! write(0,*) 'oacc free_buffer' + info = 0 + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_d_base_vect_type%free_buffer(info) + + end subroutine d_oacc_vect_free_buffer + + function d_oacc_get_size(x) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: res + + res = size(x%v) + end function d_oacc_get_size + +end module psb_d_oacc_vect_mod diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90 new file mode 100644 index 00000000..344ad931 --- /dev/null +++ b/openacc/psb_i_oacc_vect_mod.F90 @@ -0,0 +1,617 @@ +module psb_i_oacc_vect_mod + use iso_c_binding + use openacc + use psb_const_mod + use psb_error_mod + use psb_realloc_mod + use psb_oacc_env_mod + use psb_i_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_base_vect_type) :: psb_i_vect_oacc + integer :: state = is_host + + contains + procedure, pass(x) :: get_nrows => i_oacc_get_nrows + procedure, nopass :: get_fmt => i_oacc_get_fmt + + procedure, pass(x) :: all => i_oacc_vect_all + procedure, pass(x) :: zero => i_oacc_zero + procedure, pass(x) :: asb_m => i_oacc_asb_m + procedure, pass(x) :: sync => i_oacc_sync + procedure, pass(x) :: sync_dev_space => i_oacc_sync_dev_space + procedure, pass(x) :: bld_x => i_oacc_bld_x + procedure, pass(x) :: bld_mn => i_oacc_bld_mn + procedure, pass(x) :: free => i_oacc_vect_free + procedure, pass(x) :: free_buffer => i_oacc_vect_free_buffer + procedure, pass(x) :: maybe_free_buffer => i_oacc_vect_maybe_free_buffer + procedure, pass(x) :: ins_a => i_oacc_ins_a + procedure, pass(x) :: ins_v => i_oacc_ins_v + procedure, pass(x) :: is_host => i_oacc_is_host + procedure, pass(x) :: is_dev => i_oacc_is_dev + procedure, pass(x) :: is_sync => i_oacc_is_sync + procedure, pass(x) :: set_host => i_oacc_set_host + procedure, pass(x) :: set_dev => i_oacc_set_dev + procedure, pass(x) :: set_sync => i_oacc_set_sync + procedure, pass(x) :: set_scal => i_oacc_set_scal + + procedure, pass(x) :: new_buffer => i_oacc_new_buffer + procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x + procedure, pass(x) :: gthzbuf => i_oacc_gthzbuf + procedure, pass(y) :: sctb => i_oacc_sctb + procedure, pass(y) :: sctb_x => i_oacc_sctb_x + procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf + procedure, nopass :: device_wait => i_oacc_device_wait + + procedure, pass(x) :: get_size => i_oacc_get_size + + final :: i_oacc_final_vect_free + end type psb_i_vect_oacc + + +contains + + subroutine i_oacc_device_wait() + implicit none + call acc_wait_all() + end subroutine i_oacc_device_wait + + + subroutine i_oacc_sctb_buf(i, n, idx, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: beta + class(psb_i_vect_oacc) :: y + integer(psb_ipk_) :: info, k + logical :: acc_done + if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if + + acc_done = .false. + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) + call y%set_dev() + acc_done = .true. + end select + + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k) + end do + end if + + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_ipk_) :: beta,x(:), y(:) + integer(psb_ipk_) :: k + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + + end subroutine i_oacc_sctb_buf + + subroutine i_oacc_sctb_x(i, n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_):: i + integer(psb_mpk_):: n + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: beta, x(:) + class(psb_i_vect_oacc) :: y + integer(psb_ipk_) :: info, ni, k + logical :: acc_done + + acc_done = .false. + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (acc_is_present(x)) then + call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1)) + acc_done = .true. + call y%set_dev() + end if + end select + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1) + end do + call y%set_host() + end if + + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_ipk_) :: beta, x(:), y(:) + integer(psb_ipk_) :: k + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + + end subroutine i_oacc_sctb_x + + subroutine i_oacc_sctb(n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_ipk_) :: beta, x(:) + class(psb_i_vect_oacc) :: y + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (n == 0) return + if (y%is_dev()) call y%sync() + + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + + call y%set_host() + end subroutine i_oacc_sctb + + subroutine i_oacc_gthzbuf(i, n, idx, x) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type) :: idx + class(psb_i_vect_oacc) :: x + integer(psb_ipk_) :: info,k + logical :: acc_done + + info = 0 + acc_done = .false. + + if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + select type (ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1)) + acc_done = .true. + end select + + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (x%is_dev()) call x%sync() + do k = 1, n + x%combuf(k+i-1) = x%v(idx%v(k+i-1)) + end do + end if + + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_ipk_) :: x(:), y(:) + integer(psb_ipk_) :: k + ! + !$acc parallel loop present(x,y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y(1:n)) + end subroutine inner_gth + end subroutine i_oacc_gthzbuf + + subroutine i_oacc_gthzv_x(i, n, idx, x, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type):: idx + integer(psb_ipk_) :: y(:) + class(psb_i_vect_oacc):: x + integer(psb_ipk_) :: info, k + logical :: acc_done + + info = 0 + acc_done = .false. + select type (ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + if (acc_is_present(y)) then + call inner_gth(n,x%v,y(i:),ii%v(i:)) + acc_done=.true. + end if + end select + if (.not.acc_done) then + if (x%is_dev()) call x%sync() + if (idx%is_dev()) call idx%sync() + do k = 1, n + y(k+i-1) = x%v(idx%v(k+i-1)) + !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k) + end do + end if + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_ipk_) :: x(:), y(:) + integer(psb_ipk_) :: k + ! + !$acc parallel loop present(x,y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y(1:n)) + end subroutine inner_gth + end subroutine i_oacc_gthzv_x + + subroutine i_oacc_ins_v(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_i_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + type is (psb_i_vect_oacc) + select type(vval => val) + type is (psb_i_vect_oacc) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + !$acc parallel loop present(x%v,virl%v,vval%v) + do i = 1, n + x%v(virl%v(i)) = vval%v(i) + end do + call x%set_dev() + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + select type(virl => irl) + type is (psb_i_vect_oacc) + if (virl%is_dev()) call virl%sync() + end select + select type(vval => val) + type is (psb_i_vect_oacc) + if (vval%is_dev()) call vval%sync() + end select + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info, 'oacc_vect_ins') + return + end if + + end subroutine i_oacc_ins_v + + subroutine i_oacc_ins_a(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + integer(psb_ipk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_i_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + + + end subroutine i_oacc_ins_a + + subroutine i_oacc_bld_mn(x, n) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%free(info) + call x%all(ione*n, info) + if (info /= 0) then + call psb_errpush(info, 'i_oacc_bld_mn',& + & i_err=ione*(/n, n, n, n, n/)) + end if + call x%set_host() + call x%sync_dev_space() + + end subroutine i_oacc_bld_mn + + + subroutine i_oacc_bld_x(x, this) + use psb_base_mod + implicit none + integer(psb_ipk_), intent(in) :: this(:) + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%free(info) + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'i_oacc_bld_x', & + i_err=(/size(this)*ione, izero, izero, izero, izero/)) + return + end if + x%v(:) = this(:) + call x%set_host() + call x%sync_dev_space() + + end subroutine i_oacc_bld_x + + subroutine i_oacc_asb_m(n, x, info) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + info = psb_success_ + + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_i_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync() + call x%set_host() + end if + else + if (size(x%v) < n) then + call x%psb_i_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync() + call x%set_host() + end if + end if + end subroutine i_oacc_asb_m + + subroutine i_oacc_set_scal(x, val, first, last) + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(last, last_) + + !$acc parallel loop present(x%v) + do i = first_, last_ + x%v(i) = val + end do + !$acc end parallel loop + + call x%set_dev() + end subroutine i_oacc_set_scal + + subroutine i_oacc_zero(x) + use psi_serial_mod + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + call x%set_dev() + call x%set_scal(izero) + end subroutine i_oacc_zero + + function i_oacc_get_nrows(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + if (allocated(x%v)) res = size(x%v) + end function i_oacc_get_nrows + + function i_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = "iOACC" + + end function i_oacc_get_fmt + + + subroutine i_oacc_new_buffer(n,x,info) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if + call x%psb_i_base_vect_type%new_buffer(n,info) + !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) + end if + end subroutine i_oacc_new_buffer + + subroutine i_oacc_sync_dev_space(x) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) + end subroutine i_oacc_sync_dev_space + + subroutine i_oacc_sync(x) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + if (x%is_dev()) then + if (psb_size(x%v)>0) call acc_update_self(x%v) + end if + if (x%is_host()) then + if (.not.acc_is_present(x%v)) call i_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) + end if + call x%set_sync() + end subroutine i_oacc_sync + + subroutine i_oacc_set_host(x) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + + x%state = is_host + end subroutine i_oacc_set_host + + subroutine i_oacc_set_dev(x) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + + x%state = is_dev + end subroutine i_oacc_set_dev + + subroutine i_oacc_set_sync(x) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + + x%state = is_sync + end subroutine i_oacc_set_sync + + function i_oacc_is_dev(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function i_oacc_is_dev + + function i_oacc_is_host(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function i_oacc_is_host + + function i_oacc_is_sync(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function i_oacc_is_sync + + subroutine i_oacc_vect_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_i_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'i_oacc_all', & + i_err=(/n, n, n, n, n/)) + end if + call x%set_host() + call x%sync_dev_space() + end subroutine i_oacc_vect_all + + subroutine i_oacc_final_vect_free(x) + implicit none + type(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine i_oacc_final_vect_free + + subroutine i_oacc_vect_free(x, info) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + call x%psb_i_base_vect_type%free(info) + end subroutine i_oacc_vect_free + + subroutine i_oacc_vect_maybe_free_buffer(x,info) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if + + end subroutine i_oacc_vect_maybe_free_buffer + + subroutine i_oacc_vect_free_buffer(x,info) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info +! write(0,*) 'oacc free_buffer' + info = 0 + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_i_base_vect_type%free_buffer(info) + + end subroutine i_oacc_vect_free_buffer + + function i_oacc_get_size(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: res + + res = size(x%v) + end function i_oacc_get_size + +end module psb_i_oacc_vect_mod diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90 new file mode 100644 index 00000000..85b561a9 --- /dev/null +++ b/openacc/psb_l_oacc_vect_mod.F90 @@ -0,0 +1,619 @@ +module psb_l_oacc_vect_mod + use iso_c_binding + use openacc + use psb_const_mod + use psb_error_mod + use psb_realloc_mod + use psb_oacc_env_mod + use psb_l_vect_mod + use psb_i_vect_mod + use psb_i_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_l_base_vect_type) :: psb_l_vect_oacc + integer :: state = is_host + + contains + procedure, pass(x) :: get_nrows => l_oacc_get_nrows + procedure, nopass :: get_fmt => l_oacc_get_fmt + + procedure, pass(x) :: all => l_oacc_vect_all + procedure, pass(x) :: zero => l_oacc_zero + procedure, pass(x) :: asb_m => l_oacc_asb_m + procedure, pass(x) :: sync => l_oacc_sync + procedure, pass(x) :: sync_dev_space => l_oacc_sync_dev_space + procedure, pass(x) :: bld_x => l_oacc_bld_x + procedure, pass(x) :: bld_mn => l_oacc_bld_mn + procedure, pass(x) :: free => l_oacc_vect_free + procedure, pass(x) :: free_buffer => l_oacc_vect_free_buffer + procedure, pass(x) :: maybe_free_buffer => l_oacc_vect_maybe_free_buffer + procedure, pass(x) :: ins_a => l_oacc_ins_a + procedure, pass(x) :: ins_v => l_oacc_ins_v + procedure, pass(x) :: is_host => l_oacc_is_host + procedure, pass(x) :: is_dev => l_oacc_is_dev + procedure, pass(x) :: is_sync => l_oacc_is_sync + procedure, pass(x) :: set_host => l_oacc_set_host + procedure, pass(x) :: set_dev => l_oacc_set_dev + procedure, pass(x) :: set_sync => l_oacc_set_sync + procedure, pass(x) :: set_scal => l_oacc_set_scal + + procedure, pass(x) :: new_buffer => l_oacc_new_buffer + procedure, pass(x) :: gthzv_x => l_oacc_gthzv_x + procedure, pass(x) :: gthzbuf => l_oacc_gthzbuf + procedure, pass(y) :: sctb => l_oacc_sctb + procedure, pass(y) :: sctb_x => l_oacc_sctb_x + procedure, pass(y) :: sctb_buf => l_oacc_sctb_buf + procedure, nopass :: device_wait => l_oacc_device_wait + + procedure, pass(x) :: get_size => l_oacc_get_size + + final :: l_oacc_final_vect_free + end type psb_l_vect_oacc + + +contains + + subroutine l_oacc_device_wait() + implicit none + call acc_wait_all() + end subroutine l_oacc_device_wait + + + subroutine l_oacc_sctb_buf(i, n, idx, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type) :: idx + integer(psb_lpk_) :: beta + class(psb_l_vect_oacc) :: y + integer(psb_ipk_) :: info, k + logical :: acc_done + if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if + + acc_done = .false. + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) + call y%set_dev() + acc_done = .true. + end select + + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k) + end do + end if + + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_lpk_) :: beta,x(:), y(:) + integer(psb_ipk_) :: k + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + + end subroutine l_oacc_sctb_buf + + subroutine l_oacc_sctb_x(i, n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_):: i + integer(psb_mpk_):: n + class(psb_i_base_vect_type) :: idx + integer(psb_lpk_) :: beta, x(:) + class(psb_l_vect_oacc) :: y + integer(psb_ipk_) :: info, ni, k + logical :: acc_done + + acc_done = .false. + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (acc_is_present(x)) then + call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1)) + acc_done = .true. + call y%set_dev() + end if + end select + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1) + end do + call y%set_host() + end if + + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_lpk_) :: beta, x(:), y(:) + integer(psb_ipk_) :: k + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + + end subroutine l_oacc_sctb_x + + subroutine l_oacc_sctb(n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_lpk_) :: beta, x(:) + class(psb_l_vect_oacc) :: y + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (n == 0) return + if (y%is_dev()) call y%sync() + + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + + call y%set_host() + end subroutine l_oacc_sctb + + subroutine l_oacc_gthzbuf(i, n, idx, x) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type) :: idx + class(psb_l_vect_oacc) :: x + integer(psb_ipk_) :: info,k + logical :: acc_done + + info = 0 + acc_done = .false. + + if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + select type (ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1)) + acc_done = .true. + end select + + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (x%is_dev()) call x%sync() + do k = 1, n + x%combuf(k+i-1) = x%v(idx%v(k+i-1)) + end do + end if + + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_lpk_) :: x(:), y(:) + integer(psb_ipk_) :: k + ! + !$acc parallel loop present(x,y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y(1:n)) + end subroutine inner_gth + end subroutine l_oacc_gthzbuf + + subroutine l_oacc_gthzv_x(i, n, idx, x, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type):: idx + integer(psb_lpk_) :: y(:) + class(psb_l_vect_oacc):: x + integer(psb_ipk_) :: info, k + logical :: acc_done + + info = 0 + acc_done = .false. + select type (ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + if (acc_is_present(y)) then + call inner_gth(n,x%v,y(i:),ii%v(i:)) + acc_done=.true. + end if + end select + if (.not.acc_done) then + if (x%is_dev()) call x%sync() + if (idx%is_dev()) call idx%sync() + do k = 1, n + y(k+i-1) = x%v(idx%v(k+i-1)) + !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k) + end do + end if + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_lpk_) :: x(:), y(:) + integer(psb_ipk_) :: k + ! + !$acc parallel loop present(x,y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y(1:n)) + end subroutine inner_gth + end subroutine l_oacc_gthzv_x + + subroutine l_oacc_ins_v(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_l_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + type is (psb_i_vect_oacc) + select type(vval => val) + type is (psb_l_vect_oacc) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + !$acc parallel loop present(x%v,virl%v,vval%v) + do i = 1, n + x%v(virl%v(i)) = vval%v(i) + end do + call x%set_dev() + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + select type(virl => irl) + type is (psb_i_vect_oacc) + if (virl%is_dev()) call virl%sync() + end select + select type(vval => val) + type is (psb_l_vect_oacc) + if (vval%is_dev()) call vval%sync() + end select + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info, 'oacc_vect_ins') + return + end if + + end subroutine l_oacc_ins_v + + subroutine l_oacc_ins_a(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + integer(psb_lpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_l_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + + + end subroutine l_oacc_ins_a + + subroutine l_oacc_bld_mn(x, n) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%free(info) + call x%all(ione*n, info) + if (info /= 0) then + call psb_errpush(info, 'l_oacc_bld_mn',& + & i_err=ione*(/n, n, n, n, n/)) + end if + call x%set_host() + call x%sync_dev_space() + + end subroutine l_oacc_bld_mn + + + subroutine l_oacc_bld_x(x, this) + use psb_base_mod + implicit none + integer(psb_lpk_), intent(in) :: this(:) + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%free(info) + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'l_oacc_bld_x', & + i_err=(/size(this)*ione, izero, izero, izero, izero/)) + return + end if + x%v(:) = this(:) + call x%set_host() + call x%sync_dev_space() + + end subroutine l_oacc_bld_x + + subroutine l_oacc_asb_m(n, x, info) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + info = psb_success_ + + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_l_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync() + call x%set_host() + end if + else + if (size(x%v) < n) then + call x%psb_l_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync() + call x%set_host() + end if + end if + end subroutine l_oacc_asb_m + + subroutine l_oacc_set_scal(x, val, first, last) + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_lpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(last, last_) + + !$acc parallel loop present(x%v) + do i = first_, last_ + x%v(i) = val + end do + !$acc end parallel loop + + call x%set_dev() + end subroutine l_oacc_set_scal + + subroutine l_oacc_zero(x) + use psi_serial_mod + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + call x%set_dev() + call x%set_scal(lzero) + end subroutine l_oacc_zero + + function l_oacc_get_nrows(x) result(res) + implicit none + class(psb_l_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + if (allocated(x%v)) res = size(x%v) + end function l_oacc_get_nrows + + function l_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = "lOACC" + + end function l_oacc_get_fmt + + + subroutine l_oacc_new_buffer(n,x,info) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if + call x%psb_l_base_vect_type%new_buffer(n,info) + !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) + end if + end subroutine l_oacc_new_buffer + + subroutine l_oacc_sync_dev_space(x) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) + end subroutine l_oacc_sync_dev_space + + subroutine l_oacc_sync(x) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + if (x%is_dev()) then + if (psb_size(x%v)>0) call acc_update_self(x%v) + end if + if (x%is_host()) then + if (.not.acc_is_present(x%v)) call l_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) + end if + call x%set_sync() + end subroutine l_oacc_sync + + subroutine l_oacc_set_host(x) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + + x%state = is_host + end subroutine l_oacc_set_host + + subroutine l_oacc_set_dev(x) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + + x%state = is_dev + end subroutine l_oacc_set_dev + + subroutine l_oacc_set_sync(x) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + + x%state = is_sync + end subroutine l_oacc_set_sync + + function l_oacc_is_dev(x) result(res) + implicit none + class(psb_l_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function l_oacc_is_dev + + function l_oacc_is_host(x) result(res) + implicit none + class(psb_l_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function l_oacc_is_host + + function l_oacc_is_sync(x) result(res) + implicit none + class(psb_l_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function l_oacc_is_sync + + subroutine l_oacc_vect_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_l_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'l_oacc_all', & + i_err=(/n, n, n, n, n/)) + end if + call x%set_host() + call x%sync_dev_space() + end subroutine l_oacc_vect_all + + subroutine l_oacc_final_vect_free(x) + implicit none + type(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine l_oacc_final_vect_free + + subroutine l_oacc_vect_free(x, info) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + call x%psb_l_base_vect_type%free(info) + end subroutine l_oacc_vect_free + + subroutine l_oacc_vect_maybe_free_buffer(x,info) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if + + end subroutine l_oacc_vect_maybe_free_buffer + + subroutine l_oacc_vect_free_buffer(x,info) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info +! write(0,*) 'oacc free_buffer' + info = 0 + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_l_base_vect_type%free_buffer(info) + + end subroutine l_oacc_vect_free_buffer + + function l_oacc_get_size(x) result(res) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: res + + res = size(x%v) + end function l_oacc_get_size + +end module psb_l_oacc_vect_mod diff --git a/openacc/psb_oacc_env_mod.F90 b/openacc/psb_oacc_env_mod.F90 new file mode 100644 index 00000000..6d810f74 --- /dev/null +++ b/openacc/psb_oacc_env_mod.F90 @@ -0,0 +1,29 @@ +module psb_oacc_env_mod + use psb_penv_mod + use psb_const_mod + use psb_error_mod + logical, private :: oacc_do_maybe_free_buffer = .false. + +contains + function psb_oacc_get_maybe_free_buffer() result(res) + logical :: res + res = oacc_do_maybe_free_buffer + end function psb_oacc_get_maybe_free_buffer + + subroutine psb_oacc_set_maybe_free_buffer(val) + logical, intent(in) :: val + oacc_do_maybe_free_buffer = val + end subroutine psb_oacc_set_maybe_free_buffer + + subroutine psb_oacc_init(ctxt, dev) + type(psb_ctxt_type), intent(in) :: ctxt + integer, intent(in), optional :: dev + oacc_do_maybe_free_buffer = .false. + end subroutine psb_oacc_init + + subroutine psb_oacc_exit() + integer :: res + + end subroutine psb_oacc_exit + +end module psb_oacc_env_mod diff --git a/openacc/psb_oacc_mod.F90 b/openacc/psb_oacc_mod.F90 new file mode 100644 index 00000000..7d3f9406 --- /dev/null +++ b/openacc/psb_oacc_mod.F90 @@ -0,0 +1,26 @@ +module psb_oacc_mod + use psb_const_mod + + use psb_oacc_env_mod + + use psb_i_oacc_vect_mod + use psb_l_oacc_vect_mod + use psb_s_oacc_vect_mod + use psb_d_oacc_vect_mod + use psb_c_oacc_vect_mod + use psb_z_oacc_vect_mod + + use psb_s_oacc_csr_mat_mod + use psb_d_oacc_csr_mat_mod + use psb_c_oacc_csr_mat_mod + use psb_z_oacc_csr_mat_mod + use psb_s_oacc_ell_mat_mod + use psb_d_oacc_ell_mat_mod + use psb_c_oacc_ell_mat_mod + use psb_z_oacc_ell_mat_mod + use psb_s_oacc_hll_mat_mod + use psb_d_oacc_hll_mat_mod + use psb_c_oacc_hll_mat_mod + use psb_z_oacc_hll_mat_mod + +end module psb_oacc_mod diff --git a/openacc/psb_s_oacc_csr_mat_mod.F90 b/openacc/psb_s_oacc_csr_mat_mod.F90 new file mode 100644 index 00000000..d66dca3b --- /dev/null +++ b/openacc/psb_s_oacc_csr_mat_mod.F90 @@ -0,0 +1,290 @@ +module psb_s_oacc_csr_mat_mod + + use iso_c_binding + use openacc + use psb_s_mat_mod + use psb_s_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_csr_sparse_mat) :: psb_s_oacc_csr_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => s_oacc_csr_get_fmt + procedure, pass(a) :: sizeof => s_oacc_csr_sizeof + procedure, pass(a) :: vect_mv => psb_s_oacc_csr_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_oacc_csr_inner_vect_sv + procedure, pass(a) :: scals => psb_s_oacc_csr_scals + procedure, pass(a) :: scalv => psb_s_oacc_csr_scal + procedure, pass(a) :: reallocate_nz => psb_s_oacc_csr_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_oacc_csr_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_s_oacc_csr_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_oacc_csr_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_oacc_csr_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_oacc_csr_mv_from_fmt + procedure, pass(a) :: free => s_oacc_csr_free + procedure, pass(a) :: mold => psb_s_oacc_csr_mold + procedure, pass(a) :: all => s_oacc_csr_all + procedure, pass(a) :: is_host => s_oacc_csr_is_host + procedure, pass(a) :: is_sync => s_oacc_csr_is_sync + procedure, pass(a) :: is_dev => s_oacc_csr_is_dev + procedure, pass(a) :: set_host => s_oacc_csr_set_host + procedure, pass(a) :: set_sync => s_oacc_csr_set_sync + procedure, pass(a) :: set_dev => s_oacc_csr_set_dev + procedure, pass(a) :: free_dev_space => s_oacc_csr_free_dev_space + procedure, pass(a) :: sync_dev_space => s_oacc_csr_sync_dev_space + procedure, pass(a) :: sync => s_oacc_csr_sync + end type psb_s_oacc_csr_sparse_mat + + interface + module subroutine psb_s_oacc_csr_mold(a,b,info) + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_csr_mold + end interface + + interface + module subroutine psb_s_oacc_csr_cp_from_fmt(a,b,info) + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_csr_cp_from_fmt + end interface + + interface + module subroutine psb_s_oacc_csr_mv_from_coo(a,b,info) + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_csr_mv_from_coo + end interface + + interface + module subroutine psb_s_oacc_csr_mv_from_fmt(a,b,info) + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_csr_mv_from_fmt + end interface + + interface + module subroutine psb_s_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_csr_vect_mv + end interface + + interface + module subroutine psb_s_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_csr_inner_vect_sv + end interface + + interface + module subroutine psb_s_oacc_csr_scals(d, a, info) + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_csr_scals + end interface + + interface + module subroutine psb_s_oacc_csr_scal(d,a,info,side) + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_s_oacc_csr_scal + end interface + + interface + module subroutine psb_s_oacc_csr_reallocate_nz(nz,a) + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_s_oacc_csr_reallocate_nz + end interface + + interface + module subroutine psb_s_oacc_csr_allocate_mnnz(m,n,a,nz) + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_oacc_csr_allocate_mnnz + end interface + + interface + module subroutine psb_s_oacc_csr_cp_from_coo(a,b,info) + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_csr_cp_from_coo + end interface + +contains + + + subroutine s_oacc_csr_free_dev_space(a) + use psb_base_mod + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irp)>0) call acc_delete_finalize(a%irp) + + return + end subroutine s_oacc_csr_free_dev_space + + subroutine s_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + call a%free_dev_space() + call a%psb_s_csr_sparse_mat%free() + + return + end subroutine s_oacc_csr_free + + function s_oacc_csr_sizeof(a) result(res) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + + end function s_oacc_csr_sizeof + + + function s_oacc_csr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSROA' + end function s_oacc_csr_get_fmt + + subroutine s_oacc_csr_all(m, n, nz, a, info) + implicit none + integer(psb_ipk_), intent(in) :: m, n, nz + class(psb_s_oacc_csr_sparse_mat), intent(out) :: a + integer(psb_ipk_), intent(out) :: info + + info = 0 + call a%free() + + call a%set_nrows(m) + call a%set_ncols(n) + + allocate(a%val(nz),stat=info) + allocate(a%ja(nz),stat=info) + allocate(a%irp(m+1),stat=info) + if (info == 0) call a%set_host() + if (info == 0) call a%sync_dev_space() + end subroutine s_oacc_csr_all + + function s_oacc_csr_is_host(a) result(res) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_oacc_csr_is_host + + function s_oacc_csr_is_sync(a) result(res) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_oacc_csr_is_sync + + function s_oacc_csr_is_dev(a) result(res) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_oacc_csr_is_dev + + subroutine s_oacc_csr_set_host(a) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_oacc_csr_set_host + + subroutine s_oacc_csr_set_sync(a) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_oacc_csr_set_sync + + subroutine s_oacc_csr_set_dev(a) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_oacc_csr_set_dev + + subroutine s_oacc_csr_sync_dev_space(a) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irp)>0) call acc_copyin(a%irp) + end subroutine s_oacc_csr_sync_dev_space + + subroutine s_oacc_csr_sync(a) + implicit none + class(psb_s_oacc_csr_sparse_mat), target, intent(in) :: a + class(psb_s_oacc_csr_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (a%is_dev()) then + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irp)>0) call acc_update_self(a%irp) + else if (a%is_host()) then + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irp)>0) call acc_update_device(a%irp) + end if + call tmpa%set_sync() + end subroutine s_oacc_csr_sync + +end module psb_s_oacc_csr_mat_mod + diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90 new file mode 100644 index 00000000..600a08a7 --- /dev/null +++ b/openacc/psb_s_oacc_ell_mat_mod.F90 @@ -0,0 +1,272 @@ +module psb_s_oacc_ell_mat_mod + use iso_c_binding + use openacc + use psb_s_mat_mod + use psb_s_ell_mat_mod + use psb_s_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_ell_sparse_mat) :: psb_s_oacc_ell_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => s_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => s_oacc_ell_sizeof + procedure, pass(a) :: is_host => s_oacc_ell_is_host + procedure, pass(a) :: is_sync => s_oacc_ell_is_sync + procedure, pass(a) :: is_dev => s_oacc_ell_is_dev + procedure, pass(a) :: set_host => s_oacc_ell_set_host + procedure, pass(a) :: set_sync => s_oacc_ell_set_sync + procedure, pass(a) :: set_dev => s_oacc_ell_set_dev + procedure, pass(a) :: sync_dev_space => s_oacc_ell_sync_dev_space + procedure, pass(a) :: sync => s_oacc_ell_sync + procedure, pass(a) :: free_dev_space => s_oacc_ell_free_dev_space + procedure, pass(a) :: free => s_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_s_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_oacc_ell_inner_vect_sv + procedure, pass(a) :: scals => psb_s_oacc_ell_scals + procedure, pass(a) :: scalv => psb_s_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_s_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_s_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_s_oacc_ell_mold + + end type psb_s_oacc_ell_sparse_mat + + interface + module subroutine psb_s_oacc_ell_mold(a,b,info) + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_mold + end interface + + interface + module subroutine psb_s_oacc_ell_cp_from_fmt(a,b,info) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_cp_from_fmt + end interface + + interface + module subroutine psb_s_oacc_ell_mv_from_coo(a,b,info) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_mv_from_coo + end interface + + interface + module subroutine psb_s_oacc_ell_mv_from_fmt(a,b,info) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_mv_from_fmt + end interface + + interface + module subroutine psb_s_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_ell_vect_mv + end interface + + interface + module subroutine psb_s_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_ell_inner_vect_sv + end interface + + interface + module subroutine psb_s_oacc_ell_scals(d, a, info) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_scals + end interface + + interface + module subroutine psb_s_oacc_ell_scal(d,a,info,side) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_s_oacc_ell_scal + end interface + + interface + module subroutine psb_s_oacc_ell_reallocate_nz(nz,a) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_s_oacc_ell_reallocate_nz + end interface + + interface + module subroutine psb_s_oacc_ell_allocate_mnnz(m,n,a,nz) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_oacc_ell_allocate_mnnz + end interface + + interface + module subroutine psb_s_oacc_ell_cp_from_coo(a,b,info) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_cp_from_coo + end interface + +contains + + subroutine s_oacc_ell_free_dev_space(a) + use psb_base_mod + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn) + if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag) + return + end subroutine s_oacc_ell_free_dev_space + + subroutine s_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + call a%free_dev_space() + call a%psb_s_ell_sparse_mat%free() + + return + end subroutine s_oacc_ell_free + + function s_oacc_ell_sizeof(a) result(res) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + + end function s_oacc_ell_sizeof + + subroutine s_oacc_ell_sync_dev_space(a) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irn)>0) call acc_copyin(a%irn) + if (psb_size(a%idiag)>0) call acc_copyin(a%idiag) + end subroutine s_oacc_ell_sync_dev_space + + function s_oacc_ell_is_host(a) result(res) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_oacc_ell_is_host + + function s_oacc_ell_is_sync(a) result(res) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_oacc_ell_is_sync + + function s_oacc_ell_is_dev(a) result(res) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_oacc_ell_is_dev + + subroutine s_oacc_ell_set_host(a) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_oacc_ell_set_host + + subroutine s_oacc_ell_set_sync(a) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_oacc_ell_set_sync + + subroutine s_oacc_ell_set_dev(a) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_oacc_ell_set_dev + + function s_oacc_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELLOA' + end function s_oacc_ell_get_fmt + + subroutine s_oacc_ell_sync(a) + implicit none + class(psb_s_oacc_ell_sparse_mat), target, intent(in) :: a + class(psb_s_oacc_ell_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (a%is_dev()) then + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irn)>0) call acc_update_self(a%irn) + if (psb_size(a%idiag)>0) call acc_update_self(a%idiag) + else if (a%is_host()) then + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irn)>0) call acc_update_device(a%irn) + if (psb_size(a%idiag)>0) call acc_update_device(a%idiag) + end if + call tmpa%set_sync() + end subroutine s_oacc_ell_sync + +end module psb_s_oacc_ell_mat_mod diff --git a/openacc/psb_s_oacc_hll_mat_mod.F90 b/openacc/psb_s_oacc_hll_mat_mod.F90 new file mode 100644 index 00000000..33033248 --- /dev/null +++ b/openacc/psb_s_oacc_hll_mat_mod.F90 @@ -0,0 +1,279 @@ +module psb_s_oacc_hll_mat_mod + use iso_c_binding + use openacc + use psb_s_mat_mod + use psb_s_hll_mat_mod + use psb_s_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_hll_sparse_mat) :: psb_s_oacc_hll_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => s_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => s_oacc_hll_sizeof + procedure, pass(a) :: is_host => s_oacc_hll_is_host + procedure, pass(a) :: is_sync => s_oacc_hll_is_sync + procedure, pass(a) :: is_dev => s_oacc_hll_is_dev + procedure, pass(a) :: set_host => s_oacc_hll_set_host + procedure, pass(a) :: set_sync => s_oacc_hll_set_sync + procedure, pass(a) :: set_dev => s_oacc_hll_set_dev + procedure, pass(a) :: sync_dev_space => s_oacc_hll_sync_dev_space + procedure, pass(a) :: sync => s_oacc_hll_sync + procedure, pass(a) :: free_dev_space => s_oacc_hll_free_dev_space + procedure, pass(a) :: free => s_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_s_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_oacc_hll_inner_vect_sv + procedure, pass(a) :: scals => psb_s_oacc_hll_scals + procedure, pass(a) :: scalv => psb_s_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_s_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_s_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_s_oacc_hll_mold + + end type psb_s_oacc_hll_sparse_mat + + interface + module subroutine psb_s_oacc_hll_mold(a,b,info) + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_mold + end interface + + interface + module subroutine psb_s_oacc_hll_cp_from_fmt(a,b,info) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_cp_from_fmt + end interface + + interface + module subroutine psb_s_oacc_hll_mv_from_coo(a,b,info) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_mv_from_coo + end interface + + interface + module subroutine psb_s_oacc_hll_mv_from_fmt(a,b,info) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_mv_from_fmt + end interface + + interface + module subroutine psb_s_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_hll_vect_mv + end interface + + interface + module subroutine psb_s_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_hll_inner_vect_sv + end interface + + interface + module subroutine psb_s_oacc_hll_scals(d, a, info) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_scals + end interface + + interface + module subroutine psb_s_oacc_hll_scal(d,a,info,side) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_s_oacc_hll_scal + end interface + + interface + module subroutine psb_s_oacc_hll_reallocate_nz(nz,a) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_s_oacc_hll_reallocate_nz + end interface + + interface + module subroutine psb_s_oacc_hll_allocate_mnnz(m,n,a,nz) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_oacc_hll_allocate_mnnz + end interface + + interface + module subroutine psb_s_oacc_hll_cp_from_coo(a,b,info) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_cp_from_coo + end interface + +contains + + subroutine s_oacc_hll_free_dev_space(a) + use psb_base_mod + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn) + if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_delete_finalize(a%hkoffs) + return + end subroutine s_oacc_hll_free_dev_space + + subroutine s_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + call a%free_dev_space() + call a%psb_s_hll_sparse_mat%free() + + return + end subroutine s_oacc_hll_free + + function s_oacc_hll_sizeof(a) result(res) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + end function s_oacc_hll_sizeof + + + + function s_oacc_hll_is_host(a) result(res) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_oacc_hll_is_host + + function s_oacc_hll_is_sync(a) result(res) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_oacc_hll_is_sync + + function s_oacc_hll_is_dev(a) result(res) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_oacc_hll_is_dev + + subroutine s_oacc_hll_set_host(a) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_oacc_hll_set_host + + subroutine s_oacc_hll_set_sync(a) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_oacc_hll_set_sync + + subroutine s_oacc_hll_set_dev(a) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_oacc_hll_set_dev + + function s_oacc_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLLOA' + end function s_oacc_hll_get_fmt + + subroutine s_oacc_hll_sync_dev_space(a) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irn)>0) call acc_copyin(a%irn) + if (psb_size(a%idiag)>0) call acc_copyin(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_copyin(a%hkoffs) + end subroutine s_oacc_hll_sync_dev_space + + + subroutine s_oacc_hll_sync(a) + implicit none + class(psb_s_oacc_hll_sparse_mat), target, intent(in) :: a + class(psb_s_oacc_hll_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (a%is_dev()) then + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irn)>0) call acc_update_self(a%irn) + if (psb_size(a%idiag)>0) call acc_update_self(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_update_self(a%hkoffs) + else if (a%is_host()) then + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irn)>0) call acc_update_device(a%irn) + if (psb_size(a%idiag)>0) call acc_update_device(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_update_device(a%hkoffs) + end if + call tmpa%set_sync() + end subroutine s_oacc_hll_sync + +end module psb_s_oacc_hll_mat_mod diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90 new file mode 100644 index 00000000..b8d9700d --- /dev/null +++ b/openacc/psb_s_oacc_vect_mod.F90 @@ -0,0 +1,1013 @@ +module psb_s_oacc_vect_mod + use iso_c_binding + use openacc + use psb_const_mod + use psb_error_mod + use psb_realloc_mod + use psb_oacc_env_mod + use psb_s_vect_mod + use psb_i_vect_mod + use psb_i_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_base_vect_type) :: psb_s_vect_oacc + integer :: state = is_host + + contains + procedure, pass(x) :: get_nrows => s_oacc_get_nrows + procedure, nopass :: get_fmt => s_oacc_get_fmt + + procedure, pass(x) :: all => s_oacc_vect_all + procedure, pass(x) :: zero => s_oacc_zero + procedure, pass(x) :: asb_m => s_oacc_asb_m + procedure, pass(x) :: sync => s_oacc_sync + procedure, pass(x) :: sync_dev_space => s_oacc_sync_dev_space + procedure, pass(x) :: bld_x => s_oacc_bld_x + procedure, pass(x) :: bld_mn => s_oacc_bld_mn + procedure, pass(x) :: free => s_oacc_vect_free + procedure, pass(x) :: free_buffer => s_oacc_vect_free_buffer + procedure, pass(x) :: maybe_free_buffer => s_oacc_vect_maybe_free_buffer + procedure, pass(x) :: ins_a => s_oacc_ins_a + procedure, pass(x) :: ins_v => s_oacc_ins_v + procedure, pass(x) :: is_host => s_oacc_is_host + procedure, pass(x) :: is_dev => s_oacc_is_dev + procedure, pass(x) :: is_sync => s_oacc_is_sync + procedure, pass(x) :: set_host => s_oacc_set_host + procedure, pass(x) :: set_dev => s_oacc_set_dev + procedure, pass(x) :: set_sync => s_oacc_set_sync + procedure, pass(x) :: set_scal => s_oacc_set_scal + + procedure, pass(x) :: new_buffer => s_oacc_new_buffer + procedure, pass(x) :: gthzv_x => s_oacc_gthzv_x + procedure, pass(x) :: gthzbuf => s_oacc_gthzbuf + procedure, pass(y) :: sctb => s_oacc_sctb + procedure, pass(y) :: sctb_x => s_oacc_sctb_x + procedure, pass(y) :: sctb_buf => s_oacc_sctb_buf + procedure, nopass :: device_wait => s_oacc_device_wait + + procedure, pass(x) :: get_size => s_oacc_get_size + + procedure, pass(x) :: dot_v => s_oacc_vect_dot + procedure, pass(x) :: dot_a => s_oacc_dot_a + procedure, pass(y) :: axpby_v => s_oacc_axpby_v + procedure, pass(y) :: axpby_a => s_oacc_axpby_a + procedure, pass(z) :: upd_xyz => s_oacc_upd_xyz + procedure, pass(y) :: mlt_a => s_oacc_mlt_a + procedure, pass(z) :: mlt_a_2 => s_oacc_mlt_a_2 + procedure, pass(y) :: mlt_v => psb_s_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => psb_s_oacc_mlt_v_2 + procedure, pass(x) :: scal => s_oacc_scal + procedure, pass(x) :: nrm2 => s_oacc_nrm2 + procedure, pass(x) :: amax => s_oacc_amax + procedure, pass(x) :: asum => s_oacc_asum + procedure, pass(x) :: absval1 => s_oacc_absval1 + procedure, pass(x) :: absval2 => s_oacc_absval2 + final :: s_oacc_final_vect_free + end type psb_s_vect_oacc + + interface + subroutine psb_s_oacc_mlt_v(x, y, info) + import + implicit none + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_mlt_v + end interface + + interface + subroutine psb_s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + import + implicit none + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + end subroutine psb_s_oacc_mlt_v_2 + end interface + +contains + + subroutine s_oacc_device_wait() + implicit none + call acc_wait_all() + end subroutine s_oacc_device_wait + + subroutine s_oacc_absval1(x) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: n + + if (x%is_host()) call x%sync() + n = size(x%v) + call s_inner_oacc_absval1(n,x%v) + call x%set_dev() + contains + subroutine s_inner_oacc_absval1(n,x) + implicit none + real(psb_spk_), intent(inout) :: x(:) + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + !$acc parallel loop present(x) + do i = 1, n + x(i) = abs(x(i)) + end do + end subroutine s_inner_oacc_absval1 + end subroutine s_oacc_absval1 + + subroutine s_oacc_absval2(x, y) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + + n = min(size(x%v), size(y%v)) + select type (yy => y) + class is (psb_s_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + call s_inner_oacc_absval2(n,x%v,yy%v) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_s_base_vect_type%absval(y) + end select + contains + subroutine s_inner_oacc_absval2(n,x,y) + implicit none + real(psb_spk_), intent(inout) :: x(:),y(:) + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + !$acc parallel loop present(x,y) + do i = 1, n + y(i) = abs(x(i)) + end do + end subroutine s_inner_oacc_absval2 + end subroutine s_oacc_absval2 + + subroutine s_oacc_scal(alpha, x) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + real(psb_spk_), intent(in) :: alpha + integer(psb_ipk_) :: info + if (x%is_host()) call x%sync() + call s_inner_oacc_scal(alpha, x%v) + call x%set_dev() + contains + subroutine s_inner_oacc_scal(alpha, x) + real(psb_spk_), intent(in) :: alpha + real(psb_spk_), intent(inout) :: x(:) + integer(psb_ipk_) :: i + !$acc parallel loop present(x) + do i = 1, size(x) + x(i) = alpha * x(i) + end do + end subroutine s_inner_oacc_scal + end subroutine s_oacc_scal + + function s_oacc_nrm2(n, x) result(res) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() +!!$ write(0,*)'oacc_nrm2' + res = s_inner_oacc_nrm2(n, x%v) + contains + function s_inner_oacc_nrm2(n, x) result(res) + integer(psb_ipk_) :: n + real(psb_spk_) :: x(:) + real(psb_spk_) :: res + real(psb_spk_) :: sum, mx + integer(psb_ipk_) :: i + mx = szero + !$acc parallel loop reduction(max:mx) present(x) + do i = 1, n + if (abs(x(i)) > mx) mx = abs(x(i)) + end do + if (mx == szero) then + res = mx + else + sum = szero + !$acc parallel loop reduction(+:sum) present(x) + do i = 1, n + sum = sum + abs(x(i)/mx)**2 + end do + res = mx*sqrt(sum) + end if + end function s_inner_oacc_nrm2 + end function s_oacc_nrm2 + + function s_oacc_amax(n, x) result(res) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + res = s_inner_oacc_amax(n, x%v) + contains + function s_inner_oacc_amax(n, x) result(res) + integer(psb_ipk_) :: n + real(psb_spk_) :: x(:) + real(psb_spk_) :: res + real(psb_spk_) :: max_val + integer(psb_ipk_) :: i + max_val = szero + !$acc parallel loop reduction(max:max_val) present(x) + do i = 1, n + if (abs(x(i)) > max_val) max_val = abs(x(i)) + end do + res = max_val + end function s_inner_oacc_amax + end function s_oacc_amax + + function s_oacc_asum(n, x) result(res) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + real(psb_spk_) :: sum + integer(psb_ipk_) :: i + if (x%is_host()) call x%sync() + res = s_inner_oacc_asum(n, x%v) + contains + function s_inner_oacc_asum(n, x) result(res) + integer(psb_ipk_) :: n + real(psb_spk_) :: x(:) + real(psb_spk_) :: res + integer(psb_ipk_) :: i + res = szero + !$acc parallel loop reduction(+:res) present(x) + do i = 1, n + res = res + abs(x(i)) + end do + end function s_inner_oacc_asum + end function s_oacc_asum + + + subroutine s_oacc_mlt_a(x, y, info) + implicit none + real(psb_spk_), intent(in) :: x(:) + class(psb_s_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + !$acc parallel loop present(x,y) + do i = 1, size(x) + y%v(i) = y%v(i) * x(i) + end do + call y%set_host() + end subroutine s_oacc_mlt_a + + subroutine s_oacc_mlt_a_2(alpha, x, y, beta, z, info) + implicit none + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_), intent(in) :: x(:) + real(psb_spk_), intent(in) :: y(:) + class(psb_s_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + !$acc parallel loop present(x,y,z%v) + do i = 1, size(x) + z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) + end do + call z%set_host() + end subroutine s_oacc_mlt_a_2 + + subroutine s_oacc_axpby_v(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_vect_oacc), intent(inout) :: y + real(psb_spk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, i + + info = psb_success_ + + select type(xx => x) + type is (psb_s_vect_oacc) + if ((beta /= szero) .and. y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + nx = size(xx%v) + ny = size(y%v) + if ((nx < m) .or. (ny < m)) then + info = psb_err_internal_error_ + else + call s_inner_oacc_axpby(m, alpha, x%v, beta, y%v, info) + end if + call y%set_dev() + class default + if ((alpha /= szero) .and. (x%is_dev())) call x%sync() + call y%axpby(m, alpha, x%v, beta, info) + end select + contains + subroutine s_inner_oacc_axpby(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent(inout) :: x(:) + real(psb_spk_), intent(inout) :: y(:) + real(psb_spk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + !$acc parallel present(x,y) + !$acc loop + do i = 1, m + y(i) = alpha * x(i) + beta * y(i) + end do + !$acc end parallel + end subroutine s_inner_oacc_axpby + end subroutine s_oacc_axpby_v + + subroutine s_oacc_axpby_a(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent(in) :: x(:) + class(psb_s_vect_oacc), intent(inout) :: y + real(psb_spk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i + + if ((beta /= szero) .and. (y%is_dev())) call y%sync() + + do i = 1, m + y%v(i) = alpha * x(i) + beta * y%v(i) + end do + call y%set_host() + end subroutine s_oacc_axpby_a + + subroutine s_oacc_upd_xyz(m, alpha, beta, gamma, delta, x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_vect_oacc), intent(inout) :: z + real(psb_spk_), intent(in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz, i + logical :: gpu_done + + info = psb_success_ + gpu_done = .false. + + select type(xx => x) + class is (psb_s_vect_oacc) + select type(yy => y) + class is (psb_s_vect_oacc) + select type(zz => z) + class is (psb_s_vect_oacc) + if ((beta /= szero) .and. yy%is_host()) call yy%sync() + if ((delta /= szero) .and. zz%is_host()) call zz%sync() + if (xx%is_host()) call xx%sync() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop present(xx%v,yy%v,zz%v) + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. + end select + end select + end select + + if (.not. gpu_done) then + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) + end if + end subroutine s_oacc_upd_xyz + + subroutine s_oacc_sctb_buf(i, n, idx, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type) :: idx + real(psb_spk_) :: beta + class(psb_s_vect_oacc) :: y + integer(psb_ipk_) :: info, k + logical :: acc_done + if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if + + acc_done = .false. + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) + call y%set_dev() + acc_done = .true. + end select + + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k) + end do + end if + + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + real(psb_spk_) :: beta,x(:), y(:) + integer(psb_ipk_) :: k + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + + end subroutine s_oacc_sctb_buf + + subroutine s_oacc_sctb_x(i, n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_):: i + integer(psb_mpk_):: n + class(psb_i_base_vect_type) :: idx + real(psb_spk_) :: beta, x(:) + class(psb_s_vect_oacc) :: y + integer(psb_ipk_) :: info, ni, k + logical :: acc_done + + acc_done = .false. + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (acc_is_present(x)) then + call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1)) + acc_done = .true. + call y%set_dev() + end if + end select + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1) + end do + call y%set_host() + end if + + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + real(psb_spk_) :: beta, x(:), y(:) + integer(psb_ipk_) :: k + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + + end subroutine s_oacc_sctb_x + + subroutine s_oacc_sctb(n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + real(psb_spk_) :: beta, x(:) + class(psb_s_vect_oacc) :: y + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (n == 0) return + if (y%is_dev()) call y%sync() + + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + + call y%set_host() + end subroutine s_oacc_sctb + + subroutine s_oacc_gthzbuf(i, n, idx, x) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type) :: idx + class(psb_s_vect_oacc) :: x + integer(psb_ipk_) :: info,k + logical :: acc_done + + info = 0 + acc_done = .false. + + if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + select type (ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1)) + acc_done = .true. + end select + + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (x%is_dev()) call x%sync() + do k = 1, n + x%combuf(k+i-1) = x%v(idx%v(k+i-1)) + end do + end if + + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + real(psb_spk_) :: x(:), y(:) + integer(psb_ipk_) :: k + ! + !$acc parallel loop present(x,y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y(1:n)) + end subroutine inner_gth + end subroutine s_oacc_gthzbuf + + subroutine s_oacc_gthzv_x(i, n, idx, x, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type):: idx + real(psb_spk_) :: y(:) + class(psb_s_vect_oacc):: x + integer(psb_ipk_) :: info, k + logical :: acc_done + + info = 0 + acc_done = .false. + select type (ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + if (acc_is_present(y)) then + call inner_gth(n,x%v,y(i:),ii%v(i:)) + acc_done=.true. + end if + end select + if (.not.acc_done) then + if (x%is_dev()) call x%sync() + if (idx%is_dev()) call idx%sync() + do k = 1, n + y(k+i-1) = x%v(idx%v(k+i-1)) + !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k) + end do + end if + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + real(psb_spk_) :: x(:), y(:) + integer(psb_ipk_) :: k + ! + !$acc parallel loop present(x,y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y(1:n)) + end subroutine inner_gth + end subroutine s_oacc_gthzv_x + + subroutine s_oacc_ins_v(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_s_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + type is (psb_i_vect_oacc) + select type(vval => val) + type is (psb_s_vect_oacc) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + !$acc parallel loop present(x%v,virl%v,vval%v) + do i = 1, n + x%v(virl%v(i)) = vval%v(i) + end do + call x%set_dev() + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + select type(virl => irl) + type is (psb_i_vect_oacc) + if (virl%is_dev()) call virl%sync() + end select + select type(vval => val) + type is (psb_s_vect_oacc) + if (vval%is_dev()) call vval%sync() + end select + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info, 'oacc_vect_ins') + return + end if + + end subroutine s_oacc_ins_v + + subroutine s_oacc_ins_a(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_s_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + + + end subroutine s_oacc_ins_a + + subroutine s_oacc_bld_mn(x, n) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%free(info) + call x%all(ione*n, info) + if (info /= 0) then + call psb_errpush(info, 's_oacc_bld_mn',& + & i_err=ione*(/n, n, n, n, n/)) + end if + call x%set_host() + call x%sync_dev_space() + + end subroutine s_oacc_bld_mn + + + subroutine s_oacc_bld_x(x, this) + use psb_base_mod + implicit none + real(psb_spk_), intent(in) :: this(:) + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%free(info) + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 's_oacc_bld_x', & + i_err=(/size(this)*ione, izero, izero, izero, izero/)) + return + end if + x%v(:) = this(:) + call x%set_host() + call x%sync_dev_space() + + end subroutine s_oacc_bld_x + + subroutine s_oacc_asb_m(n, x, info) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + info = psb_success_ + + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_s_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync() + call x%set_host() + end if + else + if (size(x%v) < n) then + call x%psb_s_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync() + call x%set_host() + end if + end if + end subroutine s_oacc_asb_m + + subroutine s_oacc_set_scal(x, val, first, last) + class(psb_s_vect_oacc), intent(inout) :: x + real(psb_spk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(last, last_) + + !$acc parallel loop present(x%v) + do i = first_, last_ + x%v(i) = val + end do + !$acc end parallel loop + + call x%set_dev() + end subroutine s_oacc_set_scal + + subroutine s_oacc_zero(x) + use psi_serial_mod + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + call x%set_dev() + call x%set_scal(szero) + end subroutine s_oacc_zero + + function s_oacc_get_nrows(x) result(res) + implicit none + class(psb_s_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + if (allocated(x%v)) res = size(x%v) + end function s_oacc_get_nrows + + function s_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = "sOACC" + + end function s_oacc_get_fmt + + + function s_oacc_vect_dot(n, x, y) result(res) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + res = szero +!!$ write(0,*) 'oacc_dot_v' + select type(yy => y) + type is (psb_s_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + res = s_inner_oacc_dot(n, x%v, yy%v) + class default + if (x%is_dev()) call x%sync() + res = y%dot(n, x%v) + end select + contains + function s_inner_oacc_dot(n, x, y) result(res) + implicit none + real(psb_spk_), intent(in) :: x(:) + real(psb_spk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: i + + !$acc parallel loop reduction(+:res) present(x, y) + do i = 1, n + res = res + x(i) * y(i) + end do + !$acc end parallel loop + end function s_inner_oacc_dot + end function s_oacc_vect_dot + + function s_oacc_dot_a(n, x, y) result(res) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + real(psb_spk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + real(psb_spk_), external :: sdot + + if (x%is_dev()) call x%sync() + res = sdot(n, y, 1, x%v, 1) + + end function s_oacc_dot_a + + + subroutine s_oacc_new_buffer(n,x,info) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if + call x%psb_s_base_vect_type%new_buffer(n,info) + !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) + end if + end subroutine s_oacc_new_buffer + + subroutine s_oacc_sync_dev_space(x) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) + end subroutine s_oacc_sync_dev_space + + subroutine s_oacc_sync(x) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + if (x%is_dev()) then + if (psb_size(x%v)>0) call acc_update_self(x%v) + end if + if (x%is_host()) then + if (.not.acc_is_present(x%v)) call s_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) + end if + call x%set_sync() + end subroutine s_oacc_sync + + subroutine s_oacc_set_host(x) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + + x%state = is_host + end subroutine s_oacc_set_host + + subroutine s_oacc_set_dev(x) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + + x%state = is_dev + end subroutine s_oacc_set_dev + + subroutine s_oacc_set_sync(x) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + + x%state = is_sync + end subroutine s_oacc_set_sync + + function s_oacc_is_dev(x) result(res) + implicit none + class(psb_s_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function s_oacc_is_dev + + function s_oacc_is_host(x) result(res) + implicit none + class(psb_s_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function s_oacc_is_host + + function s_oacc_is_sync(x) result(res) + implicit none + class(psb_s_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function s_oacc_is_sync + + subroutine s_oacc_vect_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_s_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 's_oacc_all', & + i_err=(/n, n, n, n, n/)) + end if + call x%set_host() + call x%sync_dev_space() + end subroutine s_oacc_vect_all + + subroutine s_oacc_final_vect_free(x) + implicit none + type(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine s_oacc_final_vect_free + + subroutine s_oacc_vect_free(x, info) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + call x%psb_s_base_vect_type%free(info) + end subroutine s_oacc_vect_free + + subroutine s_oacc_vect_maybe_free_buffer(x,info) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if + + end subroutine s_oacc_vect_maybe_free_buffer + + subroutine s_oacc_vect_free_buffer(x,info) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info +! write(0,*) 'oacc free_buffer' + info = 0 + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_s_base_vect_type%free_buffer(info) + + end subroutine s_oacc_vect_free_buffer + + function s_oacc_get_size(x) result(res) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: res + + res = size(x%v) + end function s_oacc_get_size + +end module psb_s_oacc_vect_mod diff --git a/openacc/psb_z_oacc_csr_mat_mod.F90 b/openacc/psb_z_oacc_csr_mat_mod.F90 new file mode 100644 index 00000000..3b66787a --- /dev/null +++ b/openacc/psb_z_oacc_csr_mat_mod.F90 @@ -0,0 +1,290 @@ +module psb_z_oacc_csr_mat_mod + + use iso_c_binding + use openacc + use psb_z_mat_mod + use psb_z_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_csr_sparse_mat) :: psb_z_oacc_csr_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => z_oacc_csr_get_fmt + procedure, pass(a) :: sizeof => z_oacc_csr_sizeof + procedure, pass(a) :: vect_mv => psb_z_oacc_csr_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_oacc_csr_inner_vect_sv + procedure, pass(a) :: scals => psb_z_oacc_csr_scals + procedure, pass(a) :: scalv => psb_z_oacc_csr_scal + procedure, pass(a) :: reallocate_nz => psb_z_oacc_csr_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_oacc_csr_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_z_oacc_csr_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_oacc_csr_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_oacc_csr_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_oacc_csr_mv_from_fmt + procedure, pass(a) :: free => z_oacc_csr_free + procedure, pass(a) :: mold => psb_z_oacc_csr_mold + procedure, pass(a) :: all => z_oacc_csr_all + procedure, pass(a) :: is_host => z_oacc_csr_is_host + procedure, pass(a) :: is_sync => z_oacc_csr_is_sync + procedure, pass(a) :: is_dev => z_oacc_csr_is_dev + procedure, pass(a) :: set_host => z_oacc_csr_set_host + procedure, pass(a) :: set_sync => z_oacc_csr_set_sync + procedure, pass(a) :: set_dev => z_oacc_csr_set_dev + procedure, pass(a) :: free_dev_space => z_oacc_csr_free_dev_space + procedure, pass(a) :: sync_dev_space => z_oacc_csr_sync_dev_space + procedure, pass(a) :: sync => z_oacc_csr_sync + end type psb_z_oacc_csr_sparse_mat + + interface + module subroutine psb_z_oacc_csr_mold(a,b,info) + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_csr_mold + end interface + + interface + module subroutine psb_z_oacc_csr_cp_from_fmt(a,b,info) + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_csr_cp_from_fmt + end interface + + interface + module subroutine psb_z_oacc_csr_mv_from_coo(a,b,info) + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_csr_mv_from_coo + end interface + + interface + module subroutine psb_z_oacc_csr_mv_from_fmt(a,b,info) + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_csr_mv_from_fmt + end interface + + interface + module subroutine psb_z_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_csr_vect_mv + end interface + + interface + module subroutine psb_z_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_csr_inner_vect_sv + end interface + + interface + module subroutine psb_z_oacc_csr_scals(d, a, info) + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_csr_scals + end interface + + interface + module subroutine psb_z_oacc_csr_scal(d,a,info,side) + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_z_oacc_csr_scal + end interface + + interface + module subroutine psb_z_oacc_csr_reallocate_nz(nz,a) + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_z_oacc_csr_reallocate_nz + end interface + + interface + module subroutine psb_z_oacc_csr_allocate_mnnz(m,n,a,nz) + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_oacc_csr_allocate_mnnz + end interface + + interface + module subroutine psb_z_oacc_csr_cp_from_coo(a,b,info) + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_csr_cp_from_coo + end interface + +contains + + + subroutine z_oacc_csr_free_dev_space(a) + use psb_base_mod + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irp)>0) call acc_delete_finalize(a%irp) + + return + end subroutine z_oacc_csr_free_dev_space + + subroutine z_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + call a%free_dev_space() + call a%psb_z_csr_sparse_mat%free() + + return + end subroutine z_oacc_csr_free + + function z_oacc_csr_sizeof(a) result(res) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + + end function z_oacc_csr_sizeof + + + function z_oacc_csr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSROA' + end function z_oacc_csr_get_fmt + + subroutine z_oacc_csr_all(m, n, nz, a, info) + implicit none + integer(psb_ipk_), intent(in) :: m, n, nz + class(psb_z_oacc_csr_sparse_mat), intent(out) :: a + integer(psb_ipk_), intent(out) :: info + + info = 0 + call a%free() + + call a%set_nrows(m) + call a%set_ncols(n) + + allocate(a%val(nz),stat=info) + allocate(a%ja(nz),stat=info) + allocate(a%irp(m+1),stat=info) + if (info == 0) call a%set_host() + if (info == 0) call a%sync_dev_space() + end subroutine z_oacc_csr_all + + function z_oacc_csr_is_host(a) result(res) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_oacc_csr_is_host + + function z_oacc_csr_is_sync(a) result(res) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_oacc_csr_is_sync + + function z_oacc_csr_is_dev(a) result(res) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_oacc_csr_is_dev + + subroutine z_oacc_csr_set_host(a) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_oacc_csr_set_host + + subroutine z_oacc_csr_set_sync(a) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_oacc_csr_set_sync + + subroutine z_oacc_csr_set_dev(a) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_oacc_csr_set_dev + + subroutine z_oacc_csr_sync_dev_space(a) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irp)>0) call acc_copyin(a%irp) + end subroutine z_oacc_csr_sync_dev_space + + subroutine z_oacc_csr_sync(a) + implicit none + class(psb_z_oacc_csr_sparse_mat), target, intent(in) :: a + class(psb_z_oacc_csr_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (a%is_dev()) then + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irp)>0) call acc_update_self(a%irp) + else if (a%is_host()) then + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irp)>0) call acc_update_device(a%irp) + end if + call tmpa%set_sync() + end subroutine z_oacc_csr_sync + +end module psb_z_oacc_csr_mat_mod + diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90 new file mode 100644 index 00000000..abfb11e3 --- /dev/null +++ b/openacc/psb_z_oacc_ell_mat_mod.F90 @@ -0,0 +1,272 @@ +module psb_z_oacc_ell_mat_mod + use iso_c_binding + use openacc + use psb_z_mat_mod + use psb_z_ell_mat_mod + use psb_z_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_ell_sparse_mat) :: psb_z_oacc_ell_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => z_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => z_oacc_ell_sizeof + procedure, pass(a) :: is_host => z_oacc_ell_is_host + procedure, pass(a) :: is_sync => z_oacc_ell_is_sync + procedure, pass(a) :: is_dev => z_oacc_ell_is_dev + procedure, pass(a) :: set_host => z_oacc_ell_set_host + procedure, pass(a) :: set_sync => z_oacc_ell_set_sync + procedure, pass(a) :: set_dev => z_oacc_ell_set_dev + procedure, pass(a) :: sync_dev_space => z_oacc_ell_sync_dev_space + procedure, pass(a) :: sync => z_oacc_ell_sync + procedure, pass(a) :: free_dev_space => z_oacc_ell_free_dev_space + procedure, pass(a) :: free => z_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_z_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_oacc_ell_inner_vect_sv + procedure, pass(a) :: scals => psb_z_oacc_ell_scals + procedure, pass(a) :: scalv => psb_z_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_z_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_z_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_z_oacc_ell_mold + + end type psb_z_oacc_ell_sparse_mat + + interface + module subroutine psb_z_oacc_ell_mold(a,b,info) + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_mold + end interface + + interface + module subroutine psb_z_oacc_ell_cp_from_fmt(a,b,info) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_cp_from_fmt + end interface + + interface + module subroutine psb_z_oacc_ell_mv_from_coo(a,b,info) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_mv_from_coo + end interface + + interface + module subroutine psb_z_oacc_ell_mv_from_fmt(a,b,info) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_mv_from_fmt + end interface + + interface + module subroutine psb_z_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_ell_vect_mv + end interface + + interface + module subroutine psb_z_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_ell_inner_vect_sv + end interface + + interface + module subroutine psb_z_oacc_ell_scals(d, a, info) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_scals + end interface + + interface + module subroutine psb_z_oacc_ell_scal(d,a,info,side) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_z_oacc_ell_scal + end interface + + interface + module subroutine psb_z_oacc_ell_reallocate_nz(nz,a) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_z_oacc_ell_reallocate_nz + end interface + + interface + module subroutine psb_z_oacc_ell_allocate_mnnz(m,n,a,nz) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_oacc_ell_allocate_mnnz + end interface + + interface + module subroutine psb_z_oacc_ell_cp_from_coo(a,b,info) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_cp_from_coo + end interface + +contains + + subroutine z_oacc_ell_free_dev_space(a) + use psb_base_mod + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn) + if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag) + return + end subroutine z_oacc_ell_free_dev_space + + subroutine z_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + call a%free_dev_space() + call a%psb_z_ell_sparse_mat%free() + + return + end subroutine z_oacc_ell_free + + function z_oacc_ell_sizeof(a) result(res) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + + end function z_oacc_ell_sizeof + + subroutine z_oacc_ell_sync_dev_space(a) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irn)>0) call acc_copyin(a%irn) + if (psb_size(a%idiag)>0) call acc_copyin(a%idiag) + end subroutine z_oacc_ell_sync_dev_space + + function z_oacc_ell_is_host(a) result(res) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_oacc_ell_is_host + + function z_oacc_ell_is_sync(a) result(res) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_oacc_ell_is_sync + + function z_oacc_ell_is_dev(a) result(res) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_oacc_ell_is_dev + + subroutine z_oacc_ell_set_host(a) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_oacc_ell_set_host + + subroutine z_oacc_ell_set_sync(a) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_oacc_ell_set_sync + + subroutine z_oacc_ell_set_dev(a) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_oacc_ell_set_dev + + function z_oacc_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELLOA' + end function z_oacc_ell_get_fmt + + subroutine z_oacc_ell_sync(a) + implicit none + class(psb_z_oacc_ell_sparse_mat), target, intent(in) :: a + class(psb_z_oacc_ell_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (a%is_dev()) then + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irn)>0) call acc_update_self(a%irn) + if (psb_size(a%idiag)>0) call acc_update_self(a%idiag) + else if (a%is_host()) then + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irn)>0) call acc_update_device(a%irn) + if (psb_size(a%idiag)>0) call acc_update_device(a%idiag) + end if + call tmpa%set_sync() + end subroutine z_oacc_ell_sync + +end module psb_z_oacc_ell_mat_mod diff --git a/openacc/psb_z_oacc_hll_mat_mod.F90 b/openacc/psb_z_oacc_hll_mat_mod.F90 new file mode 100644 index 00000000..4c9f1b11 --- /dev/null +++ b/openacc/psb_z_oacc_hll_mat_mod.F90 @@ -0,0 +1,279 @@ +module psb_z_oacc_hll_mat_mod + use iso_c_binding + use openacc + use psb_z_mat_mod + use psb_z_hll_mat_mod + use psb_z_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_hll_sparse_mat) :: psb_z_oacc_hll_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => z_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => z_oacc_hll_sizeof + procedure, pass(a) :: is_host => z_oacc_hll_is_host + procedure, pass(a) :: is_sync => z_oacc_hll_is_sync + procedure, pass(a) :: is_dev => z_oacc_hll_is_dev + procedure, pass(a) :: set_host => z_oacc_hll_set_host + procedure, pass(a) :: set_sync => z_oacc_hll_set_sync + procedure, pass(a) :: set_dev => z_oacc_hll_set_dev + procedure, pass(a) :: sync_dev_space => z_oacc_hll_sync_dev_space + procedure, pass(a) :: sync => z_oacc_hll_sync + procedure, pass(a) :: free_dev_space => z_oacc_hll_free_dev_space + procedure, pass(a) :: free => z_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_z_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_oacc_hll_inner_vect_sv + procedure, pass(a) :: scals => psb_z_oacc_hll_scals + procedure, pass(a) :: scalv => psb_z_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_z_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_z_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_z_oacc_hll_mold + + end type psb_z_oacc_hll_sparse_mat + + interface + module subroutine psb_z_oacc_hll_mold(a,b,info) + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_mold + end interface + + interface + module subroutine psb_z_oacc_hll_cp_from_fmt(a,b,info) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_cp_from_fmt + end interface + + interface + module subroutine psb_z_oacc_hll_mv_from_coo(a,b,info) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_mv_from_coo + end interface + + interface + module subroutine psb_z_oacc_hll_mv_from_fmt(a,b,info) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_mv_from_fmt + end interface + + interface + module subroutine psb_z_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_hll_vect_mv + end interface + + interface + module subroutine psb_z_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_hll_inner_vect_sv + end interface + + interface + module subroutine psb_z_oacc_hll_scals(d, a, info) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_scals + end interface + + interface + module subroutine psb_z_oacc_hll_scal(d,a,info,side) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_z_oacc_hll_scal + end interface + + interface + module subroutine psb_z_oacc_hll_reallocate_nz(nz,a) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_z_oacc_hll_reallocate_nz + end interface + + interface + module subroutine psb_z_oacc_hll_allocate_mnnz(m,n,a,nz) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_oacc_hll_allocate_mnnz + end interface + + interface + module subroutine psb_z_oacc_hll_cp_from_coo(a,b,info) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_cp_from_coo + end interface + +contains + + subroutine z_oacc_hll_free_dev_space(a) + use psb_base_mod + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn) + if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_delete_finalize(a%hkoffs) + return + end subroutine z_oacc_hll_free_dev_space + + subroutine z_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + call a%free_dev_space() + call a%psb_z_hll_sparse_mat%free() + + return + end subroutine z_oacc_hll_free + + function z_oacc_hll_sizeof(a) result(res) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + end function z_oacc_hll_sizeof + + + + function z_oacc_hll_is_host(a) result(res) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_oacc_hll_is_host + + function z_oacc_hll_is_sync(a) result(res) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_oacc_hll_is_sync + + function z_oacc_hll_is_dev(a) result(res) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_oacc_hll_is_dev + + subroutine z_oacc_hll_set_host(a) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_oacc_hll_set_host + + subroutine z_oacc_hll_set_sync(a) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_oacc_hll_set_sync + + subroutine z_oacc_hll_set_dev(a) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_oacc_hll_set_dev + + function z_oacc_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLLOA' + end function z_oacc_hll_get_fmt + + subroutine z_oacc_hll_sync_dev_space(a) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_copyin(a%val) + if (psb_size(a%ja)>0) call acc_copyin(a%ja) + if (psb_size(a%irn)>0) call acc_copyin(a%irn) + if (psb_size(a%idiag)>0) call acc_copyin(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_copyin(a%hkoffs) + end subroutine z_oacc_hll_sync_dev_space + + + subroutine z_oacc_hll_sync(a) + implicit none + class(psb_z_oacc_hll_sparse_mat), target, intent(in) :: a + class(psb_z_oacc_hll_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (a%is_dev()) then + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irn)>0) call acc_update_self(a%irn) + if (psb_size(a%idiag)>0) call acc_update_self(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_update_self(a%hkoffs) + else if (a%is_host()) then + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irn)>0) call acc_update_device(a%irn) + if (psb_size(a%idiag)>0) call acc_update_device(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_update_device(a%hkoffs) + end if + call tmpa%set_sync() + end subroutine z_oacc_hll_sync + +end module psb_z_oacc_hll_mat_mod diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90 new file mode 100644 index 00000000..a119303d --- /dev/null +++ b/openacc/psb_z_oacc_vect_mod.F90 @@ -0,0 +1,1013 @@ +module psb_z_oacc_vect_mod + use iso_c_binding + use openacc + use psb_const_mod + use psb_error_mod + use psb_realloc_mod + use psb_oacc_env_mod + use psb_z_vect_mod + use psb_i_vect_mod + use psb_i_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_base_vect_type) :: psb_z_vect_oacc + integer :: state = is_host + + contains + procedure, pass(x) :: get_nrows => z_oacc_get_nrows + procedure, nopass :: get_fmt => z_oacc_get_fmt + + procedure, pass(x) :: all => z_oacc_vect_all + procedure, pass(x) :: zero => z_oacc_zero + procedure, pass(x) :: asb_m => z_oacc_asb_m + procedure, pass(x) :: sync => z_oacc_sync + procedure, pass(x) :: sync_dev_space => z_oacc_sync_dev_space + procedure, pass(x) :: bld_x => z_oacc_bld_x + procedure, pass(x) :: bld_mn => z_oacc_bld_mn + procedure, pass(x) :: free => z_oacc_vect_free + procedure, pass(x) :: free_buffer => z_oacc_vect_free_buffer + procedure, pass(x) :: maybe_free_buffer => z_oacc_vect_maybe_free_buffer + procedure, pass(x) :: ins_a => z_oacc_ins_a + procedure, pass(x) :: ins_v => z_oacc_ins_v + procedure, pass(x) :: is_host => z_oacc_is_host + procedure, pass(x) :: is_dev => z_oacc_is_dev + procedure, pass(x) :: is_sync => z_oacc_is_sync + procedure, pass(x) :: set_host => z_oacc_set_host + procedure, pass(x) :: set_dev => z_oacc_set_dev + procedure, pass(x) :: set_sync => z_oacc_set_sync + procedure, pass(x) :: set_scal => z_oacc_set_scal + + procedure, pass(x) :: new_buffer => z_oacc_new_buffer + procedure, pass(x) :: gthzv_x => z_oacc_gthzv_x + procedure, pass(x) :: gthzbuf => z_oacc_gthzbuf + procedure, pass(y) :: sctb => z_oacc_sctb + procedure, pass(y) :: sctb_x => z_oacc_sctb_x + procedure, pass(y) :: sctb_buf => z_oacc_sctb_buf + procedure, nopass :: device_wait => z_oacc_device_wait + + procedure, pass(x) :: get_size => z_oacc_get_size + + procedure, pass(x) :: dot_v => z_oacc_vect_dot + procedure, pass(x) :: dot_a => z_oacc_dot_a + procedure, pass(y) :: axpby_v => z_oacc_axpby_v + procedure, pass(y) :: axpby_a => z_oacc_axpby_a + procedure, pass(z) :: upd_xyz => z_oacc_upd_xyz + procedure, pass(y) :: mlt_a => z_oacc_mlt_a + procedure, pass(z) :: mlt_a_2 => z_oacc_mlt_a_2 + procedure, pass(y) :: mlt_v => psb_z_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => psb_z_oacc_mlt_v_2 + procedure, pass(x) :: scal => z_oacc_scal + procedure, pass(x) :: nrm2 => z_oacc_nrm2 + procedure, pass(x) :: amax => z_oacc_amax + procedure, pass(x) :: asum => z_oacc_asum + procedure, pass(x) :: absval1 => z_oacc_absval1 + procedure, pass(x) :: absval2 => z_oacc_absval2 + final :: z_oacc_final_vect_free + end type psb_z_vect_oacc + + interface + subroutine psb_z_oacc_mlt_v(x, y, info) + import + implicit none + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_mlt_v + end interface + + interface + subroutine psb_z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + import + implicit none + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + end subroutine psb_z_oacc_mlt_v_2 + end interface + +contains + + subroutine z_oacc_device_wait() + implicit none + call acc_wait_all() + end subroutine z_oacc_device_wait + + subroutine z_oacc_absval1(x) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: n + + if (x%is_host()) call x%sync() + n = size(x%v) + call z_inner_oacc_absval1(n,x%v) + call x%set_dev() + contains + subroutine z_inner_oacc_absval1(n,x) + implicit none + complex(psb_dpk_), intent(inout) :: x(:) + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + !$acc parallel loop present(x) + do i = 1, n + x(i) = abs(x(i)) + end do + end subroutine z_inner_oacc_absval1 + end subroutine z_oacc_absval1 + + subroutine z_oacc_absval2(x, y) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + + n = min(size(x%v), size(y%v)) + select type (yy => y) + class is (psb_z_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + call z_inner_oacc_absval2(n,x%v,yy%v) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_z_base_vect_type%absval(y) + end select + contains + subroutine z_inner_oacc_absval2(n,x,y) + implicit none + complex(psb_dpk_), intent(inout) :: x(:),y(:) + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + !$acc parallel loop present(x,y) + do i = 1, n + y(i) = abs(x(i)) + end do + end subroutine z_inner_oacc_absval2 + end subroutine z_oacc_absval2 + + subroutine z_oacc_scal(alpha, x) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + complex(psb_dpk_), intent(in) :: alpha + integer(psb_ipk_) :: info + if (x%is_host()) call x%sync() + call z_inner_oacc_scal(alpha, x%v) + call x%set_dev() + contains + subroutine z_inner_oacc_scal(alpha, x) + complex(psb_dpk_), intent(in) :: alpha + complex(psb_dpk_), intent(inout) :: x(:) + integer(psb_ipk_) :: i + !$acc parallel loop present(x) + do i = 1, size(x) + x(i) = alpha * x(i) + end do + end subroutine z_inner_oacc_scal + end subroutine z_oacc_scal + + function z_oacc_nrm2(n, x) result(res) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() +!!$ write(0,*)'oacc_nrm2' + res = z_inner_oacc_nrm2(n, x%v) + contains + function z_inner_oacc_nrm2(n, x) result(res) + integer(psb_ipk_) :: n + complex(psb_dpk_) :: x(:) + real(psb_dpk_) :: res + real(psb_dpk_) :: sum, mx + integer(psb_ipk_) :: i + mx = dzero + !$acc parallel loop reduction(max:mx) present(x) + do i = 1, n + if (abs(x(i)) > mx) mx = abs(x(i)) + end do + if (mx == dzero) then + res = mx + else + sum = dzero + !$acc parallel loop reduction(+:sum) present(x) + do i = 1, n + sum = sum + abs(x(i)/mx)**2 + end do + res = mx*sqrt(sum) + end if + end function z_inner_oacc_nrm2 + end function z_oacc_nrm2 + + function z_oacc_amax(n, x) result(res) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + res = z_inner_oacc_amax(n, x%v) + contains + function z_inner_oacc_amax(n, x) result(res) + integer(psb_ipk_) :: n + complex(psb_dpk_) :: x(:) + real(psb_dpk_) :: res + real(psb_dpk_) :: max_val + integer(psb_ipk_) :: i + max_val = dzero + !$acc parallel loop reduction(max:max_val) present(x) + do i = 1, n + if (abs(x(i)) > max_val) max_val = abs(x(i)) + end do + res = max_val + end function z_inner_oacc_amax + end function z_oacc_amax + + function z_oacc_asum(n, x) result(res) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + complex(psb_dpk_) :: sum + integer(psb_ipk_) :: i + if (x%is_host()) call x%sync() + res = z_inner_oacc_asum(n, x%v) + contains + function z_inner_oacc_asum(n, x) result(res) + integer(psb_ipk_) :: n + complex(psb_dpk_) :: x(:) + real(psb_dpk_) :: res + integer(psb_ipk_) :: i + res = dzero + !$acc parallel loop reduction(+:res) present(x) + do i = 1, n + res = res + abs(x(i)) + end do + end function z_inner_oacc_asum + end function z_oacc_asum + + + subroutine z_oacc_mlt_a(x, y, info) + implicit none + complex(psb_dpk_), intent(in) :: x(:) + class(psb_z_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + !$acc parallel loop present(x,y) + do i = 1, size(x) + y%v(i) = y%v(i) * x(i) + end do + call y%set_host() + end subroutine z_oacc_mlt_a + + subroutine z_oacc_mlt_a_2(alpha, x, y, beta, z, info) + implicit none + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_), intent(in) :: x(:) + complex(psb_dpk_), intent(in) :: y(:) + class(psb_z_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + !$acc parallel loop present(x,y,z%v) + do i = 1, size(x) + z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) + end do + call z%set_host() + end subroutine z_oacc_mlt_a_2 + + subroutine z_oacc_axpby_v(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_vect_oacc), intent(inout) :: y + complex(psb_dpk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, i + + info = psb_success_ + + select type(xx => x) + type is (psb_z_vect_oacc) + if ((beta /= zzero) .and. y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + nx = size(xx%v) + ny = size(y%v) + if ((nx < m) .or. (ny < m)) then + info = psb_err_internal_error_ + else + call z_inner_oacc_axpby(m, alpha, x%v, beta, y%v, info) + end if + call y%set_dev() + class default + if ((alpha /= zzero) .and. (x%is_dev())) call x%sync() + call y%axpby(m, alpha, x%v, beta, info) + end select + contains + subroutine z_inner_oacc_axpby(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent(inout) :: x(:) + complex(psb_dpk_), intent(inout) :: y(:) + complex(psb_dpk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + !$acc parallel present(x,y) + !$acc loop + do i = 1, m + y(i) = alpha * x(i) + beta * y(i) + end do + !$acc end parallel + end subroutine z_inner_oacc_axpby + end subroutine z_oacc_axpby_v + + subroutine z_oacc_axpby_a(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent(in) :: x(:) + class(psb_z_vect_oacc), intent(inout) :: y + complex(psb_dpk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i + + if ((beta /= zzero) .and. (y%is_dev())) call y%sync() + + do i = 1, m + y%v(i) = alpha * x(i) + beta * y%v(i) + end do + call y%set_host() + end subroutine z_oacc_axpby_a + + subroutine z_oacc_upd_xyz(m, alpha, beta, gamma, delta, x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_vect_oacc), intent(inout) :: z + complex(psb_dpk_), intent(in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz, i + logical :: gpu_done + + info = psb_success_ + gpu_done = .false. + + select type(xx => x) + class is (psb_z_vect_oacc) + select type(yy => y) + class is (psb_z_vect_oacc) + select type(zz => z) + class is (psb_z_vect_oacc) + if ((beta /= zzero) .and. yy%is_host()) call yy%sync() + if ((delta /= zzero) .and. zz%is_host()) call zz%sync() + if (xx%is_host()) call xx%sync() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop present(xx%v,yy%v,zz%v) + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. + end select + end select + end select + + if (.not. gpu_done) then + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) + end if + end subroutine z_oacc_upd_xyz + + subroutine z_oacc_sctb_buf(i, n, idx, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type) :: idx + complex(psb_dpk_) :: beta + class(psb_z_vect_oacc) :: y + integer(psb_ipk_) :: info, k + logical :: acc_done + if (.not.allocated(y%combuf)) then + write(0,*) 'allocation error for y%combuf ' + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if + + acc_done = .false. + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) + call y%set_dev() + acc_done = .true. + end select + + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k) + end do + end if + + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + complex(psb_dpk_) :: beta,x(:), y(:) + integer(psb_ipk_) :: k + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + + end subroutine z_oacc_sctb_buf + + subroutine z_oacc_sctb_x(i, n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_):: i + integer(psb_mpk_):: n + class(psb_i_base_vect_type) :: idx + complex(psb_dpk_) :: beta, x(:) + class(psb_z_vect_oacc) :: y + integer(psb_ipk_) :: info, ni, k + logical :: acc_done + + acc_done = .false. + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (acc_is_present(x)) then + call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1)) + acc_done = .true. + call y%set_dev() + end if + end select + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1) + end do + call y%set_host() + end if + + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + complex(psb_dpk_) :: beta, x(:), y(:) + integer(psb_ipk_) :: k + !$acc update device(x(1:n)) + !$acc parallel loop present(x,y) + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + + end subroutine z_oacc_sctb_x + + subroutine z_oacc_sctb(n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + complex(psb_dpk_) :: beta, x(:) + class(psb_z_vect_oacc) :: y + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (n == 0) return + if (y%is_dev()) call y%sync() + + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + + call y%set_host() + end subroutine z_oacc_sctb + + subroutine z_oacc_gthzbuf(i, n, idx, x) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type) :: idx + class(psb_z_vect_oacc) :: x + integer(psb_ipk_) :: info,k + logical :: acc_done + + info = 0 + acc_done = .false. + + if (.not.allocated(x%combuf)) then + write(0,*) 'oacc allocation error combuf gthzbuf ' + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + select type (ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1)) + acc_done = .true. + end select + + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (x%is_dev()) call x%sync() + do k = 1, n + x%combuf(k+i-1) = x%v(idx%v(k+i-1)) + end do + end if + + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + complex(psb_dpk_) :: x(:), y(:) + integer(psb_ipk_) :: k + ! + !$acc parallel loop present(x,y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y(1:n)) + end subroutine inner_gth + end subroutine z_oacc_gthzbuf + + subroutine z_oacc_gthzv_x(i, n, idx, x, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i + integer(psb_mpk_) :: n + class(psb_i_base_vect_type):: idx + complex(psb_dpk_) :: y(:) + class(psb_z_vect_oacc):: x + integer(psb_ipk_) :: info, k + logical :: acc_done + + info = 0 + acc_done = .false. + select type (ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + if (acc_is_present(y)) then + call inner_gth(n,x%v,y(i:),ii%v(i:)) + acc_done=.true. + end if + end select + if (.not.acc_done) then + if (x%is_dev()) call x%sync() + if (idx%is_dev()) call idx%sync() + do k = 1, n + y(k+i-1) = x%v(idx%v(k+i-1)) + !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k) + end do + end if + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_mpk_) :: n + integer(psb_ipk_) :: idx(:) + complex(psb_dpk_) :: x(:), y(:) + integer(psb_ipk_) :: k + ! + !$acc parallel loop present(x,y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y(1:n)) + end subroutine inner_gth + end subroutine z_oacc_gthzv_x + + subroutine z_oacc_ins_v(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_z_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + type is (psb_i_vect_oacc) + select type(vval => val) + type is (psb_z_vect_oacc) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + !$acc parallel loop present(x%v,virl%v,vval%v) + do i = 1, n + x%v(virl%v(i)) = vval%v(i) + end do + call x%set_dev() + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + select type(virl => irl) + type is (psb_i_vect_oacc) + if (virl%is_dev()) call virl%sync() + end select + select type(vval => val) + type is (psb_z_vect_oacc) + if (vval%is_dev()) call vval%sync() + end select + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info, 'oacc_vect_ins') + return + end if + + end subroutine z_oacc_ins_v + + subroutine z_oacc_ins_a(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_z_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + + + end subroutine z_oacc_ins_a + + subroutine z_oacc_bld_mn(x, n) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%free(info) + call x%all(ione*n, info) + if (info /= 0) then + call psb_errpush(info, 'z_oacc_bld_mn',& + & i_err=ione*(/n, n, n, n, n/)) + end if + call x%set_host() + call x%sync_dev_space() + + end subroutine z_oacc_bld_mn + + + subroutine z_oacc_bld_x(x, this) + use psb_base_mod + implicit none + complex(psb_dpk_), intent(in) :: this(:) + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%free(info) + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'z_oacc_bld_x', & + i_err=(/size(this)*ione, izero, izero, izero, izero/)) + return + end if + x%v(:) = this(:) + call x%set_host() + call x%sync_dev_space() + + end subroutine z_oacc_bld_x + + subroutine z_oacc_asb_m(n, x, info) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + info = psb_success_ + + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_z_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync() + call x%set_host() + end if + else + if (size(x%v) < n) then + call x%psb_z_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync() + call x%set_host() + end if + end if + end subroutine z_oacc_asb_m + + subroutine z_oacc_set_scal(x, val, first, last) + class(psb_z_vect_oacc), intent(inout) :: x + complex(psb_dpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(last, last_) + + !$acc parallel loop present(x%v) + do i = first_, last_ + x%v(i) = val + end do + !$acc end parallel loop + + call x%set_dev() + end subroutine z_oacc_set_scal + + subroutine z_oacc_zero(x) + use psi_serial_mod + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + call x%set_dev() + call x%set_scal(zzero) + end subroutine z_oacc_zero + + function z_oacc_get_nrows(x) result(res) + implicit none + class(psb_z_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + if (allocated(x%v)) res = size(x%v) + end function z_oacc_get_nrows + + function z_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = "zOACC" + + end function z_oacc_get_fmt + + + function z_oacc_vect_dot(n, x, y) result(res) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_) :: res + integer(psb_ipk_) :: info + + res = zzero +!!$ write(0,*) 'oacc_dot_v' + select type(yy => y) + type is (psb_z_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + res = z_inner_oacc_dot(n, x%v, yy%v) + class default + if (x%is_dev()) call x%sync() + res = y%dot(n, x%v) + end select + contains + function z_inner_oacc_dot(n, x, y) result(res) + implicit none + complex(psb_dpk_), intent(in) :: x(:) + complex(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_) :: res + integer(psb_ipk_) :: i + + !$acc parallel loop reduction(+:res) present(x, y) + do i = 1, n + res = res + x(i) * y(i) + end do + !$acc end parallel loop + end function z_inner_oacc_dot + end function z_oacc_vect_dot + + function z_oacc_dot_a(n, x, y) result(res) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + complex(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_) :: res + complex(psb_dpk_), external :: zdot + + if (x%is_dev()) call x%sync() + res = zdot(n, y, 1, x%v, 1) + + end function z_oacc_dot_a + + + subroutine z_oacc_new_buffer(n,x,info) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf) + if (n > psb_size(x%combuf)) then + !write(0,*) 'oacc new_buffer: reallocating ' + if (allocated(x%combuf)) then + !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + !$acc exit data delete(x%combuf) + end if + call x%psb_z_base_vect_type%new_buffer(n,info) + !$acc enter data copyin(x%combuf) + ! call acc_copyin(x%combuf) + end if + end subroutine z_oacc_new_buffer + + subroutine z_oacc_sync_dev_space(x) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x +!!$ write(0,*) 'oacc sync_dev_space' + if (psb_size(x%v)>0) call acc_copyin(x%v) + end subroutine z_oacc_sync_dev_space + + subroutine z_oacc_sync(x) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + if (x%is_dev()) then + if (psb_size(x%v)>0) call acc_update_self(x%v) + end if + if (x%is_host()) then + if (.not.acc_is_present(x%v)) call z_oacc_sync_dev_space(x) + if (psb_size(x%v)>0) call acc_update_device(x%v) + end if + call x%set_sync() + end subroutine z_oacc_sync + + subroutine z_oacc_set_host(x) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + + x%state = is_host + end subroutine z_oacc_set_host + + subroutine z_oacc_set_dev(x) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + + x%state = is_dev + end subroutine z_oacc_set_dev + + subroutine z_oacc_set_sync(x) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + + x%state = is_sync + end subroutine z_oacc_set_sync + + function z_oacc_is_dev(x) result(res) + implicit none + class(psb_z_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function z_oacc_is_dev + + function z_oacc_is_host(x) result(res) + implicit none + class(psb_z_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function z_oacc_is_host + + function z_oacc_is_sync(x) result(res) + implicit none + class(psb_z_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function z_oacc_is_sync + + subroutine z_oacc_vect_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_z_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'z_oacc_all', & + i_err=(/n, n, n, n, n/)) + end if + call x%set_host() + call x%sync_dev_space() + end subroutine z_oacc_vect_all + + subroutine z_oacc_final_vect_free(x) + implicit none + type(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 +!!$ write(0,*) 'oacc final_vect_free' + call x%free_buffer(info) + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine z_oacc_final_vect_free + + subroutine z_oacc_vect_free(x, info) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + info = 0 +!!$ write(0,*) 'oacc vect_free' + call x%free_buffer(info) + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + call x%psb_z_base_vect_type%free(info) + end subroutine z_oacc_vect_free + + subroutine z_oacc_vect_maybe_free_buffer(x,info) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (psb_oacc_get_maybe_free_buffer()) then + !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer() + call x%free_buffer(info) + end if + + end subroutine z_oacc_vect_maybe_free_buffer + + subroutine z_oacc_vect_free_buffer(x,info) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info +! write(0,*) 'oacc free_buffer' + info = 0 + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_z_base_vect_type%free_buffer(info) + + end subroutine z_oacc_vect_free_buffer + + function z_oacc_get_size(x) result(res) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: res + + res = size(x%v) + end function z_oacc_get_size + +end module psb_z_oacc_vect_mod diff --git a/prec/CMakeLists.txt b/prec/CMakeLists.txt new file mode 100644 index 00000000..83bf7c82 --- /dev/null +++ b/prec/CMakeLists.txt @@ -0,0 +1,141 @@ + +set(PSB_prec_source_files + psb_c_diagprec.f90 + psb_prec_mod.f90 + psb_c_ainv_tools_mod.f90 + psb_s_ilu_fact_mod.f90 + psb_z_ainv_tools_mod.f90 + psb_s_ainv_fact_mod.f90 + psb_d_invt_fact_mod.f90 + psb_z_base_prec_mod.f90 + psb_s_invt_fact_mod.f90 + impl/psb_d_nullprec_impl.f90 + impl/psb_cprecinit.f90 + impl/psb_crwclip.f90 + impl/psb_z_sparsify.f90 + impl/psb_z_nullprec_impl.f90 + impl/psb_c_ilut_fact.f90 + impl/psb_d_sp_drop.f90 + impl/psb_c_ilu0_fact.f90 + impl/psb_d_prec_type_impl.f90 + impl/psb_s_invt_fact.f90 + impl/psb_z_ilut_fact.f90 + impl/psb_s_diagprec_impl.f90 + impl/psb_z_prec_type_impl.f90 + impl/psb_z_diagprec_impl.f90 + impl/psb_z_invk_fact.f90 + impl/psb_z_iluk_fact.f90 + impl/psb_s_ilut_fact.f90 + impl/psb_d_ilut_fact.f90 + impl/psb_dprecinit.f90 + impl/psb_s_sp_drop.f90 + impl/psb_s_iluk_fact.f90 + impl/psb_d_ainv_bld.f90 + impl/psb_d_sparsify.f90 + impl/psb_zprecinit.f90 + impl/psb_z_invt_fact.f90 + impl/psb_d_iluk_fact.f90 + impl/psb_c_sp_drop.f90 + impl/psb_z_bjacprec_impl.f90 + impl/psb_c_invk_fact.f90 + impl/psb_sprecinit.f90 + impl/psb_z_ilu0_fact.f90 + impl/psb_zprecbld.f90 + impl/psb_c_bjacprec_impl.f90 + impl/psb_s_ilu0_fact.f90 + impl/psb_s_prec_type_impl.f90 + impl/psb_s_nullprec_impl.f90 + impl/psb_s_bjacprec_impl.f90 + impl/psb_c_prec_type_impl.f90 + impl/psb_z_ainv_bld.f90 + impl/psb_s_invk_fact.f90 + impl/psb_z_sp_drop.f90 + impl/psb_c_diagprec_impl.f90 + impl/psb_d_invk_fact.f90 + impl/psb_c_invt_fact.f90 + impl/psb_dprecbld.f90 + impl/psb_c_iluk_fact.f90 + impl/psb_s_ainv_bld.f90 + impl/psb_c_nullprec_impl.f90 + impl/psb_c_sparsify.f90 + impl/psb_c_ainv_bld.f90 + impl/psb_s_sparsify.f90 + impl/psb_d_diagprec_impl.f90 + impl/psb_d_invt_fact.f90 + impl/psb_d_ilu0_fact.f90 + impl/psb_cprecbld.f90 + impl/psb_srwclip.f90 + impl/psb_d_bjacprec_impl.f90 + impl/psb_drwclip.f90 + impl/psb_zrwclip.f90 + impl/psb_sprecbld.f90 + psb_d_ainv_fact_mod.f90 + psb_z_prec_mod.f90 + psb_s_nullprec.f90 + psb_prec_type.f90 + psb_s_bjacprec.f90 + psb_z_diagprec.f90 + psb_z_bjacprec.f90 + psb_d_diagprec.f90 + psb_c_prec_mod.f90 + psb_d_nullprec.f90 + psb_s_diagprec.f90 + psb_c_prec_type.f90 + psb_c_ilu_fact_mod.f90 + psb_s_base_prec_mod.f90 + psb_c_nullprec.f90 + psb_c_invk_fact_mod.f90 + psb_z_invk_fact_mod.f90 + psb_z_prec_type.f90 + psb_c_invt_fact_mod.f90 + psb_s_prec_mod.f90 + psb_s_prec_type.f90 + psb_ainv_tools_mod.f90 + psb_d_invk_fact_mod.f90 + psb_c_bjacprec.f90 + psb_d_prec_type.f90 + psb_prec_const_mod.f90 + psb_s_ainv_tools_mod.f90 + psb_z_nullprec.f90 + psb_d_ainv_tools_mod.f90 + psb_d_bjacprec.f90 + psb_d_prec_mod.f90 + psb_c_base_prec_mod.f90 + psb_z_ainv_fact_mod.f90 + psb_d_base_prec_mod.f90 + psb_c_ainv_fact_mod.f90 + psb_s_invk_fact_mod.f90 + psb_d_ilu_fact_mod.f90 + psb_z_ilu_fact_mod.f90 + psb_z_invt_fact_mod.f90 + psb_d_biconjg_mod.F90 + impl/psb_zsparse_biconjg_llk_noth.F90 + impl/psb_ssparse_biconjg_s_llk.F90 + impl/psb_ssparse_biconjg_llk_noth.F90 + impl/psb_csparse_biconjg_s_llk.F90 + impl/psb_dsparse_biconjg_mlk.F90 + impl/psb_ssparse_biconjg_llk.F90 + impl/psb_csparse_biconjg_s_ft_llk.F90 + impl/psb_dsparse_biconjg_s_ft_llk.F90 + impl/psb_csparse_biconjg_llk_noth.F90 + impl/psb_dsparse_biconjg_s_llk.F90 + impl/psb_zsparse_biconjg_llk.F90 + impl/psb_csparse_biconjg_mlk.F90 + impl/psb_zsparse_biconjg_s_ft_llk.F90 + impl/psb_zsparse_biconjg_mlk.F90 + impl/psb_ssparse_biconjg_s_ft_llk.F90 + impl/psb_zsparse_biconjg_s_llk.F90 + impl/psb_dsparse_biconjg_llk.F90 + impl/psb_dsparse_biconjg_llk_noth.F90 + impl/psb_ssparse_biconjg_mlk.F90 + impl/psb_csparse_biconjg_llk.F90 + psb_biconjg_mod.F90 + psb_z_biconjg_mod.F90 + psb_s_biconjg_mod.F90 + psb_c_biconjg_mod.F90 + + ) + +foreach(file IN LISTS PSB_prec_source_files) + list(APPEND prec_source_files ${CMAKE_CURRENT_LIST_DIR}/${file}) +endforeach() diff --git a/prec/impl/Makefile b/prec/impl/Makefile index bc5ef2e1..2b6b1dc5 100644 --- a/prec/impl/Makefile +++ b/prec/impl/Makefile @@ -7,16 +7,16 @@ HERE=.. OBJS=psb_s_prec_type_impl.o psb_d_prec_type_impl.o \ psb_c_prec_type_impl.o psb_z_prec_type_impl.o \ psb_d_diagprec_impl.o psb_d_bjacprec_impl.o psb_d_nullprec_impl.o \ - psb_dilu_fct.o psb_d_ilu0_fact.o psb_d_iluk_fact.o psb_d_ilut_fact.o \ + psb_d_ilu0_fact.o psb_d_iluk_fact.o psb_d_ilut_fact.o \ psb_dprecbld.o psb_dprecinit.o \ psb_s_diagprec_impl.o psb_s_bjacprec_impl.o psb_s_nullprec_impl.o \ - psb_silu_fct.o psb_s_ilu0_fact.o psb_s_iluk_fact.o psb_s_ilut_fact.o \ + psb_s_ilu0_fact.o psb_s_iluk_fact.o psb_s_ilut_fact.o \ psb_sprecbld.o psb_sprecinit.o \ psb_c_diagprec_impl.o psb_c_bjacprec_impl.o psb_c_nullprec_impl.o \ - psb_cilu_fct.o psb_c_ilu0_fact.o psb_c_iluk_fact.o psb_c_ilut_fact.o \ + psb_c_ilu0_fact.o psb_c_iluk_fact.o psb_c_ilut_fact.o \ psb_cprecbld.o psb_cprecinit.o \ psb_z_diagprec_impl.o psb_z_bjacprec_impl.o psb_z_nullprec_impl.o \ - psb_zilu_fct.o psb_z_ilu0_fact.o psb_z_iluk_fact.o psb_z_ilut_fact.o \ + psb_z_ilu0_fact.o psb_z_iluk_fact.o psb_z_ilut_fact.o \ psb_zprecbld.o psb_zprecinit.o \ psb_c_sparsify.o psb_d_sparsify.o psb_s_sparsify.o psb_z_sparsify.o \ psb_crwclip.o psb_drwclip.o psb_srwclip.o psb_zrwclip.o \ diff --git a/prec/impl/psb_c_ilu0_fact.f90 b/prec/impl/psb_c_ilu0_fact.f90 index 1a3e1046..c016359f 100644 --- a/prec/impl/psb_c_ilu0_fact.f90 +++ b/prec/impl/psb_c_ilu0_fact.f90 @@ -130,7 +130,7 @@ ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck is empty. ! -subroutine psb_cilu0_fact(ialg,a,l,u,d,info,blck, upd) +subroutine psb_cilu0_fact(ialg,a,l,u,d,info,blck, upd,shft) use psb_base_mod use psb_c_ilu_fact_mod, psb_protect_name => psb_cilu0_fact @@ -145,11 +145,13 @@ subroutine psb_cilu0_fact(ialg,a,l,u,d,info,blck, upd) integer(psb_ipk_), intent(out) :: info type(psb_cspmat_type),intent(in), optional, target :: blck character, intent(in), optional :: upd + complex(psb_spk_), intent(in), optional :: shft ! Local variables integer(psb_ipk_) :: l1, l2, m, err_act type(psb_cspmat_type), pointer :: blck_ type(psb_c_csr_sparse_mat) :: ll, uu + complex(psb_spk_) :: shft_ character :: upd_ character(len=20) :: name, ch_err @@ -177,7 +179,12 @@ subroutine psb_cilu0_fact(ialg,a,l,u,d,info,blck, upd) else upd_ = 'F' end if - + if (present(shft)) then + shft_ = shft + else + shft_ = czero + end if + m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& & (m > size(d)) ) then @@ -193,7 +200,7 @@ subroutine psb_cilu0_fact(ialg,a,l,u,d,info,blck, upd) ! Compute the ILU(0) or the MILU(0) factorization, depending on ialg ! call psb_cilu0_factint(ialg,a,blck_,& - & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,upd_,info) + & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,upd_,shft_,info) if(info.ne.0) then info=psb_err_from_subroutine_ ch_err='psb_cilu0_factint' @@ -314,7 +321,7 @@ contains ! Error code. ! subroutine psb_cilu0_factint(ialg,a,b,& - & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,info) + & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,shft,info) implicit none @@ -325,6 +332,7 @@ contains integer(psb_ipk_), intent(inout) :: lja(:),lirp(:),uja(:),uirp(:) complex(psb_spk_), intent(inout) :: lval(:),uval(:),d(:) character, intent(in) :: upd + complex(psb_spk_), intent(in) :: shft ! Local variables integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, ktrw,err_act, m @@ -382,14 +390,14 @@ contains ! into lval/d(i)/uval ! call ilu_copyin(i,ma,a,i,ione,m,l1,lja,lval,& - & d(i),l2,uja,uval,ktrw,trw,upd) + & d(i),l2,uja,uval,ktrw,trw,upd,shft_) else ! ! Copy the i-th local row of the matrix, stored in b ! (as (i-ma)-th row), into lval/d(i)/uval ! call ilu_copyin(i-ma,mb,b,i,ione,m,l1,lja,lval,& - & d(i),l2,uja,uval,ktrw,trw,upd) + & d(i),l2,uja,uval,ktrw,trw,upd,shft_) endif lirp(i+1) = l1 + 1 @@ -583,7 +591,7 @@ contains ! every nrb calls to copyin. If A is in CSR format it is unused. ! subroutine ilu_copyin(i,m,a,jd,jmin,jmax,l1,lja,lval,& - & dia,l2,uja,uval,ktrw,trw,upd) + & dia,l2,uja,uval,ktrw,trw,upd,shft) use psb_base_mod @@ -597,6 +605,7 @@ contains integer(psb_ipk_), intent(inout) :: lja(:), uja(:) complex(psb_spk_), intent(inout) :: lval(:), uval(:), dia character, intent(in) :: upd + complex(psb_spk_), intent(in) :: shft ! Local variables integer(psb_ipk_) :: k,j,info,irb, nz integer(psb_ipk_), parameter :: nrb=40 @@ -625,7 +634,7 @@ contains lval(l1) = aa%val(j) lja(l1) = k else if (k == jd) then - dia = aa%val(j) + dia = aa%val(j) + shft else if ((k > jd).and.(k <= jmax)) then l2 = l2 + 1 uval(l2) = aa%val(j) @@ -665,7 +674,7 @@ contains lval(l1) = trw%val(ktrw) lja(l1) = k else if (k == jd) then - dia = trw%val(ktrw) + dia = trw%val(ktrw) + shft else if ((k > jd).and.(k <= jmax)) then l2 = l2 + 1 uval(l2) = trw%val(ktrw) diff --git a/prec/impl/psb_c_iluk_fact.f90 b/prec/impl/psb_c_iluk_fact.f90 index c4ebc678..6c6d8a5f 100644 --- a/prec/impl/psb_c_iluk_fact.f90 +++ b/prec/impl/psb_c_iluk_fact.f90 @@ -127,7 +127,7 @@ ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck does not contain any row. ! -subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck) +subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck,shft) use psb_base_mod use psb_c_ilu_fact_mod, psb_protect_name => psb_ciluk_fact @@ -141,9 +141,11 @@ subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck) type(psb_cspmat_type),intent(inout) :: l,u type(psb_cspmat_type),intent(in), optional, target :: blck complex(psb_spk_), intent(inout) :: d(:) + complex(psb_spk_), intent(in), optional :: shft ! Local Variables integer(psb_ipk_) :: l1, l2, m, err_act + complex(psb_spk_) :: shft_ type(psb_cspmat_type), pointer :: blck_ type(psb_c_csr_sparse_mat) :: ll, uu character(len=20) :: name, ch_err @@ -167,6 +169,11 @@ subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck) goto 9999 end if endif + if (present(shft)) then + shft_ = shft + else + shft_ = czero + end if m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& @@ -184,7 +191,7 @@ subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck) ! Compute the ILU(k) or the MILU(k) factorization, depending on ialg ! call psb_ciluk_factint(fill_in,ialg,a,blck_,& - & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,info) + & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,info,shft_) if (info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='psb_ciluk_factint' @@ -298,7 +305,7 @@ contains ! Error code. ! subroutine psb_ciluk_factint(fill_in,ialg,a,b,& - & d,lval,lja,lirp,uval,uja,uirp,l1,l2,info) + & d,lval,lja,lirp,uval,uja,uirp,l1,l2,info,shft) use psb_base_mod @@ -311,6 +318,7 @@ contains integer(psb_ipk_), allocatable, intent(inout) :: lja(:),lirp(:),uja(:),uirp(:) complex(psb_spk_), allocatable, intent(inout) :: lval(:),uval(:) complex(psb_spk_), intent(inout) :: d(:) + complex(psb_spk_), intent(in) :: shft ! Local variables integer(psb_ipk_) :: ma,mb,i, ktrw,err_act,nidx, m @@ -400,13 +408,13 @@ contains ! ! Copy into trw the i-th local row of the matrix, stored in a ! - call iluk_copyin(i,ma,a,ione,m,row,rowlevs,heap,ktrw,trw,info) + call iluk_copyin(i,ma,a,ione,m,row,rowlevs,heap,ktrw,trw,info,shft) else ! ! Copy into trw the i-th local row of the matrix, stored in b ! (as (i-ma)-th row) ! - call iluk_copyin(i-ma,mb,b,ione,m,row,rowlevs,heap,ktrw,trw,info) + call iluk_copyin(i-ma,mb,b,ione,m,row,rowlevs,heap,ktrw,trw,info,shft) endif ! Do an elimination step on the current row. It turns out we only @@ -516,7 +524,7 @@ contains ! until we empty the buffer. Thus we will make a call to psb_sp_getblk ! every nrb calls to copyin. If A is in CSR format it is unused. ! - subroutine iluk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,ktrw,trw,info) + subroutine iluk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,ktrw,trw,info,shft) use psb_base_mod @@ -530,6 +538,8 @@ contains integer(psb_ipk_), intent(inout) :: rowlevs(:) complex(psb_spk_), intent(inout) :: row(:) type(psb_i_heap), intent(inout) :: heap + complex(psb_spk_), intent(in) :: shft + ! Local variables integer(psb_ipk_) :: k,j,irb,err_act,nz @@ -554,6 +564,7 @@ contains k = aa%ja(j) if ((jmin<=k).and.(k<=jmax)) then row(k) = aa%val(j) + if (k==i) row(k) = row(k) + shft rowlevs(k) = 0 call heap%insert(k,info) end if @@ -587,6 +598,7 @@ contains k = trw%ja(ktrw) if ((jmin<=k).and.(k<=jmax)) then row(k) = trw%val(ktrw) + if (k==i) row(k) = row(k) + shft rowlevs(k) = 0 call heap%insert(k,info) end if @@ -670,7 +682,8 @@ contains ! Note: this argument is intent(inout) and not only intent(out) ! to retain its allocation, done by this routine. ! - subroutine iluk_fact(fill_in,i,row,rowlevs,heap,d,uja,uirp,uval,uplevs,nidx,idxs,info) + subroutine iluk_fact(fill_in,i,row,rowlevs,heap,d,& + & uja,uirp,uval,uplevs,nidx,idxs,info) use psb_base_mod diff --git a/prec/impl/psb_c_ilut_fact.f90 b/prec/impl/psb_c_ilut_fact.f90 index 633899de..8421ee1c 100644 --- a/prec/impl/psb_c_ilut_fact.f90 +++ b/prec/impl/psb_c_ilut_fact.f90 @@ -123,7 +123,7 @@ ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck does not contain any row. ! -subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) +subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale,shft) use psb_base_mod use psb_c_ilu_fact_mod, psb_protect_name => psb_cilut_fact @@ -139,9 +139,11 @@ subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) complex(psb_spk_), intent(inout) :: d(:) type(psb_cspmat_type),intent(in), optional, target :: blck integer(psb_ipk_), intent(in), optional :: iscale + complex(psb_spk_), intent(in), optional :: shft ! Local Variables integer(psb_ipk_) :: l1, l2, m, err_act, iscale_ + complex(psb_spk_) :: shft_ type(psb_cspmat_type), pointer :: blck_ type(psb_c_csr_sparse_mat) :: ll, uu real(psb_spk_) :: scale @@ -177,6 +179,11 @@ subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) else iscale_ = psb_ilu_scale_none_ end if + if (present(shft)) then + shft_ = shft + else + shft_ = czero + end if select case(iscale_) case(psb_ilu_scale_none_) @@ -206,7 +213,7 @@ subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) ! Compute the ILU(k,t) factorization ! call psb_cilut_factint(fill_in,thres,a,blck_,& - & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,info,scale) + & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,info,scale,shft_) if (info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='psb_cilut_factint' @@ -316,7 +323,7 @@ contains ! Error code. ! subroutine psb_cilut_factint(fill_in,thres,a,b,& - & d,lval,lja,lirp,uval,uja,uirp,l1,l2,info,scale) + & d,lval,lja,lirp,uval,uja,uirp,l1,l2,info,scale,shft) use psb_base_mod @@ -331,6 +338,7 @@ contains complex(psb_spk_), allocatable, intent(inout) :: lval(:),uval(:) complex(psb_spk_), intent(inout) :: d(:) real(psb_spk_), intent(in), optional :: scale + complex(psb_spk_), intent(in) :: shft ! Local Variables integer(psb_ipk_) :: i, ktrw,err_act,nidx,nlw,nup,jmaxup, ma, mb, m @@ -401,10 +409,10 @@ contains d(i) = czero if (i<=ma) then call ilut_copyin(i,ma,a,i,ione,m,nlw,nup,jmaxup,nrmi,weight,& - & row,heap,ktrw,trw,info) + & row,heap,ktrw,trw,info,shft) else call ilut_copyin(i-ma,mb,b,i,ione,m,nlw,nup,jmaxup,nrmi,weight,& - & row,heap,ktrw,trw,info) + & row,heap,ktrw,trw,info,shft) endif ! @@ -540,7 +548,7 @@ contains ! every nrb calls to copyin. If A is in CSR format it is unused. ! subroutine ilut_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,& - & nrmi,weight,row,heap,ktrw,trw,info) + & nrmi,weight,row,heap,ktrw,trw,info,shft) use psb_base_mod implicit none type(psb_cspmat_type), intent(in) :: a @@ -551,6 +559,7 @@ contains complex(psb_spk_), intent(inout) :: row(:) real(psb_spk_), intent(in) :: weight type(psb_i_heap), intent(inout) :: heap + complex(psb_spk_), intent(in) :: shft integer(psb_ipk_) :: k,j,irb,kin,nz integer(psb_ipk_), parameter :: nrb=40 @@ -597,6 +606,7 @@ contains call heap%insert(k,info) if (info /= psb_success_) exit if (kjd) then nup = nup + 1 if (abs(row(k))>dmaxup) then @@ -648,6 +658,7 @@ contains call heap%insert(k,info) if (info /= psb_success_) exit if (kjd) then nup = nup + 1 if (abs(row(k))>dmaxup) then diff --git a/prec/impl/psb_cilu_fct.f90 b/prec/impl/psb_cilu_fct.f90 deleted file mode 100644 index d54769bf..00000000 --- a/prec/impl/psb_cilu_fct.f90 +++ /dev/null @@ -1,438 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_cilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_cspmat_type),intent(in) :: a - type(psb_c_csr_sparse_mat),intent(inout) :: l,u - type(psb_cspmat_type),intent(in), optional, target :: blck - complex(psb_spk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1, l2,m,err_act - type(psb_cspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_cilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_cilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_cilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - implicit none - - type(psb_cspmat_type) :: a,b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - complex(psb_spk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - complex(psb_spk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_c_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_cilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = czero - - ! - ! - select type(aa => a%a) - type is (psb_c_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = cone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = czero - - select type(aa => b%a) - type is (psb_c_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = cone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_cilu_fctint -end subroutine psb_cilu_fct diff --git a/prec/impl/psb_d_ilu0_fact.f90 b/prec/impl/psb_d_ilu0_fact.f90 index 478eedfa..dde22249 100644 --- a/prec/impl/psb_d_ilu0_fact.f90 +++ b/prec/impl/psb_d_ilu0_fact.f90 @@ -130,7 +130,7 @@ ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck is empty. ! -subroutine psb_dilu0_fact(ialg,a,l,u,d,info,blck, upd) +subroutine psb_dilu0_fact(ialg,a,l,u,d,info,blck, upd,shft) use psb_base_mod use psb_d_ilu_fact_mod, psb_protect_name => psb_dilu0_fact @@ -145,11 +145,13 @@ subroutine psb_dilu0_fact(ialg,a,l,u,d,info,blck, upd) integer(psb_ipk_), intent(out) :: info type(psb_dspmat_type),intent(in), optional, target :: blck character, intent(in), optional :: upd + real(psb_dpk_), intent(in), optional :: shft ! Local variables integer(psb_ipk_) :: l1, l2, m, err_act type(psb_dspmat_type), pointer :: blck_ type(psb_d_csr_sparse_mat) :: ll, uu + real(psb_dpk_) :: shft_ character :: upd_ character(len=20) :: name, ch_err @@ -177,7 +179,12 @@ subroutine psb_dilu0_fact(ialg,a,l,u,d,info,blck, upd) else upd_ = 'F' end if - + if (present(shft)) then + shft_ = shft + else + shft_ = dzero + end if + m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& & (m > size(d)) ) then @@ -193,7 +200,7 @@ subroutine psb_dilu0_fact(ialg,a,l,u,d,info,blck, upd) ! Compute the ILU(0) or the MILU(0) factorization, depending on ialg ! call psb_dilu0_factint(ialg,a,blck_,& - & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,upd_,info) + & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,upd_,shft_,info) if(info.ne.0) then info=psb_err_from_subroutine_ ch_err='psb_dilu0_factint' @@ -314,7 +321,7 @@ contains ! Error code. ! subroutine psb_dilu0_factint(ialg,a,b,& - & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,info) + & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,shft,info) implicit none @@ -325,6 +332,7 @@ contains integer(psb_ipk_), intent(inout) :: lja(:),lirp(:),uja(:),uirp(:) real(psb_dpk_), intent(inout) :: lval(:),uval(:),d(:) character, intent(in) :: upd + real(psb_dpk_), intent(in) :: shft ! Local variables integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, ktrw,err_act, m @@ -382,14 +390,14 @@ contains ! into lval/d(i)/uval ! call ilu_copyin(i,ma,a,i,ione,m,l1,lja,lval,& - & d(i),l2,uja,uval,ktrw,trw,upd) + & d(i),l2,uja,uval,ktrw,trw,upd,shft_) else ! ! Copy the i-th local row of the matrix, stored in b ! (as (i-ma)-th row), into lval/d(i)/uval ! call ilu_copyin(i-ma,mb,b,i,ione,m,l1,lja,lval,& - & d(i),l2,uja,uval,ktrw,trw,upd) + & d(i),l2,uja,uval,ktrw,trw,upd,shft_) endif lirp(i+1) = l1 + 1 @@ -583,7 +591,7 @@ contains ! every nrb calls to copyin. If A is in CSR format it is unused. ! subroutine ilu_copyin(i,m,a,jd,jmin,jmax,l1,lja,lval,& - & dia,l2,uja,uval,ktrw,trw,upd) + & dia,l2,uja,uval,ktrw,trw,upd,shft) use psb_base_mod @@ -597,6 +605,7 @@ contains integer(psb_ipk_), intent(inout) :: lja(:), uja(:) real(psb_dpk_), intent(inout) :: lval(:), uval(:), dia character, intent(in) :: upd + real(psb_dpk_), intent(in) :: shft ! Local variables integer(psb_ipk_) :: k,j,info,irb, nz integer(psb_ipk_), parameter :: nrb=40 @@ -625,7 +634,7 @@ contains lval(l1) = aa%val(j) lja(l1) = k else if (k == jd) then - dia = aa%val(j) + dia = aa%val(j) + shft else if ((k > jd).and.(k <= jmax)) then l2 = l2 + 1 uval(l2) = aa%val(j) @@ -665,7 +674,7 @@ contains lval(l1) = trw%val(ktrw) lja(l1) = k else if (k == jd) then - dia = trw%val(ktrw) + dia = trw%val(ktrw) + shft else if ((k > jd).and.(k <= jmax)) then l2 = l2 + 1 uval(l2) = trw%val(ktrw) diff --git a/prec/impl/psb_d_iluk_fact.f90 b/prec/impl/psb_d_iluk_fact.f90 index 544ec987..dc837ba9 100644 --- a/prec/impl/psb_d_iluk_fact.f90 +++ b/prec/impl/psb_d_iluk_fact.f90 @@ -127,7 +127,7 @@ ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck does not contain any row. ! -subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck) +subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck,shft) use psb_base_mod use psb_d_ilu_fact_mod, psb_protect_name => psb_diluk_fact @@ -141,9 +141,11 @@ subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck) type(psb_dspmat_type),intent(inout) :: l,u type(psb_dspmat_type),intent(in), optional, target :: blck real(psb_dpk_), intent(inout) :: d(:) + real(psb_dpk_), intent(in), optional :: shft ! Local Variables integer(psb_ipk_) :: l1, l2, m, err_act + real(psb_dpk_) :: shft_ type(psb_dspmat_type), pointer :: blck_ type(psb_d_csr_sparse_mat) :: ll, uu character(len=20) :: name, ch_err @@ -167,6 +169,11 @@ subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck) goto 9999 end if endif + if (present(shft)) then + shft_ = shft + else + shft_ = dzero + end if m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& @@ -184,7 +191,7 @@ subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck) ! Compute the ILU(k) or the MILU(k) factorization, depending on ialg ! call psb_diluk_factint(fill_in,ialg,a,blck_,& - & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,info) + & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,info,shft_) if (info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='psb_diluk_factint' @@ -298,7 +305,7 @@ contains ! Error code. ! subroutine psb_diluk_factint(fill_in,ialg,a,b,& - & d,lval,lja,lirp,uval,uja,uirp,l1,l2,info) + & d,lval,lja,lirp,uval,uja,uirp,l1,l2,info,shft) use psb_base_mod @@ -311,6 +318,7 @@ contains integer(psb_ipk_), allocatable, intent(inout) :: lja(:),lirp(:),uja(:),uirp(:) real(psb_dpk_), allocatable, intent(inout) :: lval(:),uval(:) real(psb_dpk_), intent(inout) :: d(:) + real(psb_dpk_), intent(in) :: shft ! Local variables integer(psb_ipk_) :: ma,mb,i, ktrw,err_act,nidx, m @@ -400,13 +408,13 @@ contains ! ! Copy into trw the i-th local row of the matrix, stored in a ! - call iluk_copyin(i,ma,a,ione,m,row,rowlevs,heap,ktrw,trw,info) + call iluk_copyin(i,ma,a,ione,m,row,rowlevs,heap,ktrw,trw,info,shft) else ! ! Copy into trw the i-th local row of the matrix, stored in b ! (as (i-ma)-th row) ! - call iluk_copyin(i-ma,mb,b,ione,m,row,rowlevs,heap,ktrw,trw,info) + call iluk_copyin(i-ma,mb,b,ione,m,row,rowlevs,heap,ktrw,trw,info,shft) endif ! Do an elimination step on the current row. It turns out we only @@ -516,7 +524,7 @@ contains ! until we empty the buffer. Thus we will make a call to psb_sp_getblk ! every nrb calls to copyin. If A is in CSR format it is unused. ! - subroutine iluk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,ktrw,trw,info) + subroutine iluk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,ktrw,trw,info,shft) use psb_base_mod @@ -530,6 +538,8 @@ contains integer(psb_ipk_), intent(inout) :: rowlevs(:) real(psb_dpk_), intent(inout) :: row(:) type(psb_i_heap), intent(inout) :: heap + real(psb_dpk_), intent(in) :: shft + ! Local variables integer(psb_ipk_) :: k,j,irb,err_act,nz @@ -554,6 +564,7 @@ contains k = aa%ja(j) if ((jmin<=k).and.(k<=jmax)) then row(k) = aa%val(j) + if (k==i) row(k) = row(k) + shft rowlevs(k) = 0 call heap%insert(k,info) end if @@ -587,6 +598,7 @@ contains k = trw%ja(ktrw) if ((jmin<=k).and.(k<=jmax)) then row(k) = trw%val(ktrw) + if (k==i) row(k) = row(k) + shft rowlevs(k) = 0 call heap%insert(k,info) end if @@ -670,7 +682,8 @@ contains ! Note: this argument is intent(inout) and not only intent(out) ! to retain its allocation, done by this routine. ! - subroutine iluk_fact(fill_in,i,row,rowlevs,heap,d,uja,uirp,uval,uplevs,nidx,idxs,info) + subroutine iluk_fact(fill_in,i,row,rowlevs,heap,d,& + & uja,uirp,uval,uplevs,nidx,idxs,info) use psb_base_mod diff --git a/prec/impl/psb_d_ilut_fact.f90 b/prec/impl/psb_d_ilut_fact.f90 index 6c2dc698..cd185e80 100644 --- a/prec/impl/psb_d_ilut_fact.f90 +++ b/prec/impl/psb_d_ilut_fact.f90 @@ -123,7 +123,7 @@ ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck does not contain any row. ! -subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) +subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale,shft) use psb_base_mod use psb_d_ilu_fact_mod, psb_protect_name => psb_dilut_fact @@ -139,9 +139,11 @@ subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) real(psb_dpk_), intent(inout) :: d(:) type(psb_dspmat_type),intent(in), optional, target :: blck integer(psb_ipk_), intent(in), optional :: iscale + real(psb_dpk_), intent(in), optional :: shft ! Local Variables integer(psb_ipk_) :: l1, l2, m, err_act, iscale_ + real(psb_dpk_) :: shft_ type(psb_dspmat_type), pointer :: blck_ type(psb_d_csr_sparse_mat) :: ll, uu real(psb_dpk_) :: scale @@ -177,6 +179,11 @@ subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) else iscale_ = psb_ilu_scale_none_ end if + if (present(shft)) then + shft_ = shft + else + shft_ = dzero + end if select case(iscale_) case(psb_ilu_scale_none_) @@ -206,7 +213,7 @@ subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) ! Compute the ILU(k,t) factorization ! call psb_dilut_factint(fill_in,thres,a,blck_,& - & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,info,scale) + & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,info,scale,shft_) if (info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='psb_dilut_factint' @@ -316,7 +323,7 @@ contains ! Error code. ! subroutine psb_dilut_factint(fill_in,thres,a,b,& - & d,lval,lja,lirp,uval,uja,uirp,l1,l2,info,scale) + & d,lval,lja,lirp,uval,uja,uirp,l1,l2,info,scale,shft) use psb_base_mod @@ -331,6 +338,7 @@ contains real(psb_dpk_), allocatable, intent(inout) :: lval(:),uval(:) real(psb_dpk_), intent(inout) :: d(:) real(psb_dpk_), intent(in), optional :: scale + real(psb_dpk_), intent(in) :: shft ! Local Variables integer(psb_ipk_) :: i, ktrw,err_act,nidx,nlw,nup,jmaxup, ma, mb, m @@ -401,10 +409,10 @@ contains d(i) = czero if (i<=ma) then call ilut_copyin(i,ma,a,i,ione,m,nlw,nup,jmaxup,nrmi,weight,& - & row,heap,ktrw,trw,info) + & row,heap,ktrw,trw,info,shft) else call ilut_copyin(i-ma,mb,b,i,ione,m,nlw,nup,jmaxup,nrmi,weight,& - & row,heap,ktrw,trw,info) + & row,heap,ktrw,trw,info,shft) endif ! @@ -540,7 +548,7 @@ contains ! every nrb calls to copyin. If A is in CSR format it is unused. ! subroutine ilut_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,& - & nrmi,weight,row,heap,ktrw,trw,info) + & nrmi,weight,row,heap,ktrw,trw,info,shft) use psb_base_mod implicit none type(psb_dspmat_type), intent(in) :: a @@ -551,6 +559,7 @@ contains real(psb_dpk_), intent(inout) :: row(:) real(psb_dpk_), intent(in) :: weight type(psb_i_heap), intent(inout) :: heap + real(psb_dpk_), intent(in) :: shft integer(psb_ipk_) :: k,j,irb,kin,nz integer(psb_ipk_), parameter :: nrb=40 @@ -597,6 +606,7 @@ contains call heap%insert(k,info) if (info /= psb_success_) exit if (kjd) then nup = nup + 1 if (abs(row(k))>dmaxup) then @@ -648,6 +658,7 @@ contains call heap%insert(k,info) if (info /= psb_success_) exit if (kjd) then nup = nup + 1 if (abs(row(k))>dmaxup) then diff --git a/prec/impl/psb_dilu_fct.f90 b/prec/impl/psb_dilu_fct.f90 deleted file mode 100644 index b97b88ec..00000000 --- a/prec/impl/psb_dilu_fct.f90 +++ /dev/null @@ -1,441 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_dilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_dspmat_type),intent(in) :: a - type(psb_d_csr_sparse_mat),intent(inout) :: l,u - type(psb_dspmat_type),intent(in), optional, target :: blck - real(psb_dpk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1,l2,m,err_act - type(psb_dspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_dilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_dilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_dilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - use psb_mat_mod - - implicit none - - type(psb_dspmat_type), target :: a - type(psb_dspmat_type), target :: b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - real(psb_dpk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - real(psb_dpk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_d_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - - name='psb_dilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = dzero - ! - ! - select type(aa => a%a) - type is (psb_d_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (dabs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') dia - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = done/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = dzero - - select type(aa => b%a) - type is (psb_d_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (dabs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') dia - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = done/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_dilu_fctint -end subroutine psb_dilu_fct diff --git a/prec/impl/psb_s_ilu0_fact.f90 b/prec/impl/psb_s_ilu0_fact.f90 index b6f442e9..d9ce1298 100644 --- a/prec/impl/psb_s_ilu0_fact.f90 +++ b/prec/impl/psb_s_ilu0_fact.f90 @@ -130,7 +130,7 @@ ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck is empty. ! -subroutine psb_silu0_fact(ialg,a,l,u,d,info,blck, upd) +subroutine psb_silu0_fact(ialg,a,l,u,d,info,blck, upd,shft) use psb_base_mod use psb_s_ilu_fact_mod, psb_protect_name => psb_silu0_fact @@ -145,11 +145,13 @@ subroutine psb_silu0_fact(ialg,a,l,u,d,info,blck, upd) integer(psb_ipk_), intent(out) :: info type(psb_sspmat_type),intent(in), optional, target :: blck character, intent(in), optional :: upd + real(psb_spk_), intent(in), optional :: shft ! Local variables integer(psb_ipk_) :: l1, l2, m, err_act type(psb_sspmat_type), pointer :: blck_ type(psb_s_csr_sparse_mat) :: ll, uu + real(psb_spk_) :: shft_ character :: upd_ character(len=20) :: name, ch_err @@ -177,7 +179,12 @@ subroutine psb_silu0_fact(ialg,a,l,u,d,info,blck, upd) else upd_ = 'F' end if - + if (present(shft)) then + shft_ = shft + else + shft_ = szero + end if + m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& & (m > size(d)) ) then @@ -193,7 +200,7 @@ subroutine psb_silu0_fact(ialg,a,l,u,d,info,blck, upd) ! Compute the ILU(0) or the MILU(0) factorization, depending on ialg ! call psb_silu0_factint(ialg,a,blck_,& - & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,upd_,info) + & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,upd_,shft_,info) if(info.ne.0) then info=psb_err_from_subroutine_ ch_err='psb_silu0_factint' @@ -314,7 +321,7 @@ contains ! Error code. ! subroutine psb_silu0_factint(ialg,a,b,& - & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,info) + & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,shft,info) implicit none @@ -325,6 +332,7 @@ contains integer(psb_ipk_), intent(inout) :: lja(:),lirp(:),uja(:),uirp(:) real(psb_spk_), intent(inout) :: lval(:),uval(:),d(:) character, intent(in) :: upd + real(psb_spk_), intent(in) :: shft ! Local variables integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, ktrw,err_act, m @@ -382,14 +390,14 @@ contains ! into lval/d(i)/uval ! call ilu_copyin(i,ma,a,i,ione,m,l1,lja,lval,& - & d(i),l2,uja,uval,ktrw,trw,upd) + & d(i),l2,uja,uval,ktrw,trw,upd,shft_) else ! ! Copy the i-th local row of the matrix, stored in b ! (as (i-ma)-th row), into lval/d(i)/uval ! call ilu_copyin(i-ma,mb,b,i,ione,m,l1,lja,lval,& - & d(i),l2,uja,uval,ktrw,trw,upd) + & d(i),l2,uja,uval,ktrw,trw,upd,shft_) endif lirp(i+1) = l1 + 1 @@ -583,7 +591,7 @@ contains ! every nrb calls to copyin. If A is in CSR format it is unused. ! subroutine ilu_copyin(i,m,a,jd,jmin,jmax,l1,lja,lval,& - & dia,l2,uja,uval,ktrw,trw,upd) + & dia,l2,uja,uval,ktrw,trw,upd,shft) use psb_base_mod @@ -597,6 +605,7 @@ contains integer(psb_ipk_), intent(inout) :: lja(:), uja(:) real(psb_spk_), intent(inout) :: lval(:), uval(:), dia character, intent(in) :: upd + real(psb_spk_), intent(in) :: shft ! Local variables integer(psb_ipk_) :: k,j,info,irb, nz integer(psb_ipk_), parameter :: nrb=40 @@ -625,7 +634,7 @@ contains lval(l1) = aa%val(j) lja(l1) = k else if (k == jd) then - dia = aa%val(j) + dia = aa%val(j) + shft else if ((k > jd).and.(k <= jmax)) then l2 = l2 + 1 uval(l2) = aa%val(j) @@ -665,7 +674,7 @@ contains lval(l1) = trw%val(ktrw) lja(l1) = k else if (k == jd) then - dia = trw%val(ktrw) + dia = trw%val(ktrw) + shft else if ((k > jd).and.(k <= jmax)) then l2 = l2 + 1 uval(l2) = trw%val(ktrw) diff --git a/prec/impl/psb_s_iluk_fact.f90 b/prec/impl/psb_s_iluk_fact.f90 index 6129663b..67fb8ada 100644 --- a/prec/impl/psb_s_iluk_fact.f90 +++ b/prec/impl/psb_s_iluk_fact.f90 @@ -127,7 +127,7 @@ ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck does not contain any row. ! -subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck) +subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck,shft) use psb_base_mod use psb_s_ilu_fact_mod, psb_protect_name => psb_siluk_fact @@ -141,9 +141,11 @@ subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck) type(psb_sspmat_type),intent(inout) :: l,u type(psb_sspmat_type),intent(in), optional, target :: blck real(psb_spk_), intent(inout) :: d(:) + real(psb_spk_), intent(in), optional :: shft ! Local Variables integer(psb_ipk_) :: l1, l2, m, err_act + real(psb_spk_) :: shft_ type(psb_sspmat_type), pointer :: blck_ type(psb_s_csr_sparse_mat) :: ll, uu character(len=20) :: name, ch_err @@ -167,6 +169,11 @@ subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck) goto 9999 end if endif + if (present(shft)) then + shft_ = shft + else + shft_ = szero + end if m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& @@ -184,7 +191,7 @@ subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck) ! Compute the ILU(k) or the MILU(k) factorization, depending on ialg ! call psb_siluk_factint(fill_in,ialg,a,blck_,& - & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,info) + & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,info,shft_) if (info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='psb_siluk_factint' @@ -298,7 +305,7 @@ contains ! Error code. ! subroutine psb_siluk_factint(fill_in,ialg,a,b,& - & d,lval,lja,lirp,uval,uja,uirp,l1,l2,info) + & d,lval,lja,lirp,uval,uja,uirp,l1,l2,info,shft) use psb_base_mod @@ -311,6 +318,7 @@ contains integer(psb_ipk_), allocatable, intent(inout) :: lja(:),lirp(:),uja(:),uirp(:) real(psb_spk_), allocatable, intent(inout) :: lval(:),uval(:) real(psb_spk_), intent(inout) :: d(:) + real(psb_spk_), intent(in) :: shft ! Local variables integer(psb_ipk_) :: ma,mb,i, ktrw,err_act,nidx, m @@ -400,13 +408,13 @@ contains ! ! Copy into trw the i-th local row of the matrix, stored in a ! - call iluk_copyin(i,ma,a,ione,m,row,rowlevs,heap,ktrw,trw,info) + call iluk_copyin(i,ma,a,ione,m,row,rowlevs,heap,ktrw,trw,info,shft) else ! ! Copy into trw the i-th local row of the matrix, stored in b ! (as (i-ma)-th row) ! - call iluk_copyin(i-ma,mb,b,ione,m,row,rowlevs,heap,ktrw,trw,info) + call iluk_copyin(i-ma,mb,b,ione,m,row,rowlevs,heap,ktrw,trw,info,shft) endif ! Do an elimination step on the current row. It turns out we only @@ -516,7 +524,7 @@ contains ! until we empty the buffer. Thus we will make a call to psb_sp_getblk ! every nrb calls to copyin. If A is in CSR format it is unused. ! - subroutine iluk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,ktrw,trw,info) + subroutine iluk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,ktrw,trw,info,shft) use psb_base_mod @@ -530,6 +538,8 @@ contains integer(psb_ipk_), intent(inout) :: rowlevs(:) real(psb_spk_), intent(inout) :: row(:) type(psb_i_heap), intent(inout) :: heap + real(psb_spk_), intent(in) :: shft + ! Local variables integer(psb_ipk_) :: k,j,irb,err_act,nz @@ -554,6 +564,7 @@ contains k = aa%ja(j) if ((jmin<=k).and.(k<=jmax)) then row(k) = aa%val(j) + if (k==i) row(k) = row(k) + shft rowlevs(k) = 0 call heap%insert(k,info) end if @@ -587,6 +598,7 @@ contains k = trw%ja(ktrw) if ((jmin<=k).and.(k<=jmax)) then row(k) = trw%val(ktrw) + if (k==i) row(k) = row(k) + shft rowlevs(k) = 0 call heap%insert(k,info) end if @@ -670,7 +682,8 @@ contains ! Note: this argument is intent(inout) and not only intent(out) ! to retain its allocation, done by this routine. ! - subroutine iluk_fact(fill_in,i,row,rowlevs,heap,d,uja,uirp,uval,uplevs,nidx,idxs,info) + subroutine iluk_fact(fill_in,i,row,rowlevs,heap,d,& + & uja,uirp,uval,uplevs,nidx,idxs,info) use psb_base_mod diff --git a/prec/impl/psb_s_ilut_fact.f90 b/prec/impl/psb_s_ilut_fact.f90 index 43cacf41..3d111103 100644 --- a/prec/impl/psb_s_ilut_fact.f90 +++ b/prec/impl/psb_s_ilut_fact.f90 @@ -123,7 +123,7 @@ ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck does not contain any row. ! -subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) +subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale,shft) use psb_base_mod use psb_s_ilu_fact_mod, psb_protect_name => psb_silut_fact @@ -139,9 +139,11 @@ subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) real(psb_spk_), intent(inout) :: d(:) type(psb_sspmat_type),intent(in), optional, target :: blck integer(psb_ipk_), intent(in), optional :: iscale + real(psb_spk_), intent(in), optional :: shft ! Local Variables integer(psb_ipk_) :: l1, l2, m, err_act, iscale_ + real(psb_spk_) :: shft_ type(psb_sspmat_type), pointer :: blck_ type(psb_s_csr_sparse_mat) :: ll, uu real(psb_spk_) :: scale @@ -177,6 +179,11 @@ subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) else iscale_ = psb_ilu_scale_none_ end if + if (present(shft)) then + shft_ = shft + else + shft_ = szero + end if select case(iscale_) case(psb_ilu_scale_none_) @@ -206,7 +213,7 @@ subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) ! Compute the ILU(k,t) factorization ! call psb_silut_factint(fill_in,thres,a,blck_,& - & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,info,scale) + & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,info,scale,shft_) if (info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='psb_silut_factint' @@ -316,7 +323,7 @@ contains ! Error code. ! subroutine psb_silut_factint(fill_in,thres,a,b,& - & d,lval,lja,lirp,uval,uja,uirp,l1,l2,info,scale) + & d,lval,lja,lirp,uval,uja,uirp,l1,l2,info,scale,shft) use psb_base_mod @@ -331,6 +338,7 @@ contains real(psb_spk_), allocatable, intent(inout) :: lval(:),uval(:) real(psb_spk_), intent(inout) :: d(:) real(psb_spk_), intent(in), optional :: scale + real(psb_spk_), intent(in) :: shft ! Local Variables integer(psb_ipk_) :: i, ktrw,err_act,nidx,nlw,nup,jmaxup, ma, mb, m @@ -401,10 +409,10 @@ contains d(i) = czero if (i<=ma) then call ilut_copyin(i,ma,a,i,ione,m,nlw,nup,jmaxup,nrmi,weight,& - & row,heap,ktrw,trw,info) + & row,heap,ktrw,trw,info,shft) else call ilut_copyin(i-ma,mb,b,i,ione,m,nlw,nup,jmaxup,nrmi,weight,& - & row,heap,ktrw,trw,info) + & row,heap,ktrw,trw,info,shft) endif ! @@ -540,7 +548,7 @@ contains ! every nrb calls to copyin. If A is in CSR format it is unused. ! subroutine ilut_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,& - & nrmi,weight,row,heap,ktrw,trw,info) + & nrmi,weight,row,heap,ktrw,trw,info,shft) use psb_base_mod implicit none type(psb_sspmat_type), intent(in) :: a @@ -551,6 +559,7 @@ contains real(psb_spk_), intent(inout) :: row(:) real(psb_spk_), intent(in) :: weight type(psb_i_heap), intent(inout) :: heap + real(psb_spk_), intent(in) :: shft integer(psb_ipk_) :: k,j,irb,kin,nz integer(psb_ipk_), parameter :: nrb=40 @@ -597,6 +606,7 @@ contains call heap%insert(k,info) if (info /= psb_success_) exit if (kjd) then nup = nup + 1 if (abs(row(k))>dmaxup) then @@ -648,6 +658,7 @@ contains call heap%insert(k,info) if (info /= psb_success_) exit if (kjd) then nup = nup + 1 if (abs(row(k))>dmaxup) then diff --git a/prec/impl/psb_silu_fct.f90 b/prec/impl/psb_silu_fct.f90 deleted file mode 100644 index 85b58bad..00000000 --- a/prec/impl/psb_silu_fct.f90 +++ /dev/null @@ -1,440 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_silu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_sspmat_type),intent(in) :: a - type(psb_s_csr_sparse_mat),intent(inout) :: l,u - type(psb_sspmat_type),intent(in), optional, target :: blck - real(psb_spk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1,l2,m,err_act - type(psb_sspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_silu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_silu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_silu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - use psb_mat_mod - - implicit none - - type(psb_sspmat_type) :: a - type(psb_sspmat_type) :: b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - real(psb_spk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - real(psb_spk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_s_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_silu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = szero - ! - ! - select type(aa => a%a) - type is (psb_s_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') dia - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = sone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = szero - - select type(aa => b%a) - type is (psb_s_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') dia - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = sone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_silu_fctint -end subroutine psb_silu_fct diff --git a/prec/impl/psb_z_ilu0_fact.f90 b/prec/impl/psb_z_ilu0_fact.f90 index 26322e95..997a5e05 100644 --- a/prec/impl/psb_z_ilu0_fact.f90 +++ b/prec/impl/psb_z_ilu0_fact.f90 @@ -130,7 +130,7 @@ ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck is empty. ! -subroutine psb_zilu0_fact(ialg,a,l,u,d,info,blck, upd) +subroutine psb_zilu0_fact(ialg,a,l,u,d,info,blck, upd,shft) use psb_base_mod use psb_z_ilu_fact_mod, psb_protect_name => psb_zilu0_fact @@ -145,11 +145,13 @@ subroutine psb_zilu0_fact(ialg,a,l,u,d,info,blck, upd) integer(psb_ipk_), intent(out) :: info type(psb_zspmat_type),intent(in), optional, target :: blck character, intent(in), optional :: upd + complex(psb_dpk_), intent(in), optional :: shft ! Local variables integer(psb_ipk_) :: l1, l2, m, err_act type(psb_zspmat_type), pointer :: blck_ type(psb_z_csr_sparse_mat) :: ll, uu + complex(psb_dpk_) :: shft_ character :: upd_ character(len=20) :: name, ch_err @@ -177,7 +179,12 @@ subroutine psb_zilu0_fact(ialg,a,l,u,d,info,blck, upd) else upd_ = 'F' end if - + if (present(shft)) then + shft_ = shft + else + shft_ = zzero + end if + m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& & (m > size(d)) ) then @@ -193,7 +200,7 @@ subroutine psb_zilu0_fact(ialg,a,l,u,d,info,blck, upd) ! Compute the ILU(0) or the MILU(0) factorization, depending on ialg ! call psb_zilu0_factint(ialg,a,blck_,& - & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,upd_,info) + & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,upd_,shft_,info) if(info.ne.0) then info=psb_err_from_subroutine_ ch_err='psb_zilu0_factint' @@ -314,7 +321,7 @@ contains ! Error code. ! subroutine psb_zilu0_factint(ialg,a,b,& - & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,info) + & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,shft,info) implicit none @@ -325,6 +332,7 @@ contains integer(psb_ipk_), intent(inout) :: lja(:),lirp(:),uja(:),uirp(:) complex(psb_dpk_), intent(inout) :: lval(:),uval(:),d(:) character, intent(in) :: upd + complex(psb_dpk_), intent(in) :: shft ! Local variables integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, ktrw,err_act, m @@ -382,14 +390,14 @@ contains ! into lval/d(i)/uval ! call ilu_copyin(i,ma,a,i,ione,m,l1,lja,lval,& - & d(i),l2,uja,uval,ktrw,trw,upd) + & d(i),l2,uja,uval,ktrw,trw,upd,shft_) else ! ! Copy the i-th local row of the matrix, stored in b ! (as (i-ma)-th row), into lval/d(i)/uval ! call ilu_copyin(i-ma,mb,b,i,ione,m,l1,lja,lval,& - & d(i),l2,uja,uval,ktrw,trw,upd) + & d(i),l2,uja,uval,ktrw,trw,upd,shft_) endif lirp(i+1) = l1 + 1 @@ -583,7 +591,7 @@ contains ! every nrb calls to copyin. If A is in CSR format it is unused. ! subroutine ilu_copyin(i,m,a,jd,jmin,jmax,l1,lja,lval,& - & dia,l2,uja,uval,ktrw,trw,upd) + & dia,l2,uja,uval,ktrw,trw,upd,shft) use psb_base_mod @@ -597,6 +605,7 @@ contains integer(psb_ipk_), intent(inout) :: lja(:), uja(:) complex(psb_dpk_), intent(inout) :: lval(:), uval(:), dia character, intent(in) :: upd + complex(psb_dpk_), intent(in) :: shft ! Local variables integer(psb_ipk_) :: k,j,info,irb, nz integer(psb_ipk_), parameter :: nrb=40 @@ -625,7 +634,7 @@ contains lval(l1) = aa%val(j) lja(l1) = k else if (k == jd) then - dia = aa%val(j) + dia = aa%val(j) + shft else if ((k > jd).and.(k <= jmax)) then l2 = l2 + 1 uval(l2) = aa%val(j) @@ -665,7 +674,7 @@ contains lval(l1) = trw%val(ktrw) lja(l1) = k else if (k == jd) then - dia = trw%val(ktrw) + dia = trw%val(ktrw) + shft else if ((k > jd).and.(k <= jmax)) then l2 = l2 + 1 uval(l2) = trw%val(ktrw) diff --git a/prec/impl/psb_z_iluk_fact.f90 b/prec/impl/psb_z_iluk_fact.f90 index 1a398cda..a5540880 100644 --- a/prec/impl/psb_z_iluk_fact.f90 +++ b/prec/impl/psb_z_iluk_fact.f90 @@ -127,7 +127,7 @@ ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck does not contain any row. ! -subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck) +subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck,shft) use psb_base_mod use psb_z_ilu_fact_mod, psb_protect_name => psb_ziluk_fact @@ -141,9 +141,11 @@ subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck) type(psb_zspmat_type),intent(inout) :: l,u type(psb_zspmat_type),intent(in), optional, target :: blck complex(psb_dpk_), intent(inout) :: d(:) + complex(psb_dpk_), intent(in), optional :: shft ! Local Variables integer(psb_ipk_) :: l1, l2, m, err_act + complex(psb_dpk_) :: shft_ type(psb_zspmat_type), pointer :: blck_ type(psb_z_csr_sparse_mat) :: ll, uu character(len=20) :: name, ch_err @@ -167,6 +169,11 @@ subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck) goto 9999 end if endif + if (present(shft)) then + shft_ = shft + else + shft_ = zzero + end if m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& @@ -184,7 +191,7 @@ subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck) ! Compute the ILU(k) or the MILU(k) factorization, depending on ialg ! call psb_ziluk_factint(fill_in,ialg,a,blck_,& - & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,info) + & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,info,shft_) if (info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='psb_ziluk_factint' @@ -298,7 +305,7 @@ contains ! Error code. ! subroutine psb_ziluk_factint(fill_in,ialg,a,b,& - & d,lval,lja,lirp,uval,uja,uirp,l1,l2,info) + & d,lval,lja,lirp,uval,uja,uirp,l1,l2,info,shft) use psb_base_mod @@ -311,6 +318,7 @@ contains integer(psb_ipk_), allocatable, intent(inout) :: lja(:),lirp(:),uja(:),uirp(:) complex(psb_dpk_), allocatable, intent(inout) :: lval(:),uval(:) complex(psb_dpk_), intent(inout) :: d(:) + complex(psb_dpk_), intent(in) :: shft ! Local variables integer(psb_ipk_) :: ma,mb,i, ktrw,err_act,nidx, m @@ -400,13 +408,13 @@ contains ! ! Copy into trw the i-th local row of the matrix, stored in a ! - call iluk_copyin(i,ma,a,ione,m,row,rowlevs,heap,ktrw,trw,info) + call iluk_copyin(i,ma,a,ione,m,row,rowlevs,heap,ktrw,trw,info,shft) else ! ! Copy into trw the i-th local row of the matrix, stored in b ! (as (i-ma)-th row) ! - call iluk_copyin(i-ma,mb,b,ione,m,row,rowlevs,heap,ktrw,trw,info) + call iluk_copyin(i-ma,mb,b,ione,m,row,rowlevs,heap,ktrw,trw,info,shft) endif ! Do an elimination step on the current row. It turns out we only @@ -516,7 +524,7 @@ contains ! until we empty the buffer. Thus we will make a call to psb_sp_getblk ! every nrb calls to copyin. If A is in CSR format it is unused. ! - subroutine iluk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,ktrw,trw,info) + subroutine iluk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,ktrw,trw,info,shft) use psb_base_mod @@ -530,6 +538,8 @@ contains integer(psb_ipk_), intent(inout) :: rowlevs(:) complex(psb_dpk_), intent(inout) :: row(:) type(psb_i_heap), intent(inout) :: heap + complex(psb_dpk_), intent(in) :: shft + ! Local variables integer(psb_ipk_) :: k,j,irb,err_act,nz @@ -554,6 +564,7 @@ contains k = aa%ja(j) if ((jmin<=k).and.(k<=jmax)) then row(k) = aa%val(j) + if (k==i) row(k) = row(k) + shft rowlevs(k) = 0 call heap%insert(k,info) end if @@ -587,6 +598,7 @@ contains k = trw%ja(ktrw) if ((jmin<=k).and.(k<=jmax)) then row(k) = trw%val(ktrw) + if (k==i) row(k) = row(k) + shft rowlevs(k) = 0 call heap%insert(k,info) end if @@ -670,7 +682,8 @@ contains ! Note: this argument is intent(inout) and not only intent(out) ! to retain its allocation, done by this routine. ! - subroutine iluk_fact(fill_in,i,row,rowlevs,heap,d,uja,uirp,uval,uplevs,nidx,idxs,info) + subroutine iluk_fact(fill_in,i,row,rowlevs,heap,d,& + & uja,uirp,uval,uplevs,nidx,idxs,info) use psb_base_mod diff --git a/prec/impl/psb_z_ilut_fact.f90 b/prec/impl/psb_z_ilut_fact.f90 index 291dc778..0c278515 100644 --- a/prec/impl/psb_z_ilut_fact.f90 +++ b/prec/impl/psb_z_ilut_fact.f90 @@ -123,7 +123,7 @@ ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see psb_fact_bld), then blck does not contain any row. ! -subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) +subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale,shft) use psb_base_mod use psb_z_ilu_fact_mod, psb_protect_name => psb_zilut_fact @@ -139,9 +139,11 @@ subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) complex(psb_dpk_), intent(inout) :: d(:) type(psb_zspmat_type),intent(in), optional, target :: blck integer(psb_ipk_), intent(in), optional :: iscale + complex(psb_dpk_), intent(in), optional :: shft ! Local Variables integer(psb_ipk_) :: l1, l2, m, err_act, iscale_ + complex(psb_dpk_) :: shft_ type(psb_zspmat_type), pointer :: blck_ type(psb_z_csr_sparse_mat) :: ll, uu real(psb_dpk_) :: scale @@ -177,6 +179,11 @@ subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) else iscale_ = psb_ilu_scale_none_ end if + if (present(shft)) then + shft_ = shft + else + shft_ = zzero + end if select case(iscale_) case(psb_ilu_scale_none_) @@ -206,7 +213,7 @@ subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) ! Compute the ILU(k,t) factorization ! call psb_zilut_factint(fill_in,thres,a,blck_,& - & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,info,scale) + & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,info,scale,shft_) if (info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='psb_zilut_factint' @@ -316,7 +323,7 @@ contains ! Error code. ! subroutine psb_zilut_factint(fill_in,thres,a,b,& - & d,lval,lja,lirp,uval,uja,uirp,l1,l2,info,scale) + & d,lval,lja,lirp,uval,uja,uirp,l1,l2,info,scale,shft) use psb_base_mod @@ -331,6 +338,7 @@ contains complex(psb_dpk_), allocatable, intent(inout) :: lval(:),uval(:) complex(psb_dpk_), intent(inout) :: d(:) real(psb_dpk_), intent(in), optional :: scale + complex(psb_dpk_), intent(in) :: shft ! Local Variables integer(psb_ipk_) :: i, ktrw,err_act,nidx,nlw,nup,jmaxup, ma, mb, m @@ -401,10 +409,10 @@ contains d(i) = czero if (i<=ma) then call ilut_copyin(i,ma,a,i,ione,m,nlw,nup,jmaxup,nrmi,weight,& - & row,heap,ktrw,trw,info) + & row,heap,ktrw,trw,info,shft) else call ilut_copyin(i-ma,mb,b,i,ione,m,nlw,nup,jmaxup,nrmi,weight,& - & row,heap,ktrw,trw,info) + & row,heap,ktrw,trw,info,shft) endif ! @@ -540,7 +548,7 @@ contains ! every nrb calls to copyin. If A is in CSR format it is unused. ! subroutine ilut_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,& - & nrmi,weight,row,heap,ktrw,trw,info) + & nrmi,weight,row,heap,ktrw,trw,info,shft) use psb_base_mod implicit none type(psb_zspmat_type), intent(in) :: a @@ -551,6 +559,7 @@ contains complex(psb_dpk_), intent(inout) :: row(:) real(psb_dpk_), intent(in) :: weight type(psb_i_heap), intent(inout) :: heap + complex(psb_dpk_), intent(in) :: shft integer(psb_ipk_) :: k,j,irb,kin,nz integer(psb_ipk_), parameter :: nrb=40 @@ -597,6 +606,7 @@ contains call heap%insert(k,info) if (info /= psb_success_) exit if (kjd) then nup = nup + 1 if (abs(row(k))>dmaxup) then @@ -648,6 +658,7 @@ contains call heap%insert(k,info) if (info /= psb_success_) exit if (kjd) then nup = nup + 1 if (abs(row(k))>dmaxup) then diff --git a/prec/impl/psb_zilu_fct.f90 b/prec/impl/psb_zilu_fct.f90 deleted file mode 100644 index e5ea4b0d..00000000 --- a/prec/impl/psb_zilu_fct.f90 +++ /dev/null @@ -1,438 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_zilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_zspmat_type),intent(in) :: a - type(psb_z_csr_sparse_mat),intent(inout) :: l,u - type(psb_zspmat_type),intent(in), optional, target :: blck - complex(psb_dpk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1, l2,m,err_act - type(psb_zspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_zilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_zilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_zilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - implicit none - - type(psb_zspmat_type) :: a,b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - complex(psb_dpk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - complex(psb_dpk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_z_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_zilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = zzero - - ! - ! - select type(aa => a%a) - type is (psb_z_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = zone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = zzero - - select type(aa => b%a) - type is (psb_z_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = zone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_zilu_fctint -end subroutine psb_zilu_fct diff --git a/prec/psb_c_biconjg_mod.F90 b/prec/psb_c_biconjg_mod.F90 index 6af30b4f..e6fdaa95 100644 --- a/prec/psb_c_biconjg_mod.F90 +++ b/prec/psb_c_biconjg_mod.F90 @@ -128,10 +128,10 @@ module psb_c_biconjg_mod & psb_csparse_biconjg_s_llk, psb_csparse_biconjg_s_ft_llk,& & psb_csparse_biconjg_llk_noth, psb_csparse_biconjg_mlk -#if defined(HAVE_TUMA_SAINV) - procedure(psb_csparse_biconjg_variant) :: psb_csparse_tuma_sainv,& - & psb_csparse_tuma_lainv -#endif +!#if defined(PSB_HAVE_TUMA_SAINV) +! procedure(psb_csparse_biconjg_variant) :: psb_csparse_tuma_sainv,& +! & psb_csparse_tuma_lainv +!#endif contains @@ -179,7 +179,7 @@ contains call psb_csparse_biconjg_s_ft_llk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info) case (psb_ainv_llk_noth_) call psb_csparse_biconjg_llk_noth(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info) -!#if defined(HAVE_TUMA_SAINV) +!#if defined(PSB_HAVE_TUMA_SAINV) ! case (psb_ainv_s_tuma_) ! call psb_csparse_tuma_sainv(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info) ! case (psb_ainv_l_tuma_) diff --git a/prec/psb_c_ilu_fact_mod.f90 b/prec/psb_c_ilu_fact_mod.f90 index 45d06211..0fae1fc5 100644 --- a/prec/psb_c_ilu_fact_mod.f90 +++ b/prec/psb_c_ilu_fact_mod.f90 @@ -80,7 +80,7 @@ module psb_c_ilu_fact_mod use psb_base_mod use psb_prec_const_mod interface psb_ilu0_fact - subroutine psb_cilu0_fact(ialg,a,l,u,d,info,blck,upd) + subroutine psb_cilu0_fact(ialg,a,l,u,d,info,blck,upd,shft) import psb_cspmat_type, psb_spk_, psb_ipk_ integer(psb_ipk_), intent(in) :: ialg integer(psb_ipk_), intent(out) :: info @@ -89,11 +89,12 @@ module psb_c_ilu_fact_mod type(psb_cspmat_type),intent(in), optional, target :: blck character, intent(in), optional :: upd complex(psb_spk_), intent(inout) :: d(:) + complex(psb_spk_), intent(in), optional :: shft end subroutine psb_cilu0_fact end interface interface psb_iluk_fact - subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck) + subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck,shft) import psb_cspmat_type, psb_spk_, psb_ipk_ integer(psb_ipk_), intent(in) :: fill_in,ialg integer(psb_ipk_), intent(out) :: info @@ -101,11 +102,12 @@ module psb_c_ilu_fact_mod type(psb_cspmat_type),intent(inout) :: l,u type(psb_cspmat_type),intent(in), optional, target :: blck complex(psb_spk_), intent(inout) :: d(:) + complex(psb_spk_), intent(in), optional :: shft end subroutine psb_ciluk_fact end interface interface psb_ilut_fact - subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) + subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale,shft) import psb_cspmat_type, psb_spk_, psb_ipk_ integer(psb_ipk_), intent(in) :: fill_in real(psb_spk_), intent(in) :: thres @@ -115,6 +117,7 @@ module psb_c_ilu_fact_mod complex(psb_spk_), intent(inout) :: d(:) type(psb_cspmat_type),intent(in), optional, target :: blck integer(psb_ipk_), intent(in), optional :: iscale + complex(psb_spk_), intent(in), optional :: shft end subroutine psb_cilut_fact end interface diff --git a/prec/psb_c_prec_type.f90 b/prec/psb_c_prec_type.f90 index 4a62a134..9c5e1cdc 100644 --- a/prec/psb_c_prec_type.f90 +++ b/prec/psb_c_prec_type.f90 @@ -60,6 +60,7 @@ module psb_c_prec_type generic, public :: set => cseti, csetc, csetr procedure, pass(prec) :: allocate_wrk => psb_c_allocate_wrk procedure, pass(prec) :: free_wrk => psb_c_free_wrk + procedure, pass(prec) :: deallocate_wrk => psb_c_free_wrk procedure, pass(prec) :: is_allocated_wrk => psb_c_is_allocated_wrk end type psb_cprec_type diff --git a/prec/psb_d_biconjg_mod.F90 b/prec/psb_d_biconjg_mod.F90 index 09358744..696ecbaa 100644 --- a/prec/psb_d_biconjg_mod.F90 +++ b/prec/psb_d_biconjg_mod.F90 @@ -128,10 +128,10 @@ module psb_d_biconjg_mod & psb_dsparse_biconjg_s_llk, psb_dsparse_biconjg_s_ft_llk,& & psb_dsparse_biconjg_llk_noth, psb_dsparse_biconjg_mlk -#if defined(HAVE_TUMA_SAINV) - procedure(psb_dsparse_biconjg_variant) :: psb_dsparse_tuma_sainv,& - & psb_dsparse_tuma_lainv -#endif +!#if defined(PSB_HAVE_TUMA_SAINV) +! procedure(psb_dsparse_biconjg_variant) :: psb_dsparse_tuma_sainv,& +! & psb_dsparse_tuma_lainv +!#endif contains @@ -179,7 +179,7 @@ contains call psb_dsparse_biconjg_s_ft_llk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info) case (psb_ainv_llk_noth_) call psb_dsparse_biconjg_llk_noth(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info) -!#if defined(HAVE_TUMA_SAINV) +!#if defined(PSB_HAVE_TUMA_SAINV) ! case (psb_ainv_s_tuma_) ! call psb_dsparse_tuma_sainv(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info) ! case (psb_ainv_l_tuma_) diff --git a/prec/psb_d_ilu_fact_mod.f90 b/prec/psb_d_ilu_fact_mod.f90 index 02753a4c..6354573d 100644 --- a/prec/psb_d_ilu_fact_mod.f90 +++ b/prec/psb_d_ilu_fact_mod.f90 @@ -80,7 +80,7 @@ module psb_d_ilu_fact_mod use psb_base_mod use psb_prec_const_mod interface psb_ilu0_fact - subroutine psb_dilu0_fact(ialg,a,l,u,d,info,blck,upd) + subroutine psb_dilu0_fact(ialg,a,l,u,d,info,blck,upd,shft) import psb_dspmat_type, psb_dpk_, psb_ipk_ integer(psb_ipk_), intent(in) :: ialg integer(psb_ipk_), intent(out) :: info @@ -89,11 +89,12 @@ module psb_d_ilu_fact_mod type(psb_dspmat_type),intent(in), optional, target :: blck character, intent(in), optional :: upd real(psb_dpk_), intent(inout) :: d(:) + real(psb_dpk_), intent(in), optional :: shft end subroutine psb_dilu0_fact end interface interface psb_iluk_fact - subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck) + subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck,shft) import psb_dspmat_type, psb_dpk_, psb_ipk_ integer(psb_ipk_), intent(in) :: fill_in,ialg integer(psb_ipk_), intent(out) :: info @@ -101,11 +102,12 @@ module psb_d_ilu_fact_mod type(psb_dspmat_type),intent(inout) :: l,u type(psb_dspmat_type),intent(in), optional, target :: blck real(psb_dpk_), intent(inout) :: d(:) + real(psb_dpk_), intent(in), optional :: shft end subroutine psb_diluk_fact end interface interface psb_ilut_fact - subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) + subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale,shft) import psb_dspmat_type, psb_dpk_, psb_ipk_ integer(psb_ipk_), intent(in) :: fill_in real(psb_dpk_), intent(in) :: thres @@ -115,6 +117,7 @@ module psb_d_ilu_fact_mod real(psb_dpk_), intent(inout) :: d(:) type(psb_dspmat_type),intent(in), optional, target :: blck integer(psb_ipk_), intent(in), optional :: iscale + real(psb_dpk_), intent(in), optional :: shft end subroutine psb_dilut_fact end interface diff --git a/prec/psb_d_prec_type.f90 b/prec/psb_d_prec_type.f90 index a5e3a7ad..708b33f3 100644 --- a/prec/psb_d_prec_type.f90 +++ b/prec/psb_d_prec_type.f90 @@ -60,6 +60,7 @@ module psb_d_prec_type generic, public :: set => cseti, csetc, csetr procedure, pass(prec) :: allocate_wrk => psb_d_allocate_wrk procedure, pass(prec) :: free_wrk => psb_d_free_wrk + procedure, pass(prec) :: deallocate_wrk => psb_d_free_wrk procedure, pass(prec) :: is_allocated_wrk => psb_d_is_allocated_wrk end type psb_dprec_type diff --git a/prec/psb_prec_const_mod.f90 b/prec/psb_prec_const_mod.f90 index 73c22e58..d74c5bf0 100644 --- a/prec/psb_prec_const_mod.f90 +++ b/prec/psb_prec_const_mod.f90 @@ -74,15 +74,15 @@ module psb_prec_const_mod integer(psb_ipk_), parameter :: psb_ilu_scale_arcsum_ = 5 ! Numerical parameters relative to Approximate Inverse Preconditioners - integer, parameter :: psb_inv_fillin_ = 3 - integer, parameter :: psb_ainv_alg_ = psb_inv_fillin_ + 1 - integer, parameter :: psb_inv_thresh_ = 3 - integer, parameter :: psb_ainv_llk_ = psb_inv_thresh_ + 1 - integer, parameter :: psb_ainv_s_llk_ = psb_ainv_llk_ + 1 - integer, parameter :: psb_ainv_s_ft_llk_ = psb_ainv_s_llk_ + 1 - integer, parameter :: psb_ainv_llk_noth_ = psb_ainv_s_ft_llk_ + 1 - integer, parameter :: psb_ainv_mlk_ = psb_ainv_llk_noth_ + 1 - integer, parameter :: psb_ainv_lmx_ = psb_ainv_mlk_ + integer(psb_ipk_), parameter :: psb_inv_fillin_ = 3 + integer(psb_ipk_), parameter :: psb_ainv_alg_ = psb_inv_fillin_ + 1 + integer(psb_ipk_), parameter :: psb_inv_thresh_ = 3 + integer(psb_ipk_), parameter :: psb_ainv_llk_ = psb_inv_thresh_ + 1 + integer(psb_ipk_), parameter :: psb_ainv_s_llk_ = psb_ainv_llk_ + 1 + integer(psb_ipk_), parameter :: psb_ainv_s_ft_llk_ = psb_ainv_s_llk_ + 1 + integer(psb_ipk_), parameter :: psb_ainv_llk_noth_ = psb_ainv_s_ft_llk_ + 1 + integer(psb_ipk_), parameter :: psb_ainv_mlk_ = psb_ainv_llk_noth_ + 1 + integer(psb_ipk_), parameter :: psb_ainv_lmx_ = psb_ainv_mlk_ interface psb_check_def diff --git a/prec/psb_s_biconjg_mod.F90 b/prec/psb_s_biconjg_mod.F90 index bc2aaefc..46fe3a0a 100644 --- a/prec/psb_s_biconjg_mod.F90 +++ b/prec/psb_s_biconjg_mod.F90 @@ -128,10 +128,10 @@ module psb_s_biconjg_mod & psb_ssparse_biconjg_s_llk, psb_ssparse_biconjg_s_ft_llk,& & psb_ssparse_biconjg_llk_noth, psb_ssparse_biconjg_mlk -#if defined(HAVE_TUMA_SAINV) - procedure(psb_ssparse_biconjg_variant) :: psb_ssparse_tuma_sainv,& - & psb_ssparse_tuma_lainv -#endif +!#if defined(PSB_HAVE_TUMA_SAINV) +! procedure(psb_ssparse_biconjg_variant) :: psb_ssparse_tuma_sainv,& +! & psb_ssparse_tuma_lainv +!#endif contains @@ -179,7 +179,7 @@ contains call psb_ssparse_biconjg_s_ft_llk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info) case (psb_ainv_llk_noth_) call psb_ssparse_biconjg_llk_noth(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info) -!#if defined(HAVE_TUMA_SAINV) +!#if defined(PSB_HAVE_TUMA_SAINV) ! case (psb_ainv_s_tuma_) ! call psb_ssparse_tuma_sainv(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info) ! case (psb_ainv_l_tuma_) diff --git a/prec/psb_s_ilu_fact_mod.f90 b/prec/psb_s_ilu_fact_mod.f90 index 6334df15..4021adc9 100644 --- a/prec/psb_s_ilu_fact_mod.f90 +++ b/prec/psb_s_ilu_fact_mod.f90 @@ -80,7 +80,7 @@ module psb_s_ilu_fact_mod use psb_base_mod use psb_prec_const_mod interface psb_ilu0_fact - subroutine psb_silu0_fact(ialg,a,l,u,d,info,blck,upd) + subroutine psb_silu0_fact(ialg,a,l,u,d,info,blck,upd,shft) import psb_sspmat_type, psb_spk_, psb_ipk_ integer(psb_ipk_), intent(in) :: ialg integer(psb_ipk_), intent(out) :: info @@ -89,11 +89,12 @@ module psb_s_ilu_fact_mod type(psb_sspmat_type),intent(in), optional, target :: blck character, intent(in), optional :: upd real(psb_spk_), intent(inout) :: d(:) + real(psb_spk_), intent(in), optional :: shft end subroutine psb_silu0_fact end interface interface psb_iluk_fact - subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck) + subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck,shft) import psb_sspmat_type, psb_spk_, psb_ipk_ integer(psb_ipk_), intent(in) :: fill_in,ialg integer(psb_ipk_), intent(out) :: info @@ -101,11 +102,12 @@ module psb_s_ilu_fact_mod type(psb_sspmat_type),intent(inout) :: l,u type(psb_sspmat_type),intent(in), optional, target :: blck real(psb_spk_), intent(inout) :: d(:) + real(psb_spk_), intent(in), optional :: shft end subroutine psb_siluk_fact end interface interface psb_ilut_fact - subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) + subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale,shft) import psb_sspmat_type, psb_spk_, psb_ipk_ integer(psb_ipk_), intent(in) :: fill_in real(psb_spk_), intent(in) :: thres @@ -115,6 +117,7 @@ module psb_s_ilu_fact_mod real(psb_spk_), intent(inout) :: d(:) type(psb_sspmat_type),intent(in), optional, target :: blck integer(psb_ipk_), intent(in), optional :: iscale + real(psb_spk_), intent(in), optional :: shft end subroutine psb_silut_fact end interface diff --git a/prec/psb_s_prec_type.f90 b/prec/psb_s_prec_type.f90 index d438fe9f..9b6ee231 100644 --- a/prec/psb_s_prec_type.f90 +++ b/prec/psb_s_prec_type.f90 @@ -60,6 +60,7 @@ module psb_s_prec_type generic, public :: set => cseti, csetc, csetr procedure, pass(prec) :: allocate_wrk => psb_s_allocate_wrk procedure, pass(prec) :: free_wrk => psb_s_free_wrk + procedure, pass(prec) :: deallocate_wrk => psb_s_free_wrk procedure, pass(prec) :: is_allocated_wrk => psb_s_is_allocated_wrk end type psb_sprec_type diff --git a/prec/psb_z_biconjg_mod.F90 b/prec/psb_z_biconjg_mod.F90 index b40485e7..bb193f57 100644 --- a/prec/psb_z_biconjg_mod.F90 +++ b/prec/psb_z_biconjg_mod.F90 @@ -128,10 +128,10 @@ module psb_z_biconjg_mod & psb_zsparse_biconjg_s_llk, psb_zsparse_biconjg_s_ft_llk,& & psb_zsparse_biconjg_llk_noth, psb_zsparse_biconjg_mlk -#if defined(HAVE_TUMA_SAINV) - procedure(psb_zsparse_biconjg_variant) :: psb_zsparse_tuma_sainv,& - & psb_zsparse_tuma_lainv -#endif +!#if defined(PSB_HAVE_TUMA_SAINV) +! procedure(psb_zsparse_biconjg_variant) :: psb_zsparse_tuma_sainv,& +! & psb_zsparse_tuma_lainv +!#endif contains @@ -179,7 +179,7 @@ contains call psb_zsparse_biconjg_s_ft_llk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info) case (psb_ainv_llk_noth_) call psb_zsparse_biconjg_llk_noth(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info) -!#if defined(HAVE_TUMA_SAINV) +!#if defined(PSB_HAVE_TUMA_SAINV) ! case (psb_ainv_s_tuma_) ! call psb_zsparse_tuma_sainv(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info) ! case (psb_ainv_l_tuma_) diff --git a/prec/psb_z_ilu_fact_mod.f90 b/prec/psb_z_ilu_fact_mod.f90 index 220d673f..4793b43b 100644 --- a/prec/psb_z_ilu_fact_mod.f90 +++ b/prec/psb_z_ilu_fact_mod.f90 @@ -80,7 +80,7 @@ module psb_z_ilu_fact_mod use psb_base_mod use psb_prec_const_mod interface psb_ilu0_fact - subroutine psb_zilu0_fact(ialg,a,l,u,d,info,blck,upd) + subroutine psb_zilu0_fact(ialg,a,l,u,d,info,blck,upd,shft) import psb_zspmat_type, psb_dpk_, psb_ipk_ integer(psb_ipk_), intent(in) :: ialg integer(psb_ipk_), intent(out) :: info @@ -89,11 +89,12 @@ module psb_z_ilu_fact_mod type(psb_zspmat_type),intent(in), optional, target :: blck character, intent(in), optional :: upd complex(psb_dpk_), intent(inout) :: d(:) + complex(psb_dpk_), intent(in), optional :: shft end subroutine psb_zilu0_fact end interface interface psb_iluk_fact - subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck) + subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck,shft) import psb_zspmat_type, psb_dpk_, psb_ipk_ integer(psb_ipk_), intent(in) :: fill_in,ialg integer(psb_ipk_), intent(out) :: info @@ -101,11 +102,12 @@ module psb_z_ilu_fact_mod type(psb_zspmat_type),intent(inout) :: l,u type(psb_zspmat_type),intent(in), optional, target :: blck complex(psb_dpk_), intent(inout) :: d(:) + complex(psb_dpk_), intent(in), optional :: shft end subroutine psb_ziluk_fact end interface interface psb_ilut_fact - subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale) + subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale,shft) import psb_zspmat_type, psb_dpk_, psb_ipk_ integer(psb_ipk_), intent(in) :: fill_in real(psb_dpk_), intent(in) :: thres @@ -115,6 +117,7 @@ module psb_z_ilu_fact_mod complex(psb_dpk_), intent(inout) :: d(:) type(psb_zspmat_type),intent(in), optional, target :: blck integer(psb_ipk_), intent(in), optional :: iscale + complex(psb_dpk_), intent(in), optional :: shft end subroutine psb_zilut_fact end interface diff --git a/prec/psb_z_prec_type.f90 b/prec/psb_z_prec_type.f90 index 728e05c1..7fa396ed 100644 --- a/prec/psb_z_prec_type.f90 +++ b/prec/psb_z_prec_type.f90 @@ -60,6 +60,7 @@ module psb_z_prec_type generic, public :: set => cseti, csetc, csetr procedure, pass(prec) :: allocate_wrk => psb_z_allocate_wrk procedure, pass(prec) :: free_wrk => psb_z_free_wrk + procedure, pass(prec) :: deallocate_wrk => psb_z_free_wrk procedure, pass(prec) :: is_allocated_wrk => psb_z_is_allocated_wrk end type psb_zprec_type diff --git a/rsb/Makefile b/rsb/Makefile new file mode 100755 index 00000000..58be5ee2 --- /dev/null +++ b/rsb/Makefile @@ -0,0 +1,53 @@ +include ../Make.inc +# +# Libraries used +# +PSBLIBDIR=$(PSBLASDIR)/lib/ +PSBINCDIR=$(PSBLASDIR)/include +PSBMODDIR=$(PSBLASDIR)/modules +LIBDIR=../lib +INCDIR=../include +MODDIR=../modules +PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base +#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(FIFLAG). $(LIBRSB_INCLUDES) $(LIBRSB_DEFINES) +CINCLUDES=-I$(GPU_INCDIR) -I$(CUDA_INCDIR) +LIBNAME=libpsb_rsb.a + + +FOBJS= rsb_mod.o psb_d_rsb_mat_mod.o \ + psb_rsb_penv_mod.o psb_rsb_mod.o + +COBJS= rsb_int.o + +OBJS=$(COBJS) $(FOBJS) + +lib: objs ilib + /bin/cp -p $(LIBNAME) $(LIBDIR) + +objs: $(OBJS) iobjs + /bin/cp -p *$(.mod) $(MODDIR) + +iobjs: + $(MAKE) -C impl objs +ilib: iobjs + $(MAKE) -C impl lib LIBNAME=$(LIBNAME) + +clean: cclean iclean + /bin/rm -f $(FOBJS) *$(.mod) *.a + +cclean: + /bin/rm -f $(COBJS) +iclean: + cd impl && $(MAKE) clean + +verycleanlib: + (cd ../..; make veryclean) + + + diff --git a/rsb/impl/Makefile b/rsb/impl/Makefile new file mode 100755 index 00000000..e78fc325 --- /dev/null +++ b/rsb/impl/Makefile @@ -0,0 +1,30 @@ +include ../../Make.inc +PSBLIBDIR=$(PSBLASDIR)/lib/ +PSBINCDIR=$(PSBLASDIR)/include +PSBMODDIR=$(PSBLASDIR)/modules +LIBDIR=../../lib +INCDIR=../../include +MODDIR=../../modules +PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base +#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG).. $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_INCLUDES) $(FIFLAG).. $(LIBRSB_DEFINES) +CINCLUDES= +LIBNAME=libpsb_rsb.a + +OBJS= \ +psb_d_cp_rsb_from_coo.o \ +psb_d_mv_rsb_from_coo.o \ +psb_d_cp_rsb_to_coo.o psb_d_rsb_csmv.o + +objs: $(OBJS) + +lib: objs + $(AR) ../$(LIBNAME) $(OBJS) + +clean: + /bin/rm -f $(OBJS) diff --git a/rsb/impl/psb_d_cp_rsb_from_coo.F90 b/rsb/impl/psb_d_cp_rsb_from_coo.F90 new file mode 100644 index 00000000..14592877 --- /dev/null +++ b/rsb/impl/psb_d_cp_rsb_from_coo.F90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_rsb_from_coo(a,b,info) + + use psb_base_mod + use rsb_mod + use psb_d_rsb_mat_mod, psb_protect_name => psb_d_cp_rsb_from_coo + implicit none + + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k ,bs + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ +#ifdef HAVE_RSB + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + bs = 1!RSB_DEFAULT_BLOCKING + + info = Rsb_from_coo(a%rsbMat,b%val,b%ia,b%ja,nza,nr,nc,bs,bs) + + call tmp%free() +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_rsb_from_coo diff --git a/rsb/impl/psb_d_cp_rsb_to_coo.f90 b/rsb/impl/psb_d_cp_rsb_to_coo.f90 new file mode 100644 index 00000000..3747100f --- /dev/null +++ b/rsb/impl/psb_d_cp_rsb_to_coo.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_rsb_to_coo(a,b,info) + + use psb_base_mod + use rsb + use psb_d_rsb_mat_mod, psb_protect_name => psb_d_cp_rsb_to_coo + implicit none + + class(psb_d_rsb_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + real(psb_dpk_), pointer :: val_point(:) + type(c_ptr) :: t_p,s_p + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc + + info = psb_success_ + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + allocate(val_point(nza)) + + t_p = c_loc(val_point(1)) + + info = rsb_mtx_get_coo(a%rsbMat, t_p, b%ia, b%ja,RSB_FLAG_FORTRAN_INDICES_INTERFACE) + + !info = rsb_mtx_switch_to_coo(a%rsbMat,t_p,b%ia,b%ja,RSB_FLAG_FORTRAN_INDICES_INTERFACE) + + k = rsb_perror(s_p,info) + + do i=1,nza + b%val(i)=val_point(i) + enddo + + deallocate(val_point) + + call b%set_nzeros(nza) + call b%fix(info) + +end subroutine psb_d_cp_rsb_to_coo diff --git a/rsb/impl/psb_d_mv_rsb_from_coo.f90 b/rsb/impl/psb_d_mv_rsb_from_coo.f90 new file mode 100644 index 00000000..9de74ec7 --- /dev/null +++ b/rsb/impl/psb_d_mv_rsb_from_coo.f90 @@ -0,0 +1,114 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_rsb_from_coo(a,b,info) + + use psb_base_mod + use psb_d_rsb_mat_mod, psb_protect_name => psb_d_mv_rsb_from_coo + implicit none + + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + call b%fix(info) + if (info /= psb_success_) return + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! if (b%is_sorted()) then + ! ! If it is sorted then we can lessen memory impact + ! a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + + ! ! First compute the number of nonzeros in each row. + ! call psb_realloc(nr,a%irn,info) + ! if (info /= 0) goto 9999 + ! a%irn = 0 + ! do i=1, nza + ! a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + ! end do + ! nzm = 0 + ! do i=1, nr + ! nzm = max(nzm,a%irn(i)) + ! a%irn(i) = 0 + ! end do + ! ! Second: copy the column indices. + ! call psb_realloc(nr,a%idiag,info) + ! if (info == 0) call psb_realloc(nr,nzm,a%ja,info) + ! if (info /= 0) goto 9999 + ! do i=1, nza + ! ir = b%ia(i) + ! ic = b%ja(i) + ! j = a%irn(ir) + 1 + ! a%ja(ir,j) = ic + ! a%irn(ir) = j + ! end do + ! ! Third copy the other stuff + ! deallocate(b%ia,b%ja,stat=info) + ! if (info == 0) call psb_realloc(nr,a%idiag,info) + ! if (info == 0) call psb_realloc(nr,nzm,a%val,info) + ! if (info /= 0) goto 9999 + ! k = 0 + ! do i=1, nr + ! a%idiag(i) = 0 + ! do j=1, a%irn(i) + ! k = k + 1 + ! a%val(i,j) = b%val(k) + ! if (i==a%ja(i,j)) a%idiag(i)=j + ! end do + ! do j=a%irn(i)+1, nzm + ! a%ja(i,j) = i + ! a%val(i,j) = dzero + ! end do + ! end do + + ! else + ! If b is not sorted, the only way is to copy. + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + ! end if + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_rsb_from_coo diff --git a/rsb/impl/psb_d_rsb_csmv.F90 b/rsb/impl/psb_d_rsb_csmv.F90 new file mode 100644 index 00000000..f5dbba97 --- /dev/null +++ b/rsb/impl/psb_d_rsb_csmv.F90 @@ -0,0 +1,138 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_rsb_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use rsb_mod + use psb_d_rsb_mat_mod, psb_protect_name => psb_d_rsb_csmv + implicit none + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='d_rsb_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ +#ifdef HAVE_RSB + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) d_rsb_get_size + procedure, pass(a) :: get_nzeros => d_rsb_get_nzeros + procedure, nopass :: get_fmt => d_rsb_get_fmt + procedure, pass(a) :: sizeof => d_rsb_sizeof + ! procedure, pass(a) :: csmm => psb_d_rsb_csmm + procedure, pass(a) :: csmv => psb_d_rsb_csmv + ! procedure, pass(a) :: inner_cssm => psb_d_rsb_cssm + ! procedure, pass(a) :: inner_cssv => psb_d_rsb_cssv + ! procedure, pass(a) :: scals => psb_d_rsb_scals + ! procedure, pass(a) :: scalv => psb_d_rsb_scal + ! procedure, pass(a) :: maxval => psb_d_rsb_maxval + ! procedure, pass(a) :: csnmi => psb_d_rsb_csnmi + ! procedure, pass(a) :: csnm1 => psb_d_rsb_csnm1 + ! procedure, pass(a) :: rowsum => psb_d_rsb_rowsum + ! procedure, pass(a) :: arwsum => psb_d_rsb_arwsum + ! procedure, pass(a) :: colsum => psb_d_rsb_colsum + ! procedure, pass(a) :: aclsum => psb_d_rsb_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_d_rsb_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_d_rsb_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_rsb_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_rsb_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_d_cp_rsb_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_d_cp_rsb_from_fmt +! procedure, pass(a) :: mv_to_coo => psb_d_mv_rsb_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_rsb_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_d_mv_rsb_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_d_mv_rsb_from_fmt + ! procedure, pass(a) :: csput => psb_d_rsb_csput + ! procedure, pass(a) :: get_diag => psb_d_rsb_get_diag + ! procedure, pass(a) :: csgetptn => psb_d_rsb_csgetptn + ! procedure, pass(a) :: csgetrow => psb_d_rsb_csgetrow + ! procedure, pass(a) :: get_nz_row => d_rsb_get_nz_row + ! procedure, pass(a) :: reinit => psb_d_rsb_reinit + ! procedure, pass(a) :: trim => psb_d_rsb_trim + ! procedure, pass(a) :: print => psb_d_rsb_print + procedure, pass(a) :: free => d_rsb_free + ! procedure, pass(a) :: mold => psb_d_rsb_mold + + end type psb_d_rsb_sparse_mat + + private :: d_rsb_get_nzeros, d_rsb_free, d_rsb_get_fmt, & + & d_rsb_get_size, d_rsb_sizeof, d_rsb_get_nz_row + + interface + subroutine psb_d_rsb_reallocate_nz(nz,a) + import :: psb_d_rsb_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_rsb_sparse_mat), intent(inout) :: a + end subroutine psb_d_rsb_reallocate_nz + end interface + + interface + subroutine psb_d_rsb_reinit(a,clear) + import :: psb_d_rsb_sparse_mat + class(psb_d_rsb_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_d_rsb_reinit + end interface + + interface + subroutine psb_d_rsb_trim(a) + import :: psb_d_rsb_sparse_mat + class(psb_d_rsb_sparse_mat), intent(inout) :: a + end subroutine psb_d_rsb_trim + end interface + + interface + subroutine psb_d_rsb_mold(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_rsb_mold + end interface + + interface + subroutine psb_d_rsb_allocate_mnnz(m,n,a,nz) + import :: psb_d_rsb_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_rsb_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_rsb_allocate_mnnz + end interface + + interface + subroutine psb_d_rsb_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_rsb_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_ipk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_rsb_print + end interface + + interface + subroutine psb_d_cp_rsb_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_rsb_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_rsb_to_coo + end interface + + interface + subroutine psb_d_cp_rsb_from_coo(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_rsb_from_coo + end interface + + interface + subroutine psb_d_cp_rsb_to_fmt(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_rsb_to_fmt + end interface + + interface + subroutine psb_d_cp_rsb_from_fmt(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_rsb_from_fmt + end interface + + interface + subroutine psb_d_mv_rsb_to_coo(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_rsb_to_coo + end interface + + interface + subroutine psb_d_mv_rsb_from_coo(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_rsb_from_coo + end interface + + interface + subroutine psb_d_mv_rsb_to_fmt(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_rsb_to_fmt + end interface + + interface + subroutine psb_d_mv_rsb_from_fmt(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_rsb_from_fmt + end interface + + interface + subroutine psb_d_rsb_csput(nz,ia,ja,val,a,imin,imax,jmin,jmax,info,gtl) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: gtl(:) + end subroutine psb_d_rsb_csput + end interface + + interface + subroutine psb_d_rsb_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_rsb_csgetptn + end interface + + interface + subroutine psb_d_rsb_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_rsb_csgetrow + end interface + + interface + subroutine psb_d_rsb_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_rsb_csgetblk + end interface + + interface + subroutine psb_d_rsb_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_rsb_cssv + subroutine psb_d_rsb_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_rsb_cssm + end interface + + interface + subroutine psb_d_rsb_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_rsb_csmv + subroutine psb_d_rsb_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_rsb_csmm + end interface + + + interface + function psb_d_rsb_maxval(a) result(res) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_rsb_maxval + end interface + + interface + function psb_d_rsb_csnmi(a) result(res) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_rsb_csnmi + end interface + + interface + function psb_d_rsb_csnm1(a) result(res) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_rsb_csnm1 + end interface + + interface + subroutine psb_d_rsb_rowsum(d,a) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_rsb_rowsum + end interface + + interface + subroutine psb_d_rsb_arwsum(d,a) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_rsb_arwsum + end interface + + interface + subroutine psb_d_rsb_colsum(d,a) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_rsb_colsum + end interface + + interface + subroutine psb_d_rsb_aclsum(d,a) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_rsb_aclsum + end interface + + interface + subroutine psb_d_rsb_get_diag(a,d,info) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_rsb_get_diag + end interface + + interface + subroutine psb_d_rsb_scal(d,a,info,side) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_rsb_scal + end interface + + interface + subroutine psb_d_rsb_scals(d,a,info) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_rsb_scals + end interface + + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_rsb_sizeof(a) result(res) + implicit none + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + + + end function d_rsb_sizeof + + function d_rsb_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'RSB' + end function d_rsb_get_fmt + + function d_rsb_get_nzeros(a) result(res) + use rsb_mod + implicit none + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = Rsb_get_nzeros(a%rsbMat) + + end function d_rsb_get_nzeros + + function d_rsb_get_size(a) result(res) + implicit none + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + end function d_rsb_get_size + + + function d_rsb_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + + end function d_rsb_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_rsb_free(a) + use rsb_mod + implicit none + + class(psb_d_rsb_sparse_mat), intent(inout) :: a + + call freeRsbMat(a%rsbMat) + + call a%set_null() + call a%set_nrows(0) + call a%set_ncols(0) + + return + + end subroutine d_rsb_free + + +end module psb_d_rsb_mat_mod diff --git a/rsb/psb_rsb_mod.F90 b/rsb/psb_rsb_mod.F90 new file mode 100644 index 00000000..73db825a --- /dev/null +++ b/rsb/psb_rsb_mod.F90 @@ -0,0 +1,50 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_rsb_mod + use psb_const_mod + use rsb_mod + use psb_rsb_penv_mod + ! use psb_d_ell_mat_mod + ! use psb_s_ell_mat_mod + ! use psb_z_ell_mat_mod + ! use psb_c_ell_mat_mod + + ! use psb_s_hll_mat_mod + ! use psb_d_hll_mat_mod + ! use psb_c_hll_mat_mod + ! use psb_z_hll_mat_mod + + ! use psb_d_dia_mat_mod + ! use psb_d_hdia_mat_mod + use psb_d_rsb_mat_mod +end module psb_rsb_mod diff --git a/rsb/psb_rsb_penv_mod.F90 b/rsb/psb_rsb_penv_mod.F90 new file mode 100644 index 00000000..12a56d83 --- /dev/null +++ b/rsb/psb_rsb_penv_mod.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_rsb_penv_mod + use psb_const_mod + use psb_penv_mod + !use psi_comm_buffers_mod, only : psb_buffer_queue + use iso_c_binding + +! interface psb_rsb_init +! module procedure psb_rsb_init +! end interface +#if defined(HAVE_RSB) + interface + function psb_C_rsb_init() & + & result(res) bind(c,name='rsbInit') + use iso_c_binding + integer(c_int) :: res + end function psb_C_rsb_init + end interface + + interface + function psb_C_rsb_exit() & + & result(res) bind(c,name='rsbExit') + use iso_c_binding + integer(c_int) :: res + end function psb_C_rsb_exit + end interface + +#endif + +contains + ! !!!!!!!!!!!!!!!!!!!!!! + ! + ! Environment handling + ! + ! !!!!!!!!!!!!!!!!!!!!!! + + + subroutine psb_rsb_init() + use psb_penv_mod + use psb_const_mod + use psb_error_mod + ! type(psb_ctxt_type), intent(in) :: ctxt + ! integer, intent(in), optional :: dev + + integer :: info + +#if defined (HAVE_RSB) + info = psb_C_rsb_init() + if (info/=0) write(*,*) 'error during rsb_init' +#endif + end subroutine psb_rsb_init + + subroutine psb_rsb_exit() + use psb_penv_mod + use psb_const_mod + use psb_error_mod + ! type(psb_ctxt_type), intent(in) :: ctxt + ! integer, intent(in), optional :: dev + + integer :: info + +#if defined (HAVE_RSB) + info = psb_C_rsb_exit() + if (info/=0) write(*,*) 'error during rsb_exit' +#endif + end subroutine psb_rsb_exit + +end module psb_rsb_penv_mod diff --git a/rsb/rsb_int.c b/rsb/rsb_int.c new file mode 100644 index 00000000..dc4a8026 --- /dev/null +++ b/rsb/rsb_int.c @@ -0,0 +1,110 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#include +#if defined(HAVE_RSB) +#include "rsb.h" +#include "rsb_int.h" + +int rsbInit() +{ + rsb_err_t errval = RSB_ERR_NO_ERROR; + + if((errval = rsb_lib_init(RSB_NULL_INIT_OPTIONS))!=RSB_ERR_NO_ERROR) + { + printf("Error initializing the library!\n"); + return 1; + } + + return 0; +} + +int rsbExit() +{ + rsb_err_t errval = RSB_ERR_NO_ERROR; + + if((errval = rsb_lib_exit(RSB_NULL_INIT_OPTIONS))!=RSB_ERR_NO_ERROR) + { + printf("Error finalizing the library!\n"); + return 1; + } + + return 0; +} + +int Rsb_double_from_coo(void **rsbMat, double *va, int *ia,int *ja,int nnz,int nr, + int nc, int br, int bc) +{ + int i=0; + rsb_err_t errval = RSB_ERR_NO_ERROR; + + *rsbMat = rsb_mtx_alloc_from_coo_const(va,ia,ja,nnz,RSB_NUMERICAL_TYPE_DOUBLE,nr,nc,br,bc,RSB_FLAG_FORTRAN_INDICES_INTERFACE,&errval); + + if((!*rsbMat) || (errval != RSB_ERR_NO_ERROR)) + { + printf("Error while allocating the matrix!\n"); + return 1; + } + return 0; +} + +//X is the input and y is the output +int Rsb_double_spmv(void *rsbMat, double *x, double alfa, double *y, double beta,char trans) +{ + rsb_err_t errval = RSB_ERR_NO_ERROR; + + if(trans=='N') + errval = rsb_spmv(RSB_TRANSPOSITION_N,&alfa,(struct rsb_mtx_t *)rsbMat,x,1,&beta,y,1); + else + errval = rsb_spmv(RSB_TRANSPOSITION_T,&alfa,(struct rsb_mtx_t *)rsbMat,x,1,&beta,y,1); + + if(errval != RSB_ERR_NO_ERROR) + { + printf("Error performing a multiplication!\n"); + return 1; + } + + return 0; +} + +//Should it return a long instead of integer? +int Rsb_getNZeros(void *rsbMat) +{ + int res = 0; + rsb_mtx_get_info((struct rsb_mtx_t *)rsbMat,RSB_MIF_MATRIX_NNZ__TO__RSB_NNZ_INDEX_T,(void *)&res); + return res; +} + +void freeRsbMat(void *rsbMat) +{ + rsb_mtx_free(rsbMat); +} + +#endif diff --git a/rsb/rsb_int.h b/rsb/rsb_int.h new file mode 100644 index 00000000..c08f114d --- /dev/null +++ b/rsb/rsb_int.h @@ -0,0 +1,2 @@ +int Rsb_double_from_coo(void **rsbMat,double *va, int *ia,int *ja,int nnz,int nr, + int nc, int br, int bc); diff --git a/rsb/rsb_mod.F90 b/rsb/rsb_mod.F90 new file mode 100644 index 00000000..e5252654 --- /dev/null +++ b/rsb/rsb_mod.F90 @@ -0,0 +1,235 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module rsb_mod + use rsb + use iso_c_binding + +#ifdef HAVE_RSB + + interface Rsb_from_coo + function Rsb_double_from_coo(rsbMat,va,ia,ja,nnz,nr,nc,br,bc) & + & result(res) bind(c,name='Rsb_double_from_coo') + use iso_c_binding + integer(c_int) :: res + type(c_ptr) :: rsbMat + real(c_double) :: va(*) + integer(c_int) :: ia(*),ja(*) + integer(c_int),value :: nnz,nr,nc,br,bc + end function Rsb_double_from_coo + end interface Rsb_from_coo + + interface + function Rsb_get_nzeros(rsbMat) & + & result(res) bind(c,name='Rsb_getNZeros') + use iso_c_binding + integer(c_int) :: res + type(c_ptr),value :: rsbMat + end function Rsb_get_nzeros + end interface + + interface Rsb_spmv + function Rsb_double_spmv(rsbMat,x,alfa,y,beta,trans) & + & result(res) bind(c,name='Rsb_double_spmv') + use iso_c_binding + integer(c_int) :: res + type(c_ptr),value :: rsbMat + real(c_double) :: x(*),y(*) + real(c_double),value :: alfa,beta + character(c_char),value :: trans + end function Rsb_double_spmv + end interface Rsb_spmv + + interface + subroutine freeRsbMat(rsbMat) & + & bind(c,name='freeRsbMat') + use iso_c_binding + type(c_ptr), value :: rsbMat + end subroutine freeRsbMat + end interface + + ! interface writeEllDevice + + ! function writeEllDeviceFloat(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='writeEllDeviceFloat') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! real(c_float) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function writeEllDeviceFloat + + ! function writeEllDeviceDouble(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='writeEllDeviceDouble') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! real(c_double) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function writeEllDeviceDouble + + ! function writeEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='writeEllDeviceFloatComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! complex(c_float_complex) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function writeEllDeviceFloatComplex + + ! function writeEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='writeEllDeviceDoubleComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! complex(c_double_complex) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function writeEllDeviceDoubleComplex + + ! end interface writeEllDevice + + ! interface readEllDevice + + ! function readEllDeviceFloat(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='readEllDeviceFloat') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! real(c_float) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function readEllDeviceFloat + + ! function readEllDeviceDouble(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='readEllDeviceDouble') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! real(c_double) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function readEllDeviceDouble + + ! function readEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='readEllDeviceFloatComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! complex(c_float_complex) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function readEllDeviceFloatComplex + + ! function readEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='readEllDeviceDoubleComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! complex(c_double_complex) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function readEllDeviceDoubleComplex + + ! end interface readEllDevice + + ! interface + ! subroutine resetEllTimer() bind(c,name='resetEllTimer') + ! use iso_c_binding + ! end subroutine resetEllTimer + ! end interface + ! interface + ! function getEllTimer() & + ! & bind(c,name='getEllTimer') result(res) + ! use iso_c_binding + ! real(c_double) :: res + ! end function getEllTimer + ! end interface + + + ! interface + ! function getEllDevicePitch(deviceMat) & + ! & bind(c,name='getEllDevicePitch') result(res) + ! use iso_c_binding + ! type(c_ptr), value :: deviceMat + ! integer(c_int) :: res + ! end function getEllDevicePitch + ! end interface + + ! interface + ! function getEllDeviceMaxRowSize(deviceMat) & + ! & bind(c,name='getEllDeviceMaxRowSize') result(res) + ! use iso_c_binding + ! type(c_ptr), value :: deviceMat + ! integer(c_int) :: res + ! end function getEllDeviceMaxRowSize + ! end interface + + + ! interface spmvEllDevice + ! function spmvEllDeviceFloat(deviceMat,alpha,x,beta,y) & + ! & result(res) bind(c,name='spmvEllDeviceFloat') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat, x, y + ! real(c_float),value :: alpha, beta + ! end function spmvEllDeviceFloat + ! function spmvEllDeviceDouble(deviceMat,alpha,x,beta,y) & + ! & result(res) bind(c,name='spmvEllDeviceDouble') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat, x, y + ! real(c_double),value :: alpha, beta + ! end function spmvEllDeviceDouble + ! function spmvEllDeviceFloatComplex(deviceMat,alpha,x,beta,y) & + ! & result(res) bind(c,name='spmvEllDeviceFloatComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat, x, y + ! complex(c_float_complex),value :: alpha, beta + ! end function spmvEllDeviceFloatComplex + ! function spmvEllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & + ! & result(res) bind(c,name='spmvEllDeviceDoubleComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat, x, y + ! complex(c_double_complex),value :: alpha, beta + ! end function spmvEllDeviceDoubleComplex + ! end interface spmvEllDevice + +#endif + + +end module rsb_mod diff --git a/test/cudakern/Makefile b/test/cudakern/Makefile new file mode 100755 index 00000000..e1c3b78d --- /dev/null +++ b/test/cudakern/Makefile @@ -0,0 +1,48 @@ +TOPDIR=../.. +include $(TOPDIR)/Make.inc +# +# Libraries used +# +LIBDIR=$(TOPDIR)/lib/ +PSBLIBDIR=$(TOPDIR)/lib/ +OPTDIR=$(LIBDIR) +PSBINCDIR=$(TOPDIR)/include +PSBMODDIR=$(TOPDIR)/modules +PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) $(LCUDA) -lpsb_ext -lpsb_util -lpsb_base +INCDIR=$(TOPDIR)/include +MODDIR=$(TOPDIR)/modules + +LDLIBS=$(PSBGPULDLIBS) + +FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES) + +DPGOBJS=dpdegenmv.o data_input.o +SPGOBJS=spdegenmv.o data_input.o +EXEDIR=./runs + +all: dir pgen +pgen: dpdegenmv spdegenmv +dpdegenmv spdegenmv: dir +dir: + (if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi) +dpdegenmv.o spdegenmv.o: data_input.o +dpdegenmv: $(DPGOBJS) + $(FLINK) $(LOPT) $(DPGOBJS) -o dpdegenmv $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) + /bin/mv dpdegenmv $(EXEDIR) +spdegenmv: $(SPGOBJS) + $(FLINK) $(LOPT) $(SPGOBJS) -o spdegenmv $(PSBLAS_LIB) $(LDLIBS) + /bin/mv spdegenmv $(EXEDIR) + +clean: + /bin/rm -f $(DTOBJS) $(STOBJS) $(DPGOBJS) $(SPGOBJS) $(ZTOBJS) $(CTOBJS) \ + $(EXEDIR)/dpdegenmv $(EXEDIR)/spdegenmv *mod + +lib: + (cd ../../; make library) +verycleanlib: + (cd ../../; make veryclean) + +%.o: %.F90 + $(FC) $(FFLAGS) $(FINCLUDES) $(FCUDEFINES) -c $< -o $@ +%.o: %.f90 + $(FC) $(FFLAGS) $(FINCLUDES) $(FCUDEFINES) -c $< -o $@ diff --git a/test/cudakern/data_input.f90 b/test/cudakern/data_input.f90 new file mode 100644 index 00000000..274cb7ad --- /dev/null +++ b/test/cudakern/data_input.f90 @@ -0,0 +1,221 @@ +! +! +! MLD2P4 version 2.0 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.0) +! +! (C) Copyright 2008,2009,2010 +! +! Salvatore Filippone +! Alfredo Buttari +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the MLD2P4 group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +module data_input + + interface read_data + module procedure read_char, read_int,& + & read_double, read_single, read_logical,& + & string_read_char, string_read_int,& + & string_read_double, string_read_single, & + & string_read_logical + end interface read_data + interface trim_string + module procedure trim_string + end interface + + character(len=4096), private :: charbuf + character, private, parameter :: def_marker="!" + +contains + + subroutine read_logical(val,file,marker) + logical, intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_logical + + subroutine read_char(val,file,marker) + character(len=*), intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_char + + + subroutine read_int(val,file,marker) + integer, intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_int + subroutine read_single(val,file,marker) + use psb_base_mod + real(psb_spk_), intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_single + subroutine read_double(val,file,marker) + use psb_base_mod + real(psb_dpk_), intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_double + + subroutine string_read_char(val,file,marker) + character(len=*), intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),'(a)') val + end subroutine string_read_char + + subroutine string_read_int(val,file,marker) + integer, intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),*) val + end subroutine string_read_int + + subroutine string_read_single(val,file,marker) + use psb_base_mod + real(psb_spk_), intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),*) val + end subroutine string_read_single + + subroutine string_read_double(val,file,marker) + use psb_base_mod + real(psb_dpk_), intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),*) val + end subroutine string_read_double + + subroutine string_read_logical(val,file,marker) + use psb_base_mod + logical, intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),*) val + end subroutine string_read_logical + + function trim_string(string,marker) + character(len=*), intent(in) :: string + character(len=1), optional, intent(in) :: marker + character(len=len(string)) :: trim_string + character(len=1) :: marker_ + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + idx=index(string,marker_) + trim_string = adjustl(string(idx:)) + end function trim_string +end module data_input + diff --git a/test/cudakern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90 new file mode 100644 index 00000000..d97a06aa --- /dev/null +++ b/test/cudakern/dpdegenmv.F90 @@ -0,0 +1,1021 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: dpdegenmv.f90 +! +! Program: pdegenmv +! This sample program measures the performance of the matrix-vector product. +! The matrix is generated in the same way as for the pdegen test case of +! the main PSBLAS library. +! +! +module psb_d_pde3d_mod + + + use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,& + & psb_dspmat_type, psb_d_vect_type, dzero,& + & psb_d_base_sparse_mat, psb_d_base_vect_type, & + & psb_i_base_vect_type, psb_l_base_vect_type + + interface + function d_func_3d(x,y,z) result(val) + import :: psb_dpk_ + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + end function d_func_3d + end interface + + interface psb_gen_pde3d + module procedure psb_d_gen_pde3d + end interface psb_gen_pde3d + +contains + + function d_null_func_3d(x,y,z) result(val) + + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + + val = dzero + + end function d_null_func_3d + ! + ! functions parametrizing the differential equation + ! + + ! + ! Note: b1, b2 and b3 are the coefficients of the first + ! derivative of the unknown function. The default + ! we apply here is to have them zero, so that the resulting + ! matrix is symmetric/hermitian and suitable for + ! testing with CG and FCG. + ! When testing methods for non-hermitian matrices you can + ! change the B1/B2/B3 functions to e.g. done/sqrt((3*done)) + ! + function b1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b1 + real(psb_dpk_), intent(in) :: x,y,z + b1=done/sqrt((3*done)) + end function b1 + function b2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b2 + real(psb_dpk_), intent(in) :: x,y,z + b2=done/sqrt((3*done)) + end function b2 + function b3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b3 + real(psb_dpk_), intent(in) :: x,y,z + b3=done/sqrt((3*done)) + end function b3 + function c(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: c + real(psb_dpk_), intent(in) :: x,y,z + c=dzero + end function c + function a1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a1 + real(psb_dpk_), intent(in) :: x,y,z + a1=done/80 + end function a1 + function a2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a2 + real(psb_dpk_), intent(in) :: x,y,z + a2=done/80 + end function a2 + function a3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a3 + real(psb_dpk_), intent(in) :: x,y,z + a3=done/80 + end function a3 + function g(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: g + real(psb_dpk_), intent(in) :: x,y,z + g = dzero + if (x == done) then + g = done + else if (x == dzero) then + g = exp(y**2-z**2) + end if + end function g + + + ! + ! subroutine to allocate and fill in the coefficient matrix and + ! the rhs. + ! + subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& + & f,amold,vmold,imold,partition,nrl,iv,tnd) + use psb_base_mod + use psb_util_mod + ! + ! Discretizes the partial differential equation + ! + ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) + ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f + ! dxdx dydy dzdz dx dy dz + ! + ! with Dirichlet boundary conditions + ! u = g + ! + ! on the unit cube 0<=x,y,z<=1. + ! + ! + ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. + ! + implicit none + integer(psb_ipk_) :: idim + type(psb_dspmat_type) :: a + type(psb_d_vect_type) :: xv,bv + type(psb_desc_type) :: desc_a + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: info + character(len=*) :: afmt + procedure(d_func_3d), optional :: f + class(psb_d_base_sparse_mat), optional :: amold + class(psb_d_base_vect_type), optional :: vmold + class(psb_i_base_vect_type), optional :: imold + integer(psb_ipk_), optional :: partition, nrl,iv(:) + logical, optional :: tnd + ! Local variables. + + integer(psb_ipk_), parameter :: nb=20 + type(psb_d_csc_sparse_mat) :: acsc + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat) :: acsr + real(psb_dpk_) :: zt(nb),x,y,z + integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ + integer(psb_lpk_) :: m,n,glob_row,nt + integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner + ! For 3D partition + ! Note: integer control variables going directly into an MPI call + ! must be 4 bytes, i.e. psb_mpk_ + integer(psb_mpk_) :: npdims(3), npp, minfo + integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz + integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) + ! Process grid + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: icoeff + integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) + real(psb_dpk_), allocatable :: val(:) + ! deltah dimension of each grid cell + ! deltat discretization time + real(psb_dpk_) :: deltah, sqdeltah, deltah2 + real(psb_dpk_), parameter :: rhs=dzero,one=done,zero=dzero + real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb + integer(psb_ipk_) :: err_act + procedure(d_func_3d), pointer :: f_ + logical :: tnd_ + character(len=20) :: name, ch_err,tmpfmt + + info = psb_success_ + name = 'create_matrix' + call psb_erractionsave(err_act) + + call psb_info(ctxt, iam, np) + + + if (present(f)) then + f_ => f + else + f_ => d_null_func_3d + end if + + deltah = done/(idim+2) + sqdeltah = deltah*deltah + deltah2 = (2*done)* deltah + + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + + ! initialize array descriptor and sparse matrix storage. provide an + ! estimate of the number of non zeroes + + m = (1_psb_lpk_*idim)*idim*idim + n = m + nnz = ((n*7)/(np)) + if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n + t0 = psb_wtime() + select case(partition_) + case(1) + ! A BLOCK partition + if (present(nrl)) then + nr = nrl + else + ! + ! Using a simple BLOCK distribution. + ! + nt = (m+np-1)/np + nr = max(0,min(nt,m-(iam*nt))) + end if + + nt = nr + call psb_sum(ctxt,nt) + if (nt /= m) then + write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ctxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! + call psb_cdall(ctxt,desc_a,info,vg=iv) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 3-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 + call mpi_dims_create(np,3,npdims,info) + npx = npdims(1) + npy = npdims(2) + npz = npdims(3) + + allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + ! Now let's split the 3D cube in hexahedra + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + call dist1Didx(bndz,idim,npz) + mynz = bndz(iamz+1)-bndz(iamz) + + ! How many indices do I own? + nlr = mynx*myny*mynz + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + do k=bndz(iamz),bndz(iamz+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) + end do + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny,mynz + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + end if + + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ctxt,desc_a,info,vl=myidx) + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end select + + + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz,& + & dupl=psb_dupl_err_) + ! define rhs from boundary conditions; also build initial guess + if (info == psb_success_) call psb_geall(xv,desc_a,info) + if (info == psb_success_) call psb_geall(bv,desc_a,info) + + call psb_barrier(ctxt) + talc = psb_wtime()-t0 + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='allocation rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! we build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! + allocate(val(20*nb),irow(20*nb),& + &icol(20*nb),stat=info) + if (info /= psb_success_ ) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + + + ! loop over rows belonging to current process in a block + ! distribution. + + call psb_barrier(ctxt) + t1 = psb_wtime() + do ii=1, nlr,nb + ib = min(nb,nlr-ii+1) + icoeff = 1 + do k=1,ib + i=ii+k-1 + ! local matrix pointer + glob_row=myidx(i) + ! compute gridpoint coordinates + call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) + ! x, y, z coordinates + x = (ix-1)*deltah + y = (iy-1)*deltah + z = (iz-1)*deltah + zt(k) = f_(x,y,z) + ! internal point: build discretization + ! + ! term depending on (x-1,y,z) + ! + val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 + if (ix == 1) then + zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y-1,z) + val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 + if (iy == 1) then + zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y,z-1) + val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 + if (iz == 1) then + zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + ! term depending on (x,y,z) + val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & + & + c(x,y,z) + call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + ! term depending on (x,y,z+1) + val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 + if (iz == idim) then + zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y+1,z) + val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 + if (iy == idim) then + zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x+1,y,z) + val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 + if (ix==idim) then + zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + end do + call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) + if(info /= psb_success_) exit + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) + if(info /= psb_success_) exit + zt(:)=dzero + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) + if(info /= psb_success_) exit + end do + + tgen = psb_wtime()-t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='insert rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(val,irow,icol) + + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_cdasb(desc_a,info,mold=imold) + tcdasb = psb_wtime()-t1 + call psb_barrier(ctxt) + t1 = psb_wtime() + if (info == psb_success_) then + if (present(amold)) then + call psb_spasb(a,desc_a,info,mold=amold,bld_and=tnd) + else + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=tnd) + end if + end if + call psb_barrier(ctxt) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold) + if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + tasb = psb_wtime()-t1 + call psb_barrier(ctxt) + ttot = psb_wtime() - t0 + + call psb_amx(ctxt,talc) + call psb_amx(ctxt,tgen) + call psb_amx(ctxt,tasb) + call psb_amx(ctxt,ttot) + if(iam == psb_root_) then + tmpfmt = a%get_fmt() + write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')& + & tmpfmt + write(psb_out_unit,'("-allocation time : ",es12.5)') talc + write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen + write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb + write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb + write(psb_out_unit,'("-total time : ",es12.5)') ttot + + end if + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + end subroutine psb_d_gen_pde3d + + +end module psb_d_pde3d_mod + + +program pdgenmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef PSB_HAVE_CUDA + use psb_cuda_mod +#endif +#ifdef HAVE_RSB + use psb_rsb_mod +#endif + use psb_d_pde3d_mod + implicit none + + ! input parameters + character(len=5) :: acfmt, agfmt + integer :: idim + logical :: tnd + ! miscellaneous + real(psb_dpk_), parameter :: one = 1.d0 + real(psb_dpk_) :: t1, t2, tprec, flops, tflops,& + & tt1, tt2, gt1, gt2, gflops, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + + ! sparse matrix and preconditioner + type(psb_dspmat_type) :: a, agpu, aux_a + ! descriptor + type(psb_desc_type) :: desc_a + ! dense matrices + type(psb_d_vect_type), target :: xv, bv, xg, bg +#ifdef PSB_HAVE_CUDA + type(psb_d_vect_cuda) :: vmold + type(psb_i_vect_cuda) :: imold +#endif + real(psb_dpk_), allocatable :: x1(:), x2(:), x0(:) + ! blacs parameters + type(psb_ctxt_type) :: ctxt + integer :: iam, np + + ! solver parameters + integer(psb_epk_) :: amatsize, precsize, descsize, annz, nbytes + real(psb_dpk_) :: err, eps, tnv, tng,tdot, dnrm2,ddot + integer, parameter :: ntests=8, ngpu=2, ncnv=3 + type(psb_d_coo_sparse_mat), target :: acoo + type(psb_d_csr_sparse_mat), target :: acsr + type(psb_d_ell_sparse_mat), target :: aell + type(psb_d_hll_sparse_mat), target :: ahll + type(psb_d_dia_sparse_mat), target :: adia + type(psb_d_hdia_sparse_mat), target :: ahdia +#ifdef HAVE_RSB + type(psb_d_rsb_sparse_mat), target :: arsb +#endif +#ifdef PSB_HAVE_CUDA + type(psb_d_cuda_elg_sparse_mat), target :: aelg + type(psb_d_cuda_csrg_sparse_mat), target :: acsrg +#if PSB_CUDA_SHORT_VERSION <= 10 + type(psb_d_cuda_hybg_sparse_mat), target :: ahybg +#endif + type(psb_d_cuda_hlg_sparse_mat), target :: ahlg + type(psb_d_cuda_hdiag_sparse_mat), target :: ahdiag + type(psb_d_cuda_dnsg_sparse_mat), target :: adnsg +#endif + class(psb_d_base_sparse_mat), pointer :: agmold, acmold + ! other variables + logical, parameter :: dump=.false. + integer(psb_ipk_) :: info, i, j, nr, nrg + integer(psb_lpk_) :: ig + character(len=20) :: name,ch_err + character(len=40) :: fname + + info=psb_success_ + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + +#ifdef PSB_HAVE_CUDA + call psb_cuda_init(ctxt) +#endif +#ifdef HAVE_RSB + call psb_rsb_init() +#endif + + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + if(psb_get_errstatus() /= 0) goto 9999 + name='pdegenmv-cuda' + ! + ! Hello world + ! + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef PSB_HAVE_CUDA + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) +#endif + ! + ! get parameters + ! + call get_parms(ctxt,acfmt,agfmt,idim,tnd) + call psb_init_timers() + ! + ! allocate and fill in the coefficient matrix and initial vectors + ! + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,'CSR ',info,partition=3,tnd=tnd) + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='create_matrix' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (iam == psb_root_) write(psb_out_unit,'("Overall matrix creation time : ",es12.5)')t2 + if (iam == psb_root_) write(psb_out_unit,'(" ")') + + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'pde',idim,'-',iam,'-',np,'.mtx' + call a%print(fname,head='PDEGEN test matrix') + end if + + select case(psb_toupper(acfmt)) + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case('DIA') + acmold => adia + case('HDIA') + acmold => ahdia + case('CSR') + acmold => acsr + case('COO') + acmold => acoo +#ifdef HAVE_RSB + case('RSB') + acmold => arsb +#endif + case default + write(*,*) 'Unknown format defaulting to HLL' + acmold => ahll + end select + call a%cscnv(info,mold=acmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + +#ifdef PSB_HAVE_CUDA + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('HDIAG') + agmold => ahdiag + case('CSRG') + agmold => acsrg + case('DNSG') + agmold => adnsg +#if PSB_CUDA_SHORT_VERSION <= 10 + case('HYBG') + agmold => ahybg +#endif + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select + call a%cscnv(agpu,info,mold=agmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + call desc_a%cnv(mold=imold) + + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_geasb(xg,desc_a,info,scratch=.true.,mold=vmold) +#endif + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x0,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + x0(i) = 1.0 + (1.0*ig)/(nrg**2) + end do + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + call psb_geall(x1,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + call psb_geasb(x1,desc_a,info) + call xv%bld(x0) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef PSB_HAVE_CUDA + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(x0,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_cuda_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + + call xv%set(x0) + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(done,a,xv,dzero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef PSB_HAVE_CUDA + call xg%set(x0) + + ! FIXME: cache flush needed here + x1 = bv%get_vect() + x2 = bg%get_vect() + + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(done,agpu,xv,dzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_cuda_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + x1 = bv%get_vect() + x2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + ! FIXME: cache flush needed here + call xg%set(x0) + call xg%sync() + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info) + ! For timing purposes we need to make sure all threads + ! in the device are done. + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_cuda_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + x1 = bv%get_vect() + x2 = bg%get_vect() + tnv = psb_genrm2(bv,desc_a,info) + tng = psb_genrm2(bg,desc_a,info) + tdot = psb_gedot(bg,bg,desc_a,info) + write(0,*) ' bv ',tnv,' bg ',tng, ' dot ',tdot,eps,& + & dnrm2(desc_a%get_local_rows(),x2,1),& + & ddot(desc_a%get_local_rows(),x1,1,x2,1) + call psb_geaxpby(-done,bg,+done,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a)')'XCPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x1(1:nr),'Local part CPU',info,filename=fname) + write(fname,'(a,i3.3,a,i3.3,a)')'XGPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x2(1:nr),'Local part GPU',info,filename=fname) + end if + +#endif + annz = a%get_nzeros() + amatsize = a%sizeof() + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,nr) + call psb_sum(ctxt,annz) + call psb_sum(ctxt,amatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,& + & '("Matrix: ell1 ",i0)') idim + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nr + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation : ",i20," ")') amatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef PSB_HAVE_CUDA + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef PSB_HAVE_CUDA + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU.) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU.) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU.) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*psb_sizeof_dp + psb_sizeof_ip)+& + & annz*(psb_sizeof_dp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth +#ifdef PSB_HAVE_CUDA + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth + bdwdth = psb_cuda_MemoryPeakBandwidth() + write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + call psb_print_timers(ctxt) + + ! + ! cleanup storage and exit + ! + call psb_gefree(bv,desc_a,info) + call psb_gefree(xv,desc_a,info) + call psb_spfree(a,desc_a,info) + call psb_cdfree(desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='free routine' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if +#ifdef PSB_HAVE_CUDA + call psb_cuda_exit() +#endif + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +contains + ! + ! get iteration parameters from standard input + ! + subroutine get_parms(ctxt,acfmt,agfmt,idim,tnd) + type(psb_ctxt_type) :: ctxt + character(len=*) :: agfmt, acfmt + integer :: idim + logical :: tnd + integer :: np, iam + integer :: intbuf(10), ip + + call psb_info(ctxt, iam, np) + + if (iam == 0) then + write(*,*) 'CPU side format?' + read(psb_inp_unit,*) acfmt + write(*,*) 'CUDA side format?' + read(psb_inp_unit,*) agfmt + write(*,*) 'Size of discretization cube?' + read(psb_inp_unit,*) idim + write(*,*) 'Try comm/comp overlap?' + read(psb_inp_unit,*) tnd + endif + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,idim) + call psb_bcast(ctxt,tnd) + + if (iam == 0) then + write(psb_out_unit,'("Testing matrix : ell1")') + write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)')idim,idim,idim + write(psb_out_unit,'("Number of processors : ",i0)')np + write(psb_out_unit,'("Data distribution : BLOCK")') + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Storage formats ",a)') acfmt,' ',agfmt + write(psb_out_unit,'("Testing overlap ND ",l8)') tnd + end if + return + + end subroutine get_parms + +end program pdgenmv diff --git a/test/cudakern/spdegenmv.F90 b/test/cudakern/spdegenmv.F90 new file mode 100644 index 00000000..7e828011 --- /dev/null +++ b/test/cudakern/spdegenmv.F90 @@ -0,0 +1,998 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: spdegenmv.f90 +! +! Program: pdegenmv +! This sample program measures the performance of the matrix-vector product. +! The matrix is generated in the same way as for the pdegen test case of +! the main PSBLAS library. +! +! +module psb_s_pde3d_mod + + + use psb_base_mod, only : psb_spk_, psb_ipk_, psb_lpk_, psb_desc_type,& + & psb_sspmat_type, psb_s_vect_type, szero,& + & psb_s_base_sparse_mat, psb_s_base_vect_type, & + & psb_i_base_vect_type, psb_l_base_vect_type + + interface + function s_func_3d(x,y,z) result(val) + import :: psb_spk_ + real(psb_spk_), intent(in) :: x,y,z + real(psb_spk_) :: val + end function s_func_3d + end interface + + interface psb_gen_pde3d + module procedure psb_s_gen_pde3d + end interface psb_gen_pde3d + +contains + + function s_null_func_3d(x,y,z) result(val) + + real(psb_spk_), intent(in) :: x,y,z + real(psb_spk_) :: val + + val = szero + + end function s_null_func_3d + ! + ! functions parametrizing the differential equation + ! + function b1(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: b1 + real(psb_spk_), intent(in) :: x,y,z + b1=sone/sqrt((3*sone)) + end function b1 + function b2(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: b2 + real(psb_spk_), intent(in) :: x,y,z + b2=sone/sqrt((3*sone)) + end function b2 + function b3(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: b3 + real(psb_spk_), intent(in) :: x,y,z + b3=sone/sqrt((3*sone)) + end function b3 + function c(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: c + real(psb_spk_), intent(in) :: x,y,z + c=szero + end function c + function a1(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: a1 + real(psb_spk_), intent(in) :: x,y,z + a1=sone/80 + end function a1 + function a2(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: a2 + real(psb_spk_), intent(in) :: x,y,z + a2=sone/80 + end function a2 + function a3(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: a3 + real(psb_spk_), intent(in) :: x,y,z + a3=sone/80 + end function a3 + function g(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: g + real(psb_spk_), intent(in) :: x,y,z + g = szero + if (x == sone) then + g = sone + else if (x == szero) then + g = exp(y**2-z**2) + end if + end function g + + + ! + ! subroutine to allocate and fill in the coefficient matrix and + ! the rhs. + ! + subroutine psb_s_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& + & f,amold,vmold,imold,partition,nrl,iv) + use psb_base_mod + use psb_util_mod + ! + ! Discretizes the partial differential equation + ! + ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) + ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f + ! dxdx dydy dzdz dx dy dz + ! + ! with Dirichlet boundary conditions + ! u = g + ! + ! on the unit cube 0<=x,y,z<=1. + ! + ! + ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. + ! + implicit none + integer(psb_ipk_) :: idim + type(psb_sspmat_type) :: a + type(psb_s_vect_type) :: xv,bv + type(psb_desc_type) :: desc_a + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: info + character(len=*) :: afmt + procedure(s_func_3d), optional :: f + class(psb_s_base_sparse_mat), optional :: amold + class(psb_s_base_vect_type), optional :: vmold + class(psb_i_base_vect_type), optional :: imold + integer(psb_ipk_), optional :: partition, nrl,iv(:) + + ! Local variables. + + integer(psb_ipk_), parameter :: nb=20 + type(psb_s_csc_sparse_mat) :: acsc + type(psb_s_coo_sparse_mat) :: acoo + type(psb_s_csr_sparse_mat) :: acsr + real(psb_spk_) :: zt(nb),x,y,z + integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ + integer(psb_lpk_) :: m,n,glob_row,nt + integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner + ! For 3D partition + ! Note: integer control variables going directly into an MPI call + ! must be 4 bytes, i.e. psb_mpk_ + integer(psb_mpk_) :: npdims(3), npp, minfo + integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz + integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) + ! Process grid + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: icoeff + integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) + real(psb_spk_), allocatable :: val(:) + ! deltah dimension of each grid cell + ! deltat discretization time + real(psb_spk_) :: deltah, sqdeltah, deltah2 + real(psb_spk_), parameter :: rhs=szero,one=sone,zero=szero + real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb + integer(psb_ipk_) :: err_act + procedure(s_func_3d), pointer :: f_ + character(len=20) :: name, ch_err,tmpfmt + + info = psb_success_ + name = 'create_matrix' + call psb_erractionsave(err_act) + + call psb_info(ctxt, iam, np) + + + if (present(f)) then + f_ => f + else + f_ => s_null_func_3d + end if + + deltah = sone/(idim+2) + sqdeltah = deltah*deltah + deltah2 = (2*sone)* deltah + + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + + ! initialize array descriptor and sparse matrix storage. provide an + ! estimate of the number of non zeroes + + m = (1_psb_lpk_*idim)*idim*idim + n = m + nnz = ((n*7)/(np)) + if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n + t0 = psb_wtime() + select case(partition_) + case(1) + ! A BLOCK partition + if (present(nrl)) then + nr = nrl + else + ! + ! Using a simple BLOCK distribution. + ! + nt = (m+np-1)/np + nr = max(0,min(nt,m-(iam*nt))) + end if + + nt = nr + call psb_sum(ctxt,nt) + if (nt /= m) then + write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ctxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! + call psb_cdall(ctxt,desc_a,info,vg=iv) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 3-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 + call mpi_dims_create(np,3,npdims,info) + npx = npdims(1) + npy = npdims(2) + npz = npdims(3) + + allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + ! Now let's split the 3D cube in hexahedra + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + call dist1Didx(bndz,idim,npz) + mynz = bndz(iamz+1)-bndz(iamz) + + ! How many indices do I own? + nlr = mynx*myny*mynz + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + do k=bndz(iamz),bndz(iamz+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) + end do + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny,mynz + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + end if + + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ctxt,desc_a,info,vl=myidx) + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end select + + + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz,& + & dupl=psb_dupl_err_) + ! define rhs from boundary conditions; also build initial guess + if (info == psb_success_) call psb_geall(xv,desc_a,info) + if (info == psb_success_) call psb_geall(bv,desc_a,info) + + call psb_barrier(ctxt) + talc = psb_wtime()-t0 + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='allocation rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! we build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! + allocate(val(20*nb),irow(20*nb),& + &icol(20*nb),stat=info) + if (info /= psb_success_ ) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + + + ! loop over rows belonging to current process in a block + ! distribution. + + call psb_barrier(ctxt) + t1 = psb_wtime() + do ii=1, nlr,nb + ib = min(nb,nlr-ii+1) + icoeff = 1 + do k=1,ib + i=ii+k-1 + ! local matrix pointer + glob_row=myidx(i) + ! compute gridpoint coordinates + call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) + ! x, y, z coordinates + x = (ix-1)*deltah + y = (iy-1)*deltah + z = (iz-1)*deltah + zt(k) = f_(x,y,z) + ! internal point: build discretization + ! + ! term depending on (x-1,y,z) + ! + val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 + if (ix == 1) then + zt(k) = g(szero,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y-1,z) + val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 + if (iy == 1) then + zt(k) = g(x,szero,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y,z-1) + val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 + if (iz == 1) then + zt(k) = g(x,y,szero)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + ! term depending on (x,y,z) + val(icoeff)=(2*sone)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & + & + c(x,y,z) + call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + ! term depending on (x,y,z+1) + val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 + if (iz == idim) then + zt(k) = g(x,y,sone)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y+1,z) + val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 + if (iy == idim) then + zt(k) = g(x,sone,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x+1,y,z) + val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 + if (ix==idim) then + zt(k) = g(sone,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + end do + call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) + if(info /= psb_success_) exit + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) + if(info /= psb_success_) exit + zt(:)=szero + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) + if(info /= psb_success_) exit + end do + + tgen = psb_wtime()-t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='insert rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(val,irow,icol) + + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_cdasb(desc_a,info,mold=imold) + tcdasb = psb_wtime()-t1 + call psb_barrier(ctxt) + t1 = psb_wtime() + if (info == psb_success_) then + if (present(amold)) then + call psb_spasb(a,desc_a,info,mold=amold) + else + call psb_spasb(a,desc_a,info,afmt=afmt) + end if + end if + call psb_barrier(ctxt) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold) + if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + tasb = psb_wtime()-t1 + call psb_barrier(ctxt) + ttot = psb_wtime() - t0 + + call psb_amx(ctxt,talc) + call psb_amx(ctxt,tgen) + call psb_amx(ctxt,tasb) + call psb_amx(ctxt,ttot) + if(iam == psb_root_) then + tmpfmt = a%get_fmt() + write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')& + & tmpfmt + write(psb_out_unit,'("-allocation time : ",es12.5)') talc + write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen + write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb + write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb + write(psb_out_unit,'("-total time : ",es12.5)') ttot + + end if + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + end subroutine psb_s_gen_pde3d + + +end module psb_s_pde3d_mod + + +program pdgenmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef PSB_HAVE_CUDA + use psb_cuda_mod +#endif + use psb_s_pde3d_mod + implicit none + + ! input parameters + character(len=5) :: acfmt, agfmt + integer :: idim + + ! miscellaneous + real(psb_spk_), parameter :: one = 1.e0 + real(psb_dpk_) :: t1, t2, tprec, flops, tflops,& + & tt1, tt2, gt1, gt2, gflops, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + + ! sparse matrix and preconditioner + type(psb_sspmat_type) :: a, agpu, aux_a + ! descriptor + type(psb_desc_type) :: desc_a + ! dense matrices + type(psb_s_vect_type), target :: xv,bv, xg, bg +#ifdef PSB_HAVE_CUDA + type(psb_s_vect_cuda) :: vmold + type(psb_i_vect_cuda) :: imold +#endif + real(psb_spk_), allocatable :: x1(:), x2(:), x0(:) + ! blacs parameters + type(psb_ctxt_type) :: ctxt + integer :: iam, np + + ! solver parameters + integer(psb_epk_) :: amatsize, precsize, descsize, annz, nbytes + real(psb_spk_) :: err, eps, tnv, tng,tdot, snrm2,sdot + integer, parameter :: ntests=8, ngpu=2, ncnv=3 + type(psb_s_coo_sparse_mat), target :: acoo + type(psb_s_csr_sparse_mat), target :: acsr + type(psb_s_ell_sparse_mat), target :: aell + type(psb_s_hll_sparse_mat), target :: ahll + type(psb_s_dia_sparse_mat), target :: adia + type(psb_s_hdia_sparse_mat), target :: ahdia +#ifdef PSB_HAVE_CUDA + type(psb_s_cuda_elg_sparse_mat), target :: aelg + type(psb_s_cuda_csrg_sparse_mat), target :: acsrg +#if PSB_CUDA_SHORT_VERSION <= 10 + type(psb_s_cuda_hybg_sparse_mat), target :: ahybg +#endif + type(psb_s_cuda_hlg_sparse_mat), target :: ahlg + type(psb_s_cuda_hdiag_sparse_mat), target :: ahdiag + type(psb_s_cuda_dnsg_sparse_mat), target :: adnsg +#endif + class(psb_s_base_sparse_mat), pointer :: agmold, acmold + ! other variables + logical, parameter :: dump=.false. + integer(psb_ipk_) :: info, i, j, nr, nrg + integer(psb_lpk_) :: ig + character(len=20) :: name,ch_err + character(len=40) :: fname + + info=psb_success_ + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + +#ifdef PSB_HAVE_CUDA + call psb_cuda_init(ctxt) +#endif +#ifdef HAVE_RSB + call psb_rsb_init() +#endif + + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + if(psb_get_errstatus() /= 0) goto 9999 + name='pdegenmv-cuda' + ! + ! Hello world + ! + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef PSB_HAVE_CUDA + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) +#endif + ! + ! get parameters + ! + call get_parms(ctxt,acfmt,agfmt,idim) + + ! + ! allocate and fill in the coefficient matrix and initial vectors + ! + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,'CSR ',info,partition=3) + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='create_matrix' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (iam == psb_root_) write(psb_out_unit,'("Overall matrix creation time : ",es12.5)')t2 + if (iam == psb_root_) write(psb_out_unit,'(" ")') + + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'pde',idim,'-',iam,'-',np,'.mtx' + call a%print(fname,head='PDEGEN test matrix') + end if + + select case(psb_toupper(acfmt)) + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case('DIA') + acmold => adia + case('HDIA') + acmold => ahdia + case('CSR') + acmold => acsr + case('COO') + acmold => acoo +#ifdef HAVE_RSB + case('RSB') + acmold => arsb +#endif + case default + write(*,*) 'Unknown format defaulting to HLL' + acmold => ahll + end select + call a%cscnv(info,mold=acmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + +#ifdef PSB_HAVE_CUDA + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('HDIAG') + agmold => ahdiag + case('CSRG') + agmold => acsrg + case('DNSG') + agmold => adnsg +#if PSB_CUDA_SHORT_VERSION <= 10 + case('HYBG') + agmold => ahybg +#endif + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select + call a%cscnv(agpu,info,mold=agmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + call desc_a%cnv(mold=imold) + + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_geasb(xg,desc_a,info,scratch=.true.,mold=vmold) +#endif + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x0,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + x0(i) = 1.0 + (1.0*ig)/(nrg**2) + end do + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + call psb_geall(x1,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + call psb_geasb(x1,desc_a,info) + call xv%bld(x0) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef PSB_HAVE_CUDA + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(x0,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_cuda_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + + call xv%set(x0) + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(sone,a,xv,szero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef PSB_HAVE_CUDA + call xg%set(x0) + + ! FIXME: cache flush needed here + x1 = bv%get_vect() + x2 = bg%get_vect() + + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(sone,agpu,xv,szero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_cuda_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + x1 = bv%get_vect() + x2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + + ! FIXME: cache flush needed here + call xg%set(x0) + call xg%sync() + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(sone,agpu,xg,szero,bg,desc_a,info) + ! For timing purposes we need to make sure all threads + ! in the device are done. + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_cuda_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + x1 = bv%get_vect() + x2 = bg%get_vect() + tnv = psb_genrm2(bv,desc_a,info) + tng = psb_genrm2(bg,desc_a,info) + tdot = psb_gedot(bg,bg,desc_a,info) + write(0,*) ' bv ',tnv,' bg ',tng, ' dot ',tdot,eps,& + & snrm2(desc_a%get_local_rows(),x2,1),& + & sdot(desc_a%get_local_rows(),x1,1,x2,1) + call psb_geaxpby(-sone,bg,+sone,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a)')'XCPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x1(1:nr),'Local part CPU',info,filename=fname) + write(fname,'(a,i3.3,a,i3.3,a)')'XGPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x2(1:nr),'Local part GPU',info,filename=fname) + end if +#endif + annz = a%get_nzeros() + amatsize = a%sizeof() + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,nr) + call psb_sum(ctxt,annz) + call psb_sum(ctxt,amatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,& + & '("Matrix: ell1 ",i0)') idim + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nr + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation : ",i20," ")') amatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef PSB_HAVE_CUDA + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef PSB_HAVE_CUDA + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU.) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU.) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU.) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*psb_sizeof_sp + psb_sizeof_ip)+& + & annz*(psb_sizeof_sp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth +#ifdef PSB_HAVE_CUDA + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth + bdwdth = psb_cuda_MemoryPeakBandwidth() + write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + ! + ! cleanup storage and exit + ! + call psb_gefree(bv,desc_a,info) + call psb_gefree(xv,desc_a,info) + call psb_spfree(a,desc_a,info) + call psb_cdfree(desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='free routine' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if +#ifdef PSB_HAVE_CUDA + call psb_cuda_exit() +#endif + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +contains + ! + ! get iteration parameters from standard input + ! + subroutine get_parms(ctxt,acfmt,agfmt,idim) + type(psb_ctxt_type) :: ctxt + character(len=*) :: agfmt, acfmt + integer :: idim + integer :: np, iam + integer :: intbuf(10), ip + + call psb_info(ctxt, iam, np) + + if (iam == 0) then + write(*,*) 'CPU side format?' + read(psb_inp_unit,*) acfmt + write(*,*) 'CUDA side format?' + read(psb_inp_unit,*) agfmt + write(*,*) 'Size of discretization cube?' + read(psb_inp_unit,*) idim + endif + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,idim) + + if (iam == 0) then + write(psb_out_unit,'("Testing matrix : ell1")') + write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)')idim,idim,idim + write(psb_out_unit,'("Number of processors : ",i0)')np + write(psb_out_unit,'("Data distribution : BLOCK")') + write(psb_out_unit,'(" ")') + end if + return + + end subroutine get_parms + + +end program pdgenmv diff --git a/test/fileread/Makefile b/test/fileread/Makefile index 132a2e15..a7afaabc 100644 --- a/test/fileread/Makefile +++ b/test/fileread/Makefile @@ -6,7 +6,7 @@ include $(INCDIR)/Make.inc.psblas # Libraries used # LIBDIR=$(INSTALLDIR)/lib/ -PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base LDLIBS=$(PSBLDLIBS) FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). diff --git a/test/fileread/getp.f90 b/test/fileread/getp.f90 index 82bef762..db8207b6 100644 --- a/test/fileread/getp.f90 +++ b/test/fileread/getp.f90 @@ -198,7 +198,7 @@ contains read(inp_unit,*) kmethd read(inp_unit,*) ptype read(inp_unit,*) afmt - read(inp_unit,*) ipart + read(inp_unit,*) part call psb_bcast(ctxt,mtrx_file) diff --git a/test/fileread/psb_cf_sample.f90 b/test/fileread/psb_cf_sample.f90 index 3c0ce8f9..9fd965ae 100644 --- a/test/fileread/psb_cf_sample.f90 +++ b/test/fileread/psb_cf_sample.f90 @@ -32,7 +32,7 @@ program psb_cf_sample use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_util_mod use getp implicit none @@ -185,7 +185,7 @@ program psb_cf_sample endif call psb_barrier(ctxt) - call distr_mtpart(psb_root_,ctxt) + call distr_mtpart(ione*psb_root_,ctxt) call getv_mtpart(ivg) call psb_matdist(aux_a, a, ctxt,desc_a,info,fmt=afmt,vg=ivg) @@ -194,7 +194,7 @@ program psb_cf_sample call psb_matdist(aux_a, a, ctxt,desc_a,info,fmt=afmt,parts=part_block) end select - call psb_scatter(b_col_glob,b_col,desc_a,info,root=psb_root_) + call psb_scatter(b_col_glob,b_col,desc_a,info,root=ione*psb_root_) call psb_geall(x_col,desc_a,info) call x_col%zero() call psb_geasb(x_col,desc_a,info) @@ -274,9 +274,9 @@ program psb_cf_sample & desc_a%get_fmt() end if - call psb_gather(x_col_glob,x_col,desc_a,info,root=psb_root_) + call psb_gather(x_col_glob,x_col,desc_a,info,root=ione*psb_root_) if (info == psb_success_) & - & call psb_gather(r_col_glob,r_col,desc_a,info,root=psb_root_) + & call psb_gather(r_col_glob,r_col,desc_a,info,root=ione*psb_root_) if (info /= psb_success_) goto 9999 if (iam == psb_root_) then write(psb_err_unit,'(" ")') diff --git a/test/fileread/psb_df_sample.f90 b/test/fileread/psb_df_sample.f90 index 25a121a4..b07e4ede 100644 --- a/test/fileread/psb_df_sample.f90 +++ b/test/fileread/psb_df_sample.f90 @@ -32,7 +32,7 @@ program psb_df_sample use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_util_mod use getp implicit none @@ -185,7 +185,7 @@ program psb_df_sample endif call psb_barrier(ctxt) - call distr_mtpart(psb_root_,ctxt) + call distr_mtpart(ione*psb_root_,ctxt) call getv_mtpart(ivg) call psb_matdist(aux_a, a, ctxt,desc_a,info,fmt=afmt,vg=ivg) @@ -194,7 +194,7 @@ program psb_df_sample call psb_matdist(aux_a, a, ctxt,desc_a,info,fmt=afmt,parts=part_block) end select - call psb_scatter(b_col_glob,b_col,desc_a,info,root=psb_root_) + call psb_scatter(b_col_glob,b_col,desc_a,info,root=ione*psb_root_) call psb_geall(x_col,desc_a,info) call x_col%zero() call psb_geasb(x_col,desc_a,info) @@ -276,9 +276,9 @@ program psb_df_sample & desc_a%get_fmt() end if - call psb_gather(x_col_glob,x_col,desc_a,info,root=psb_root_) + call psb_gather(x_col_glob,x_col,desc_a,info,root=ione*psb_root_) if (info == psb_success_) & - & call psb_gather(r_col_glob,r_col,desc_a,info,root=psb_root_) + & call psb_gather(r_col_glob,r_col,desc_a,info,root=ione*psb_root_) if (info /= psb_success_) goto 9999 if (iam == psb_root_) then write(psb_err_unit,'(" ")') diff --git a/test/fileread/psb_sf_sample.f90 b/test/fileread/psb_sf_sample.f90 index 8d9ccb0a..289df307 100644 --- a/test/fileread/psb_sf_sample.f90 +++ b/test/fileread/psb_sf_sample.f90 @@ -32,7 +32,7 @@ program psb_sf_sample use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_util_mod use getp implicit none @@ -185,7 +185,7 @@ program psb_sf_sample endif call psb_barrier(ctxt) - call distr_mtpart(psb_root_,ctxt) + call distr_mtpart(ione*psb_root_,ctxt) call getv_mtpart(ivg) call psb_matdist(aux_a, a, ctxt,desc_a,info,fmt=afmt,vg=ivg) @@ -194,7 +194,7 @@ program psb_sf_sample call psb_matdist(aux_a, a, ctxt,desc_a,info,fmt=afmt,parts=part_block) end select - call psb_scatter(b_col_glob,b_col,desc_a,info,root=psb_root_) + call psb_scatter(b_col_glob,b_col,desc_a,info,root=ione*psb_root_) call psb_geall(x_col,desc_a,info) call x_col%zero() call psb_geasb(x_col,desc_a,info) @@ -276,9 +276,9 @@ program psb_sf_sample & desc_a%get_fmt() end if - call psb_gather(x_col_glob,x_col,desc_a,info,root=psb_root_) + call psb_gather(x_col_glob,x_col,desc_a,info,root=ione*psb_root_) if (info == psb_success_) & - & call psb_gather(r_col_glob,r_col,desc_a,info,root=psb_root_) + & call psb_gather(r_col_glob,r_col,desc_a,info,root=ione*psb_root_) if (info /= psb_success_) goto 9999 if (iam == psb_root_) then write(psb_err_unit,'(" ")') diff --git a/test/fileread/psb_zf_sample.f90 b/test/fileread/psb_zf_sample.f90 index 3c024606..b8385922 100644 --- a/test/fileread/psb_zf_sample.f90 +++ b/test/fileread/psb_zf_sample.f90 @@ -32,7 +32,7 @@ program psb_zf_sample use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_util_mod use getp implicit none @@ -185,7 +185,7 @@ program psb_zf_sample endif call psb_barrier(ctxt) - call distr_mtpart(psb_root_,ctxt) + call distr_mtpart(ione*psb_root_,ctxt) call getv_mtpart(ivg) call psb_matdist(aux_a, a, ctxt,desc_a,info,fmt=afmt,vg=ivg) @@ -194,7 +194,7 @@ program psb_zf_sample call psb_matdist(aux_a, a, ctxt,desc_a,info,fmt=afmt,parts=part_block) end select - call psb_scatter(b_col_glob,b_col,desc_a,info,root=psb_root_) + call psb_scatter(b_col_glob,b_col,desc_a,info,root=ione*psb_root_) call psb_geall(x_col,desc_a,info) call x_col%zero() call psb_geasb(x_col,desc_a,info) @@ -274,9 +274,9 @@ program psb_zf_sample & desc_a%get_fmt() end if - call psb_gather(x_col_glob,x_col,desc_a,info,root=psb_root_) + call psb_gather(x_col_glob,x_col,desc_a,info,root=ione*psb_root_) if (info == psb_success_) & - & call psb_gather(r_col_glob,r_col,desc_a,info,root=psb_root_) + & call psb_gather(r_col_glob,r_col,desc_a,info,root=ione*psb_root_) if (info /= psb_success_) goto 9999 if (iam == psb_root_) then write(psb_err_unit,'(" ")') diff --git a/test/hello/CMakeLists.txt b/test/hello/CMakeLists.txt new file mode 100644 index 00000000..70b0b04d --- /dev/null +++ b/test/hello/CMakeLists.txt @@ -0,0 +1,43 @@ +cmake_minimum_required(VERSION 3.10) +project(HelloWorld Fortran) + +# Accept a user-defined library path +set(LIBRARY_DIR "" CACHE PATH "Path to the library directory") + +# Check if the user provided a library directory +if(NOT LIBRARY_DIR) + message(FATAL_ERROR "Library directory not specified! Use -DLIBRARY_DIR=path/to/library") +endif() + +# Include CMakePackageConfigHelpers to work with configuration files +include(CMakePackageConfigHelpers) + +# Find the package +find_package(psblas REQUIRED PATHS ${LIBRARY_DIR}/lib/cmake/psblas NO_DEFAULT_PATH) + +# Check if the package was found +if(NOT psblas_FOUND) + message(FATAL_ERROR "psblas not found!") +endif() + +# Include directories for the library +include_directories(${LIBRARY_DIR}/include) # Path to header files +include_directories(${psblas_DIR}/modules) # Path to module files + +message(STATUS "Library directory: ${psblas_DIR}") + +# Add the executables +add_executable(hello hello.f90) +add_executable(pingpong pingpong.f90) + +# Link the specific library targets +target_link_libraries(hello PRIVATE psblas::base) +target_link_libraries(pingpong PRIVATE psblas::base) + +# Set output directory for executables +set_target_properties(hello pingpong PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/runs) + +# Ensure the module path is set correctly +set_target_properties(hello pingpong PROPERTIES + Fortran_MODULE_DIRECTORY "${psblas_DIR}/modules" +) diff --git a/test/hello/Makefile b/test/hello/Makefile index a6811ea7..dd04768d 100644 --- a/test/hello/Makefile +++ b/test/hello/Makefile @@ -5,7 +5,7 @@ include $(INCDIR)/Make.inc.psblas # # Libraries used LIBDIR=$(BASEDIR)/lib -PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base LDLIBS=$(PSBLDLIBS) # # Compilers and such @@ -16,7 +16,7 @@ FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). EXEDIR=./runs -all: runsd hello pingpong +all: runsd hello pingpong runsd: (if test ! -d runs ; then mkdir runs; fi) @@ -30,9 +30,8 @@ pingpong: pingpong.o /bin/mv pingpong $(EXEDIR) - clean: - /bin/rm -f hello.o pingpong.o + /bin/rm -f hello.o pingpong.o tsum.o tsum1.o $(EXEDIR)/hello verycleanlib: (cd ../..; make veryclean) diff --git a/test/kernel/Makefile b/test/kernel/Makefile index 21851beb..7e413092 100644 --- a/test/kernel/Makefile +++ b/test/kernel/Makefile @@ -6,7 +6,7 @@ INCDIR=$(INSTALLDIR)/include/ MODDIR=$(INSTALLDIR)/modules/ include $(INCDIR)/Make.inc.psblas LIBDIR=$(INSTALLDIR)/lib/ -PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base LDLIBS=$(PSBLDLIBS) FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). diff --git a/test/kernel/pdgenspmv.f90 b/test/kernel/pdgenspmv.F90 similarity index 95% rename from test/kernel/pdgenspmv.f90 rename to test/kernel/pdgenspmv.F90 index d5fd9ba4..736c1eee 100644 --- a/test/kernel/pdgenspmv.f90 +++ b/test/kernel/pdgenspmv.F90 @@ -142,7 +142,7 @@ contains ! the rhs. ! subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& - & f,amold,vmold,imold,partition,nrl,iv) + & f,amold,vmold,imold,partition,nrl,iv,tnd) use psb_base_mod use psb_util_mod ! @@ -173,7 +173,7 @@ contains class(psb_d_base_vect_type), optional :: vmold class(psb_i_base_vect_type), optional :: imold integer(psb_ipk_), optional :: partition, nrl,iv(:) - + logical, optional :: tnd ! Local variables. integer(psb_ipk_), parameter :: nb=20 @@ -191,7 +191,7 @@ contains integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) ! Process grid - integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: np, iam, nth integer(psb_ipk_) :: icoeff integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) real(psb_dpk_), allocatable :: val(:) @@ -202,6 +202,7 @@ contains real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb integer(psb_ipk_) :: err_act procedure(d_func_3d), pointer :: f_ + logical :: tnd_ character(len=20) :: name, ch_err,tmpfmt info = psb_success_ @@ -495,9 +496,9 @@ contains t1 = psb_wtime() if (info == psb_success_) then if (present(amold)) then - call psb_spasb(a,desc_a,info,mold=amold) + call psb_spasb(a,desc_a,info,mold=amold,bld_and=tnd) else - call psb_spasb(a,desc_a,info,afmt=afmt) + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=tnd) end if end if call psb_barrier(ctxt) @@ -549,13 +550,17 @@ program pdgenspmv use psb_base_mod use psb_util_mod use psb_d_pde3d_mod +#if defined(PSB_OPENMP) + use omp_lib +#endif + implicit none ! input parameters character(len=20) :: kmethd, ptype character(len=5) :: afmt integer(psb_ipk_) :: idim - + logical :: tnd ! miscellaneous real(psb_dpk_), parameter :: one = done real(psb_dpk_) :: t1, t2, tprec, flops, tflops, tt1, tt2, bdwdth @@ -569,7 +574,7 @@ program pdgenspmv real(psb_dpk_), allocatable :: tst(:) ! blacs parameters type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: iam, np + integer(psb_ipk_) :: iam, np, nth ! solver parameters integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, nr, ipart @@ -587,6 +592,15 @@ program pdgenspmv call psb_init(ctxt) call psb_info(ctxt,iam,np) +#if defined(PSB_OPENMP) + !$OMP parallel shared(nth) + !$OMP master + nth = omp_get_num_threads() + !$OMP end master + !$OMP end parallel +#else + nth = 1 +#endif if (iam < 0) then ! This should not happen, but just in case @@ -606,14 +620,14 @@ program pdgenspmv ! ! get parameters ! - call get_parms(ctxt,afmt,idim) - + call get_parms(ctxt,afmt,idim,tnd) + call psb_init_timers() ! ! allocate and fill in the coefficient matrix, rhs and initial guess ! call psb_barrier(ctxt) t1 = psb_wtime() - call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info) + call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,tnd=tnd) call psb_barrier(ctxt) t2 = psb_wtime() - t1 if(info /= psb_success_) then @@ -665,7 +679,9 @@ program pdgenspmv flops = 2.d0*times*annz tflops=flops write(psb_out_unit,'("Matrix: ell1 ",i0)') idim - write(psb_out_unit,'("Test on : ",i20," processors")') np + write(psb_out_unit,'("Test on : ",i20," processes ")') np + write(psb_out_unit,'("Test on : ",i20," threads ")') nth + write(psb_out_unit,'("Total number of tasks : ",i20 )') nth*np write(psb_out_unit,'("Size of matrix : ",i20," ")') nr write(psb_out_unit,'("Number of nonzeros : ",i20," ")') annz write(psb_out_unit,'("Memory occupation : ",i20," ")') amatsize @@ -694,7 +710,7 @@ program pdgenspmv write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize end if - + call psb_print_timers(ctxt) ! ! cleanup storage and exit @@ -721,10 +737,11 @@ contains ! ! get iteration parameters from standard input ! - subroutine get_parms(ctxt,afmt,idim) + subroutine get_parms(ctxt,afmt,idim,tnd) type(psb_ctxt_type) :: ctxt character(len=*) :: afmt integer(psb_ipk_) :: idim + logical :: tnd integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: intbuf(10), ip @@ -733,9 +750,11 @@ contains if (iam == 0) then read(psb_inp_unit,*) afmt read(psb_inp_unit,*) idim + read(psb_inp_unit,*) tnd endif call psb_bcast(ctxt,afmt) call psb_bcast(ctxt,idim) + call psb_bcast(ctxt,tnd) if (iam == 0) then write(psb_out_unit,'("Testing matrix : ell1")') @@ -743,6 +762,8 @@ contains write(psb_out_unit,'("Number of processors : ",i0)')np write(psb_out_unit,'("Data distribution : BLOCK")') write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Storage format ",a)') afmt + write(psb_out_unit,'("Testing overlap ND ",l8)') tnd end if return diff --git a/test/omp/Makefile b/test/omp/Makefile index c35431c5..3eca17de 100644 --- a/test/omp/Makefile +++ b/test/omp/Makefile @@ -5,7 +5,7 @@ include $(INCDIR)/Make.inc.psblas # # Libraries used LIBDIR=$(INSTALLDIR)/lib -PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base LDLIBS=$(PSBLDLIBS) # # Compilers and such diff --git a/test/omp/psb_tomp.F90 b/test/omp/psb_tomp.F90 index 79097ca8..45136ad0 100644 --- a/test/omp/psb_tomp.F90 +++ b/test/omp/psb_tomp.F90 @@ -172,7 +172,7 @@ contains & f,amold,vmold,imold,partition,nrl,iv) use psb_base_mod use psb_util_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif ! @@ -332,7 +332,7 @@ contains ! A nifty MPI function will split the process list npdims = 0 -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) npdims = 1 #else call mpi_dims_create(np,3,npdims,info) @@ -459,7 +459,7 @@ contains integer(psb_lpk_), allocatable :: irow(:),icol(:) real(psb_dpk_), allocatable :: val(:) real(psb_dpk_) :: x,y,z, zt(nb) -#if defined(OPENMP) +#if defined(PSB_OPENMP) nth = omp_get_num_threads() ith = omp_get_thread_num() #else @@ -562,7 +562,7 @@ contains endif end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) !!$ write(0,*) omp_get_thread_num(),' Check insertion ',& !!$ & irow(1:icoeff-1),':',icol(1:icoeff-1) #endif @@ -658,10 +658,10 @@ end module psb_d_pde3d_mod program psb_d_pde3d use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_util_mod use psb_d_pde3d_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -710,7 +710,7 @@ program psb_d_pde3d call psb_init(ctxt) call psb_info(ctxt,iam,np) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP parallel shared(nth) !$OMP master nth = omp_get_num_threads() @@ -728,7 +728,7 @@ program psb_d_pde3d if(psb_errstatus_fatal()) goto 9999 name='pde3d90' call psb_set_errverbosity(itwo) - call psb_cd_set_large_threshold(125000_psb_ipk_) + call psb_cd_set_hash_threshold(125000_psb_ipk_) ! ! Hello world ! diff --git a/test/openacc/Makefile b/test/openacc/Makefile new file mode 100644 index 00000000..5b69c2d6 --- /dev/null +++ b/test/openacc/Makefile @@ -0,0 +1,58 @@ +TOPDIR=../.. +include $(TOPDIR)/Make.inc + +LIBDIR=$(TOPDIR)/lib/ +PSBLIBDIR=$(TOPDIR)/lib/ +PSBINCDIR=$(TOPDIR)/include +PSBMODDIR=$(TOPDIR)/modules +INCDIR=$(TOPDIR)/include +MODDIR=$(TOPDIR)/modules +EXEDIR=./runs + +PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_openacc -lpsb_ext -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base +LDLIBS=$(PSBGPULDLIBS) + +FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES) + +FFLAGS=-O3 -march=native -fopenacc -foffload=nvptx-none="-march=sm_75" +CFLAGS=-O3 -march=native + +VTC=vectoacc.o +DVT=datavect.o +CSRC=timers.c + +OBJS=$(SRCS:.F90=.o) $(CSRC:.c=.o) + +all: dir psb_d_oacc_pde3d dpdegenmv + +#$(OBJS) +# $(FC) $(FFLAGS) $(OBJS) -o datavect $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) +# /bin/mv datavect $(EXEDIR) + +dir: + @if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi + +%: %.o timers.o + $(FC) $(FFLAGS) $^ -o $@ $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) + /bin/mv $@ $(EXEDIR) + +%.o: %.F90 + $(FC) $(FFLAGS) $(FINCLUDES) -c $< -o $@ + +%.o: %.c + $(CC) $(CFLAGS) $(FINCLUDES) -c $< -o $@ + +psb_d_oacc_pde3d: + mpifort -fallow-argument-mismatch -frecursive -g -O3 -frecursive -I../../modules/ -I. -DOPENACC -DHAVE_LAPACK -DHAVE_FLUSH_STMT -DLPK8 -DIPK4 -DMPI_MOD -c psb_d_oacc_pde3d.F90 -o psb_d_oacc_pde3d.o + $(FLINK) -fopenacc -DOPENACC psb_d_oacc_pde3d.o -o psb_d_oacc_pde3d $(PSBLAS_LIB) $(LDLIBS) + /bin/mv psb_d_oacc_pde3d $(EXEDIR) + +dpdegenmv: + mpifort -fallow-argument-mismatch -frecursive -g -O3 -frecursive -I../../modules/ -I. -DOPENACC -DHAVE_LAPACK -DHAVE_FLUSH_STMT -DLPK8 -DIPK4 -DMPI_MOD -c dpdegenmv.F90 -o dpdegenmv.o + $(FLINK) -fopenacc -DOPENACC dpdegenmv.o -o dpdegenmv $(PSBLAS_LIB) $(LDLIBS) + /bin/mv dpdegenmv $(EXEDIR) + +clean: + /bin/rm -fr *.o *.mod $(EXEDIR)/* + +.PHONY: all dir clean diff --git a/test/openacc/datavect.F90 b/test/openacc/datavect.F90 new file mode 100644 index 00000000..b78a65d9 --- /dev/null +++ b/test/openacc/datavect.F90 @@ -0,0 +1,84 @@ +program datavect + use psb_base_mod + use psb_oacc_mod + implicit none + + type(psb_d_vect_oacc) :: v3, v4, v5 + integer(psb_ipk_) :: info, n, i, old_percentage, percentage + real(psb_dpk_) :: alpha, dot_dev, dot_host, t_alloc_host, t_alloc_dev, t_calc_host, t_calc_dev + double precision, external :: etime + double precision :: time_start, time_end + integer, parameter :: min_size = 1000, max_size = 100000000, step_size = 1000000 + integer, parameter :: ntests = 80, ngpu = 20 + integer :: size + character(len=20) :: filename + + open(unit=10, file='performance_data.csv', status='unknown') + write(10, '(A, A, A, A, A)') 'Size,Alloc_Host,Alloc_Dev,Calc_Host,Calc_Dev' + + write(*, *) 'Test of the vector operations with OpenACC' + + alpha = 2.0 + old_percentage = 0 + + do size = min_size, max_size, step_size + n = size + percentage = int(real(size - min_size) / real(max_size - min_size) * 100.0) + if (percentage /= old_percentage) then + write(*, '(A,I3,A)', advance='no') 'Progress: ', percentage, '%' + write(*,'(A)', advance='no') char(13) + old_percentage = percentage + end if + + time_start = etime() + call v3%all(n, info) + call v4%all(n, info) + call v5%all(n, info) + time_end = etime() + t_alloc_host = (time_end - time_start) + + do i = 1, n + v3%v(i) = real(i, psb_dpk_) + v4%v(i) = real(n - i, psb_dpk_) + end do + + call v3%scal(alpha) + + call v3%set_host() + call v4%set_host() + + time_start = etime() + do i = 1, ntests + dot_host = sum(v3%v * v4%v) + end do + time_end = etime() + t_calc_host = (time_end - time_start) / real(ntests) + + time_start = etime() + call v3%set_dev() + call v4%set_dev() + call v3%sync_space() + call v4%sync_space() + time_end = etime() + t_alloc_dev = (time_end - time_start) + + time_start = etime() + do i = 1, ntests + dot_dev = v3%dot_v(n, v4) + end do + !$acc wait + time_end = etime() + t_calc_dev = (time_end - time_start) / real(ntests) + + write(10, '(I10, 1X, ES12.5, 1X, ES12.5, 1X, ES12.5, 1X, ES12.5)') size, t_alloc_host, t_alloc_dev, t_calc_host, t_calc_dev + + call v3%free(info) + call v4%free(info) + call v5%free(info) + end do + + close(10) + write(*, *) 'Performance data written to performance_data.csv' + + +end program datavect diff --git a/test/openacc/dpdegenmv.F90 b/test/openacc/dpdegenmv.F90 new file mode 100644 index 00000000..3342c6dc --- /dev/null +++ b/test/openacc/dpdegenmv.F90 @@ -0,0 +1,995 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: dpdegenmv.f90 +! +! Program: pdegenmv +! This sample program measures the performance of the matrix-vector product. +! The matrix is generated in the same way as for the pdegen test case of +! the main PSBLAS library. +! +! +module psb_d_pde3d_mod + + + use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,& + & psb_dspmat_type, psb_d_vect_type, dzero,& + & psb_d_base_sparse_mat, psb_d_base_vect_type, & + & psb_i_base_vect_type, psb_l_base_vect_type + + interface + function d_func_3d(x,y,z) result(val) + import :: psb_dpk_ + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + end function d_func_3d + end interface + + interface psb_gen_pde3d + module procedure psb_d_gen_pde3d + end interface psb_gen_pde3d + +contains + + function d_null_func_3d(x,y,z) result(val) + + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + + val = dzero + + end function d_null_func_3d + ! + ! functions parametrizing the differential equation + ! + + ! + ! Note: b1, b2 and b3 are the coefficients of the first + ! derivative of the unknown function. The default + ! we apply here is to have them zero, so that the resulting + ! matrix is symmetric/hermitian and suitable for + ! testing with CG and FCG. + ! When testing methods for non-hermitian matrices you can + ! change the B1/B2/B3 functions to e.g. done/sqrt((3*done)) + ! + function b1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b1 + real(psb_dpk_), intent(in) :: x,y,z + b1=done/sqrt((3*done)) + end function b1 + function b2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b2 + real(psb_dpk_), intent(in) :: x,y,z + b2=done/sqrt((3*done)) + end function b2 + function b3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b3 + real(psb_dpk_), intent(in) :: x,y,z + b3=done/sqrt((3*done)) + end function b3 + function c(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: c + real(psb_dpk_), intent(in) :: x,y,z + c=dzero + end function c + function a1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a1 + real(psb_dpk_), intent(in) :: x,y,z + a1=done/80 + end function a1 + function a2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a2 + real(psb_dpk_), intent(in) :: x,y,z + a2=done/80 + end function a2 + function a3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a3 + real(psb_dpk_), intent(in) :: x,y,z + a3=done/80 + end function a3 + function g(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: g + real(psb_dpk_), intent(in) :: x,y,z + g = dzero + if (x == done) then + g = done + else if (x == dzero) then + g = exp(y**2-z**2) + end if + end function g + + + ! + ! subroutine to allocate and fill in the coefficient matrix and + ! the rhs. + ! + subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& + & f,amold,vmold,imold,partition,nrl,iv,tnd) + use psb_base_mod + use psb_util_mod + ! + ! Discretizes the partial differential equation + ! + ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) + ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f + ! dxdx dydy dzdz dx dy dz + ! + ! with Dirichlet boundary conditions + ! u = g + ! + ! on the unit cube 0<=x,y,z<=1. + ! + ! + ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. + ! + implicit none + integer(psb_ipk_) :: idim + type(psb_dspmat_type) :: a + type(psb_d_vect_type) :: xv,bv + type(psb_desc_type) :: desc_a + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: info + character(len=*) :: afmt + procedure(d_func_3d), optional :: f + class(psb_d_base_sparse_mat), optional :: amold + class(psb_d_base_vect_type), optional :: vmold + class(psb_i_base_vect_type), optional :: imold + integer(psb_ipk_), optional :: partition, nrl,iv(:) + logical, optional :: tnd + ! Local variables. + + integer(psb_ipk_), parameter :: nb=20 + type(psb_d_csc_sparse_mat) :: acsc + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat) :: acsr + real(psb_dpk_) :: zt(nb),x,y,z + integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ + integer(psb_lpk_) :: m,n,glob_row,nt + integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner + ! For 3D partition + ! Note: integer control variables going directly into an MPI call + ! must be 4 bytes, i.e. psb_mpk_ + integer(psb_mpk_) :: npdims(3), npp, minfo + integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz + integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) + ! Process grid + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: icoeff + integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) + real(psb_dpk_), allocatable :: val(:) + ! deltah dimension of each grid cell + ! deltat discretization time + real(psb_dpk_) :: deltah, sqdeltah, deltah2 + real(psb_dpk_), parameter :: rhs=dzero,one=done,zero=dzero + real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb + integer(psb_ipk_) :: err_act + procedure(d_func_3d), pointer :: f_ + logical :: tnd_ + character(len=20) :: name, ch_err,tmpfmt + + info = psb_success_ + name = 'create_matrix' + call psb_erractionsave(err_act) + + call psb_info(ctxt, iam, np) + + + if (present(f)) then + f_ => f + else + f_ => d_null_func_3d + end if + + deltah = done/(idim+2) + sqdeltah = deltah*deltah + deltah2 = (2*done)* deltah + + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + + ! initialize array descriptor and sparse matrix storage. provide an + ! estimate of the number of non zeroes + + m = (1_psb_lpk_*idim)*idim*idim + n = m + nnz = ((n*7)/(np)) + if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n + t0 = psb_wtime() + select case(partition_) + case(1) + ! A BLOCK partition + if (present(nrl)) then + nr = nrl + else + ! + ! Using a simple BLOCK distribution. + ! + nt = (m+np-1)/np + nr = max(0,min(nt,m-(iam*nt))) + end if + + nt = nr + call psb_sum(ctxt,nt) + if (nt /= m) then + write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ctxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! + call psb_cdall(ctxt,desc_a,info,vg=iv) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 3-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 + call mpi_dims_create(np,3,npdims,info) + npx = npdims(1) + npy = npdims(2) + npz = npdims(3) + + allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + ! Now let's split the 3D cube in hexahedra + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + call dist1Didx(bndz,idim,npz) + mynz = bndz(iamz+1)-bndz(iamz) + + ! How many indices do I own? + nlr = mynx*myny*mynz + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + do k=bndz(iamz),bndz(iamz+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) + end do + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny,mynz + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + end if + + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ctxt,desc_a,info,vl=myidx) + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end select + + + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz,& + & dupl=psb_dupl_err_) + ! define rhs from boundary conditions; also build initial guess + if (info == psb_success_) call psb_geall(xv,desc_a,info) + if (info == psb_success_) call psb_geall(bv,desc_a,info) + + call psb_barrier(ctxt) + talc = psb_wtime()-t0 + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='allocation rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! we build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! + allocate(val(20*nb),irow(20*nb),& + &icol(20*nb),stat=info) + if (info /= psb_success_ ) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + + + ! loop over rows belonging to current process in a block + ! distribution. + + call psb_barrier(ctxt) + t1 = psb_wtime() + do ii=1, nlr,nb + ib = min(nb,nlr-ii+1) + icoeff = 1 + do k=1,ib + i=ii+k-1 + ! local matrix pointer + glob_row=myidx(i) + ! compute gridpoint coordinates + call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) + ! x, y, z coordinates + x = (ix-1)*deltah + y = (iy-1)*deltah + z = (iz-1)*deltah + zt(k) = f_(x,y,z) + ! internal point: build discretization + ! + ! term depending on (x-1,y,z) + ! + val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 + if (ix == 1) then + zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y-1,z) + val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 + if (iy == 1) then + zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y,z-1) + val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 + if (iz == 1) then + zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + ! term depending on (x,y,z) + val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & + & + c(x,y,z) + call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + ! term depending on (x,y,z+1) + val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 + if (iz == idim) then + zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y+1,z) + val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 + if (iy == idim) then + zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x+1,y,z) + val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 + if (ix==idim) then + zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + end do + call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) + if(info /= psb_success_) exit + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) + if(info /= psb_success_) exit + zt(:)=dzero + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) + if(info /= psb_success_) exit + end do + + tgen = psb_wtime()-t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='insert rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(val,irow,icol) + + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_cdasb(desc_a,info,mold=imold) + tcdasb = psb_wtime()-t1 + call psb_barrier(ctxt) + t1 = psb_wtime() + if (info == psb_success_) then + if (present(amold)) then + call psb_spasb(a,desc_a,info,mold=amold,bld_and=tnd) + else + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=tnd) + end if + end if + call psb_barrier(ctxt) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold) + if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + tasb = psb_wtime()-t1 + call psb_barrier(ctxt) + ttot = psb_wtime() - t0 + + call psb_amx(ctxt,talc) + call psb_amx(ctxt,tgen) + call psb_amx(ctxt,tasb) + call psb_amx(ctxt,ttot) + if(iam == psb_root_) then + tmpfmt = a%get_fmt() + write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')& + & tmpfmt + write(psb_out_unit,'("-allocation time : ",es12.5)') talc + write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen + write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb + write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb + write(psb_out_unit,'("-total time : ",es12.5)') ttot + + end if + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + end subroutine psb_d_gen_pde3d + + +end module psb_d_pde3d_mod + + +program pdgenmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef OPENACC + use psb_oacc_mod +#endif + use psb_d_pde3d_mod + implicit none + + ! input parameters + character(len=5) :: acfmt, agfmt + integer :: idim + logical :: tnd + ! miscellaneous + real(psb_dpk_), parameter :: one = 1.d0 + real(psb_dpk_) :: t1, t2, tprec, flops, tflops,& + & tt1, tt2, gt1, gt2, gflops, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + + ! sparse matrix and preconditioner + type(psb_dspmat_type) :: a, agpu, aux_a + ! descriptor + type(psb_desc_type) :: desc_a + ! dense matrices + type(psb_d_vect_type), target :: xv, bv, xg, bg +#ifdef OPENACC + type(psb_d_vect_oacc) :: vmold + type(psb_i_vect_oacc) :: imold +#endif + real(psb_dpk_), allocatable :: x1(:), x2(:), x0(:) + ! blacs parameters + type(psb_ctxt_type) :: ctxt + integer :: iam, np + + ! solver parameters + integer(psb_epk_) :: amatsize, precsize, descsize, annz, nbytes + real(psb_dpk_) :: err, eps, tnv, tng,tdot, dnrm2,ddot + integer, parameter :: ntests=8, ngpu=4, ncnv=3 + type(psb_d_coo_sparse_mat), target :: acoo + type(psb_d_csr_sparse_mat), target :: acsr + type(psb_d_ell_sparse_mat), target :: aell + type(psb_d_hll_sparse_mat), target :: ahll + type(psb_d_dia_sparse_mat), target :: adia + type(psb_d_hdia_sparse_mat), target :: ahdia +#ifdef OPENACC + type(psb_d_oacc_ell_sparse_mat), target :: aelg + type(psb_d_oacc_csr_sparse_mat), target :: acsrg + type(psb_d_oacc_hll_sparse_mat), target :: ahlg +#endif + class(psb_d_base_sparse_mat), pointer :: agmold, acmold + ! other variables + logical, parameter :: dump=.false. + integer(psb_ipk_) :: info, i, j, nr, nrg + integer(psb_lpk_) :: ig + character(len=20) :: name,ch_err + character(len=40) :: fname + + info=psb_success_ + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + +#ifdef OPENACC + call psb_oacc_init(ctxt) +#endif + + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + if(psb_get_errstatus() /= 0) goto 9999 + name='pdegenmv-oacc' + ! + ! Hello world + ! + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef OPENACC +!!$ write(*,*) 'Process ',iam,' running on device: ', psb_oacc_getDevice(),' out of', psb_oacc_getDeviceCount() +!!$ write(*,*) 'Process ',iam,' device ', psb_oacc_getDevice(),' is a: ', trim(psb_oacc_DeviceName()) +#endif + ! + ! get parameters + ! + call get_parms(ctxt,acfmt,agfmt,idim,tnd) + call psb_init_timers() + ! + ! allocate and fill in the coefficient matrix and initial vectors + ! + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,'CSR ',info,partition=3,tnd=tnd) + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='create_matrix' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (iam == psb_root_) write(psb_out_unit,'("Overall matrix creation time : ",es12.5)')t2 + if (iam == psb_root_) write(psb_out_unit,'(" ")') + + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'pde',idim,'-',iam,'-',np,'.mtx' + call a%print(fname,head='PDEGEN test matrix') + end if + + select case(psb_toupper(acfmt)) + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case('DIA') + acmold => adia + case('HDIA') + acmold => ahdia + case('CSR') + acmold => acsr + case('COO') + acmold => acoo + case default + write(*,*) 'Unknown format defaulting to HLL' + acmold => ahll + end select + call a%cscnv(info,mold=acmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + +#ifdef OPENACC + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('CSRG') + agmold => acsrg + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select +#endif + call a%cscnv(agpu,info,mold=agmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + call desc_a%cnv(mold=imold) + + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_geasb(xg,desc_a,info,scratch=.true.,mold=vmold) + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x0,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + x0(i) = 1.0 + (1.0*ig)/(nrg**2) + end do + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + call psb_geall(x1,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + call psb_geasb(x1,desc_a,info) + call xv%bld(x0) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef OPENACC + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(x0,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) +!!$ call psb_oacc_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + + call xv%set(x0) + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(done,a,xv,dzero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef OPENACC + call xg%set(x0) + + ! FIXME: cache flush needed here + x1 = bv%get_vect() + x2 = bg%get_vect() + + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(done,agpu,xv,dzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do +!!$ call psb_oacc_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + x1 = bv%get_vect() + x2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + ! FIXME: cache flush needed here + call xg%set(x0) + call xg%sync() + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info) + ! For timing purposes we need to make sure all threads + ! in the device are done. + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do +!!$ call psb_oacc_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + x1 = bv%get_vect() + x2 = bg%get_vect() + tnv = psb_genrm2(bv,desc_a,info) + tng = psb_genrm2(bg,desc_a,info) + tdot = psb_gedot(bg,bg,desc_a,info) + write(0,*) ' bv ',tnv,' bg ',tng, ' dot ',tdot,eps,& + & dnrm2(desc_a%get_local_rows(),x2,1),& + & ddot(desc_a%get_local_rows(),x1,1,x2,1) + call psb_geaxpby(-done,bg,+done,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a)')'XCPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x1(1:nr),'Local part CPU',info,filename=fname) + write(fname,'(a,i3.3,a,i3.3,a)')'XGPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x2(1:nr),'Local part GPU',info,filename=fname) + end if + +#endif + annz = a%get_nzeros() + amatsize = a%sizeof() + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,nr) + call psb_sum(ctxt,annz) + call psb_sum(ctxt,amatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,& + & '("Matrix: ell1 ",i0)') idim + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nr + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation : ",i20," ")') amatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef OPENACC + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef OPENACC + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU.) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU.) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU.) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*psb_sizeof_dp + psb_sizeof_ip)+& + & annz*(psb_sizeof_dp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth +#ifdef OPENACC + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth +!!$ bdwdth = psb_oacc_MemoryPeakBandwidth() + write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + call psb_print_timers(ctxt) + + ! + ! cleanup storage and exit + ! + call psb_gefree(bv,desc_a,info) + call psb_gefree(xv,desc_a,info) + call psb_spfree(a,desc_a,info) + call psb_cdfree(desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='free routine' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if +#ifdef OPENACC + call psb_oacc_exit() +#endif + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +contains + ! + ! get iteration parameters from standard input + ! + subroutine get_parms(ctxt,acfmt,agfmt,idim,tnd) + type(psb_ctxt_type) :: ctxt + character(len=*) :: agfmt, acfmt + integer :: idim + logical :: tnd + integer :: np, iam + integer :: intbuf(10), ip + + call psb_info(ctxt, iam, np) + + if (iam == 0) then + write(*,*) 'CPU side format?' + read(psb_inp_unit,*) acfmt + write(*,*) 'OACC side format?' + read(psb_inp_unit,*) agfmt + write(*,*) 'Size of discretization cube?' + read(psb_inp_unit,*) idim + write(*,*) 'Try comm/comp overlap?' + read(psb_inp_unit,*) tnd + endif + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,idim) + call psb_bcast(ctxt,tnd) + + if (iam == 0) then + write(psb_out_unit,'("Testing matrix : ell1")') + write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)')idim,idim,idim + write(psb_out_unit,'("Number of processors : ",i0)')np + write(psb_out_unit,'("Data distribution : BLOCK")') + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Storage formats ",a)') acfmt,' ',agfmt + write(psb_out_unit,'("Testing overlap ND ",l8)') tnd + end if + return + + end subroutine get_parms + +end program pdgenmv diff --git a/test/openacc/psb_d_oacc_pde3d.F90 b/test/openacc/psb_d_oacc_pde3d.F90 new file mode 100644 index 00000000..841da878 --- /dev/null +++ b/test/openacc/psb_d_oacc_pde3d.F90 @@ -0,0 +1,1081 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_d_pde3d.f90 +! +! Program: psb_d_pde3d +! This sample program solves a linear system obtained by discretizing a +! PDE with Dirichlet BCs. +! +! +! The PDE is a general second order equation in 3d +! +! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) +! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f +! dxdx dydy dzdz dx dy dz +! +! with Dirichlet boundary conditions +! u = g +! +! on the unit cube 0<=x,y,z<=1. +! +! +! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. +! +! There are three choices available for data distribution: +! 1. A simple BLOCK distribution +! 2. A ditribution based on arbitrary assignment of indices to processes, +! typically from a graph partitioner +! 3. A 3D distribution in which the unit cube is partitioned +! into subcubes, each one assigned to a process. +! +! +module psb_d_pde3d_mod + + + use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,& + & psb_dspmat_type, psb_d_vect_type, dzero,& + & psb_d_base_sparse_mat, psb_d_base_vect_type, & + & psb_i_base_vect_type, psb_l_base_vect_type + + interface + function d_func_3d(x,y,z) result(val) + import :: psb_dpk_ + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + end function d_func_3d + end interface + + interface psb_gen_pde3d + module procedure psb_d_gen_pde3d + end interface psb_gen_pde3d + +contains + + function d_null_func_3d(x,y,z) result(val) + + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + + val = dzero + + end function d_null_func_3d + ! + ! functions parametrizing the differential equation + ! + + ! + ! Note: b1, b2 and b3 are the coefficients of the first + ! derivative of the unknown function. The default + ! we apply here is to have them zero, so that the resulting + ! matrix is symmetric/hermitian and suitable for + ! testing with CG and FCG. + ! When testing methods for non-hermitian matrices you can + ! change the B1/B2/B3 functions to e.g. done/sqrt((3*done)) + ! + function b1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b1 + real(psb_dpk_), intent(in) :: x,y,z + b1=dzero + end function b1 + function b2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b2 + real(psb_dpk_), intent(in) :: x,y,z + b2=dzero + end function b2 + function b3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b3 + real(psb_dpk_), intent(in) :: x,y,z + b3=dzero + end function b3 + function c(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: c + real(psb_dpk_), intent(in) :: x,y,z + c=dzero + end function c + function a1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a1 + real(psb_dpk_), intent(in) :: x,y,z + a1=done/80 + end function a1 + function a2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a2 + real(psb_dpk_), intent(in) :: x,y,z + a2=done/80 + end function a2 + function a3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a3 + real(psb_dpk_), intent(in) :: x,y,z + a3=done/80 + end function a3 + function g(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: g + real(psb_dpk_), intent(in) :: x,y,z + g = dzero + if (x == done) then + g = done + else if (x == dzero) then + g = exp(y**2-z**2) + end if + end function g + + + ! + ! subroutine to allocate and fill in the coefficient matrix and + ! the rhs. + ! + subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& + & f,amold,vmold,imold,partition,nrl,iv) + use psb_base_mod + use psb_util_mod +#if defined(PSB_OPENMP) + use omp_lib +#endif + ! + ! Discretizes the partial differential equation + ! + ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) + ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f + ! dxdx dydy dzdz dx dy dz + ! + ! with Dirichlet boundary conditions + ! u = g + ! + ! on the unit cube 0<=x,y,z<=1. + ! + ! + ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. + ! + implicit none + integer(psb_ipk_) :: idim + type(psb_dspmat_type) :: a + type(psb_d_vect_type) :: xv,bv + type(psb_desc_type) :: desc_a + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: info + character(len=*) :: afmt + procedure(d_func_3d), optional :: f + class(psb_d_base_sparse_mat), optional :: amold + class(psb_d_base_vect_type), optional :: vmold + class(psb_i_base_vect_type), optional :: imold + integer(psb_ipk_), optional :: partition, nrl,iv(:) + + ! Local variables. + + integer(psb_ipk_), parameter :: nb=20 + type(psb_d_csc_sparse_mat) :: acsc + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat) :: acsr + real(psb_dpk_) :: zt(nb),x,y,z + integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_, mysz + integer(psb_lpk_) :: m,n,glob_row,nt + integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner + ! For 3D partition + ! Note: integer control variables going directly into an MPI call + ! must be 4 bytes, i.e. psb_mpk_ + integer(psb_mpk_) :: npdims(3), npp, minfo + integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz + integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) + ! Process grid + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: icoeff + integer(psb_lpk_), allocatable :: myidx(:) + ! deltah dimension of each grid cell + ! deltat discretization time + real(psb_dpk_) :: deltah, sqdeltah, deltah2 + real(psb_dpk_), parameter :: rhs=dzero,one=done,zero=dzero + real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb + integer(psb_ipk_) :: err_act + procedure(d_func_3d), pointer :: f_ + character(len=20) :: name, ch_err,tmpfmt + + info = psb_success_ + name = 'create_matrix' + call psb_erractionsave(err_act) + + call psb_info(ctxt, iam, np) + + + if (present(f)) then + f_ => f + else + f_ => d_null_func_3d + end if + + deltah = done/(idim+1) + sqdeltah = deltah*deltah + deltah2 = (2*done)* deltah + + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + + ! initialize array descriptor and sparse matrix storage. provide an + ! estimate of the number of non zeroes + + m = (1_psb_lpk_*idim)*idim*idim + n = m + nnz = ((n*7)/(np)) + if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n + t0 = psb_wtime() + select case(partition_) + case(1) + ! A BLOCK partition + if (present(nrl)) then + nr = nrl + else + ! + ! Using a simple BLOCK distribution. + ! + nt = (m+np-1)/np + nr = max(0,min(nt,m-(iam*nt))) + end if + + nt = nr + call psb_sum(ctxt,nt) + if (nt /= m) then + write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ctxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! + call psb_cdall(ctxt,desc_a,info,vg=iv) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 3-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 +#if defined(PSB_SERIAL_MPI) + npdims = 1 +#else + call mpi_dims_create(np,3,npdims,info) +#endif + npx = npdims(1) + npy = npdims(2) + npz = npdims(3) + + allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + ! Now let's split the 3D cube in hexahedra + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + call dist1Didx(bndz,idim,npz) + mynz = bndz(iamz+1)-bndz(iamz) + + ! How many indices do I own? + nlr = mynx*myny*mynz + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + do k=bndz(iamz),bndz(iamz+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) + end do + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny,mynz + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + end if + + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ctxt,desc_a,info,vl=myidx) + + ! + ! Specify process topology + ! + block + ! + ! Use adjcncy methods + ! + integer(psb_mpk_), allocatable :: neighbours(:) + integer(psb_mpk_) :: cnt + logical, parameter :: debug_adj=.true. + if (debug_adj.and.(np > 1)) then + cnt = 0 + allocate(neighbours(np)) + if (iamx < npx-1) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx+1,iamy,iamz,npx,npy,npz,base=0) + end if + if (iamy < npy-1) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx,iamy+1,iamz,npx,npy,npz,base=0) + end if + if (iamz < npz-1) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx,iamy,iamz+1,npx,npy,npz,base=0) + end if + if (iamx >0) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx-1,iamy,iamz,npx,npy,npz,base=0) + end if + if (iamy >0) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx,iamy-1,iamz,npx,npy,npz,base=0) + end if + if (iamz >0) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx,iamy,iamz-1,npx,npy,npz,base=0) + end if + call psb_realloc(cnt, neighbours,info) + call desc_a%set_p_adjcncy(neighbours) + !write(0,*) iam,' Check on neighbours: ',desc_a%get_p_adjcncy() + end if + end block + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end select + + + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz, & + & bldmode=psb_matbld_remote_,dupl=psb_dupl_add_) + ! define rhs from boundary conditions; also build initial guess + if (info == psb_success_) call psb_geall(xv,desc_a,info) + if (info == psb_success_) call psb_geall(bv,desc_a,info,& + & bldmode=psb_matbld_remote_,dupl=psb_dupl_add_) + + call psb_barrier(ctxt) + talc = psb_wtime()-t0 + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='allocation rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_barrier(ctxt) + t1 = psb_wtime() + !$omp parallel shared(deltah,myidx,a,desc_a) + ! + block + integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy,iz, ith,nth + integer(psb_lpk_) :: glob_row + integer(psb_lpk_), allocatable :: irow(:),icol(:) + real(psb_dpk_), allocatable :: val(:) + real(psb_dpk_) :: x,y,z, zt(nb) +#if defined(PSB_OPENMP) + nth = omp_get_num_threads() + ith = omp_get_thread_num() +#else + nth = 1 + ith = 0 +#endif + allocate(val(20*nb),irow(20*nb),& + &icol(20*nb),stat=info) + if (info /= psb_success_ ) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + !goto 9999 + endif + + !$omp do schedule(dynamic) + ! + do ii=1, nlr, nb + if(info /= psb_success_) cycle + ib = min(nb,nlr-ii+1) + !ib = min(nb,mysz-ii+1) + icoeff = 1 + do k=1,ib + i=ii+k-1 + ! local matrix pointer + glob_row=myidx(i) + ! compute gridpoint coordinates + call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) + ! x, y, z coordinates + x = (ix-1)*deltah + y = (iy-1)*deltah + z = (iz-1)*deltah + zt(k) = f_(x,y,z) + ! internal point: build discretization + ! + ! term depending on (x-1,y,z) + ! + val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 + if (ix == 1) then + zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y-1,z) + val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 + if (iy == 1) then + zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y,z-1) + val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 + if (iz == 1) then + zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + ! term depending on (x,y,z) + val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & + & + c(x,y,z) + call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + ! term depending on (x,y,z+1) + val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 + if (iz == idim) then + zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y+1,z) + val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 + if (iy == idim) then + zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x+1,y,z) + val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 + if (ix==idim) then + zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + end do +#if defined(PSB_OPENMP) +!!$ write(0,*) omp_get_thread_num(),' Check insertion ',& +!!$ & irow(1:icoeff-1),':',icol(1:icoeff-1) +#endif + call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) + if(info /= psb_success_) cycle + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) + if(info /= psb_success_) cycle + zt(:)=dzero + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) + if(info /= psb_success_) cycle + end do + !$omp end do + deallocate(val,irow,icol) + end block + !$omp end parallel + + tgen = psb_wtime()-t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='insert rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_cdasb(desc_a,info,mold=imold) + tcdasb = psb_wtime()-t1 + + call psb_barrier(ctxt) + t1 = psb_wtime() + if (info == psb_success_) then + if (present(amold)) then + call psb_spasb(a,desc_a,info,mold=amold) + else + call psb_spasb(a,desc_a,info,afmt=afmt) + end if + end if + call psb_barrier(ctxt) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold) + if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + tasb = psb_wtime()-t1 + call psb_barrier(ctxt) + ttot = psb_wtime() - t0 + + call psb_amx(ctxt,talc) + call psb_amx(ctxt,tgen) + call psb_amx(ctxt,tasb) + call psb_amx(ctxt,ttot) + if(iam == psb_root_) then + tmpfmt = a%get_fmt() + write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')& + & tmpfmt + write(psb_out_unit,'("-allocation time : ",es12.5)') talc + write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen + write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb + write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb + write(psb_out_unit,'("-total time : ",es12.5)') ttot + + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + end subroutine psb_d_gen_pde3d + function outside(i,j,k,bndx,bndy,bndz,iamx,iamy,iamz) result(res) + logical :: res + integer(psb_ipk_), intent(in) :: i,j,k,iamx,iamy,iamz + integer(psb_ipk_), intent(in) :: bndx(0:),bndy(0:),bndz(0:) + + res = (i=bndx(iamx+1)) & + & .or.(j=bndy(iamy+1)) & + & .or.(k=bndz(iamz+1)) + end function outside +end module psb_d_pde3d_mod + +program psb_d_oacc_pde3d + use psb_base_mod + use psb_prec_mod + use psb_linsolve_mod + use psb_util_mod + use psb_d_pde3d_mod +#if defined(OPENACC) + use psb_oacc_mod +#endif + implicit none + + ! input parameters + character(len=20) :: kmethd, ptype + character(len=5) :: afmt, agfmt + integer(psb_ipk_) :: idim + integer(psb_epk_) :: system_size + + ! miscellaneous + real(psb_dpk_), parameter :: one = done + real(psb_dpk_) :: t1, t2, tprec + + ! sparse matrix and preconditioner + type(psb_dspmat_type) :: a, agpu + type(psb_dprec_type) :: prec + ! descriptor + type(psb_desc_type) :: desc_a + ! dense vectors + type(psb_d_vect_type), target :: xxv, bv, xg, bg +#ifdef OPENACC + type(psb_d_vect_oacc) :: vmold + type(psb_i_vect_oacc) :: imold + type(psb_d_oacc_csr_sparse_mat) :: acsro + type(psb_d_oacc_hll_sparse_mat) :: ahllo +#endif + real(psb_dpk_), allocatable :: x0(:) + ! parallel environment + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: iam, np, nth + + ! solver parameters + integer(psb_ipk_) :: iter, itmax, itrace, istopc, irst, ipart + integer(psb_epk_) :: amatsize, precsize, descsize, d2size + real(psb_dpk_) :: err, eps + + ! Parameters for solvers in Block-Jacobi preconditioner + type ainvparms + character(len=12) :: alg, orth_alg, ilu_alg, ilut_scale + integer(psb_ipk_) :: fill, inv_fill + real(psb_dpk_) :: thresh, inv_thresh + end type ainvparms + type(ainvparms) :: parms + + ! other variables + integer(psb_ipk_) :: info, i + character(len=20) :: name, ch_err + character(len=40) :: fname + + info = psb_success_ + + call psb_init(ctxt) + call psb_info(ctxt, iam, np) + +#if defined(OPENACC) + call psb_oacc_init(ctxt) +#endif + + nth = 1 + + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + if (psb_errstatus_fatal()) goto 9999 + name = 'pde3d90_oacc' + call psb_set_errverbosity(itwo) + + ! Hello world + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ', psb_version_string_ + write(*,*) 'This is the ', trim(name), ' sample program' + end if + + ! get parameters + call get_parms(ctxt, kmethd, ptype, afmt, agfmt, idim, istopc,& + & itmax, itrace, irst, ipart, parms) + + ! allocate and fill in the coefficient matrix, rhs and initial guess + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_gen_pde3d(ctxt, idim, a, bv, xxv, desc_a, afmt, info, partition = ipart) + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err = 'psb_gen_pde3d' + call psb_errpush(info, name, a_err = ch_err) + goto 9999 + end if + if (iam == psb_root_) write(psb_out_unit, '("Overall matrix creation time : ", es12.5)') t2 + if (iam == psb_root_) write(psb_out_unit, '(" ")') + +#ifdef OPENACC + ! Convert matrix to GPU format + call a%cscnv(agpu, info, mold = acsro) + if ((info /= 0) .or. (psb_get_errstatus() /= 0)) then + write(0,*) 'From cscnv ', info + call psb_error() + stop + end if + call desc_a%cnv(mold = imold) + call psb_geasb(bg, desc_a, info, scratch = .true., mold = vmold) + call psb_geasb(xg, desc_a, info, scratch = .true., mold = vmold) +#endif + + ! prepare the preconditioner. + if (iam == psb_root_) write(psb_out_unit, '("Setting preconditioner to : ", a)') ptype + call prec%init(ctxt, ptype, info) + + ! Set the options for the BJAC preconditioner + if (psb_toupper(ptype) == "BJAC") then + call prec%set('sub_solve', parms%alg, info) + select case (psb_toupper(parms%alg)) + case ("ILU") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('ilu_alg', parms%ilu_alg, info) + case ("ILUT") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('sub_iluthrs', parms%thresh, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case ("AINV") + call prec%set('inv_thresh', parms%inv_thresh, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + call prec%set('ainv_alg', parms%orth_alg, info) + case ("INVK") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case ("INVT") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('sub_iluthrs', parms%thresh, info) + call prec%set('inv_thresh', parms%inv_thresh, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case default + ! Do nothing, use default setting in the init routine + end select + else + ! nothing to set for NONE or DIAG preconditioner + end if + + call psb_barrier(ctxt) + t1 = psb_wtime() + call prec%build(a, desc_a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err = 'psb_precbld' + call psb_errpush(info, name, a_err = ch_err) + goto 9999 + end if + + tprec = psb_wtime() - t1 + + call psb_amx(ctxt, tprec) + + if (iam == psb_root_) write(psb_out_unit, '("Preconditioner time : ", es12.5)') tprec + if (iam == psb_root_) write(psb_out_unit, '(" ")') + call prec%descr(info) + + ! iterative method parameters + if (iam == psb_root_) write(psb_out_unit, '("Calling iterative method ", a)') kmethd + call psb_barrier(ctxt) +#ifdef OPENACC + call prec%allocate_wrk(info,vmold) +#endif + t1 = psb_wtime() + eps = 1.d-6 + +#ifdef OPENACC + call psb_krylov(kmethd, agpu, prec, bv, xxv, eps, desc_a, info, & + itmax = itmax, iter = iter, err = err, itrace = itrace, istop = istopc, irst = irst) +#else + call psb_krylov(kmethd, a, prec, bv, xxv, eps, desc_a, info, & + itmax = itmax, iter = iter, err = err, itrace = itrace, istop = istopc, irst = irst) +#endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err = 'solver routine' + call psb_errpush(info, name, a_err = ch_err) + goto 9999 + end if + + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 +#ifdef OPENACC + call prec%deallocate_wrk(info) +#endif + call psb_amx(ctxt, t2) + amatsize = a%sizeof() + descsize = desc_a%sizeof() + precsize = prec%sizeof() + system_size = desc_a%get_global_rows() + call psb_sum(ctxt, amatsize) + call psb_sum(ctxt, descsize) + call psb_sum(ctxt, precsize) + + if (iam == psb_root_) then + write(psb_out_unit, '(" ")') + write(psb_out_unit, '("Number of processes : ", i12)') np + write(psb_out_unit, '("Number of threads : ", i12)') nth + write(psb_out_unit, '("Total number of tasks : ", i12)') nth * np + write(psb_out_unit, '("Linear system size : ", i12)') system_size + write(psb_out_unit, '("Time to solve system : ", es12.5)') t2 + write(psb_out_unit, '("Time per iteration : ", es12.5)') t2 / iter + write(psb_out_unit, '("Number of iterations : ", i12)') iter + write(psb_out_unit, '("Convergence indicator on exit : ", es12.5)') err + write(psb_out_unit, '("Info on exit : ", i12)') info + write(psb_out_unit, '("Total memory occupation for A: ", i12)') amatsize + write(psb_out_unit, '("Total memory occupation for PREC: ", i12)') precsize + write(psb_out_unit, '("Total memory occupation for DESC_A: ", i12)') descsize + write(psb_out_unit, '("Storage format for A: ", a)') a%get_fmt() + write(psb_out_unit, '("Storage format for DESC_A: ", a)') desc_a%get_fmt() + end if + + ! cleanup storage and exit + call psb_gefree(bv, desc_a, info) + call psb_gefree(xxv, desc_a, info) + call psb_spfree(a, desc_a, info) + call prec%free(info) + call psb_cdfree(desc_a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err = 'free routine' + call psb_errpush(info, name, a_err = ch_err) + goto 9999 + end if + +#ifdef OPENACC + call psb_oacc_exit() +#endif + call psb_exit(ctxt) + stop + +9999 call psb_error(ctxt) + + stop + +contains + ! get iteration parameters from standard input + subroutine get_parms(ctxt, kmethd, ptype, afmt, agfmt, idim, istopc, itmax, itrace, irst, ipart, parms) + type(psb_ctxt_type) :: ctxt + character(len = *) :: kmethd, ptype, afmt, agfmt + integer(psb_ipk_) :: idim, istopc, itmax, itrace, irst, ipart + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: ip, inp_unit + character(len = 1024) :: filename + type(ainvparms) :: parms + + call psb_info(ctxt, iam, np) + + if (iam == 0) then + if (command_argument_count() > 0) then + call get_command_argument(1, filename) + inp_unit = 30 + open(inp_unit, file = filename, action = 'read', iostat = info) + if (info /= 0) then + write(psb_err_unit, *) 'Could not open file ', filename, ' for input' + call psb_abort(ctxt) + stop + else + write(psb_err_unit, *) 'Opened file ', trim(filename), ' for input' + end if + else + inp_unit = psb_inp_unit + end if + read(inp_unit, *) ip + if (ip >= 3) then + read(inp_unit, *) kmethd + read(inp_unit, *) ptype + read(inp_unit, *) afmt + read(inp_unit, *) agfmt + read(inp_unit, *) idim + if (ip >= 4) then + read(inp_unit, *) ipart + else + ipart = 3 + endif + if (ip >= 5) then + read(inp_unit, *) istopc + else + istopc = 1 + endif + if (ip >= 6) then + read(inp_unit, *) itmax + else + itmax = 500 + endif + if (ip >= 7) then + read(inp_unit, *) itrace + else + itrace = -1 + endif + if (ip >= 8) then + read(inp_unit, *) irst + else + irst = 1 + endif + if (ip >= 9) then + read(inp_unit, *) parms%alg + read(inp_unit, *) parms%ilu_alg + read(inp_unit, *) parms%ilut_scale + read(inp_unit, *) parms%fill + read(inp_unit, *) parms%inv_fill + read(inp_unit, *) parms%thresh + read(inp_unit, *) parms%inv_thresh + read(inp_unit, *) parms%orth_alg + else + parms%alg = 'ILU' ! Block Solver ILU, ILUT, INVK, AINVT, AORTH + parms%ilu_alg = 'NONE' ! If ILU : MILU or NONE otherwise ignored + parms%ilut_scale = 'NONE' ! If ILUT: NONE, MAXVAL, DIAG, ARWSUM, ACLSUM, ARCSUM + parms%fill = 0 ! Level of fill for forward factorization + parms%inv_fill = 1 ! Level of fill for inverse factorization (only INVK) + parms%thresh = 1E-1_psb_dpk_ ! Threshold for forward factorization + parms%inv_thresh = 1E-1_psb_dpk_ ! Threshold for inverse factorization + parms%orth_alg = 'LLK' ! What orthogonalization algorithm? + endif + + write(psb_out_unit, '("Solving matrix : ell1")') + write(psb_out_unit, & + '("Grid dimensions : ", i4, " x ", i4, " x ", i4)') & + idim, idim, idim + write(psb_out_unit, '("Number of processors : ", i0)') np + select case (ipart) + case (1) + write(psb_out_unit, '("Data distribution : BLOCK")') + case (3) + write(psb_out_unit, '("Data distribution : 3D")') + case default + ipart = 3 + write(psb_out_unit, '("Unknown data distrbution, defaulting to 3D")') + end select + write(psb_out_unit, '("Preconditioner : ", a)') ptype + if (psb_toupper(ptype) == "BJAC") then + write(psb_out_unit, '("Block subsolver : ", a)') parms%alg + select case (psb_toupper(parms%alg)) + case ('ILU') + write(psb_out_unit, '("Fill in : ", i0)') parms%fill + write(psb_out_unit, '("MILU : ", a)') parms%ilu_alg + case ('ILUT') + write(psb_out_unit, '("Fill in : ", i0)') parms%fill + write(psb_out_unit, '("Threshold : ", es12.5)') parms%thresh + write(psb_out_unit, '("Scaling : ", a)') parms%ilut_scale + case ('INVK') + write(psb_out_unit, '("Fill in : ", i0)') parms%fill + write(psb_out_unit, '("Invese Fill in : ", i0)') parms%inv_fill + write(psb_out_unit, '("Scaling : ", a)') parms%ilut_scale + case ('INVT') + write(psb_out_unit, '("Fill in : ", i0)') parms%fill + write(psb_out_unit, '("Threshold : ", es12.5)') parms%thresh + write(psb_out_unit, '("Invese Fill in : ", i0)') parms%inv_fill + write(psb_out_unit, '("Inverse Threshold : ", es12.5)') parms%inv_thresh + write(psb_out_unit, '("Scaling : ", a)') parms%ilut_scale + case ('AINV', 'AORTH') + write(psb_out_unit, '("Inverse Threshold : ", es12.5)') parms%inv_thresh + write(psb_out_unit, '("Invese Fill in : ", i0)') parms%inv_fill + write(psb_out_unit, '("Orthogonalization : ", a)') parms%orth_alg + write(psb_out_unit, '("Scaling : ", a)') parms%ilut_scale + case default + write(psb_out_unit, '("Unknown diagonal solver")') + end select + end if + write(psb_out_unit, '("Iterative method : ", a)') kmethd + write(psb_out_unit, '(" ")') + else + ! wrong number of parameter, print an error message and exit + call pr_usage(izero) + call psb_abort(ctxt) + stop 1 + endif + if (inp_unit /= psb_inp_unit) then + close(inp_unit) + end if + end if + ! broadcast parameters to all processors + call psb_bcast(ctxt, kmethd) + call psb_bcast(ctxt, afmt) + call psb_bcast(ctxt, agfmt) + call psb_bcast(ctxt, ptype) + call psb_bcast(ctxt, idim) + call psb_bcast(ctxt, ipart) + call psb_bcast(ctxt, istopc) + call psb_bcast(ctxt, itmax) + call psb_bcast(ctxt, itrace) + call psb_bcast(ctxt, irst) + call psb_bcast(ctxt, parms%alg) + call psb_bcast(ctxt, parms%fill) + call psb_bcast(ctxt, parms%inv_fill) + call psb_bcast(ctxt, parms%thresh) + call psb_bcast(ctxt, parms%inv_thresh) + call psb_bcast(ctxt, parms%orth_alg) + call psb_bcast(ctxt, parms%ilut_scale) + + return + + end subroutine get_parms + + ! print an error message + subroutine pr_usage(iout) + integer(psb_ipk_) :: iout + write(iout, *) 'incorrect parameter(s) found' + write(iout, *) ' usage: pde3d90 methd prec dim &' + write(iout, *) '[istop itmax itrace]' + write(iout, *) ' where:' + write(iout, *) ' methd: cgstab cgs rgmres bicgstabl' + write(iout, *) ' prec : bjac diag none' + write(iout, *) ' dim number of points along each axis' + write(iout, *) ' the size of the resulting linear ' + write(iout, *) ' system is dim**3' + write(iout, *) ' ipart data partition 1 3 ' + write(iout, *) ' istop stopping criterion 1, 2 ' + write(iout, *) ' itmax maximum number of iterations [500] ' + write(iout, *) ' itrace <=0 (no tracing, default) or ' + write(iout, *) ' >= 1 do tracing every itrace' + write(iout, *) ' iterations ' + end subroutine pr_usage + +end program psb_d_oacc_pde3d diff --git a/test/openacc/runs/ppde.inp b/test/openacc/runs/ppde.inp new file mode 100644 index 00000000..f6a0cff3 --- /dev/null +++ b/test/openacc/runs/ppde.inp @@ -0,0 +1,19 @@ +17 Number of entries below this +BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR RICHARDSON +BJAC Preconditioner NONE DIAG BJAC +CSR Storage format for matrix A: CSR COO +HLL Storage format for matrix A: CSR COO +140 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) +3 Partition: 1 BLOCK 3 3D +2 Stopping criterion 1 2 +0200 MAXIT +10 ITRACE +002 IRST restart for RGMRES and BiCGSTABL +INVK Block Solver ILU,ILUT,INVK,INVT,AINV +NONE If ILU : MILU or NONE othewise ignored +NONE Scaling if ILUT: NONE, MAXVAL otherwise ignored +0 Level of fill for forward factorization +1 Level of fill for inverse factorization (only INVK,INVT) +1E-1 Threshold for forward factorization +1E-1 Threshold for inverse factorization (Only INVK, INVT) +LLK What orthogonalization algorithm? (Only AINV) diff --git a/test/openacc/timers.c b/test/openacc/timers.c new file mode 100644 index 00000000..12fa4f56 --- /dev/null +++ b/test/openacc/timers.c @@ -0,0 +1,97 @@ +#include +#include +#include + +double wtime() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec)*1.0e3 + ((double)tt.tv_usec)*1.0e-3; + return(temp); +} + +double timef_() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec)*1.0e3 + ((double)tt.tv_usec)*1.0e-3; + return(temp); +} + +double timef() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec)*1.0e3 + ((double)tt.tv_usec)*1.0e-3; + return(temp); +} + +double etime() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6; + return(temp); +} + +double etime_() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6; + return(temp); +} + +double etimef() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6; + return(temp); +} + +double etimef_() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6; + return(temp); +} + + + diff --git a/test/openacc/vectoacc.F90 b/test/openacc/vectoacc.F90 new file mode 100644 index 00000000..639a2a67 --- /dev/null +++ b/test/openacc/vectoacc.F90 @@ -0,0 +1,99 @@ +program vectoacc + use psb_base_mod + use psb_oacc_mod + implicit none + + type(psb_d_vect_oacc) :: v3, v4, v5 + integer(psb_ipk_) :: info, n, i + real(psb_dpk_) :: alpha, beta, result + double precision, external :: etime + + real(psb_dpk_) :: dot_host, dot_dev, t_host, t_dev, t_alloc_host, t_alloc_dev, t_calc_host, t_calc_dev + double precision :: time_start, time_end + integer(psb_ipk_), parameter :: ntests=80, ngpu=20 + + write(*, *) 'Test of the vector operations with OpenACC' + + write(*, *) 'Enter the size of the vectors' + read(*, *) n + alpha = 2.0 + beta = 0.5 + + time_start = etime() + call v3%all(n, info) + call v4%all(n, info) + call v5%all(n, info) + time_end = etime() + t_alloc_host = time_end - time_start + write(*, *) 'Allocation time on host: ', t_alloc_host, ' sec' + + do i = 1, n + v3%v(i) = real(i, psb_dpk_) + v4%v(i) = real(n - i, psb_dpk_) + end do + + call v3%set_dev() + call v4%set_dev() + + call v3%scal(alpha) + call v3%sync() + + do i = 1, n + if (v3%v(i) /= alpha * real(i, psb_dpk_)) then + write(*, *) 'Scal error : index', i + end if + end do + write(*, *) 'Scal test passed' + + result = v3%dot_v(n, v4) + call v3%sync() + call v4%sync() + if (result /= sum(v3%v * v4%v)) then + write(*, *) 'Dot_v error, expected result:', sum(v3%v * v4%v), 'instead of :', result + end if + write(*, *) 'Dot_v test passed' + + result = v3%nrm2(n) + call v3%sync() + if (result /= sqrt(sum(v3%v ** 2))) then + write(*, *) 'nrm2 error, expected result:', sqrt(sum(v3%v ** 2)), 'instead of :', result + end if + write(*, *) 'nrm2 test passed' + + call v3%set_host() + call v4%set_host() + + time_start = etime() + do i = 1, ntests + dot_host = sum(v3%v * v4%v) + end do + time_end = etime() + t_calc_host = (time_end - time_start) / real(ntests) + write(*, *) 'Host calculation time: ', t_calc_host, ' sec' + + call v3%set_dev() + call v4%set_dev() + + time_start = etime() + call v3%sync_space() + call v4%sync_space() + time_end = etime() + t_alloc_dev = time_end - time_start + write(*, *) 'Allocation time on device: ', t_alloc_dev, ' sec' + + time_start = etime() + do i = 1, ntests + dot_dev = v3%dot_v(n, v4) + end do + !$acc wait + time_end = etime() + t_calc_dev = (time_end - time_start) / real(ntests) + write(*, *) 'Device calculation time: ', t_calc_dev, ' sec' + + + call v3%free(info) + call v4%free(info) + call v5%free(info) + +end program vectoacc + \ No newline at end of file diff --git a/test/pargen/runs/mach b/test/pargen/runs/mach deleted file mode 100644 index 60432829..00000000 --- a/test/pargen/runs/mach +++ /dev/null @@ -1,8 +0,0 @@ -localhost -localhost -localhost -localhost -localhost -localhost -localhost -localhost diff --git a/test/pdegen/CMakeLists.txt b/test/pdegen/CMakeLists.txt new file mode 100644 index 00000000..58d586e3 --- /dev/null +++ b/test/pdegen/CMakeLists.txt @@ -0,0 +1,52 @@ +cmake_minimum_required(VERSION 3.10) +project(pargen Fortran) + +# Check for the installation path for psblas +if(NOT DEFINED PSBLAS_INSTALL_DIR) + message(FATAL_ERROR "Please specify the path to the psblas installation directory using -DPSBLAS_INSTALL_DIR=") +endif() + +# Set the include and library directories based on the provided path +set(INSTALLDIR "${PSBLAS_INSTALL_DIR}") +set(INCDIR "${INSTALLDIR}/include") +set(MODDIR "${INSTALLDIR}/modules") +set(LIBDIR "${INSTALLDIR}/lib") + +# Find the psblas package +find_package(psblas REQUIRED PATHS ${INSTALLDIR}) + +# Include directories for the Fortran compiler +include_directories(${INCDIR} ${MODDIR}) + +# Define executable directory +set(EXEDIR "${CMAKE_CURRENT_SOURCE_DIR}/runs") + +# Ensure the executable directory exists +file(MAKE_DIRECTORY ${EXEDIR}) + +# Define sources for the executables +set(SOURCES_D_PDE3D psb_d_pde3d.F90) +set(SOURCES_S_PDE3D psb_s_pde3d.F90) +set(SOURCES_D_PDE2D psb_d_pde2d.F90) +set(SOURCES_S_PDE2D psb_s_pde2d.F90) + +# Create executables +add_executable(psb_d_pde3d ${SOURCES_D_PDE3D}) +target_link_libraries(psb_d_pde3d psblas::util psblas::linsolve psblas::prec psblas::base) + +add_executable(psb_s_pde3d ${SOURCES_S_PDE3D}) +target_link_libraries(psb_s_pde3d psblas::util psblas::linsolve psblas::prec psblas::base) + +add_executable(psb_d_pde2d ${SOURCES_D_PDE2D}) +target_link_libraries(psb_d_pde2d psblas::util psblas::linsolve psblas::prec psblas::base) + +add_executable(psb_s_pde2d ${SOURCES_S_PDE2D}) +target_link_libraries(psb_s_pde2d psblas::util psblas::linsolve psblas::prec psblas::base) + +# Set output directory for executables +foreach(target psb_d_pde3d psb_s_pde3d psb_d_pde2d psb_s_pde2d) + set_target_properties(${target} PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${EXEDIR} + ) +endforeach() + diff --git a/test/pargen/Makefile b/test/pdegen/Makefile similarity index 85% rename from test/pargen/Makefile rename to test/pdegen/Makefile index 20a95c0b..ecbda5d1 100644 --- a/test/pargen/Makefile +++ b/test/pdegen/Makefile @@ -5,7 +5,7 @@ include $(INCDIR)/Make.inc.psblas # # Libraries used LIBDIR=$(INSTALLDIR)/lib -PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base LDLIBS=$(PSBLDLIBS) # # Compilers and such @@ -25,7 +25,6 @@ psb_d_pde3d: psb_d_pde3d.o $(FLINK) psb_d_pde3d.o -o psb_d_pde3d $(PSBLAS_LIB) $(LDLIBS) /bin/mv psb_d_pde3d $(EXEDIR) - psb_s_pde3d: psb_s_pde3d.o $(FLINK) psb_s_pde3d.o -o psb_s_pde3d $(PSBLAS_LIB) $(LDLIBS) /bin/mv psb_s_pde3d $(EXEDIR) @@ -41,7 +40,7 @@ psb_s_pde2d: psb_s_pde2d.o clean: - /bin/rm -f psb_d_pde3d.o psb_s_pde3d.o psb_d_pde2d.o psb_s_pde2d.o *$(.mod) \ + /bin/rm -f psb_d_pde3d.o psb_d_oacc_pde3d.o psb_s_pde3d.o psb_d_pde2d.o psb_s_pde2d.o *$(.mod) \ $(EXEDIR)/psb_d_pde3d $(EXEDIR)/psb_s_pde3d $(EXEDIR)/psb_d_pde2d $(EXEDIR)/psb_s_pde2d verycleanlib: (cd ../..; make veryclean) diff --git a/test/pargen/psb_d_pde2d.F90 b/test/pdegen/psb_d_pde2d.F90 similarity index 96% rename from test/pargen/psb_d_pde2d.F90 rename to test/pdegen/psb_d_pde2d.F90 index 11777b19..e04ec678 100644 --- a/test/pargen/psb_d_pde2d.F90 +++ b/test/pdegen/psb_d_pde2d.F90 @@ -156,7 +156,7 @@ contains & f,amold,vmold,imold,partition,nrl,iv) use psb_base_mod use psb_util_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif ! @@ -281,6 +281,7 @@ contains ! contiguous rows ! call psb_cdall(ctxt,desc_a,info,nl=nr) + if (info /=0) goto 9999 myidx = desc_a%get_global_indices() nlr = size(myidx) @@ -308,6 +309,7 @@ contains ! process that owns it ! call psb_cdall(ctxt,desc_a,info,vg=iv) + if (info /=0) goto 9999 myidx = desc_a%get_global_indices() nlr = size(myidx) @@ -316,7 +318,7 @@ contains ! A nifty MPI function will split the process list npdims = 0 -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) npdims = 1 #else call mpi_dims_create(np,2,npdims,info) @@ -326,7 +328,7 @@ contains allocate(bndx(0:npx),bndy(0:npy)) ! We can reuse idx2ijk for process indices as well. - call idx2ijk(iamx,iamy,iam,npx,npy,base=0) + call idx2ijk(iamx,iamy,iam,npx,npy,base=mzero) ! Now let's split the 2D square in rectangles call dist1Didx(bndx,idim,npx) mynx = bndx(iamx+1)-bndx(iamx) @@ -357,6 +359,7 @@ contains ! the set of global indices it owns. ! call psb_cdall(ctxt,desc_a,info,vl=myidx) + if (info /=0) goto 9999 ! ! Specify process topology @@ -365,7 +368,7 @@ contains ! ! Use adjcncy methods ! - integer(psb_mpk_), allocatable :: neighbours(:) + integer(psb_ipk_), allocatable :: neighbours(:) integer(psb_mpk_) :: cnt logical, parameter :: debug_adj=.true. if (debug_adj.and.(np > 1)) then @@ -373,19 +376,19 @@ contains allocate(neighbours(np)) if (iamx < npx-1) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx+1,iamy,npx,npy,base=0) + call ijk2idx(neighbours(cnt),iamx+1,iamy,npx,npy,base=mzero) end if if (iamy < npy-1) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx,iamy+1,npx,npy,base=0) + call ijk2idx(neighbours(cnt),iamx,iamy+1,npx,npy,base=mzero) end if if (iamx >0) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx-1,iamy,npx,npy,base=0) + call ijk2idx(neighbours(cnt),iamx-1,iamy,npx,npy,base=mzero) end if if (iamy >0) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx,iamy-1,npx,npy,base=0) + call ijk2idx(neighbours(cnt),iamx,iamy-1,npx,npy,base=mzero) end if call psb_realloc(cnt, neighbours,info) call desc_a%set_p_adjcncy(neighbours) @@ -422,7 +425,10 @@ contains call psb_barrier(ctxt) t1 = psb_wtime() +#if 0 + !Disable parallel generation for the time being !$omp parallel shared(deltah,myidx,a,desc_a) +#endif ! block integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy, ith,nth @@ -430,7 +436,7 @@ contains integer(psb_lpk_), allocatable :: irow(:),icol(:) real(psb_dpk_), allocatable :: val(:) real(psb_dpk_) :: x,y, zt(nb) -#if defined(OPENMP) +#if defined(PSB_OPENMP) nth = omp_get_num_threads() ith = omp_get_thread_num() #else @@ -445,7 +451,10 @@ contains !goto 9999 endif +#if 0 + !Disable parallel generation for the time being !$omp do schedule(dynamic) +#endif ! do ii=1, nlr,nb if(info /= psb_success_) cycle @@ -509,7 +518,7 @@ contains endif end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) !!$ write(0,*) omp_get_thread_num(),' Check insertion ',& !!$ & irow(1:icoeff-1),':',icol(1:icoeff-1) #endif @@ -521,10 +530,16 @@ contains call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) if(info /= psb_success_) cycle end do +#if 0 + !Disable parallel generation for the time being !$omp end do +#endif deallocate(val,irow,icol) end block +#if 0 + !Disable parallel generation for the time being !$omp end parallel +#endif tgen = psb_wtime()-t1 @@ -595,10 +610,10 @@ end module psb_d_pde2d_mod program psb_d_pde2d use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_util_mod use psb_d_pde2d_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -647,7 +662,7 @@ program psb_d_pde2d call psb_init(ctxt) call psb_info(ctxt,iam,np) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP parallel shared(nth) !$OMP master nth = omp_get_num_threads() @@ -757,8 +772,22 @@ program psb_d_pde2d call psb_barrier(ctxt) t1 = psb_wtime() eps = 1.d-6 - call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& - & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst) + select case(psb_toupper(trim(kmethd))) + case('RICHARDSON') + call psb_richardson(a,prec,bv,xxv,eps,& + & desc_a,info,itmax=itmax,iter=iter,& + & err=err,itrace=itrace,& + & istop=istopc) + case('BICGSTAB','BICGSTABL','BICG','CG','CGS','FCG','GCR','RGMRES') + call psb_krylov(kmethd,a,prec,bv,xxv,eps,& + & desc_a,info,itmax=itmax,iter=iter,err=err,itrace=itrace,& + & istop=istopc,irst=irst) + case default + write(psb_err_unit,*) 'Unknown method :"',trim(kmethd),'"' + info=psb_err_invalid_input_ + call psb_errpush(info,name) + goto 9999 + end select if(info /= psb_success_) then info=psb_err_from_subroutine_ diff --git a/test/pargen/psb_d_pde3d.F90 b/test/pdegen/psb_d_pde3d.F90 similarity index 96% rename from test/pargen/psb_d_pde3d.F90 rename to test/pdegen/psb_d_pde3d.F90 index 6e895c00..cac1c413 100644 --- a/test/pargen/psb_d_pde3d.F90 +++ b/test/pdegen/psb_d_pde3d.F90 @@ -172,7 +172,7 @@ contains & f,amold,vmold,imold,partition,nrl,iv) use psb_base_mod use psb_util_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif ! @@ -297,6 +297,7 @@ contains ! contiguous rows ! call psb_cdall(ctxt,desc_a,info,nl=nr) + if (info /=0) goto 9999 myidx = desc_a%get_global_indices() nlr = size(myidx) @@ -324,6 +325,7 @@ contains ! process that owns it ! call psb_cdall(ctxt,desc_a,info,vg=iv) + if (info /=0) goto 9999 myidx = desc_a%get_global_indices() nlr = size(myidx) @@ -332,7 +334,7 @@ contains ! A nifty MPI function will split the process list npdims = 0 -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) npdims = 1 #else call mpi_dims_create(np,3,npdims,info) @@ -343,7 +345,7 @@ contains allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) ! We can reuse idx2ijk for process indices as well. - call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=mzero) ! Now let's split the 3D cube in hexahedra call dist1Didx(bndx,idim,npx) mynx = bndx(iamx+1)-bndx(iamx) @@ -378,6 +380,7 @@ contains ! the set of global indices it owns. ! call psb_cdall(ctxt,desc_a,info,vl=myidx) + if (info /=0) goto 9999 ! ! Specify process topology @@ -386,7 +389,7 @@ contains ! ! Use adjcncy methods ! - integer(psb_mpk_), allocatable :: neighbours(:) + integer(psb_ipk_), allocatable :: neighbours(:) integer(psb_mpk_) :: cnt logical, parameter :: debug_adj=.true. if (debug_adj.and.(np > 1)) then @@ -394,27 +397,27 @@ contains allocate(neighbours(np)) if (iamx < npx-1) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx+1,iamy,iamz,npx,npy,npz,base=0) + call ijk2idx(neighbours(cnt),iamx+1,iamy,iamz,npx,npy,npz,base=mzero) end if if (iamy < npy-1) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx,iamy+1,iamz,npx,npy,npz,base=0) + call ijk2idx(neighbours(cnt),iamx,iamy+1,iamz,npx,npy,npz,base=mzero) end if if (iamz < npz-1) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx,iamy,iamz+1,npx,npy,npz,base=0) + call ijk2idx(neighbours(cnt),iamx,iamy,iamz+1,npx,npy,npz,base=mzero) end if if (iamx >0) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx-1,iamy,iamz,npx,npy,npz,base=0) + call ijk2idx(neighbours(cnt),iamx-1,iamy,iamz,npx,npy,npz,base=mzero) end if if (iamy >0) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx,iamy-1,iamz,npx,npy,npz,base=0) + call ijk2idx(neighbours(cnt),iamx,iamy-1,iamz,npx,npy,npz,base=mzero) end if if (iamz >0) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx,iamy,iamz-1,npx,npy,npz,base=0) + call ijk2idx(neighbours(cnt),iamx,iamy,iamz-1,npx,npy,npz,base=mzero) end if call psb_realloc(cnt, neighbours,info) call desc_a%set_p_adjcncy(neighbours) @@ -450,7 +453,10 @@ contains call psb_barrier(ctxt) t1 = psb_wtime() +#if 0 + !Disable parallel generation for the time being !$omp parallel shared(deltah,myidx,a,desc_a) +#endif ! block integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy,iz, ith,nth @@ -458,7 +464,7 @@ contains integer(psb_lpk_), allocatable :: irow(:),icol(:) real(psb_dpk_), allocatable :: val(:) real(psb_dpk_) :: x,y,z, zt(nb) -#if defined(OPENMP) +#if defined(PSB_OPENMP) nth = omp_get_num_threads() ith = omp_get_thread_num() #else @@ -473,7 +479,10 @@ contains !goto 9999 endif +#if 0 + !Disable parallel generation for the time being !$omp do schedule(dynamic) +#endif ! do ii=1, nlr, nb if(info /= psb_success_) cycle @@ -557,7 +566,7 @@ contains endif end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) !!$ write(0,*) omp_get_thread_num(),' Check insertion ',& !!$ & irow(1:icoeff-1),':',icol(1:icoeff-1) #endif @@ -569,11 +578,16 @@ contains call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) if(info /= psb_success_) cycle end do +#if 0 + !Disable parallel generation for the time being !$omp end do +#endif deallocate(val,irow,icol) end block +#if 0 + !Disable parallel generation for the time being !$omp end parallel - +#endif tgen = psb_wtime()-t1 if(info /= psb_success_) then info=psb_err_from_subroutine_ @@ -653,10 +667,10 @@ end module psb_d_pde3d_mod program psb_d_pde3d use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_util_mod use psb_d_pde3d_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -705,7 +719,7 @@ program psb_d_pde3d call psb_init(ctxt) call psb_info(ctxt,iam,np) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP parallel shared(nth) !$OMP master nth = omp_get_num_threads() @@ -814,8 +828,22 @@ program psb_d_pde3d call psb_barrier(ctxt) t1 = psb_wtime() eps = 1.d-6 - call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& - & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst) + select case(psb_toupper(trim(kmethd))) + case('RICHARDSON') + call psb_richardson(a,prec,bv,xxv,eps,& + & desc_a,info,itmax=itmax,iter=iter,& + & err=err,itrace=itrace,& + & istop=istopc) + case('BICGSTAB','BICGSTABL','BICG','CG','CGS','FCG','GCR','RGMRES') + call psb_krylov(kmethd,a,prec,bv,xxv,eps,& + & desc_a,info,itmax=itmax,iter=iter,err=err,itrace=itrace,& + & istop=istopc,irst=irst) + case default + write(psb_err_unit,*) 'Unknown method :"',trim(kmethd),'"' + info=psb_err_invalid_input_ + call psb_errpush(info,name) + goto 9999 + end select if(info /= psb_success_) then info=psb_err_from_subroutine_ diff --git a/test/pargen/psb_s_pde2d.F90 b/test/pdegen/psb_s_pde2d.F90 similarity index 96% rename from test/pargen/psb_s_pde2d.F90 rename to test/pdegen/psb_s_pde2d.F90 index f14d2cb4..121980bf 100644 --- a/test/pargen/psb_s_pde2d.F90 +++ b/test/pdegen/psb_s_pde2d.F90 @@ -156,7 +156,7 @@ contains & f,amold,vmold,imold,partition,nrl,iv) use psb_base_mod use psb_util_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif ! @@ -281,6 +281,7 @@ contains ! contiguous rows ! call psb_cdall(ctxt,desc_a,info,nl=nr) + if (info /=0) goto 9999 myidx = desc_a%get_global_indices() nlr = size(myidx) @@ -308,6 +309,7 @@ contains ! process that owns it ! call psb_cdall(ctxt,desc_a,info,vg=iv) + if (info /=0) goto 9999 myidx = desc_a%get_global_indices() nlr = size(myidx) @@ -316,7 +318,7 @@ contains ! A nifty MPI function will split the process list npdims = 0 -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) npdims = 1 #else call mpi_dims_create(np,2,npdims,info) @@ -326,7 +328,7 @@ contains allocate(bndx(0:npx),bndy(0:npy)) ! We can reuse idx2ijk for process indices as well. - call idx2ijk(iamx,iamy,iam,npx,npy,base=0) + call idx2ijk(iamx,iamy,iam,npx,npy,base=mzero) ! Now let's split the 2D square in rectangles call dist1Didx(bndx,idim,npx) mynx = bndx(iamx+1)-bndx(iamx) @@ -357,6 +359,7 @@ contains ! the set of global indices it owns. ! call psb_cdall(ctxt,desc_a,info,vl=myidx) + if (info /=0) goto 9999 ! ! Specify process topology @@ -365,7 +368,7 @@ contains ! ! Use adjcncy methods ! - integer(psb_mpk_), allocatable :: neighbours(:) + integer(psb_ipk_), allocatable :: neighbours(:) integer(psb_mpk_) :: cnt logical, parameter :: debug_adj=.true. if (debug_adj.and.(np > 1)) then @@ -373,19 +376,19 @@ contains allocate(neighbours(np)) if (iamx < npx-1) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx+1,iamy,npx,npy,base=0) + call ijk2idx(neighbours(cnt),iamx+1,iamy,npx,npy,base=mzero) end if if (iamy < npy-1) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx,iamy+1,npx,npy,base=0) + call ijk2idx(neighbours(cnt),iamx,iamy+1,npx,npy,base=mzero) end if if (iamx >0) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx-1,iamy,npx,npy,base=0) + call ijk2idx(neighbours(cnt),iamx-1,iamy,npx,npy,base=mzero) end if if (iamy >0) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx,iamy-1,npx,npy,base=0) + call ijk2idx(neighbours(cnt),iamx,iamy-1,npx,npy,base=mzero) end if call psb_realloc(cnt, neighbours,info) call desc_a%set_p_adjcncy(neighbours) @@ -422,7 +425,10 @@ contains call psb_barrier(ctxt) t1 = psb_wtime() +#if 0 + !Disable parallel generation for the time being !$omp parallel shared(deltah,myidx,a,desc_a) +#endif ! block integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy, ith,nth @@ -430,7 +436,7 @@ contains integer(psb_lpk_), allocatable :: irow(:),icol(:) real(psb_spk_), allocatable :: val(:) real(psb_spk_) :: x,y, zt(nb) -#if defined(OPENMP) +#if defined(PSB_OPENMP) nth = omp_get_num_threads() ith = omp_get_thread_num() #else @@ -445,7 +451,10 @@ contains !goto 9999 endif +#if 0 + !Disable parallel generation for the time being !$omp do schedule(dynamic) +#endif ! do ii=1, nlr,nb if(info /= psb_success_) cycle @@ -509,7 +518,7 @@ contains endif end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) !!$ write(0,*) omp_get_thread_num(),' Check insertion ',& !!$ & irow(1:icoeff-1),':',icol(1:icoeff-1) #endif @@ -521,10 +530,16 @@ contains call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) if(info /= psb_success_) cycle end do +#if 0 + !Disable parallel generation for the time being !$omp end do +#endif deallocate(val,irow,icol) end block +#if 0 + !Disable parallel generation for the time being !$omp end parallel +#endif tgen = psb_wtime()-t1 @@ -595,10 +610,10 @@ end module psb_s_pde2d_mod program psb_s_pde2d use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_util_mod use psb_s_pde2d_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -647,7 +662,7 @@ program psb_s_pde2d call psb_init(ctxt) call psb_info(ctxt,iam,np) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP parallel shared(nth) !$OMP master nth = omp_get_num_threads() @@ -757,8 +772,22 @@ program psb_s_pde2d call psb_barrier(ctxt) t1 = psb_wtime() eps = 1.d-6 - call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& - & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst) + select case(psb_toupper(trim(kmethd))) + case('RICHARDSON') + call psb_richardson(a,prec,bv,xxv,eps,& + & desc_a,info,itmax=itmax,iter=iter,& + & err=err,itrace=itrace,& + & istop=istopc) + case('BICGSTAB','BICGSTABL','BICG','CG','CGS','FCG','GCR','RGMRES') + call psb_krylov(kmethd,a,prec,bv,xxv,eps,& + & desc_a,info,itmax=itmax,iter=iter,err=err,itrace=itrace,& + & istop=istopc,irst=irst) + case default + write(psb_err_unit,*) 'Unknown method :"',trim(kmethd),'"' + info=psb_err_invalid_input_ + call psb_errpush(info,name) + goto 9999 + end select if(info /= psb_success_) then info=psb_err_from_subroutine_ diff --git a/test/pargen/psb_s_pde3d.F90 b/test/pdegen/psb_s_pde3d.F90 similarity index 96% rename from test/pargen/psb_s_pde3d.F90 rename to test/pdegen/psb_s_pde3d.F90 index 2938a4ff..49b7e979 100644 --- a/test/pargen/psb_s_pde3d.F90 +++ b/test/pdegen/psb_s_pde3d.F90 @@ -172,7 +172,7 @@ contains & f,amold,vmold,imold,partition,nrl,iv) use psb_base_mod use psb_util_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif ! @@ -297,6 +297,7 @@ contains ! contiguous rows ! call psb_cdall(ctxt,desc_a,info,nl=nr) + if (info /=0) goto 9999 myidx = desc_a%get_global_indices() nlr = size(myidx) @@ -324,6 +325,7 @@ contains ! process that owns it ! call psb_cdall(ctxt,desc_a,info,vg=iv) + if (info /=0) goto 9999 myidx = desc_a%get_global_indices() nlr = size(myidx) @@ -332,7 +334,7 @@ contains ! A nifty MPI function will split the process list npdims = 0 -#if defined(SERIAL_MPI) +#if defined(PSB_SERIAL_MPI) npdims = 1 #else call mpi_dims_create(np,3,npdims,info) @@ -343,7 +345,7 @@ contains allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) ! We can reuse idx2ijk for process indices as well. - call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=mzero) ! Now let's split the 3D cube in hexahedra call dist1Didx(bndx,idim,npx) mynx = bndx(iamx+1)-bndx(iamx) @@ -378,6 +380,7 @@ contains ! the set of global indices it owns. ! call psb_cdall(ctxt,desc_a,info,vl=myidx) + if (info /=0) goto 9999 ! ! Specify process topology @@ -386,7 +389,7 @@ contains ! ! Use adjcncy methods ! - integer(psb_mpk_), allocatable :: neighbours(:) + integer(psb_ipk_), allocatable :: neighbours(:) integer(psb_mpk_) :: cnt logical, parameter :: debug_adj=.true. if (debug_adj.and.(np > 1)) then @@ -394,27 +397,27 @@ contains allocate(neighbours(np)) if (iamx < npx-1) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx+1,iamy,iamz,npx,npy,npz,base=0) + call ijk2idx(neighbours(cnt),iamx+1,iamy,iamz,npx,npy,npz,base=mzero) end if if (iamy < npy-1) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx,iamy+1,iamz,npx,npy,npz,base=0) + call ijk2idx(neighbours(cnt),iamx,iamy+1,iamz,npx,npy,npz,base=mzero) end if if (iamz < npz-1) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx,iamy,iamz+1,npx,npy,npz,base=0) + call ijk2idx(neighbours(cnt),iamx,iamy,iamz+1,npx,npy,npz,base=mzero) end if if (iamx >0) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx-1,iamy,iamz,npx,npy,npz,base=0) + call ijk2idx(neighbours(cnt),iamx-1,iamy,iamz,npx,npy,npz,base=mzero) end if if (iamy >0) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx,iamy-1,iamz,npx,npy,npz,base=0) + call ijk2idx(neighbours(cnt),iamx,iamy-1,iamz,npx,npy,npz,base=mzero) end if if (iamz >0) then cnt = cnt + 1 - call ijk2idx(neighbours(cnt),iamx,iamy,iamz-1,npx,npy,npz,base=0) + call ijk2idx(neighbours(cnt),iamx,iamy,iamz-1,npx,npy,npz,base=mzero) end if call psb_realloc(cnt, neighbours,info) call desc_a%set_p_adjcncy(neighbours) @@ -450,7 +453,10 @@ contains call psb_barrier(ctxt) t1 = psb_wtime() +#if 0 + !Disable parallel generation for the time being !$omp parallel shared(deltah,myidx,a,desc_a) +#endif ! block integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy,iz, ith,nth @@ -458,7 +464,7 @@ contains integer(psb_lpk_), allocatable :: irow(:),icol(:) real(psb_spk_), allocatable :: val(:) real(psb_spk_) :: x,y,z, zt(nb) -#if defined(OPENMP) +#if defined(PSB_OPENMP) nth = omp_get_num_threads() ith = omp_get_thread_num() #else @@ -473,7 +479,10 @@ contains !goto 9999 endif +#if 0 + !Disable parallel generation for the time being !$omp do schedule(dynamic) +#endif ! do ii=1, nlr, nb if(info /= psb_success_) cycle @@ -557,7 +566,7 @@ contains endif end do -#if defined(OPENMP) +#if defined(PSB_OPENMP) !!$ write(0,*) omp_get_thread_num(),' Check insertion ',& !!$ & irow(1:icoeff-1),':',icol(1:icoeff-1) #endif @@ -569,11 +578,16 @@ contains call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) if(info /= psb_success_) cycle end do +#if 0 + !Disable parallel generation for the time being !$omp end do +#endif deallocate(val,irow,icol) end block +#if 0 + !Disable parallel generation for the time being !$omp end parallel - +#endif tgen = psb_wtime()-t1 if(info /= psb_success_) then info=psb_err_from_subroutine_ @@ -653,10 +667,10 @@ end module psb_s_pde3d_mod program psb_s_pde3d use psb_base_mod use psb_prec_mod - use psb_krylov_mod + use psb_linsolve_mod use psb_util_mod use psb_s_pde3d_mod -#if defined(OPENMP) +#if defined(PSB_OPENMP) use omp_lib #endif implicit none @@ -705,7 +719,7 @@ program psb_s_pde3d call psb_init(ctxt) call psb_info(ctxt,iam,np) -#if defined(OPENMP) +#if defined(PSB_OPENMP) !$OMP parallel shared(nth) !$OMP master nth = omp_get_num_threads() @@ -814,8 +828,22 @@ program psb_s_pde3d call psb_barrier(ctxt) t1 = psb_wtime() eps = 1.d-6 - call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& - & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst) + select case(psb_toupper(trim(kmethd))) + case('RICHARDSON') + call psb_richardson(a,prec,bv,xxv,eps,& + & desc_a,info,itmax=itmax,iter=iter,& + & err=err,itrace=itrace,& + & istop=istopc) + case('BICGSTAB','BICGSTABL','BICG','CG','CGS','FCG','GCR','RGMRES') + call psb_krylov(kmethd,a,prec,bv,xxv,eps,& + & desc_a,info,itmax=itmax,iter=iter,err=err,itrace=itrace,& + & istop=istopc,irst=irst) + case default + write(psb_err_unit,*) 'Unknown method :"',trim(kmethd),'"' + info=psb_err_invalid_input_ + call psb_errpush(info,name) + goto 9999 + end select if(info /= psb_success_) then info=psb_err_from_subroutine_ diff --git a/test/pdegen/runs/psb_pde2d.inp b/test/pdegen/runs/psb_pde2d.inp new file mode 100644 index 00000000..e07773dc --- /dev/null +++ b/test/pdegen/runs/psb_pde2d.inp @@ -0,0 +1,18 @@ +17 Number of entries below this +BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR RICHARDSON +BJAC Preconditioner NONE DIAG BJAC +CSR Storage format for matrix A: CSR COO +300 Domain size (acutal system is **2 (pde2d) ) +3 Partition: 1 BLOCK 3 3D +2 Stopping criterion 1 2 +0300 MAXIT +10 ITRACE +002 IRST restart for RGMRES and BiCGSTABL +INVK Block Solver ILU,ILUT,INVK,INVT,AINV +NONE If ILU : MILU or NONE othewise ignored +NONE Scaling if ILUT: NONE, MAXVAL otherwise ignored +0 Level of fill for forward factorization +1 Level of fill for inverse factorization (only INVK,INVT) +1E-1 Threshold for forward factorization +1E-1 Threshold for inverse factorization (Only INVK, INVT) +LLK What orthogonalization algorithm? (Only AINV) diff --git a/test/pargen/runs/ppde.inp b/test/pdegen/runs/psb_pde3d.inp similarity index 79% rename from test/pargen/runs/ppde.inp rename to test/pdegen/runs/psb_pde3d.inp index 40e3358d..a924677a 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pdegen/runs/psb_pde3d.inp @@ -1,18 +1,18 @@ 17 Number of entries below this -BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR +BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR RICHARDSON BJAC Preconditioner NONE DIAG BJAC CSR Storage format for matrix A: CSR COO -140 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) +100 Domain size (acutal system is this**3 (pde3d) ) 3 Partition: 1 BLOCK 3 3D 2 Stopping criterion 1 2 0200 MAXIT -05 ITRACE +10 ITRACE 002 IRST restart for RGMRES and BiCGSTABL -INVK Block Solver ILU,ILUT,INVK,AINVT,AORTH +INVK Block Solver ILU,ILUT,INVK,INVT,AINV NONE If ILU : MILU or NONE othewise ignored NONE Scaling if ILUT: NONE, MAXVAL otherwise ignored 0 Level of fill for forward factorization -1 Level of fill for inverse factorization (only INVK) +1 Level of fill for inverse factorization (only INVK,INVT) 1E-1 Threshold for forward factorization -1E-1 Threshold for inverse factorization (Only INVK, AINVT) -LLK What orthogonalization algorithm? (Only AINVT) +1E-1 Threshold for inverse factorization (Only INVK, INVT) +LLK What orthogonalization algorithm? (Only AINV) diff --git a/test/serial/Makefile b/test/serial/Makefile index 0136949a..e714de4c 100644 --- a/test/serial/Makefile +++ b/test/serial/Makefile @@ -6,7 +6,7 @@ INCDIR=$(INSTALLDIR)/include/ MODDIR=$(INSTALLDIR)/modules/ include $(INCDIR)/Make.inc.psblas LIBDIR=$(INSTALLDIR)/lib/ -PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base LDLIBS=$(PSBLDLIBS) FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). diff --git a/test/torture/Makefile b/test/torture/Makefile index 8a98657e..a6dd3165 100644 --- a/test/torture/Makefile +++ b/test/torture/Makefile @@ -3,7 +3,7 @@ INCDIR=$(INSTALLDIR)/include/ MODDIR=$(INSTALLDIR)/modules/ include $(INCDIR)/Make.inc.psblas LIBDIR=$(INSTALLDIR)/lib/ -PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base LDLIBS=$(PSBLDLIBS) CCOPT= -g FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). diff --git a/test/util/Makefile b/test/util/Makefile index 404170fb..5dbf305f 100644 --- a/test/util/Makefile +++ b/test/util/Makefile @@ -6,7 +6,7 @@ include $(INCDIR)/Make.inc.psblas # Libraries used # LIBDIR=$(INSTALLDIR)/lib/ -PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base LDLIBS=$(PSBLDLIBS) FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt new file mode 100644 index 00000000..7656355e --- /dev/null +++ b/util/CMakeLists.txt @@ -0,0 +1,57 @@ +set(PSB_util_source_files + psb_s_mmio_impl.f90 + psb_s_mat_dist_mod.f90 + psb_renum_mod.f90 + psb_c_mmio_impl.f90 + psb_d_hbio_impl.f90 + psb_d_mat_dist_impl.f90 + psb_z_mat_dist_impl.f90 + psb_c_hbio_impl.f90 + psb_s_mat_dist_impl.f90 + psb_hbio_mod.f90 + psb_gps_mod.f90 + psb_z_renum_mod.f90 + psb_c_mat_dist_impl.f90 + psb_d_mat_dist_mod.f90 + psb_d_renum_mod.f90 + psb_s_renum_mod.f90 + psb_util_mod.f90 + psb_d_mmio_impl.f90 + psb_s_hbio_impl.f90 + psb_c_renum_mod.f90 + psb_mat_dist_mod.f90 + psb_z_mmio_impl.f90 + psb_c_mat_dist_mod.f90 + psb_blockpart_mod.f90 + psb_z_mat_dist_mod.f90 + psb_z_hbio_impl.f90 + psb_c_renum_impl.F90 + psb_partidx_mod.F90 + psi_build_mtpart.F90 + psb_z_renum_impl.F90 + psb_d_renum_impl.F90 + psb_i_mmio_impl.F90 + psb_metispart_mod.F90 + psb_mmio_mod.F90 + ) +foreach(file IN LISTS PSB_util_source_files) + list(APPEND util_source_files ${CMAKE_CURRENT_LIST_DIR}/${file}) +endforeach() + +set(PSB_util_source_C_files + + psb_amd_order.c + ) +set(PSB_util_source_C__metis_files + + psb_metis_int.c + ) + + +foreach(file IN LISTS PSB_util_source_C_files) + list(APPEND util_source_C_files ${CMAKE_CURRENT_LIST_DIR}/${file}) +endforeach() + +foreach(file IN LISTS PSB_util_source_C_metis_files) + list(APPEND util_source_C_files ${CMAKE_CURRENT_LIST_DIR}/${file}) +endforeach() diff --git a/util/Makefile b/util/Makefile index 9b70855f..96820899 100644 --- a/util/Makefile +++ b/util/Makefile @@ -27,7 +27,7 @@ OBJS=$(COBJS) $(MODOBJS) $(IMPLOBJS) LOCAL_MODS=$(MODOBJS:.o=$(.mod)) LIBNAME=$(UTILLIBNAME) FINCLUDES=$(FMFLAG). $(FMFLAG)$(MODDIR) - +CINCLUDES=-I. -I$(INCDIR) objs: $(OBJS) /bin/cp -p $(CPUPDFLAG) *$(.mod) $(MODDIR) @@ -47,9 +47,9 @@ psb_renum_mod.o: psb_s_renum_mod.o psb_d_renum_mod.o psb_c_renum_mod.o psb_z_ren $(IMPLOBJS): $(BASEOBJS) veryclean: clean - /bin/rm -f $(HERE)/$(LIBNAME) + /bin/rm -f $(HERE)/$(LIBNAME) clean: - /bin/rm -f $(OBJS) $(LOCAL_MODS) + /bin/rm -f $(OBJS) $(LOCAL_MODS) *$(.mod) veryclean: clean diff --git a/util/psb_amd_order.c b/util/psb_amd_order.c index 62c7c49f..13130396 100644 --- a/util/psb_amd_order.c +++ b/util/psb_amd_order.c @@ -29,14 +29,16 @@ POSSIBILITY OF SUCH DAMAGE. */ -#ifdef HAVE_AMD_ +#include "psb_config.h" +#include "psb_types.h" +#ifdef PSB_HAVE_AMD #include "amd.h" #endif int psb_amd_order(int n, int Ap[], int Ai[], int P[]) { int i; -#ifdef HAVE_AMD_ +#ifdef PSB_HAVE_AMD i= amd_order(n,Ap,Ai, P,(double *)NULL, (double *)NULL); if ((i==AMD_OK)||(i==AMD_OK_BUT_JUMBLED)) return(0); #endif diff --git a/util/psb_c_mat_dist_impl.f90 b/util/psb_c_mat_dist_impl.f90 index e358bf25..26d04041 100644 --- a/util/psb_c_mat_dist_impl.f90 +++ b/util/psb_c_mat_dist_impl.f90 @@ -30,7 +30,7 @@ ! ! subroutine psb_cmatdist(a_glob, a, ctxt, desc_a,& - & info, parts, vg, vsz, inroot,fmt,mold) + & info, parts, vg, vsz, inroot,fmt,amold,imold) ! ! an utility subroutine to distribute a matrix among processors ! according to a user defined data distribution, using @@ -81,16 +81,17 @@ subroutine psb_cmatdist(a_glob, a, ctxt, desc_a,& integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), optional :: inroot character(len=*), optional :: fmt - class(psb_c_base_sparse_mat), optional :: mold + class(psb_c_base_sparse_mat), optional :: amold + class(psb_i_base_vect_type), optional, intent(in) :: imold procedure(psb_parts), optional :: parts integer(psb_ipk_), optional :: vg(:) integer(psb_ipk_), optional :: vsz(:) ! local variables logical :: use_parts, use_vg, use_vsz - integer(psb_ipk_) :: np, iam, np_sharing - integer(psb_ipk_) :: k_count, root, liwork, nnzero, nrhs,& - & i, ll, nz, isize, iproc, nnr, err, err_act + integer(psb_mpk_) :: np, iam, root, iproc + integer(psb_ipk_) :: k_count, inp, np_sharing, liwork, nnzero, nrhs,& + & i, ll, nz, isize, nnr, err, err_act integer(psb_lpk_) :: i_count, j_count, nrow, ncol, ig, lastigp integer(psb_ipk_), allocatable :: iwork(:), iwrk2(:) integer(psb_lpk_), allocatable :: irow(:),icol(:) @@ -111,7 +112,7 @@ subroutine psb_cmatdist(a_glob, a, ctxt, desc_a,& root = psb_root_ end if call psb_info(ctxt, iam, np) - + use_parts = present(parts) use_vg = present(vg) use_vsz = present(vsz) @@ -194,8 +195,9 @@ subroutine psb_cmatdist(a_glob, a, ctxt, desc_a,& end if do while (i_count <= nrow) - if (use_parts) then - call parts(i_count,nrow,np,iwork, np_sharing) + if (use_parts) then + inp = np + call parts(i_count,nrow,inp,iwork, np_sharing) ! ! np_sharing allows for overlap in the data distribution. ! If an index is overlapped, then we have to send its row @@ -210,7 +212,7 @@ subroutine psb_cmatdist(a_glob, a, ctxt, desc_a,& j_count = j_count + 1 if (j_count-i_count >= nb) exit if (j_count > nrow) exit - call parts(j_count,nrow,np,iwrk2, np_sharing) + call parts(j_count,nrow,inp,iwrk2, np_sharing) if (np_sharing /= 1 ) exit if (iwrk2(1) /= iproc ) exit end do @@ -321,7 +323,7 @@ subroutine psb_cmatdist(a_glob, a, ctxt, desc_a,& call psb_barrier(ctxt) t0 = psb_wtime() - call psb_cdasb(desc_a,info) + call psb_cdasb(desc_a,info,mold=imold) t1 = psb_wtime() if(info /= psb_success_)then info=psb_err_from_subroutine_ @@ -332,7 +334,7 @@ subroutine psb_cmatdist(a_glob, a, ctxt, desc_a,& call psb_barrier(ctxt) t2 = psb_wtime() - call psb_spasb(a,desc_a,info,afmt=fmt,mold=mold) + call psb_spasb(a,desc_a,info,afmt=fmt,mold=amold) t3 = psb_wtime() if(info /= psb_success_)then info=psb_err_from_subroutine_ @@ -371,7 +373,7 @@ end subroutine psb_cmatdist subroutine psb_lcmatdist(a_glob, a, ctxt, desc_a,& - & info, parts, vg, vsz, inroot,fmt,mold) + & info, parts, vg, vsz, inroot,fmt,amold,imold) ! ! an utility subroutine to distribute a matrix among processors ! according to a user defined data distribution, using @@ -422,15 +424,16 @@ subroutine psb_lcmatdist(a_glob, a, ctxt, desc_a,& integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), optional :: inroot character(len=*), optional :: fmt - class(psb_c_base_sparse_mat), optional :: mold + class(psb_c_base_sparse_mat), optional :: amold + class(psb_i_base_vect_type), optional, intent(in) :: imold procedure(psb_parts), optional :: parts integer(psb_ipk_), optional :: vg(:) integer(psb_ipk_), optional :: vsz(:) ! local variables logical :: use_parts, use_vg, use_vsz - integer(psb_ipk_) :: np, iam, np_sharing, root, iproc - integer(psb_ipk_) :: err_act, il, inz + integer(psb_mpk_) :: np, iam, root, iproc + integer(psb_ipk_) :: err_act, il, inz, np_sharing, inp integer(psb_lpk_) :: k_count, liwork, nnzero, nrhs,& & i, ll, nz, isize, nnr, err integer(psb_lpk_) :: i_count, j_count, nrow, ncol, ig, lastigp @@ -531,8 +534,9 @@ subroutine psb_lcmatdist(a_glob, a, ctxt, desc_a,& end if do while (i_count <= nrow) - if (use_parts) then - call parts(i_count,nrow,np,iwork, np_sharing) + if (use_parts) then + inp = np + call parts(i_count,nrow,inp,iwork, np_sharing) ! ! np_sharing allows for overlap in the data distribution. ! If an index is overlapped, then we have to send its row @@ -547,7 +551,7 @@ subroutine psb_lcmatdist(a_glob, a, ctxt, desc_a,& j_count = j_count + 1 if (j_count-i_count >= nb) exit if (j_count > nrow) exit - call parts(j_count,nrow,np,iwrk2, np_sharing) + call parts(j_count,nrow,inp,iwrk2, np_sharing) if (np_sharing /= 1 ) exit if (iwrk2(1) /= iproc ) exit end do @@ -660,7 +664,7 @@ subroutine psb_lcmatdist(a_glob, a, ctxt, desc_a,& call psb_barrier(ctxt) t0 = psb_wtime() - call psb_cdasb(desc_a,info) + call psb_cdasb(desc_a,info,mold=imold) t1 = psb_wtime() if(info /= psb_success_)then info=psb_err_from_subroutine_ @@ -671,7 +675,7 @@ subroutine psb_lcmatdist(a_glob, a, ctxt, desc_a,& call psb_barrier(ctxt) t2 = psb_wtime() - call psb_spasb(a,desc_a,info,afmt=fmt,mold=mold) + call psb_spasb(a,desc_a,info,afmt=fmt,mold=amold) t3 = psb_wtime() if(info /= psb_success_)then info=psb_err_from_subroutine_ diff --git a/util/psb_c_mat_dist_mod.f90 b/util/psb_c_mat_dist_mod.f90 index de48dabf..c0fa0031 100644 --- a/util/psb_c_mat_dist_mod.f90 +++ b/util/psb_c_mat_dist_mod.f90 @@ -32,11 +32,11 @@ module psb_c_mat_dist_mod use psb_base_mod, only : psb_ipk_, psb_spk_, psb_desc_type, psb_parts, & & psb_cspmat_type, psb_c_base_sparse_mat, psb_c_vect_type, & - & psb_lcspmat_type, psb_ctxt_type + & psb_lcspmat_type, psb_ctxt_type, psb_i_base_vect_type interface psb_matdist subroutine psb_cmatdist(a_glob, a, ctxt, desc_a,& - & info, parts, vg, vsz, inroot,fmt,mold) + & info, parts, vg, vsz, inroot,fmt,amold,imold) ! ! an utility subroutine to distribute a matrix among processors ! according to a user defined data distribution, using @@ -87,13 +87,14 @@ module psb_c_mat_dist_mod integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), optional :: inroot character(len=*), optional :: fmt - class(psb_c_base_sparse_mat), optional :: mold + class(psb_c_base_sparse_mat), optional :: amold + class(psb_i_base_vect_type), optional, intent(in) :: imold procedure(psb_parts), optional :: parts integer(psb_ipk_), optional :: vg(:) integer(psb_ipk_), optional :: vsz(:) end subroutine psb_cmatdist subroutine psb_lcmatdist(a_glob, a, ctxt, desc_a,& - & info, parts, vg, vsz, inroot,fmt,mold) + & info, parts, vg, vsz, inroot,fmt,amold,imold) ! ! an utility subroutine to distribute a matrix among processors ! according to a user defined data distribution, using @@ -144,7 +145,8 @@ module psb_c_mat_dist_mod integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), optional :: inroot character(len=*), optional :: fmt - class(psb_c_base_sparse_mat), optional :: mold + class(psb_c_base_sparse_mat), optional :: amold + class(psb_i_base_vect_type), optional, intent(in) :: imold procedure(psb_parts), optional :: parts integer(psb_ipk_), optional :: vg(:) integer(psb_ipk_), optional :: vsz(:) diff --git a/util/psb_c_renum_impl.F90 b/util/psb_c_renum_impl.F90 index f57003a1..9b342d6b 100644 --- a/util/psb_c_renum_impl.F90 +++ b/util/psb_c_renum_impl.F90 @@ -188,7 +188,7 @@ contains subroutine psb_mat_renum_amd(a,info,operm) -#if defined(HAVE_AMD) +#if defined(PSB_HAVE_AMD) use iso_c_binding #endif use psb_base_mod @@ -198,7 +198,7 @@ contains integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) ! -#if defined(HAVE_AMD) +#if defined(PSB_HAVE_AMD) interface function psb_amd_order(n,ap,ai,p)& & result(res) bind(c,name='psb_amd_order') @@ -223,7 +223,7 @@ contains name = 'mat_renum_amd' call psb_erractionsave(err_act) -#if defined(HAVE_AMD) && defined(IPK4) +#if defined(PSB_HAVE_AMD) && defined(PSB_IPK4) info = psb_success_ nr = a%get_nrows() @@ -451,7 +451,7 @@ contains subroutine psb_lmat_renum_amd(a,info,operm) -#if defined(HAVE_AMD) +#if defined(PSB_HAVE_AMD) use iso_c_binding #endif use psb_base_mod @@ -461,7 +461,7 @@ contains integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:) ! -#if defined(HAVE_AMD) +#if defined(PSB_HAVE_AMD) interface function psb_amd_order(n,ap,ai,p)& & result(res) bind(c,name='psb_amd_order') @@ -486,7 +486,7 @@ contains name = 'mat_renum_amd' call psb_erractionsave(err_act) -#if defined(HAVE_AMD) && defined(LPK4) +#if defined(PSB_HAVE_AMD) && defined(PSB_LPK4) info = psb_success_ nr = a%get_nrows() diff --git a/util/psb_d_mat_dist_impl.f90 b/util/psb_d_mat_dist_impl.f90 index 3c683254..44d853e4 100644 --- a/util/psb_d_mat_dist_impl.f90 +++ b/util/psb_d_mat_dist_impl.f90 @@ -30,7 +30,7 @@ ! ! subroutine psb_dmatdist(a_glob, a, ctxt, desc_a,& - & info, parts, vg, vsz, inroot,fmt,mold) + & info, parts, vg, vsz, inroot,fmt,amold,imold) ! ! an utility subroutine to distribute a matrix among processors ! according to a user defined data distribution, using @@ -81,16 +81,17 @@ subroutine psb_dmatdist(a_glob, a, ctxt, desc_a,& integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), optional :: inroot character(len=*), optional :: fmt - class(psb_d_base_sparse_mat), optional :: mold + class(psb_d_base_sparse_mat), optional :: amold + class(psb_i_base_vect_type), optional, intent(in) :: imold procedure(psb_parts), optional :: parts integer(psb_ipk_), optional :: vg(:) integer(psb_ipk_), optional :: vsz(:) ! local variables logical :: use_parts, use_vg, use_vsz - integer(psb_ipk_) :: np, iam, np_sharing - integer(psb_ipk_) :: k_count, root, liwork, nnzero, nrhs,& - & i, ll, nz, isize, iproc, nnr, err, err_act + integer(psb_mpk_) :: np, iam, root, iproc + integer(psb_ipk_) :: k_count, inp, np_sharing, liwork, nnzero, nrhs,& + & i, ll, nz, isize, nnr, err, err_act integer(psb_lpk_) :: i_count, j_count, nrow, ncol, ig, lastigp integer(psb_ipk_), allocatable :: iwork(:), iwrk2(:) integer(psb_lpk_), allocatable :: irow(:),icol(:) @@ -111,7 +112,7 @@ subroutine psb_dmatdist(a_glob, a, ctxt, desc_a,& root = psb_root_ end if call psb_info(ctxt, iam, np) - + use_parts = present(parts) use_vg = present(vg) use_vsz = present(vsz) @@ -194,8 +195,9 @@ subroutine psb_dmatdist(a_glob, a, ctxt, desc_a,& end if do while (i_count <= nrow) - if (use_parts) then - call parts(i_count,nrow,np,iwork, np_sharing) + if (use_parts) then + inp = np + call parts(i_count,nrow,inp,iwork, np_sharing) ! ! np_sharing allows for overlap in the data distribution. ! If an index is overlapped, then we have to send its row @@ -210,7 +212,7 @@ subroutine psb_dmatdist(a_glob, a, ctxt, desc_a,& j_count = j_count + 1 if (j_count-i_count >= nb) exit if (j_count > nrow) exit - call parts(j_count,nrow,np,iwrk2, np_sharing) + call parts(j_count,nrow,inp,iwrk2, np_sharing) if (np_sharing /= 1 ) exit if (iwrk2(1) /= iproc ) exit end do @@ -321,7 +323,7 @@ subroutine psb_dmatdist(a_glob, a, ctxt, desc_a,& call psb_barrier(ctxt) t0 = psb_wtime() - call psb_cdasb(desc_a,info) + call psb_cdasb(desc_a,info,mold=imold) t1 = psb_wtime() if(info /= psb_success_)then info=psb_err_from_subroutine_ @@ -332,7 +334,7 @@ subroutine psb_dmatdist(a_glob, a, ctxt, desc_a,& call psb_barrier(ctxt) t2 = psb_wtime() - call psb_spasb(a,desc_a,info,afmt=fmt,mold=mold) + call psb_spasb(a,desc_a,info,afmt=fmt,mold=amold) t3 = psb_wtime() if(info /= psb_success_)then info=psb_err_from_subroutine_ @@ -371,7 +373,7 @@ end subroutine psb_dmatdist subroutine psb_ldmatdist(a_glob, a, ctxt, desc_a,& - & info, parts, vg, vsz, inroot,fmt,mold) + & info, parts, vg, vsz, inroot,fmt,amold,imold) ! ! an utility subroutine to distribute a matrix among processors ! according to a user defined data distribution, using @@ -422,15 +424,16 @@ subroutine psb_ldmatdist(a_glob, a, ctxt, desc_a,& integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), optional :: inroot character(len=*), optional :: fmt - class(psb_d_base_sparse_mat), optional :: mold + class(psb_d_base_sparse_mat), optional :: amold + class(psb_i_base_vect_type), optional, intent(in) :: imold procedure(psb_parts), optional :: parts integer(psb_ipk_), optional :: vg(:) integer(psb_ipk_), optional :: vsz(:) ! local variables logical :: use_parts, use_vg, use_vsz - integer(psb_ipk_) :: np, iam, np_sharing, root, iproc - integer(psb_ipk_) :: err_act, il, inz + integer(psb_mpk_) :: np, iam, root, iproc + integer(psb_ipk_) :: err_act, il, inz, np_sharing, inp integer(psb_lpk_) :: k_count, liwork, nnzero, nrhs,& & i, ll, nz, isize, nnr, err integer(psb_lpk_) :: i_count, j_count, nrow, ncol, ig, lastigp @@ -531,8 +534,9 @@ subroutine psb_ldmatdist(a_glob, a, ctxt, desc_a,& end if do while (i_count <= nrow) - if (use_parts) then - call parts(i_count,nrow,np,iwork, np_sharing) + if (use_parts) then + inp = np + call parts(i_count,nrow,inp,iwork, np_sharing) ! ! np_sharing allows for overlap in the data distribution. ! If an index is overlapped, then we have to send its row @@ -547,7 +551,7 @@ subroutine psb_ldmatdist(a_glob, a, ctxt, desc_a,& j_count = j_count + 1 if (j_count-i_count >= nb) exit if (j_count > nrow) exit - call parts(j_count,nrow,np,iwrk2, np_sharing) + call parts(j_count,nrow,inp,iwrk2, np_sharing) if (np_sharing /= 1 ) exit if (iwrk2(1) /= iproc ) exit end do @@ -660,7 +664,7 @@ subroutine psb_ldmatdist(a_glob, a, ctxt, desc_a,& call psb_barrier(ctxt) t0 = psb_wtime() - call psb_cdasb(desc_a,info) + call psb_cdasb(desc_a,info,mold=imold) t1 = psb_wtime() if(info /= psb_success_)then info=psb_err_from_subroutine_ @@ -671,7 +675,7 @@ subroutine psb_ldmatdist(a_glob, a, ctxt, desc_a,& call psb_barrier(ctxt) t2 = psb_wtime() - call psb_spasb(a,desc_a,info,afmt=fmt,mold=mold) + call psb_spasb(a,desc_a,info,afmt=fmt,mold=amold) t3 = psb_wtime() if(info /= psb_success_)then info=psb_err_from_subroutine_ diff --git a/util/psb_d_mat_dist_mod.f90 b/util/psb_d_mat_dist_mod.f90 index 2c7f9290..978c62dd 100644 --- a/util/psb_d_mat_dist_mod.f90 +++ b/util/psb_d_mat_dist_mod.f90 @@ -32,11 +32,11 @@ module psb_d_mat_dist_mod use psb_base_mod, only : psb_ipk_, psb_dpk_, psb_desc_type, psb_parts, & & psb_dspmat_type, psb_d_base_sparse_mat, psb_d_vect_type, & - & psb_ldspmat_type, psb_ctxt_type + & psb_ldspmat_type, psb_ctxt_type, psb_i_base_vect_type interface psb_matdist subroutine psb_dmatdist(a_glob, a, ctxt, desc_a,& - & info, parts, vg, vsz, inroot,fmt,mold) + & info, parts, vg, vsz, inroot,fmt,amold,imold) ! ! an utility subroutine to distribute a matrix among processors ! according to a user defined data distribution, using @@ -87,13 +87,14 @@ module psb_d_mat_dist_mod integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), optional :: inroot character(len=*), optional :: fmt - class(psb_d_base_sparse_mat), optional :: mold + class(psb_d_base_sparse_mat), optional :: amold + class(psb_i_base_vect_type), optional, intent(in) :: imold procedure(psb_parts), optional :: parts integer(psb_ipk_), optional :: vg(:) integer(psb_ipk_), optional :: vsz(:) end subroutine psb_dmatdist subroutine psb_ldmatdist(a_glob, a, ctxt, desc_a,& - & info, parts, vg, vsz, inroot,fmt,mold) + & info, parts, vg, vsz, inroot,fmt,amold,imold) ! ! an utility subroutine to distribute a matrix among processors ! according to a user defined data distribution, using @@ -144,7 +145,8 @@ module psb_d_mat_dist_mod integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), optional :: inroot character(len=*), optional :: fmt - class(psb_d_base_sparse_mat), optional :: mold + class(psb_d_base_sparse_mat), optional :: amold + class(psb_i_base_vect_type), optional, intent(in) :: imold procedure(psb_parts), optional :: parts integer(psb_ipk_), optional :: vg(:) integer(psb_ipk_), optional :: vsz(:) diff --git a/util/psb_d_renum_impl.F90 b/util/psb_d_renum_impl.F90 index 1c7928f5..08999fc9 100644 --- a/util/psb_d_renum_impl.F90 +++ b/util/psb_d_renum_impl.F90 @@ -188,7 +188,7 @@ contains subroutine psb_mat_renum_amd(a,info,operm) -#if defined(HAVE_AMD) +#if defined(PSB_HAVE_AMD) use iso_c_binding #endif use psb_base_mod @@ -198,7 +198,7 @@ contains integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) ! -#if defined(HAVE_AMD) +#if defined(PSB_HAVE_AMD) interface function psb_amd_order(n,ap,ai,p)& & result(res) bind(c,name='psb_amd_order') @@ -223,7 +223,7 @@ contains name = 'mat_renum_amd' call psb_erractionsave(err_act) -#if defined(HAVE_AMD) && defined(IPK4) +#if defined(PSB_HAVE_AMD) && defined(PSB_IPK4) info = psb_success_ nr = a%get_nrows() @@ -451,7 +451,7 @@ contains subroutine psb_lmat_renum_amd(a,info,operm) -#if defined(HAVE_AMD) +#if defined(PSB_HAVE_AMD) use iso_c_binding #endif use psb_base_mod @@ -461,7 +461,7 @@ contains integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:) ! -#if defined(HAVE_AMD) +#if defined(PSB_HAVE_AMD) interface function psb_amd_order(n,ap,ai,p)& & result(res) bind(c,name='psb_amd_order') @@ -486,7 +486,7 @@ contains name = 'mat_renum_amd' call psb_erractionsave(err_act) -#if defined(HAVE_AMD) && defined(LPK4) +#if defined(PSB_HAVE_AMD) && defined(PSB_LPK4) info = psb_success_ nr = a%get_nrows() diff --git a/util/psb_i_mmio_impl.F90 b/util/psb_i_mmio_impl.F90 index b8a47eb9..c1998997 100644 --- a/util/psb_i_mmio_impl.F90 +++ b/util/psb_i_mmio_impl.F90 @@ -284,7 +284,7 @@ subroutine mm_ivet1_write(b, header, info, iunit, filename) end subroutine mm_ivet1_write -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine mm_lvet_read(b, info, iunit, filename) use psb_base_mod implicit none diff --git a/util/psb_metis_int.c b/util/psb_metis_int.c index 5f451cb7..4ecc55f6 100644 --- a/util/psb_metis_int.c +++ b/util/psb_metis_int.c @@ -1,8 +1,8 @@ #include -#if defined(HAVE_METIS_) #include "psb_metis_int.h" +#if defined(PSB_HAVE_METIS) -#if defined(METIS_REAL_32) +#if defined(PSB_METIS_REAL_32) int metis_PartGraphKway_C(idx_t *n, idx_t *ixadj, idx_t *iadj, idx_t *ivwg, idx_t *iajw, idx_t *nparts, float *weights, @@ -26,7 +26,7 @@ int metis_PartGraphKway_C(idx_t *n, idx_t *ixadj, idx_t *iadj, idx_t *ivwg, /* NULL,NULL,NULL,(idx_t *)nparts,NULL,NULL,NULL, */ /* &objval,(idx_t *)graphpart); */ res = METIS_PartGraphKway((idx_t*)n,(idx_t *)&ncon,(idx_t *)ixadj,(idx_t *)iadj, - NULL,NULL,NULL,(idx_t *)nparts,weights,NULL,options, + NULL,NULL,NULL,(idx_t *)nparts,(void *)weights,NULL,options, &objval,(idx_t *)graphpart); } if (res == METIS_OK) { @@ -36,7 +36,7 @@ int metis_PartGraphKway_C(idx_t *n, idx_t *ixadj, idx_t *iadj, idx_t *ivwg, } } -#elif defined(METIS_REAL_64) +#elif defined(PSB_METIS_REAL_64) int metis_PartGraphKway_C(idx_t *n, idx_t *ixadj, idx_t *iadj, idx_t *ivwg, idx_t *iajw, idx_t *nparts, double *weights, diff --git a/util/psb_metis_int.h.in b/util/psb_metis_int.h.in index cd23c801..873fd3c9 100644 --- a/util/psb_metis_int.h.in +++ b/util/psb_metis_int.h.in @@ -1,23 +1,8 @@ #include #include +#include "psb_config.h" +#include "psb_types.h" +#if defined(PSB_HAVE_METIS) #include "@METISINCFILE@" - -typedef int32_t psb_m_t; - -#if defined(IPK4) && defined(LPK4) -typedef int32_t psb_i_t; -typedef int32_t psb_l_t; -#elif defined(IPK4) && defined(LPK8) -typedef int32_t psb_i_t; -typedef int64_t psb_l_t; -#elif defined(IPK8) && defined(LPK8) -typedef int64_t psb_i_t; -typedef int64_t psb_l_t; -#else #endif -typedef int64_t psb_e_t; -typedef float psb_s_t; -typedef double psb_d_t; -typedef float complex psb_c_t; -typedef double complex psb_z_t; diff --git a/util/psb_metispart_mod.F90 b/util/psb_metispart_mod.F90 index 69dae5a8..413bbdff 100644 --- a/util/psb_metispart_mod.F90 +++ b/util/psb_metispart_mod.F90 @@ -77,9 +77,9 @@ module psb_metispart_mod integer(psb_lpk_), intent(in) :: n, nparts integer(psb_lpk_), intent(in) :: ja(:), irp(:) integer(psb_lpk_), allocatable, intent(inout) :: vect(:) -#if defined(METIS_REAL_32) || !defined(HAVE_METIS) +#if defined(PSB_METIS_REAL_32) || !defined(PSB_HAVE_METIS) real(psb_spk_),optional, intent(in) :: weights(:) -#elif defined(METIS_REAL_64) +#elif defined(PSB_METIS_REAL_64) real(psb_dpk_),optional, intent(in) :: weights(:) #else choke on me; @@ -117,7 +117,8 @@ contains implicit none type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: root - integer(psb_ipk_) :: me, np, info + integer(psb_ipk_) :: info + integer(psb_mpk_) :: me, np, mroot integer(psb_lpk_) :: n call psb_info(ctxt,me,np) @@ -128,8 +129,8 @@ contains call psb_abort(ctxt) return endif - - if (me == root) then + mroot = root + if (me == mroot) then if (.not.allocated(graph_vect)) then write(psb_err_unit,*) 'Fatal error in DISTR_MTPART: vector GRAPH_VECT ',& & 'not initialized' @@ -137,9 +138,9 @@ contains return endif n = size(graph_vect) - call psb_bcast(ctxt,n,root=root) + call psb_bcast(ctxt,n,root=mroot) else - call psb_bcast(ctxt,n,root=root) + call psb_bcast(ctxt,n,root=mroot) allocate(graph_vect(n),stat=info) if (info /= psb_success_) then @@ -148,7 +149,7 @@ contains return endif endif - call psb_bcast(ctxt,graph_vect(1:n),root=root) + call psb_bcast(ctxt,graph_vect(1:n),root=mroot) return @@ -187,9 +188,9 @@ contains type(psb_ld_csr_sparse_mat), intent(in) :: a integer(psb_lpk_) :: nparts real(psb_dpk_), optional :: weights(:) -#if defined(METIS_REAL_32) || !defined(HAVE_METIS) +#if defined(PSB_METIS_REAL_32) || !defined(PSB_HAVE_METIS) real(psb_spk_), allocatable :: wgh_(:) -#elif defined(METIS_REAL_64) +#elif defined(PSB_METIS_REAL_64) real(psb_dpk_), allocatable :: wgh_(:) #else choke on me; @@ -231,9 +232,9 @@ contains type(psb_lz_csr_sparse_mat), intent(in) :: a integer(psb_lpk_) :: nparts real(psb_dpk_), optional :: weights(:) -#if defined(METIS_REAL_32) || !defined(HAVE_METIS) +#if defined(PSB_METIS_REAL_32) || !defined(PSB_HAVE_METIS) real(psb_spk_), allocatable :: wgh_(:) -#elif defined(METIS_REAL_64) +#elif defined(PSB_METIS_REAL_64) real(psb_dpk_), allocatable :: wgh_(:) #else choke on me; @@ -292,9 +293,9 @@ contains type(psb_lc_csr_sparse_mat), intent(in) :: a integer(psb_lpk_) :: nparts real(psb_spk_), optional :: weights(:) -#if defined(METIS_REAL_32) || !defined(HAVE_METIS) +#if defined(PSB_METIS_REAL_32) || !defined(PSB_HAVE_METIS) real(psb_spk_), allocatable :: wgh_(:) -#elif defined(METIS_REAL_64) +#elif defined(PSB_METIS_REAL_64) real(psb_dpk_), allocatable :: wgh_(:) #else choke on me; @@ -320,9 +321,9 @@ contains type(psb_ls_csr_sparse_mat), intent(in) :: a integer(psb_lpk_) :: nparts real(psb_spk_), optional :: weights(:) -#if defined(METIS_REAL_32) || !defined(HAVE_METIS) +#if defined(PSB_METIS_REAL_32) || !defined(PSB_HAVE_METIS) real(psb_spk_), allocatable :: wgh_(:) -#elif defined(METIS_REAL_64) +#elif defined(PSB_METIS_REAL_64) real(psb_dpk_), allocatable :: wgh_(:) #else choke on me; diff --git a/util/psb_mmio_mod.F90 b/util/psb_mmio_mod.F90 index f5d45120..533312bb 100644 --- a/util/psb_mmio_mod.F90 +++ b/util/psb_mmio_mod.F90 @@ -42,7 +42,7 @@ module psb_mmio_mod public mm_mat_read, mm_mat_write, mm_array_read, mm_array_write -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) public mm_vet_read, mm_vet_write #endif @@ -127,7 +127,7 @@ module psb_mmio_mod integer(psb_ipk_), optional, intent(in) :: iunit character(len=*), optional, intent(in) :: filename end subroutine mm_ivet2_read -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine mm_lvet_read(b, info, iunit, filename) import :: psb_dpk_, psb_ipk_, psb_lpk_ implicit none @@ -196,7 +196,7 @@ module psb_mmio_mod end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface mm_vet_read procedure mm_svet_read, mm_dvet_read, mm_cvet_read,& & mm_zvet_read, mm_svet2_read, mm_dvet2_read, & @@ -296,7 +296,7 @@ module psb_mmio_mod integer(psb_ipk_), optional, intent(in) :: iunit character(len=*), optional, intent(in) :: filename end subroutine mm_ivet1_write -#if defined(IPK4) && defined(LPK8) +#if defined(PSB_IPK4) && defined(PSB_LPK8) subroutine mm_lvet2_write(b, header, info, iunit, filename) import :: psb_dpk_, psb_ipk_, psb_lpk_ implicit none @@ -372,7 +372,7 @@ module psb_mmio_mod end subroutine mm_lvect_write end interface -#if ! defined(HAVE_BUGGY_GENERICS) +#if ! defined(PSB_HAVE_BUGGY_GENERICS) interface mm_vet_write procedure mm_svet1_write, mm_dvet1_write, mm_cvet1_write,& & mm_zvet1_write, mm_svet2_write, mm_dvet2_write, & diff --git a/util/psb_s_mat_dist_impl.f90 b/util/psb_s_mat_dist_impl.f90 index 6c8bd792..126938bf 100644 --- a/util/psb_s_mat_dist_impl.f90 +++ b/util/psb_s_mat_dist_impl.f90 @@ -30,7 +30,7 @@ ! ! subroutine psb_smatdist(a_glob, a, ctxt, desc_a,& - & info, parts, vg, vsz, inroot,fmt,mold) + & info, parts, vg, vsz, inroot,fmt,amold,imold) ! ! an utility subroutine to distribute a matrix among processors ! according to a user defined data distribution, using @@ -81,16 +81,17 @@ subroutine psb_smatdist(a_glob, a, ctxt, desc_a,& integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), optional :: inroot character(len=*), optional :: fmt - class(psb_s_base_sparse_mat), optional :: mold + class(psb_s_base_sparse_mat), optional :: amold + class(psb_i_base_vect_type), optional, intent(in) :: imold procedure(psb_parts), optional :: parts integer(psb_ipk_), optional :: vg(:) integer(psb_ipk_), optional :: vsz(:) ! local variables logical :: use_parts, use_vg, use_vsz - integer(psb_ipk_) :: np, iam, np_sharing - integer(psb_ipk_) :: k_count, root, liwork, nnzero, nrhs,& - & i, ll, nz, isize, iproc, nnr, err, err_act + integer(psb_mpk_) :: np, iam, root, iproc + integer(psb_ipk_) :: k_count, inp, np_sharing, liwork, nnzero, nrhs,& + & i, ll, nz, isize, nnr, err, err_act integer(psb_lpk_) :: i_count, j_count, nrow, ncol, ig, lastigp integer(psb_ipk_), allocatable :: iwork(:), iwrk2(:) integer(psb_lpk_), allocatable :: irow(:),icol(:) @@ -111,7 +112,7 @@ subroutine psb_smatdist(a_glob, a, ctxt, desc_a,& root = psb_root_ end if call psb_info(ctxt, iam, np) - + use_parts = present(parts) use_vg = present(vg) use_vsz = present(vsz) @@ -194,8 +195,9 @@ subroutine psb_smatdist(a_glob, a, ctxt, desc_a,& end if do while (i_count <= nrow) - if (use_parts) then - call parts(i_count,nrow,np,iwork, np_sharing) + if (use_parts) then + inp = np + call parts(i_count,nrow,inp,iwork, np_sharing) ! ! np_sharing allows for overlap in the data distribution. ! If an index is overlapped, then we have to send its row @@ -210,7 +212,7 @@ subroutine psb_smatdist(a_glob, a, ctxt, desc_a,& j_count = j_count + 1 if (j_count-i_count >= nb) exit if (j_count > nrow) exit - call parts(j_count,nrow,np,iwrk2, np_sharing) + call parts(j_count,nrow,inp,iwrk2, np_sharing) if (np_sharing /= 1 ) exit if (iwrk2(1) /= iproc ) exit end do @@ -321,7 +323,7 @@ subroutine psb_smatdist(a_glob, a, ctxt, desc_a,& call psb_barrier(ctxt) t0 = psb_wtime() - call psb_cdasb(desc_a,info) + call psb_cdasb(desc_a,info,mold=imold) t1 = psb_wtime() if(info /= psb_success_)then info=psb_err_from_subroutine_ @@ -332,7 +334,7 @@ subroutine psb_smatdist(a_glob, a, ctxt, desc_a,& call psb_barrier(ctxt) t2 = psb_wtime() - call psb_spasb(a,desc_a,info,afmt=fmt,mold=mold) + call psb_spasb(a,desc_a,info,afmt=fmt,mold=amold) t3 = psb_wtime() if(info /= psb_success_)then info=psb_err_from_subroutine_ @@ -371,7 +373,7 @@ end subroutine psb_smatdist subroutine psb_lsmatdist(a_glob, a, ctxt, desc_a,& - & info, parts, vg, vsz, inroot,fmt,mold) + & info, parts, vg, vsz, inroot,fmt,amold,imold) ! ! an utility subroutine to distribute a matrix among processors ! according to a user defined data distribution, using @@ -422,15 +424,16 @@ subroutine psb_lsmatdist(a_glob, a, ctxt, desc_a,& integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), optional :: inroot character(len=*), optional :: fmt - class(psb_s_base_sparse_mat), optional :: mold + class(psb_s_base_sparse_mat), optional :: amold + class(psb_i_base_vect_type), optional, intent(in) :: imold procedure(psb_parts), optional :: parts integer(psb_ipk_), optional :: vg(:) integer(psb_ipk_), optional :: vsz(:) ! local variables logical :: use_parts, use_vg, use_vsz - integer(psb_ipk_) :: np, iam, np_sharing, root, iproc - integer(psb_ipk_) :: err_act, il, inz + integer(psb_mpk_) :: np, iam, root, iproc + integer(psb_ipk_) :: err_act, il, inz, np_sharing, inp integer(psb_lpk_) :: k_count, liwork, nnzero, nrhs,& & i, ll, nz, isize, nnr, err integer(psb_lpk_) :: i_count, j_count, nrow, ncol, ig, lastigp @@ -531,8 +534,9 @@ subroutine psb_lsmatdist(a_glob, a, ctxt, desc_a,& end if do while (i_count <= nrow) - if (use_parts) then - call parts(i_count,nrow,np,iwork, np_sharing) + if (use_parts) then + inp = np + call parts(i_count,nrow,inp,iwork, np_sharing) ! ! np_sharing allows for overlap in the data distribution. ! If an index is overlapped, then we have to send its row @@ -547,7 +551,7 @@ subroutine psb_lsmatdist(a_glob, a, ctxt, desc_a,& j_count = j_count + 1 if (j_count-i_count >= nb) exit if (j_count > nrow) exit - call parts(j_count,nrow,np,iwrk2, np_sharing) + call parts(j_count,nrow,inp,iwrk2, np_sharing) if (np_sharing /= 1 ) exit if (iwrk2(1) /= iproc ) exit end do @@ -660,7 +664,7 @@ subroutine psb_lsmatdist(a_glob, a, ctxt, desc_a,& call psb_barrier(ctxt) t0 = psb_wtime() - call psb_cdasb(desc_a,info) + call psb_cdasb(desc_a,info,mold=imold) t1 = psb_wtime() if(info /= psb_success_)then info=psb_err_from_subroutine_ @@ -671,7 +675,7 @@ subroutine psb_lsmatdist(a_glob, a, ctxt, desc_a,& call psb_barrier(ctxt) t2 = psb_wtime() - call psb_spasb(a,desc_a,info,afmt=fmt,mold=mold) + call psb_spasb(a,desc_a,info,afmt=fmt,mold=amold) t3 = psb_wtime() if(info /= psb_success_)then info=psb_err_from_subroutine_ diff --git a/util/psb_s_mat_dist_mod.f90 b/util/psb_s_mat_dist_mod.f90 index 47f6381f..9cb48058 100644 --- a/util/psb_s_mat_dist_mod.f90 +++ b/util/psb_s_mat_dist_mod.f90 @@ -32,11 +32,11 @@ module psb_s_mat_dist_mod use psb_base_mod, only : psb_ipk_, psb_spk_, psb_desc_type, psb_parts, & & psb_sspmat_type, psb_s_base_sparse_mat, psb_s_vect_type, & - & psb_lsspmat_type, psb_ctxt_type + & psb_lsspmat_type, psb_ctxt_type, psb_i_base_vect_type interface psb_matdist subroutine psb_smatdist(a_glob, a, ctxt, desc_a,& - & info, parts, vg, vsz, inroot,fmt,mold) + & info, parts, vg, vsz, inroot,fmt,amold,imold) ! ! an utility subroutine to distribute a matrix among processors ! according to a user defined data distribution, using @@ -87,13 +87,14 @@ module psb_s_mat_dist_mod integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), optional :: inroot character(len=*), optional :: fmt - class(psb_s_base_sparse_mat), optional :: mold + class(psb_s_base_sparse_mat), optional :: amold + class(psb_i_base_vect_type), optional, intent(in) :: imold procedure(psb_parts), optional :: parts integer(psb_ipk_), optional :: vg(:) integer(psb_ipk_), optional :: vsz(:) end subroutine psb_smatdist subroutine psb_lsmatdist(a_glob, a, ctxt, desc_a,& - & info, parts, vg, vsz, inroot,fmt,mold) + & info, parts, vg, vsz, inroot,fmt,amold,imold) ! ! an utility subroutine to distribute a matrix among processors ! according to a user defined data distribution, using @@ -144,7 +145,8 @@ module psb_s_mat_dist_mod integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), optional :: inroot character(len=*), optional :: fmt - class(psb_s_base_sparse_mat), optional :: mold + class(psb_s_base_sparse_mat), optional :: amold + class(psb_i_base_vect_type), optional, intent(in) :: imold procedure(psb_parts), optional :: parts integer(psb_ipk_), optional :: vg(:) integer(psb_ipk_), optional :: vsz(:) diff --git a/util/psb_s_renum_impl.F90 b/util/psb_s_renum_impl.F90 index 6bb37a3c..f60e33a8 100644 --- a/util/psb_s_renum_impl.F90 +++ b/util/psb_s_renum_impl.F90 @@ -188,7 +188,7 @@ contains subroutine psb_mat_renum_amd(a,info,operm) -#if defined(HAVE_AMD) +#if defined(PSB_HAVE_AMD) use iso_c_binding #endif use psb_base_mod @@ -198,7 +198,7 @@ contains integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) ! -#if defined(HAVE_AMD) +#if defined(PSB_HAVE_AMD) interface function psb_amd_order(n,ap,ai,p)& & result(res) bind(c,name='psb_amd_order') @@ -223,7 +223,7 @@ contains name = 'mat_renum_amd' call psb_erractionsave(err_act) -#if defined(HAVE_AMD) && defined(IPK4) +#if defined(PSB_HAVE_AMD) && defined(PSB_IPK4) info = psb_success_ nr = a%get_nrows() @@ -451,7 +451,7 @@ contains subroutine psb_lmat_renum_amd(a,info,operm) -#if defined(HAVE_AMD) +#if defined(PSB_HAVE_AMD) use iso_c_binding #endif use psb_base_mod @@ -461,7 +461,7 @@ contains integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:) ! -#if defined(HAVE_AMD) +#if defined(PSB_HAVE_AMD) interface function psb_amd_order(n,ap,ai,p)& & result(res) bind(c,name='psb_amd_order') @@ -486,7 +486,7 @@ contains name = 'mat_renum_amd' call psb_erractionsave(err_act) -#if defined(HAVE_AMD) && defined(LPK4) +#if defined(PSB_HAVE_AMD) && defined(PSB_LPK4) info = psb_success_ nr = a%get_nrows() diff --git a/util/psb_z_mat_dist_impl.f90 b/util/psb_z_mat_dist_impl.f90 index 5c83c66c..48abe46d 100644 --- a/util/psb_z_mat_dist_impl.f90 +++ b/util/psb_z_mat_dist_impl.f90 @@ -30,7 +30,7 @@ ! ! subroutine psb_zmatdist(a_glob, a, ctxt, desc_a,& - & info, parts, vg, vsz, inroot,fmt,mold) + & info, parts, vg, vsz, inroot,fmt,amold,imold) ! ! an utility subroutine to distribute a matrix among processors ! according to a user defined data distribution, using @@ -81,16 +81,17 @@ subroutine psb_zmatdist(a_glob, a, ctxt, desc_a,& integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), optional :: inroot character(len=*), optional :: fmt - class(psb_z_base_sparse_mat), optional :: mold + class(psb_z_base_sparse_mat), optional :: amold + class(psb_i_base_vect_type), optional, intent(in) :: imold procedure(psb_parts), optional :: parts integer(psb_ipk_), optional :: vg(:) integer(psb_ipk_), optional :: vsz(:) ! local variables logical :: use_parts, use_vg, use_vsz - integer(psb_ipk_) :: np, iam, np_sharing - integer(psb_ipk_) :: k_count, root, liwork, nnzero, nrhs,& - & i, ll, nz, isize, iproc, nnr, err, err_act + integer(psb_mpk_) :: np, iam, root, iproc + integer(psb_ipk_) :: k_count, inp, np_sharing, liwork, nnzero, nrhs,& + & i, ll, nz, isize, nnr, err, err_act integer(psb_lpk_) :: i_count, j_count, nrow, ncol, ig, lastigp integer(psb_ipk_), allocatable :: iwork(:), iwrk2(:) integer(psb_lpk_), allocatable :: irow(:),icol(:) @@ -111,7 +112,7 @@ subroutine psb_zmatdist(a_glob, a, ctxt, desc_a,& root = psb_root_ end if call psb_info(ctxt, iam, np) - + use_parts = present(parts) use_vg = present(vg) use_vsz = present(vsz) @@ -194,8 +195,9 @@ subroutine psb_zmatdist(a_glob, a, ctxt, desc_a,& end if do while (i_count <= nrow) - if (use_parts) then - call parts(i_count,nrow,np,iwork, np_sharing) + if (use_parts) then + inp = np + call parts(i_count,nrow,inp,iwork, np_sharing) ! ! np_sharing allows for overlap in the data distribution. ! If an index is overlapped, then we have to send its row @@ -210,7 +212,7 @@ subroutine psb_zmatdist(a_glob, a, ctxt, desc_a,& j_count = j_count + 1 if (j_count-i_count >= nb) exit if (j_count > nrow) exit - call parts(j_count,nrow,np,iwrk2, np_sharing) + call parts(j_count,nrow,inp,iwrk2, np_sharing) if (np_sharing /= 1 ) exit if (iwrk2(1) /= iproc ) exit end do @@ -321,7 +323,7 @@ subroutine psb_zmatdist(a_glob, a, ctxt, desc_a,& call psb_barrier(ctxt) t0 = psb_wtime() - call psb_cdasb(desc_a,info) + call psb_cdasb(desc_a,info,mold=imold) t1 = psb_wtime() if(info /= psb_success_)then info=psb_err_from_subroutine_ @@ -332,7 +334,7 @@ subroutine psb_zmatdist(a_glob, a, ctxt, desc_a,& call psb_barrier(ctxt) t2 = psb_wtime() - call psb_spasb(a,desc_a,info,afmt=fmt,mold=mold) + call psb_spasb(a,desc_a,info,afmt=fmt,mold=amold) t3 = psb_wtime() if(info /= psb_success_)then info=psb_err_from_subroutine_ @@ -371,7 +373,7 @@ end subroutine psb_zmatdist subroutine psb_lzmatdist(a_glob, a, ctxt, desc_a,& - & info, parts, vg, vsz, inroot,fmt,mold) + & info, parts, vg, vsz, inroot,fmt,amold,imold) ! ! an utility subroutine to distribute a matrix among processors ! according to a user defined data distribution, using @@ -422,15 +424,16 @@ subroutine psb_lzmatdist(a_glob, a, ctxt, desc_a,& integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), optional :: inroot character(len=*), optional :: fmt - class(psb_z_base_sparse_mat), optional :: mold + class(psb_z_base_sparse_mat), optional :: amold + class(psb_i_base_vect_type), optional, intent(in) :: imold procedure(psb_parts), optional :: parts integer(psb_ipk_), optional :: vg(:) integer(psb_ipk_), optional :: vsz(:) ! local variables logical :: use_parts, use_vg, use_vsz - integer(psb_ipk_) :: np, iam, np_sharing, root, iproc - integer(psb_ipk_) :: err_act, il, inz + integer(psb_mpk_) :: np, iam, root, iproc + integer(psb_ipk_) :: err_act, il, inz, np_sharing, inp integer(psb_lpk_) :: k_count, liwork, nnzero, nrhs,& & i, ll, nz, isize, nnr, err integer(psb_lpk_) :: i_count, j_count, nrow, ncol, ig, lastigp @@ -531,8 +534,9 @@ subroutine psb_lzmatdist(a_glob, a, ctxt, desc_a,& end if do while (i_count <= nrow) - if (use_parts) then - call parts(i_count,nrow,np,iwork, np_sharing) + if (use_parts) then + inp = np + call parts(i_count,nrow,inp,iwork, np_sharing) ! ! np_sharing allows for overlap in the data distribution. ! If an index is overlapped, then we have to send its row @@ -547,7 +551,7 @@ subroutine psb_lzmatdist(a_glob, a, ctxt, desc_a,& j_count = j_count + 1 if (j_count-i_count >= nb) exit if (j_count > nrow) exit - call parts(j_count,nrow,np,iwrk2, np_sharing) + call parts(j_count,nrow,inp,iwrk2, np_sharing) if (np_sharing /= 1 ) exit if (iwrk2(1) /= iproc ) exit end do @@ -660,7 +664,7 @@ subroutine psb_lzmatdist(a_glob, a, ctxt, desc_a,& call psb_barrier(ctxt) t0 = psb_wtime() - call psb_cdasb(desc_a,info) + call psb_cdasb(desc_a,info,mold=imold) t1 = psb_wtime() if(info /= psb_success_)then info=psb_err_from_subroutine_ @@ -671,7 +675,7 @@ subroutine psb_lzmatdist(a_glob, a, ctxt, desc_a,& call psb_barrier(ctxt) t2 = psb_wtime() - call psb_spasb(a,desc_a,info,afmt=fmt,mold=mold) + call psb_spasb(a,desc_a,info,afmt=fmt,mold=amold) t3 = psb_wtime() if(info /= psb_success_)then info=psb_err_from_subroutine_ diff --git a/util/psb_z_mat_dist_mod.f90 b/util/psb_z_mat_dist_mod.f90 index 2f62899e..3be4f947 100644 --- a/util/psb_z_mat_dist_mod.f90 +++ b/util/psb_z_mat_dist_mod.f90 @@ -32,11 +32,11 @@ module psb_z_mat_dist_mod use psb_base_mod, only : psb_ipk_, psb_dpk_, psb_desc_type, psb_parts, & & psb_zspmat_type, psb_z_base_sparse_mat, psb_z_vect_type, & - & psb_lzspmat_type, psb_ctxt_type + & psb_lzspmat_type, psb_ctxt_type, psb_i_base_vect_type interface psb_matdist subroutine psb_zmatdist(a_glob, a, ctxt, desc_a,& - & info, parts, vg, vsz, inroot,fmt,mold) + & info, parts, vg, vsz, inroot,fmt,amold,imold) ! ! an utility subroutine to distribute a matrix among processors ! according to a user defined data distribution, using @@ -87,13 +87,14 @@ module psb_z_mat_dist_mod integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), optional :: inroot character(len=*), optional :: fmt - class(psb_z_base_sparse_mat), optional :: mold + class(psb_z_base_sparse_mat), optional :: amold + class(psb_i_base_vect_type), optional, intent(in) :: imold procedure(psb_parts), optional :: parts integer(psb_ipk_), optional :: vg(:) integer(psb_ipk_), optional :: vsz(:) end subroutine psb_zmatdist subroutine psb_lzmatdist(a_glob, a, ctxt, desc_a,& - & info, parts, vg, vsz, inroot,fmt,mold) + & info, parts, vg, vsz, inroot,fmt,amold,imold) ! ! an utility subroutine to distribute a matrix among processors ! according to a user defined data distribution, using @@ -144,7 +145,8 @@ module psb_z_mat_dist_mod integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), optional :: inroot character(len=*), optional :: fmt - class(psb_z_base_sparse_mat), optional :: mold + class(psb_z_base_sparse_mat), optional :: amold + class(psb_i_base_vect_type), optional, intent(in) :: imold procedure(psb_parts), optional :: parts integer(psb_ipk_), optional :: vg(:) integer(psb_ipk_), optional :: vsz(:) diff --git a/util/psb_z_renum_impl.F90 b/util/psb_z_renum_impl.F90 index a27d4523..ac6517fe 100644 --- a/util/psb_z_renum_impl.F90 +++ b/util/psb_z_renum_impl.F90 @@ -188,7 +188,7 @@ contains subroutine psb_mat_renum_amd(a,info,operm) -#if defined(HAVE_AMD) +#if defined(PSB_HAVE_AMD) use iso_c_binding #endif use psb_base_mod @@ -198,7 +198,7 @@ contains integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:) ! -#if defined(HAVE_AMD) +#if defined(PSB_HAVE_AMD) interface function psb_amd_order(n,ap,ai,p)& & result(res) bind(c,name='psb_amd_order') @@ -223,7 +223,7 @@ contains name = 'mat_renum_amd' call psb_erractionsave(err_act) -#if defined(HAVE_AMD) && defined(IPK4) +#if defined(PSB_HAVE_AMD) && defined(PSB_IPK4) info = psb_success_ nr = a%get_nrows() @@ -451,7 +451,7 @@ contains subroutine psb_lmat_renum_amd(a,info,operm) -#if defined(HAVE_AMD) +#if defined(PSB_HAVE_AMD) use iso_c_binding #endif use psb_base_mod @@ -461,7 +461,7 @@ contains integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:) ! -#if defined(HAVE_AMD) +#if defined(PSB_HAVE_AMD) interface function psb_amd_order(n,ap,ai,p)& & result(res) bind(c,name='psb_amd_order') @@ -486,7 +486,7 @@ contains name = 'mat_renum_amd' call psb_erractionsave(err_act) -#if defined(HAVE_AMD) && defined(LPK4) +#if defined(PSB_HAVE_AMD) && defined(PSB_LPK4) info = psb_success_ nr = a%get_nrows() diff --git a/util/psi_build_mtpart.F90 b/util/psi_build_mtpart.F90 index d01c3d85..c9bd91b5 100644 --- a/util/psi_build_mtpart.F90 +++ b/util/psi_build_mtpart.F90 @@ -12,20 +12,28 @@ subroutine psi_l_build_mtpart(n,ja,irp,nparts,graph_vect,weights) integer(psb_ipk_) :: info integer(psb_lpk_) :: nl,nptl integer(psb_lpk_), allocatable :: irpl(:),jal(:),gvl(:) +#if defined(PSB_METIS_REAL_32) real(psb_spk_),allocatable :: wgh_(:) +#elif defined(PSB_METIS_REAL_64) + real(psb_dpk_),allocatable :: wgh_(:) +#endif -#if defined(HAVE_METIS) && defined(LPK4) && defined(METIS_32) +#if defined(PSB_HAVE_METIS) && defined(PSB_LPK4) && defined(PSB_METIS_32) interface - function METIS_PartGraphKway(n,ixadj,iadj,ivwg,iajw,& + function PSB_METIS_PartGraphKway(n,ixadj,iadj,ivwg,iajw,& & nparts,weights,part) bind(c,name="metis_PartGraphKway_C") result(res) use iso_c_binding integer(c_int) :: res integer(c_int) :: n,nparts integer(c_int) :: ixadj(*),iadj(*),ivwg(*),iajw(*),part(*) +#if defined(PSB_METIS_REAL_32) real(c_float) :: weights(*) +#elif defined(PSB_METIS_REAL_64) + real(c_double) :: weights(*) +#endif !integer(psb_ipk_) :: n,wgflag,numflag,nparts,nedc !integer(psb_ipk_) :: ixadj(*),iadj(*),ivwg(*),iajw(*),iopt(*),part(*) - end function METIS_PartGraphKway + end function PSB_METIS_PartGraphKway end interface call psb_realloc(n,graph_vect,info) @@ -49,23 +57,23 @@ subroutine psi_l_build_mtpart(n,ja,irp,nparts,graph_vect,weights) nptl = nparts wgh_ = -1.0 if(present(weights)) then - if (size(weights) == nptl) then + if (size(weights) == nptl) then + wgh_(:) = weights(:) !!$ write(*,*) 'weights present',weights - ! call METIS_PartGraphKway(n,irp,ja,idummy,jdummy,& - ! & wgflag,numflag,nparts,weights,iopt,nedc,graph_vect) - info = METIS_PartGraphKway(nl,irpl,jal,idummy,jdummy,& - & nptl,weights,gvl) - - else -!!$ write(*,*) 'weights absent',wgh_ - info = METIS_PartGraphKway(nl,irpl,jal,idummy,jdummy,& - & nptl,wgh_,gvl) +!!$ ! call PSB_METIS_PartGraphKway(n,irp,ja,idummy,jdummy,& +!!$ ! & wgflag,numflag,nparts,weights,iopt,nedc,graph_vect) +!!$ info = PSB_METIS_PartGraphKway(nl,irpl,jal,idummy,jdummy,& +!!$ & nptl,weights,gvl) + +!!$ else + write(*,*) 'weights absent',wgh_ +!!$ info = PSB_METIS_PartGraphKway(nl,irpl,jal,idummy,jdummy,& +!!$ & nptl,wgh_,gvl) end if - else + endif !!$ write(*,*) 'weights absent',wgh_ - info = METIS_PartGraphKway(nl,irpl,jal,idummy,jdummy,& + info = PSB_METIS_PartGraphKway(nl,irpl,jal,idummy,jdummy,& & nptl,wgh_,gvl) - endif !!$ write(*,*) 'after allocation',info do i=1, n @@ -77,19 +85,23 @@ subroutine psi_l_build_mtpart(n,ja,irp,nparts,graph_vect,weights) enddo endif -#elif defined(HAVE_METIS) && defined(LPK8) && defined(METIS_64) +#elif defined(PSB_HAVE_METIS) && defined(PSB_LPK8) && defined(PSB_METIS_64) interface - function METIS_PartGraphKway(n,ixadj,iadj,ivwg,iajw,& + function PSB_METIS_PartGraphKway(n,ixadj,iadj,ivwg,iajw,& & nparts,weights,part) bind(c,name="metis_PartGraphKway_C") result(res) use iso_c_binding integer(c_long_long) :: res integer(c_long_long) :: n,nparts integer(c_long_long) :: ixadj(*),iadj(*),ivwg(*),iajw(*),part(*) +#if defined(PSB_METIS_REAL_32) real(c_float) :: weights(*) +#elif defined(PSB_METIS_REAL_64) + real(c_double) :: weights(*) +#endif !integer(psb_ipk_) :: n,wgflag,numflag,nparts,nedc !integer(psb_ipk_) :: ixadj(*),iadj(*),ivwg(*),iajw(*),iopt(*),part(*) - end function METIS_PartGraphKway + end function PSB_METIS_PartGraphKway end interface call psb_realloc(n,graph_vect,info) @@ -113,25 +125,44 @@ subroutine psi_l_build_mtpart(n,ja,irp,nparts,graph_vect,weights) nptl = nparts wgh_ = -1.0 if(present(weights)) then + if (size(weights) == nptl) then + wgh_(:) = weights(:) +!!$ write(*,*) 'weights present',weights +!!$ ! call PSB_METIS_PartGraphKway(n,irp,ja,idummy,jdummy,& +!!$ ! & wgflag,numflag,nparts,weights,iopt,nedc,graph_vect) +!!$ info = PSB_METIS_PartGraphKway(nl,irpl,jal,idummy,jdummy,& +!!$ & nptl,weights,gvl) + +!!$ else + write(*,*) 'weights absent',wgh_ +!!$ info = PSB_METIS_PartGraphKway(nl,irpl,jal,idummy,jdummy,& +!!$ & nptl,wgh_,gvl) + end if + endif +!!$ write(*,*) 'weights absent',wgh_ + info = PSB_METIS_PartGraphKway(nl,irpl,jal,idummy,jdummy,& + & nptl,wgh_,gvl) +#if 0 + if(present(weights)) then if (size(weights) == nptl) then !!$ write(*,*) 'weights present',weights - ! call METIS_PartGraphKway(n,irp,ja,idummy,jdummy,& + ! call PSB_METIS_PartGraphKway(n,irp,ja,idummy,jdummy,& ! & wgflag,numflag,nparts,weights,iopt,nedc,graph_vect) - info = METIS_PartGraphKway(nl,irpl,jal,idummy,jdummy,& + info = PSB_METIS_PartGraphKway(nl,irpl,jal,idummy,jdummy,& & nptl,weights,gvl) else !!$ write(*,*) 'weights absent',wgh_ - info = METIS_PartGraphKway(nl,irpl,jal,idummy,jdummy,& + info = PSB_METIS_PartGraphKway(nl,irpl,jal,idummy,jdummy,& & nptl,wgh_,gvl) end if else !!$ write(*,*) 'weights absent',wgh_ - info = METIS_PartGraphKway(nl,irpl,jal,idummy,jdummy,& + info = PSB_METIS_PartGraphKway(nl,irpl,jal,idummy,jdummy,& & nptl,wgh_,gvl) endif !!$ write(*,*) 'after allocation',info - +#endif do i=1, n graph_vect(i) = gvl(i) - 1 enddo @@ -143,7 +174,7 @@ subroutine psi_l_build_mtpart(n,ja,irp,nparts,graph_vect,weights) #else - write(psb_err_unit,*) 'Warning: no suitable METIS interface for LPK indices' + write(psb_err_unit,*) 'Warning: no suitable METIS interface for PSB_LPK indices' #endif return